skills-templates/communication/twilio-voice/SKILL.md
Comprehensive Twilio Voice API assistance with AI integration patterns
npx skillsauth add enuno/claude-command-and-control twilio-voiceInstall this skill globally with one command. Works with Claude Code, Cursor, and Windsurf.
3 of 9 scanners reported clean
Some scanners were skipped, did not run, or reported a non-clean status. Review each row below.
Comprehensive assistance for building voice applications with Twilio Voice API, including AI-powered voice assistants, ConversationRelay integrations, and production-ready implementation patterns.
This skill should be triggered when:
Core Voice Development:
AI-Powered Voice Applications:
Advanced Features:
Conversational Intelligence & Analytics:
// Real-time AI voice conversation setup
app.post('/voice', (req, res) => {
const twiml = new VoiceResponse();
const connect = twiml.connect();
connect.conversationRelay({
url: 'wss://your-app.ngrok.io/ws',
voice: 'Polly.Joanna',
language: 'en-US'
});
res.type('text/xml');
res.send(twiml.toString());
});
// Forward transcriptions to Langflow for AI processing
conversationRelay.on('transcription', async (data) => {
const response = await fetch(`${LANGFLOW_URL}/api/v1/run/${FLOW_ID}`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
'Authorization': `Bearer ${LANGFLOW_API_KEY}`
},
body: JSON.stringify({
message: data.text,
session_id: data.callSid
})
});
const aiResponse = await response.json();
conversationRelay.say(aiResponse.message);
});
// Best practices for AI voice responses
const systemPrompt = `
You are a helpful voice assistant. Follow these guidelines:
- Answer carefully and concisely (2-3 sentences max)
- Spell out ALL numbers (say "twenty-three" not "23")
- NO emojis, bullet points, or special symbols
- Use natural conversational language
- Avoid markdown or formatting
- Keep responses under 30 seconds when spoken
`;
// Basic Twilio webhook handler
app.post('/twiml', (req, res) => {
const twiml = new VoiceResponse();
twiml.say({
voice: 'Polly.Joanna'
}, 'Hello! How can I help you today?');
twiml.gather({
input: 'speech',
action: '/process-speech',
timeout: 3
});
res.type('text/xml');
res.send(twiml.toString());
});
# Expose local server for Twilio webhooks
ngrok http 3000
# Configure Twilio phone number webhook URL:
# https://your-subdomain.ngrok.io/voice
# Analyze call recordings with Conversational Intelligence
from twilio.rest import Client
client = Client(account_sid, auth_token)
# Create Intelligence Service (one-time setup)
service = client.intelligence.v2.services.create(
auto_transcribe=True,
unique_name='customer-service-analysis'
)
# Create transcript from call recording
transcript = client.intelligence.v2.transcripts.create(
service_sid=service.sid,
channel={
'media_properties': {
'source_sid': 'REXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX' # Recording SID
}
}
)
# Attach language operators for business insights
sentiment_op = client.intelligence.v2 \
.services(service.sid) \
.operators.create(
operator_type='sentiment-analysis',
config={'language_code': 'en-US'}
)
# Retrieve analyzed results
results = client.intelligence.v2 \
.transcripts(transcript.sid) \
.operator_results.list()
for result in results:
print(f"Operator: {result.operator_type}")
print(f"Results: {result.extract_match}")
# Monitor ConversationRelay AI agents in real-time
from twilio.rest import Client
client = Client(account_sid, auth_token)
# Create transcript from active ConversationRelay call
transcript = client.intelligence.v2.transcripts.create(
service_sid='GAxxxxx',
channel={
'media_properties': {
'source_sid': 'CA xxxx', # Active Call SID
'participant_label': 'ai_agent'
}
}
)
# Access real-time transcription
sentences = client.intelligence.v2 \
.transcripts(transcript.sid) \
.sentences.list()
for sentence in sentences:
print(f"[{sentence.participant_label}]: {sentence.transcript}")
print(f"Confidence: {sentence.confidence}")
# Create custom operators for business-specific analysis
from twilio.rest import Client
client = Client(account_sid, auth_token)
# Generative Custom Operator using LLM (public beta)
custom_op = client.intelligence.v2 \
.services(service_sid) \
.operators.create(
operator_type='custom-operator',
config={
'name': 'lead-qualification',
'description': 'Extract lead qualification criteria',
'prompt': '''
Analyze this conversation and extract:
1. Customer budget range
2. Timeline for decision
3. Decision maker status
4. Pain points mentioned
Return as JSON.
''',
'language_code': 'en-US'
}
)
# Pre-built Language Operator for PII detection
pii_op = client.intelligence.v2 \
.services(service_sid) \
.operators.create(
operator_type='pii-detection',
config={
'redact': True,
'pii_types': ['ssn', 'credit_card', 'email']
}
)
# Bidirectional voice streaming with OpenAI Realtime API
import asyncio
import websockets
import json
from twilio.twiml.voice_response import VoiceResponse, Connect
@app.route('/incoming-call', methods=['POST'])
def handle_incoming_call():
"""Initiate call with Media Streams"""
response = VoiceResponse()
connect = response.connect()
connect.stream(url=f'wss://{SERVER_DOMAIN}/media-stream')
return str(response)
async def handle_media_stream(websocket):
"""Relay audio between Twilio and OpenAI Realtime API"""
async with websockets.connect(
'wss://api.openai.com/v1/realtime?model=gpt-4o-realtime-preview-2024-10-01',
extra_headers={
"Authorization": f"Bearer {OPENAI_API_KEY}",
"OpenAI-Beta": "realtime=v1"
}
) as openai_ws:
# Configure session with interruption handling
session_update = {
"type": "session.update",
"session": {
"turn_detection": {"type": "server_vad"},
"input_audio_format": "g711_ulaw",
"output_audio_format": "g711_ulaw"
}
}
await openai_ws.send(json.dumps(session_update))
# Relay audio bidirectionally
async def twilio_receiver():
async for message in websocket:
data = json.loads(message)
if data['event'] == 'media':
# Forward audio to OpenAI
audio_append = {
"type": "input_audio_buffer.append",
"audio": data['media']['payload']
}
await openai_ws.send(json.dumps(audio_append))
async def openai_receiver():
async for message in openai_ws:
response = json.loads(message)
# Handle interruptions
if response['type'] == 'input_audio_buffer.speech_started':
# User started speaking - truncate AI response
await openai_ws.send(json.dumps({
"type": "conversation.item.truncate",
"item_id": current_item_id
}))
# Clear Twilio audio queue
await websocket.send(json.dumps({"event": "clear"}))
# Forward AI audio to Twilio
elif response['type'] == 'response.audio.delta':
await websocket.send(json.dumps({
"event": "media",
"media": {"payload": response['delta']}
}))
await asyncio.gather(twilio_receiver(), openai_receiver())
// Deepgram + GPT-4 with dynamic function calling
const { Deepgram } = require('@deepgram/sdk');
const OpenAI = require('openai');
const deepgram = new Deepgram(process.env.DEEPGRAM_API_KEY);
const openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY });
// Define available functions
const functionManifest = [
{
name: 'check_order_status',
description: 'Check the status of a customer order',
parameters: {
type: 'object',
properties: {
order_id: { type: 'string', description: 'Order ID' }
},
required: ['order_id']
}
}
];
let userContext = []; // Conversation history
async function handleMediaStream(connection) {
// Set up Deepgram transcription
const dgConnection = deepgram.transcription.live({
model: 'nova-2',
language: 'en-US',
smart_format: true
});
dgConnection.on('transcript', async (data) => {
const transcript = data.channel.alternatives[0].transcript;
if (!transcript) return;
// Add user message to context
userContext.push({ role: 'user', content: transcript });
// Stream GPT response with function calling
const stream = await openai.chat.completions.create({
model: 'gpt-4',
messages: userContext,
tools: functionManifest.map(fn => ({ type: 'function', function: fn })),
stream: true
});
let responseText = '';
let functionCall = null;
for await (const chunk of stream) {
const delta = chunk.choices[0]?.delta;
// Handle function calls
if (delta.tool_calls) {
functionCall = delta.tool_calls[0].function;
if (functionCall.name) {
// Execute function
const result = await executeFuncti on(
functionCall.name,
JSON.parse(functionCall.arguments)
);
// Add function result to context
userContext.push({
role: 'function',
name: functionCall.name,
content: JSON.stringify(result)
});
// Get follow-up response
const followUp = await openai.chat.completions.create({
model: 'gpt-4',
messages: userContext
});
responseText = followUp.choices[0].message.content;
}
}
// Handle text deltas
else if (delta.content) {
responseText += delta.content;
// Use bullet points for natural breaking
if (delta.content.includes('•')) {
await synthesizeAndPlay(responseText, connection);
responseText = '';
}
}
}
// Synthesize remaining text
if (responseText) {
await synthesizeAndPlay(responseText, connection);
}
// Add assistant response to context
userContext.push({ role: 'assistant', content: responseText });
});
// Forward Twilio audio to Deepgram
connection.on('media', (msg) => {
dgConnection.send(Buffer.from(msg.media.payload, 'base64'));
});
}
async function synthesizeAndPlay(text, connection) {
// Use Deepgram TTS for low latency
const audio = await deepgram.speak.request(
{ text },
{
model: 'aura-asteria-en',
encoding: 'mulaw',
sample_rate: 8000
}
);
// Stream to Twilio
connection.send({
event: 'media',
media: { payload: audio.toString('base64') }
});
}
async function executeFunction(name, args) {
// Dynamically require and execute function
const fn = require(`./functions/${name}`);
return await fn(args);
}
// Optimized streaming pattern for <1 second responses
const systemPrompt = `You are a helpful voice assistant.
Keep responses very concise (1-2 sentences).
Use • bullets to break responses into natural chunks.
Ask only ONE question at a time.
Be conversational and friendly.`;
let isAssistantSpeaking = false;
let currentStreamId = null;
async function streamGPTResponse(userMessage, connection) {
const stream = await openai.chat.completions.create({
model: 'gpt-4',
messages: [
{ role: 'system', content: systemPrompt },
...userContext,
{ role: 'user', content: userMessage }
],
stream: true,
max_tokens: 100, // Limit for voice responses
temperature: 0.7
});
currentStreamId = generateId();
isAssistantSpeaking = true;
let buffer = '';
for await (const chunk of stream) {
const content = chunk.choices[0]?.delta?.content;
if (!content) continue;
buffer += content;
// Stream on sentence boundaries or bullet points
if (content.match(/[.!?•]/)) {
if (!isAssistantSpeaking) break; // Interrupted
await synthesizeAndPlay(buffer.trim(), connection, currentStreamId);
buffer = '';
}
}
// Flush remaining buffer
if (buffer.trim() && isAssistantSpeaking) {
await synthesizeAndPlay(buffer.trim(), connection, currentStreamId);
}
isAssistantSpeaking = false;
}
// Handle user interruptions
deepgram.on('speech_started', () => {
if (isAssistantSpeaking) {
isAssistantSpeaking = false; // Stop streaming
connection.send({ event: 'clear' }); // Clear Twilio queue
currentStreamId = null;
}
});
┌─────────────┐ ┌──────────────┐ ┌────────────┐
│ Phone │ ──────> │ Twilio │ ──────> │ Your │
│ Caller │ │ Voice │ │ Server │
│ │ <────── │ +Conversation│ <────── │ (Node.js) │
└─────────────┘ │ Relay │ └────────────┘
└──────────────┘ │
│
┌──────────────┐ │
│ AI Service │ <─────────────┘
│ (OpenAI/ │
│ Langflow) │
└──────────────┘
Flow:
/voice endpoint// Route calls based on intent
const intentRouter = {
'billing': handleBillingInquiry,
'support': handleTechnicalSupport,
'sales': transferToSales
};
conversationRelay.on('transcription', async (data) => {
const intent = await detectIntent(data.text);
await intentRouter[intent](data);
});
// Replace traditional touch-tone IVR
twiml.gather({
input: 'speech',
hints: 'billing, support, sales, account',
speechTimeout: 'auto'
}).say('How can I help you today?');
// Extract structured data from conversation
const extractAppointment = async (transcript) => {
const prompt = `Extract appointment details: ${transcript}
Return JSON: { date, time, service }`;
const response = await openai.chat.completions.create({
model: 'gpt-4',
messages: [{ role: 'user', content: prompt }]
});
return JSON.parse(response.choices[0].message.content);
};
# Complete workflow: Call → Analysis → Business Action
from twilio.rest import Client
client = Client(account_sid, auth_token)
# 1. Create Intelligence Service for your use case
service = client.intelligence.v2.services.create(
auto_transcribe=True,
unique_name='sales-call-analysis',
auto_redaction=True # Automatically redact PII
)
# 2. Attach business-relevant operators
operators = [
# Sentiment tracking
{'type': 'sentiment-analysis', 'config': {'language_code': 'en-US'}},
# Intent detection
{'type': 'intent-detection', 'config': {'intents': ['purchase', 'cancel', 'complain']}},
# Custom lead scoring
{
'type': 'custom-operator',
'config': {
'name': 'lead-score',
'prompt': 'Rate this lead 1-10 based on budget, timeline, and authority. Explain reasoning.',
'language_code': 'en-US'
}
}
]
for op in operators:
client.intelligence.v2 \
.services(service.sid) \
.operators.create(operator_type=op['type'], config=op['config'])
# 3. Process call recording
def analyze_call(recording_sid):
transcript = client.intelligence.v2.transcripts.create(
service_sid=service.sid,
channel={'media_properties': {'source_sid': recording_sid}}
)
# Wait for processing (async in production)
import time
time.sleep(10)
# 4. Retrieve insights
results = client.intelligence.v2 \
.transcripts(transcript.sid) \
.operator_results.list()
insights = {}
for result in results:
insights[result.operator_type] = result.extract_match
# 5. Take business action
if insights.get('lead-score', {}).get('score', 0) >= 8:
# High-value lead - alert sales team
send_slack_notification(f"Hot lead detected! Score: {insights['lead-score']}")
if insights.get('sentiment', {}).get('score', 0) < 0.3:
# Negative sentiment - trigger retention workflow
create_support_ticket(transcript.sid, priority='high')
return insights
# Use with Twilio Voice webhook
@app.route('/call-completed', methods=['POST'])
def handle_call_completed():
recording_sid = request.form.get('RecordingSid')
insights = analyze_call(recording_sid)
# Store in CRM, analytics platform, etc.
store_call_insights(insights)
return '', 200
# Monitor calls for compliance and automatically redact PII
from twilio.rest import Client
client = Client(account_sid, auth_token)
# Create compliance-focused service
compliance_service = client.intelligence.v2.services.create(
auto_transcribe=True,
unique_name='compliance-monitoring',
auto_redaction=True,
data_logging=False # Don't log to Twilio for regulated industries
)
# Attach compliance operators
compliance_ops = [
# PII detection and redaction
{
'type': 'pii-detection',
'config': {
'redact': True,
'pii_types': ['ssn', 'credit_card', 'bank_account', 'email', 'phone']
}
},
# Custom compliance checker
{
'type': 'custom-operator',
'config': {
'name': 'tcpa-compliance',
'prompt': '''
Check if this call follows TCPA compliance:
1. Was consent obtained before marketing?
2. Was opt-out option provided?
3. Was call within allowed hours?
Return: {compliant: true/false, violations: []}
''',
'language_code': 'en-US'
}
}
]
for op in compliance_ops:
client.intelligence.v2 \
.services(compliance_service.sid) \
.operators.create(operator_type=op['type'], config=op['config'])
# Access redacted transcripts (PII removed)
transcript = client.intelligence.v2 \
.transcripts(transcript_sid) \
.fetch()
print(f"Redacted transcript: {transcript.redacted_transcript}")
print(f"PII detected: {transcript.pii_matches}")
# Required packages
npm install twilio express dotenv
# For AI integration
npm install openai # OpenAI Chat Completions or Realtime API
# OR configure Langflow endpoint
# For Deepgram STT/TTS (Call-GPT pattern)
npm install @deepgram/sdk
# For OpenAI Realtime API (Python)
pip install websockets openai
# For Conversational Intelligence (Python)
pip install twilio
# For local development
npm install -g ngrok # Webhook tunneling
# .env file
TWILIO_ACCOUNT_SID=ACxxxxxxxxxxxxx
TWILIO_AUTH_TOKEN=your_auth_token
TWILIO_PHONE_NUMBER=+1234567890
# For AI integration
OPENAI_API_KEY=sk-xxxxxxxxxxxxx # Chat Completions or Realtime API
# OR
LANGFLOW_URL=http://localhost:7860
LANGFLOW_FLOW_ID=your-flow-id
LANGFLOW_API_KEY=your-api-key
# For Deepgram (STT/TTS)
DEEPGRAM_API_KEY=your_deepgram_api_key
# For Conversational Intelligence
TWILIO_INTELLIGENCE_SERVICE_SID=GAxxxxxxxxxxxxx # Created via API
# Server configuration
PORT=3000
SERVER_DOMAIN=your-subdomain.ngrok.io # For OpenAI Realtime API
NGROK_URL=https://your-subdomain.ngrok.io
https://your-ngrok-url.ngrok.io/voiceThis skill includes comprehensive documentation in references/:
Use view to read specific reference files when detailed information is needed.
// Graceful degradation for voice applications
conversationRelay.on('error', (error) => {
console.error('ConversationRelay error:', error);
// Fallback to simple IVR
const twiml = new VoiceResponse();
twiml.say('I apologize, but I\'m having trouble right now.');
twiml.redirect('/fallback-menu');
res.type('text/xml').send(twiml.toString());
});
// Validate Twilio requests
const twilio = require('twilio');
app.post('/voice', (req, res) => {
const twilioSignature = req.headers['x-twilio-signature'];
const url = `https://${req.hostname}${req.url}`;
if (!twilio.validateRequest(
process.env.TWILIO_AUTH_TOKEN,
twilioSignature,
url,
req.body
)) {
return res.status(403).send('Forbidden');
}
// Process validated request
// ...
});
Organized documentation extracted from official sources:
Example implementations and templates (added from real-world integrations):
Helper utilities for development:
To refresh this skill with updated documentation:
/create-skill --url https://www.twilio.com/docs/voice --name twilio-voicetools
MemPalace local-first AI memory system. Use when setting up persistent memory for Claude Code sessions, mining project files or conversation transcripts, querying past context, configuring MCP tools, managing the knowledge graph, or troubleshooting palace operations.
tools
LangSmith Python SDK — trace, evaluate, and monitor LLM applications. Covers @traceable decorator, trace context manager, Client API, evaluate() / aevaluate(), comparative evaluation, custom evaluators, dataset management, prompt caching, ASGI middleware, and pytest plugin.
development
LangGraph (Python) — build stateful, controllable agent graphs with checkpointing, streaming, persistence, interrupts, fault tolerance, and durable execution. Covers both Graph API (StateGraph) and Functional API (@entrypoint/@task).
development
LangGraph Graph API (Python) — build explicit DAG agent workflows with StateGraph, typed state, nodes, edges, Command routing, Send fan-out, checkpointers, interrupts, and streaming. Use when you need explicit control flow and graph topology.