skills/skill-collections/ai-audio-speech/deepgram-core-workflow-a/SKILL.md
Implement speech-to-text transcription workflow with Deepgram. Use when building pre-recorded audio transcription, batch processing, or implementing core transcription features. Trigger with phrases like "deepgram transcription", "speech to text", "transcribe audio", "audio transcription workflow", "batch transcription".
npx skillsauth add zjunlp/Skills deepgram-core-workflow-aInstall this skill globally with one command. Works with Claude Code, Cursor, and Windsurf.
3 of 9 scanners reported clean
Some scanners were skipped, did not run, or reported a non-clean status. Review each row below.
Implement a complete pre-recorded audio transcription workflow using Deepgram's Nova-2 model.
deepgram-install-auth setupCreate a service class to handle transcription operations.
Add methods for both local files and remote URLs.
Configure punctuation, diarization, and formatting.
Extract and format transcription results.
| Error | Cause | Solution | |-------|-------|----------| | Audio Too Long | Exceeds limits | Split into chunks or use async | | Unsupported Format | Invalid audio type | Convert to WAV/MP3/FLAC | | Empty Response | No speech detected | Check audio quality | | Timeout | Large file processing | Use callback URL pattern |
// services/transcription.ts
import { createClient } from '@deepgram/sdk';
import { readFile } from 'fs/promises';
export interface TranscriptionOptions {
model?: 'nova-2' | 'nova' | 'enhanced' | 'base';
language?: string;
punctuate?: boolean;
diarize?: boolean;
smartFormat?: boolean;
utterances?: boolean;
paragraphs?: boolean;
}
export interface TranscriptionResult {
transcript: string;
confidence: number;
words: Array<{
word: string;
start: number;
end: number;
confidence: number;
}>;
utterances?: Array<{
speaker: number;
transcript: string;
start: number;
end: number;
}>;
}
export class TranscriptionService {
private client;
constructor(apiKey: string) {
this.client = createClient(apiKey);
}
async transcribeUrl(
url: string,
options: TranscriptionOptions = {}
): Promise<TranscriptionResult> {
const { result, error } = await this.client.listen.prerecorded.transcribeUrl(
{ url },
{
model: options.model || 'nova-2',
language: options.language || 'en',
punctuate: options.punctuate ?? true,
diarize: options.diarize ?? false,
smart_format: options.smartFormat ?? true,
utterances: options.utterances ?? false,
paragraphs: options.paragraphs ?? false,
}
);
if (error) throw new Error(error.message);
return this.formatResult(result);
}
async transcribeFile(
filePath: string,
options: TranscriptionOptions = {}
): Promise<TranscriptionResult> {
const audio = await readFile(filePath);
const mimetype = this.getMimeType(filePath);
const { result, error } = await this.client.listen.prerecorded.transcribeFile(
audio,
{
model: options.model || 'nova-2',
language: options.language || 'en',
punctuate: options.punctuate ?? true,
diarize: options.diarize ?? false,
smart_format: options.smartFormat ?? true,
mimetype,
}
);
if (error) throw new Error(error.message);
return this.formatResult(result);
}
private formatResult(result: any): TranscriptionResult {
const channel = result.results.channels[0];
const alternative = channel.alternatives[0];
return {
transcript: alternative.transcript,
confidence: alternative.confidence,
words: alternative.words || [],
utterances: result.results.utterances,
};
}
private getMimeType(filePath: string): string {
const ext = filePath.split('.').pop()?.toLowerCase();
const mimeTypes: Record<string, string> = {
wav: 'audio/wav',
mp3: 'audio/mpeg',
flac: 'audio/flac',
ogg: 'audio/ogg',
m4a: 'audio/mp4',
webm: 'audio/webm',
};
return mimeTypes[ext || ''] || 'audio/wav';
}
}
// services/batch-transcription.ts
import { TranscriptionService, TranscriptionResult } from './transcription';
export async function batchTranscribe(
files: string[],
options: { concurrency?: number } = {}
): Promise<Map<string, TranscriptionResult | Error>> {
const service = new TranscriptionService(process.env.DEEPGRAM_API_KEY!);
const results = new Map<string, TranscriptionResult | Error>();
const concurrency = options.concurrency || 5;
// Process in batches
for (let i = 0; i < files.length; i += concurrency) {
const batch = files.slice(i, i + concurrency);
const batchResults = await Promise.allSettled(
batch.map(file => service.transcribeFile(file))
);
batchResults.forEach((result, index) => {
const file = batch[index];
if (result.status === 'fulfilled') {
results.set(file, result.value);
} else {
results.set(file, result.reason);
}
});
}
return results;
}
// Example with speaker diarization
const result = await service.transcribeFile('./meeting.wav', {
diarize: true,
utterances: true,
});
// Format as conversation
result.utterances?.forEach(utterance => {
console.log(`Speaker ${utterance.speaker}: ${utterance.transcript}`);
});
# services/transcription.py
from deepgram import DeepgramClient, PrerecordedOptions, FileSource
from pathlib import Path
from typing import Optional
import mimetypes
class TranscriptionService:
def __init__(self, api_key: str):
self.client = DeepgramClient(api_key)
def transcribe_url(
self,
url: str,
model: str = 'nova-2',
language: str = 'en',
diarize: bool = False
) -> dict:
options = PrerecordedOptions(
model=model,
language=language,
smart_format=True,
punctuate=True,
diarize=diarize,
)
response = self.client.listen.rest.v("1").transcribe_url(
{"url": url},
options
)
return self._format_result(response)
def transcribe_file(
self,
file_path: str,
model: str = 'nova-2',
diarize: bool = False
) -> dict:
with open(file_path, 'rb') as f:
audio = f.read()
mimetype, _ = mimetypes.guess_type(file_path)
source = FileSource(audio, mimetype or 'audio/wav')
options = PrerecordedOptions(
model=model,
smart_format=True,
punctuate=True,
diarize=diarize,
)
response = self.client.listen.rest.v("1").transcribe_file(
source,
options
)
return self._format_result(response)
def _format_result(self, response) -> dict:
channel = response.results.channels[0]
alternative = channel.alternatives[0]
return {
'transcript': alternative.transcript,
'confidence': alternative.confidence,
'words': alternative.words,
}
Proceed to deepgram-core-workflow-b for real-time streaming transcription.
development
Machine learning in Python with scikit-learn. Use when working with supervised learning (classification, regression), unsupervised learning (clustering, dimensionality reduction), model evaluation, hyperparameter tuning, preprocessing, or building ML pipelines. Provides comprehensive reference documentation for algorithms, preprocessing techniques, pipelines, and best practices.
development
Query Reactome REST API for pathway analysis, enrichment, gene-pathway mapping, disease pathways, molecular interactions, expression analysis, for systems biology studies.
tools
Cheminformatics toolkit for fine-grained molecular control. SMILES/SDF parsing, descriptors (MW, LogP, TPSA), fingerprints, substructure search, 2D/3D generation, similarity, reactions. For standard workflows with simpler interface, use datamol (wrapper around RDKit). Use rdkit for advanced control, custom sanitization, specialized algorithms.
development
Python interface to OpenMS for mass spectrometry data analysis. Use for LC-MS/MS proteomics and metabolomics workflows including file handling (mzML, mzXML, mzTab, FASTA, pepXML, protXML, mzIdentML), signal processing, feature detection, peptide identification, and quantitative analysis. Apply when working with mass spectrometry data, analyzing proteomics experiments, or processing metabolomics datasets.