rag-patterns/SKILL.md
Use when building Retrieval-Augmented Generation (RAG) systems, chatbots with document context, semantic search, or AI apps that query a knowledge base before generating responses.
npx skillsauth add Heldinhow/awesome-opencode-dev-skills rag-patternsInstall this skill globally with one command. Works with Claude Code, Cursor, and Windsurf.
3 of 9 scanners reported clean
Some scanners were skipped, did not run, or reported a non-clean status. Review each row below.
User Query
↓
[Embed query] → vector
↓
[Vector DB search] → top-k relevant chunks
↓
[Build prompt: system + context chunks + user query]
↓
[LLM generate] → answer grounded in retrieved docs
import { openai } from '@ai-sdk/openai'
import { embedMany } from 'ai'
async function ingestDocument(text: string, metadata: Record<string, any>) {
// 1. Chunk the document
const chunks = chunkText(text, { size: 512, overlap: 64 })
// 2. Embed each chunk
const { embeddings } = await embedMany({
model: openai.embedding('text-embedding-3-small'),
values: chunks,
})
// 3. Store in vector DB
await vectorDB.upsert(
chunks.map((chunk, i) => ({
id: `${metadata.docId}-${i}`,
values: embeddings[i],
metadata: { ...metadata, text: chunk },
}))
)
}
function chunkText(text: string, options: { size: number; overlap: number }) {
const { size, overlap } = options
const chunks: string[] = []
let start = 0
while (start < text.length) {
const end = start + size
chunks.push(text.slice(start, end))
start += size - overlap
}
return chunks
}
// Strategy selection:
// - Fixed-size chunks (512 tokens) → general purpose
// - Sentence/paragraph splitting → better coherence
// - Recursive text splitting → handles code, markdown
// - Semantic chunking → split at topic boundaries (advanced)
import { embed } from 'ai'
async function retrieve(query: string, topK = 5) {
// Embed the query
const { embedding } = await embed({
model: openai.embedding('text-embedding-3-small'),
value: query,
})
// Search vector DB
const results = await vectorDB.query({
vector: embedding,
topK,
includeMetadata: true,
})
return results.matches
.filter(m => m.score > 0.75) // relevance threshold
.map(m => m.metadata.text as string)
}
import { generateText } from 'ai'
async function ragQuery(userQuestion: string) {
const contextChunks = await retrieve(userQuestion)
const context = contextChunks.join('\n\n---\n\n')
const { text } = await generateText({
model: openai('gpt-4o-mini'),
system: `You are a helpful assistant. Answer questions based ONLY on the provided context.
If the context doesn't contain enough information, say so — do not make up answers.
Context:
${context}`,
prompt: userQuestion,
})
return text
}
// Combine semantic + BM25/full-text for better recall
const [semanticResults, keywordResults] = await Promise.all([
vectorDB.query({ vector: embedding, topK: 10 }),
db.fullTextSearch(query, { limit: 10 }),
])
// Re-rank combined results with RRF or cross-encoder
// Improve retrieval by rewriting ambiguous queries
const rewrittenQuery = await generateText({
model: openai('gpt-4o-mini'),
prompt: `Rewrite this question to be more specific for document search: "${userQuestion}"`,
})
// Generate multiple query variants and merge results
const queries = await generateQueries(userQuestion) // 3-5 variants
const allResults = await Promise.all(queries.map(retrieve))
const deduped = deduplicateByContent(allResults.flat())
// After retrieval, compress chunks to only relevant sentences
const compressed = await compressContext(chunks, userQuestion)
tools
Implement WebSocket communication for real-time bidirectional client-server communication.
development
Implement webhook handlers for processing incoming events from external services.
development
Test web applications using Playwright for end-to-end browser testing.
development
Build production-quality HTML artifacts using React, Tailwind CSS, and shadcn/ui.