skills/vector-db-patterns/SKILL.md
Embedding strategies, ANN algorithms, hybrid search, RAG chunking strategies, and reranking for semantic search and retrieval.
npx skillsauth add rubicanjr/FinCognis vector-db-patternsInstall this skill globally with one command. Works with Claude Code, Cursor, and Windsurf.
3 of 9 scanners reported clean
Some scanners were skipped, did not run, or reported a non-clean status. Review each row below.
Semantic search and retrieval-augmented generation (RAG) patterns with vector databases.
import { OpenAI } from 'openai'
const openai = new OpenAI()
// Batch embedding for efficiency (max 2048 inputs per request for text-embedding-3-small)
async function embedTexts(texts: string[]): Promise<number[][]> {
const BATCH_SIZE = 2048
const allEmbeddings: number[][] = []
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
const batch = texts.slice(i, i + BATCH_SIZE)
const response = await openai.embeddings.create({
model: 'text-embedding-3-small', // 1536 dimensions, good cost/quality
input: batch,
dimensions: 512, // Reduce dims for speed (Matryoshka)
})
allEmbeddings.push(...response.data.map(d => d.embedding))
}
return allEmbeddings
}
// Embed with prefix for asymmetric retrieval
async function embedForSearch(query: string): Promise<number[]> {
const [embedding] = await embedTexts([`search_query: ${query}`])
return embedding
}
async function embedForStorage(document: string): Promise<number[]> {
const [embedding] = await embedTexts([`search_document: ${document}`])
return embedding
}
interface Chunk {
id: string
text: string
metadata: {
sourceId: string
chunkIndex: number
startChar: number
endChar: number
}
}
// Recursive character splitting with overlap
function chunkText(
text: string,
chunkSize: number = 512,
overlap: number = 50
): Chunk[] {
const separators = ['\n\n', '\n', '. ', ' ']
return recursiveSplit(text, separators, chunkSize, overlap)
}
function recursiveSplit(
text: string,
separators: string[],
chunkSize: number,
overlap: number
): Chunk[] {
if (text.length <= chunkSize) {
return [{ id: crypto.randomUUID(), text, metadata: {} as any }]
}
const separator = separators.find(s => text.includes(s)) ?? ''
const parts = text.split(separator)
const chunks: Chunk[] = []
let current = ''
for (const part of parts) {
const candidate = current ? current + separator + part : part
if (candidate.length > chunkSize && current) {
chunks.push({ id: crypto.randomUUID(), text: current.trim(), metadata: {} as any })
// Overlap: keep last N chars of previous chunk
const overlapText = current.slice(-overlap)
current = overlapText + separator + part
} else {
current = candidate
}
}
if (current.trim()) {
chunks.push({ id: crypto.randomUUID(), text: current.trim(), metadata: {} as any })
}
return chunks
}
// Semantic chunking: split at topic boundaries using embeddings
async function semanticChunk(text: string, threshold: number = 0.3): Promise<Chunk[]> {
const sentences = text.match(/[^.!?]+[.!?]+/g) ?? [text]
const embeddings = await embedTexts(sentences)
const chunks: string[][] = [[sentences[0]]]
for (let i = 1; i < sentences.length; i++) {
const similarity = cosineSimilarity(embeddings[i - 1], embeddings[i])
if (similarity < threshold) {
// Low similarity = topic boundary = new chunk
chunks.push([sentences[i]])
} else {
chunks[chunks.length - 1].push(sentences[i])
}
}
return chunks.map((sentences, i) => ({
id: crypto.randomUUID(),
text: sentences.join(' ').trim(),
metadata: { sourceId: '', chunkIndex: i, startChar: 0, endChar: 0 }
}))
}
// Using Pinecone
import { Pinecone } from '@pinecone-database/pinecone'
const pinecone = new Pinecone()
const index = pinecone.index('documents')
// Upsert with metadata
async function indexDocument(doc: Document, chunks: Chunk[]): Promise<void> {
const embeddings = await embedTexts(chunks.map(c => c.text))
const vectors = chunks.map((chunk, i) => ({
id: chunk.id,
values: embeddings[i],
metadata: {
text: chunk.text,
sourceId: doc.id,
sourceTitle: doc.title,
category: doc.category,
createdAt: doc.createdAt.toISOString(),
chunkIndex: i,
}
}))
// Upsert in batches of 100
for (let i = 0; i < vectors.length; i += 100) {
await index.upsert(vectors.slice(i, i + 100))
}
}
// Query with metadata filter
async function searchDocuments(
query: string,
filters?: { category?: string; after?: Date },
topK: number = 10
): Promise<SearchResult[]> {
const queryEmbedding = await embedForSearch(query)
const filter: Record<string, any> = {}
if (filters?.category) {
filter.category = { $eq: filters.category }
}
if (filters?.after) {
filter.createdAt = { $gte: filters.after.toISOString() }
}
const results = await index.query({
vector: queryEmbedding,
topK,
includeMetadata: true,
filter: Object.keys(filter).length > 0 ? filter : undefined,
})
return results.matches.map(m => ({
id: m.id,
score: m.score ?? 0,
text: m.metadata?.text as string,
sourceId: m.metadata?.sourceId as string,
sourceTitle: m.metadata?.sourceTitle as string,
}))
}
// Combine vector similarity with BM25 keyword matching
async function hybridSearch(
query: string,
topK: number = 10,
alpha: number = 0.7 // 0.7 = 70% semantic, 30% keyword
): Promise<SearchResult[]> {
// Run both searches in parallel
const [vectorResults, keywordResults] = await Promise.all([
vectorSearch(query, topK * 2),
keywordSearch(query, topK * 2), // BM25 via Elasticsearch
])
// Reciprocal Rank Fusion (RRF)
const k = 60 // RRF constant
const scores = new Map<string, number>()
vectorResults.forEach((r, rank) => {
const current = scores.get(r.id) ?? 0
scores.set(r.id, current + alpha * (1 / (k + rank + 1)))
})
keywordResults.forEach((r, rank) => {
const current = scores.get(r.id) ?? 0
scores.set(r.id, current + (1 - alpha) * (1 / (k + rank + 1)))
})
// Sort by combined score, return top K
const allResults = [...vectorResults, ...keywordResults]
const uniqueResults = new Map(allResults.map(r => [r.id, r]))
return [...scores.entries()]
.sort((a, b) => b[1] - a[1])
.slice(0, topK)
.map(([id, score]) => ({
...uniqueResults.get(id)!,
score,
}))
}
// Cross-encoder reranking: slower but much more accurate than bi-encoder
async function rerankResults(
query: string,
results: SearchResult[],
topK: number = 5
): Promise<SearchResult[]> {
// Use Cohere Rerank or cross-encoder model
const response = await fetch('https://api.cohere.ai/v1/rerank', {
method: 'POST',
headers: {
Authorization: `Bearer ${process.env.COHERE_API_KEY}`,
'Content-Type': 'application/json',
},
body: JSON.stringify({
model: 'rerank-english-v3.0',
query,
documents: results.map(r => r.text),
top_n: topK,
return_documents: false,
}),
})
const data = await response.json()
return data.results.map((r: any) => ({
...results[r.index],
score: r.relevance_score,
}))
}
// RAG pipeline: retrieve → rerank → generate
async function ragQuery(query: string): Promise<string> {
// Step 1: Retrieve candidates (broad, fast)
const candidates = await hybridSearch(query, 20)
// Step 2: Rerank (narrow, accurate)
const reranked = await rerankResults(query, candidates, 5)
// Step 3: Generate answer with context
const context = reranked.map(r => r.text).join('\n\n')
const response = await openai.chat.completions.create({
model: 'gpt-4o',
messages: [
{ role: 'system', content: `Answer based on the context below.\n\nContext:\n${context}` },
{ role: 'user', content: query },
],
})
return response.choices[0].message.content!
}
development
Goal-based workflow orchestration - routes tasks to specialist agents based on user goals
tools
Wiring Verification
development
Connection management, room patterns, reconnection strategies, message buffering, and binary protocol design.
development
Screenshot comparison QA for frontend development. Takes a screenshot of the current implementation, scores it across multiple visual dimensions, and returns a structured PASS/REVISE/FAIL verdict with concrete fixes. Use when implementing UI from a design reference or verifying visual correctness.