skills/rag-patterns/SKILL.md
Chunking strategies, embedding model selection, hybrid search, reranking, eval metrics
npx skillsauth add rubicanjr/FinCognis rag-patternsInstall this skill globally with one command. Works with Claude Code, Cursor, and Windsurf.
3 of 9 scanners reported clean
Some scanners were skipped, did not run, or reported a non-clean status. Review each row below.
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Strategy 1: Recursive character splitting (general purpose)
splitter = RecursiveCharacterTextSplitter(
chunk_size=512,
chunk_overlap=64,
separators=["\n\n", "\n", ". ", " ", ""],
length_function=len,
)
# Strategy 2: Semantic chunking (better coherence)
from langchain_experimental.text_splitter import SemanticChunker
from langchain_openai import OpenAIEmbeddings
semantic_splitter = SemanticChunker(
OpenAIEmbeddings(),
breakpoint_threshold_type="percentile",
breakpoint_threshold_amount=95,
)
# Strategy 3: Parent-child chunking (preserves context)
parent_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200)
child_splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=50)
parent_docs = parent_splitter.split_documents(documents)
for parent in parent_docs:
children = child_splitter.split_documents([parent])
for child in children:
child.metadata["parent_id"] = parent.metadata["id"]
Models by Use Case:
General (English):
- text-embedding-3-small (OpenAI, 1536d, cheap)
- text-embedding-3-large (OpenAI, 3072d, best quality)
- all-MiniLM-L6-v2 (local, 384d, fast)
Code:
- text-embedding-3-large with code-tuned prompts
- voyage-code-2 (Voyage AI)
Multilingual:
- multilingual-e5-large (local)
- text-embedding-3-large (OpenAI)
Selection Criteria:
- Latency requirement < 50ms → local model
- Quality critical → text-embedding-3-large
- Budget constrained → text-embedding-3-small
- Air-gapped → all-MiniLM-L6-v2
from rank_bm25 import BM25Okapi
import numpy as np
class HybridRetriever:
def __init__(self, vector_store, documents, alpha=0.5):
self.vector_store = vector_store
self.alpha = alpha # 0=BM25 only, 1=vector only
tokenized = [doc.page_content.lower().split() for doc in documents]
self.bm25 = BM25Okapi(tokenized)
self.documents = documents
def search(self, query: str, k: int = 10) -> list:
# Vector search
vector_results = self.vector_store.similarity_search_with_score(query, k=k)
vector_scores = {doc.metadata["id"]: score for doc, score in vector_results}
# BM25 search
bm25_scores_raw = self.bm25.get_scores(query.lower().split())
bm25_max = max(bm25_scores_raw) if max(bm25_scores_raw) > 0 else 1
bm25_scores = {
self.documents[i].metadata["id"]: score / bm25_max
for i, score in enumerate(bm25_scores_raw)
}
# Reciprocal Rank Fusion
all_ids = set(vector_scores) | set(bm25_scores)
fused = {}
for doc_id in all_ids:
v_score = vector_scores.get(doc_id, 0)
b_score = bm25_scores.get(doc_id, 0)
fused[doc_id] = self.alpha * v_score + (1 - self.alpha) * b_score
sorted_ids = sorted(fused, key=fused.get, reverse=True)[:k]
return [self._get_doc(did) for did in sorted_ids]
from sentence_transformers import CrossEncoder
reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")
def rerank(query: str, documents: list, top_k: int = 5) -> list:
pairs = [(query, doc.page_content) for doc in documents]
scores = reranker.predict(pairs)
ranked = sorted(zip(documents, scores), key=lambda x: x[1], reverse=True)
return [doc for doc, _ in ranked[:top_k]]
# Pipeline: retrieve 20 → rerank to 5
candidates = hybrid_retriever.search(query, k=20)
final = rerank(query, candidates, top_k=5)
def evaluate_rag(queries, expected_answers, retriever, generator):
metrics = {"retrieval_recall": [], "answer_correctness": [], "faithfulness": []}
for query, expected in zip(queries, expected_answers):
retrieved = retriever.search(query, k=5)
retrieved_texts = [d.page_content for d in retrieved]
# Retrieval recall: did we find the right chunks?
relevant_found = any(expected["source"] in t for t in retrieved_texts)
metrics["retrieval_recall"].append(1.0 if relevant_found else 0.0)
# Generate answer
answer = generator.generate(query, retrieved_texts)
# Faithfulness: is answer grounded in retrieved context?
# (Use LLM-as-judge or NLI model)
metrics["faithfulness"].append(check_faithfulness(answer, retrieved_texts))
# Correctness: does answer match expected?
metrics["answer_correctness"].append(check_correctness(answer, expected["answer"]))
return {k: sum(v) / len(v) for k, v in metrics.items()}
development
Goal-based workflow orchestration - routes tasks to specialist agents based on user goals
tools
Wiring Verification
development
Connection management, room patterns, reconnection strategies, message buffering, and binary protocol design.
development
Screenshot comparison QA for frontend development. Takes a screenshot of the current implementation, scores it across multiple visual dimensions, and returns a structured PASS/REVISE/FAIL verdict with concrete fixes. Use when implementing UI from a design reference or verifying visual correctness.