agents/plugins/llm-application-dev/skills/rag-implementation/SKILL.md
Build Retrieval-Augmented Generation (RAG) systems for LLM applications with vector databases and semantic search. Use when implementing knowledge-grounded AI, building document Q&A systems, or integrating LLMs with external knowledge bases.
npx skillsauth add simplysmartai/5cypressautomation rag-implementationInstall this skill globally with one command. Works with Claude Code, Cursor, and Windsurf.
3 of 9 scanners reported clean
Some scanners were skipped, did not run, or reported a non-clean status. Review each row below.
Master Retrieval-Augmented Generation (RAG) to build LLM applications that provide accurate, grounded responses using external knowledge sources.
Purpose: Store and retrieve document embeddings efficiently
Options:
Purpose: Convert text to numerical vectors for similarity search
Models:
Approaches:
Purpose: Improve retrieval quality by reordering results
Methods:
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitters import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI
# 1. Load documents
loader = DirectoryLoader('./docs', glob="**/*.txt")
documents = loader.load()
# 2. Split into chunks
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
chunks = text_splitter.split_documents(documents)
# 3. Create embeddings and vector store
embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(chunks, embeddings)
# 4. Create retrieval chain
qa_chain = RetrievalQA.from_chain_type(
llm=OpenAI(),
chain_type="stuff",
retriever=vectorstore.as_retriever(search_kwargs={"k": 4}),
return_source_documents=True
)
# 5. Query
result = qa_chain({"query": "What are the main features?"})
print(result['result'])
print(result['source_documents'])
from langchain.retrievers import BM25Retriever, EnsembleRetriever
# Sparse retriever (BM25)
bm25_retriever = BM25Retriever.from_documents(chunks)
bm25_retriever.k = 5
# Dense retriever (embeddings)
embedding_retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
# Combine with weights
ensemble_retriever = EnsembleRetriever(
retrievers=[bm25_retriever, embedding_retriever],
weights=[0.3, 0.7]
)
from langchain.retrievers.multi_query import MultiQueryRetriever
# Generate multiple query perspectives
retriever = MultiQueryRetriever.from_llm(
retriever=vectorstore.as_retriever(),
llm=OpenAI()
)
# Single query → multiple variations → combined results
results = retriever.get_relevant_documents("What is the main topic?")
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor
compressor = LLMChainExtractor.from_llm(llm)
compression_retriever = ContextualCompressionRetriever(
base_compressor=compressor,
base_retriever=vectorstore.as_retriever()
)
# Returns only relevant parts of documents
compressed_docs = compression_retriever.get_relevant_documents("query")
from langchain.retrievers import ParentDocumentRetriever
from langchain.storage import InMemoryStore
# Store for parent documents
store = InMemoryStore()
# Small chunks for retrieval, large chunks for context
child_splitter = RecursiveCharacterTextSplitter(chunk_size=400)
parent_splitter = RecursiveCharacterTextSplitter(chunk_size=2000)
retriever = ParentDocumentRetriever(
vectorstore=vectorstore,
docstore=store,
child_splitter=child_splitter,
parent_splitter=parent_splitter
)
from langchain.text_splitters import RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=200,
length_function=len,
separators=["\n\n", "\n", " ", ""] # Try these in order
)
from langchain.text_splitters import TokenTextSplitter
splitter = TokenTextSplitter(
chunk_size=512,
chunk_overlap=50
)
from langchain.text_splitters import SemanticChunker
splitter = SemanticChunker(
embeddings=OpenAIEmbeddings(),
breakpoint_threshold_type="percentile"
)
from langchain.text_splitters import MarkdownHeaderTextSplitter
headers_to_split_on = [
("#", "Header 1"),
("##", "Header 2"),
("###", "Header 3"),
]
splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)
import pinecone
from langchain.vectorstores import Pinecone
pinecone.init(api_key="your-api-key", environment="us-west1-gcp")
index = pinecone.Index("your-index-name")
vectorstore = Pinecone(index, embeddings.embed_query, "text")
import weaviate
from langchain.vectorstores import Weaviate
client = weaviate.Client("http://localhost:8080")
vectorstore = Weaviate(client, "Document", "content", embeddings)
from langchain.vectorstores import Chroma
vectorstore = Chroma(
collection_name="my_collection",
embedding_function=embeddings,
persist_directory="./chroma_db"
)
# Add metadata during indexing
chunks_with_metadata = []
for i, chunk in enumerate(chunks):
chunk.metadata = {
"source": chunk.metadata.get("source"),
"page": i,
"category": determine_category(chunk.page_content)
}
chunks_with_metadata.append(chunk)
# Filter during retrieval
results = vectorstore.similarity_search(
"query",
filter={"category": "technical"},
k=5
)
# Balance relevance with diversity
results = vectorstore.max_marginal_relevance_search(
"query",
k=5,
fetch_k=20, # Fetch 20, return top 5 diverse
lambda_mult=0.5 # 0=max diversity, 1=max relevance
)
from sentence_transformers import CrossEncoder
reranker = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
# Get initial results
candidates = vectorstore.similarity_search("query", k=20)
# Rerank
pairs = [[query, doc.page_content] for doc in candidates]
scores = reranker.predict(pairs)
# Sort by score and take top k
reranked = sorted(zip(candidates, scores), key=lambda x: x[1], reverse=True)[:5]
prompt_template = """Use the following context to answer the question. If you cannot answer based on the context, say "I don't have enough information."
Context:
{context}
Question: {question}
Answer:"""
prompt_template = """Answer the question based on the context below. Include citations using [1], [2], etc.
Context:
{context}
Question: {question}
Answer (with citations):"""
prompt_template = """Answer the question using the context. Provide a confidence score (0-100%) for your answer.
Context:
{context}
Question: {question}
Answer:
Confidence:"""
def evaluate_rag_system(qa_chain, test_cases):
metrics = {
'accuracy': [],
'retrieval_quality': [],
'groundedness': []
}
for test in test_cases:
result = qa_chain({"query": test['question']})
# Check if answer matches expected
accuracy = calculate_accuracy(result['result'], test['expected'])
metrics['accuracy'].append(accuracy)
# Check if relevant docs were retrieved
retrieval_quality = evaluate_retrieved_docs(
result['source_documents'],
test['relevant_docs']
)
metrics['retrieval_quality'].append(retrieval_quality)
# Check if answer is grounded in context
groundedness = check_groundedness(
result['result'],
result['source_documents']
)
metrics['groundedness'].append(groundedness)
return {k: sum(v)/len(v) for k, v in metrics.items()}
content-media
This skill should be used when the user asks to "plan team structure", "determine hiring needs", "design org chart", "calculate compensation", "plan equity allocation", or requests organizational design and headcount planning for a startup.
development
This skill should be used when the user asks about "key startup metrics", "SaaS metrics", "CAC and LTV", "unit economics", "burn multiple", "rule of 40", "marketplace metrics", or requests guidance on tracking and optimizing business performance metrics.
development
This skill should be used when the user asks to "create financial projections", "build a financial model", "forecast revenue", "calculate burn rate", "estimate runway", "model cash flow", or requests 3-5 year financial planning for a startup.
research
This skill should be used when the user asks to "calculate TAM", "determine SAM", "estimate SOM", "size the market", "calculate market opportunity", "what's the total addressable market", or requests market sizing analysis for a startup or business opportunity.