skills/43-wentorai-research-plugins/skills/domains/pharma/drug-target-interaction/SKILL.md
Computational drug-target interaction prediction and virtual screening
npx skillsauth add brycewang-stanford/Awesome-Agent-Skills-for-Empirical-Research drug-target-interactionInstall this skill globally with one command. Works with Claude Code, Cursor, and Windsurf.
3 of 9 scanners reported clean
Some scanners were skipped, did not run, or reported a non-clean status. Review each row below.
A skill for computational prediction of drug-target interactions (DTI), covering molecular docking, machine learning-based binding affinity prediction, compound library screening, and target identification using cheminformatics and structural biology tools.
| Database | Content | Access | |----------|---------|--------| | ChEMBL | 2.4M compounds, 15M bioactivities | REST API, SQL dump | | BindingDB | 2.8M binding data points | Bulk download, REST API | | DrugBank | 15,000+ drug entries with targets | Academic license | | PDB (Protein Data Bank) | 220,000+ 3D structures | Free download, REST API | | UniProt | 250M+ protein sequences | Free, REST API | | STITCH | Chemical-protein interactions | Free academic access |
from chembl_webresource_client.new_client import new_client
def get_target_bioactivities(target_chembl_id: str,
activity_type: str = "IC50",
max_nm: float = 10000) -> list[dict]:
"""
Retrieve bioactivity data for a protein target from ChEMBL.
Returns compounds with measured binding/inhibition values.
"""
activity = new_client.activity
results = activity.filter(
target_chembl_id=target_chembl_id,
standard_type=activity_type,
standard_relation="=",
standard_units="nM",
).only([
"molecule_chembl_id", "canonical_smiles",
"standard_value", "standard_type",
"pchembl_value", "assay_description",
])
filtered = []
for r in results:
if r.get("standard_value") and float(r["standard_value"]) <= max_nm:
filtered.append({
"molecule_id": r["molecule_chembl_id"],
"smiles": r["canonical_smiles"],
"activity_type": r["standard_type"],
"value_nM": float(r["standard_value"]),
"pchembl": float(r["pchembl_value"]) if r.get("pchembl_value") else None,
})
return filtered
from rdkit import Chem
from rdkit.Chem import AllChem, Descriptors, rdMolDescriptors
import numpy as np
def compute_fingerprints(smiles_list: list[str],
fp_type: str = "morgan",
radius: int = 2,
n_bits: int = 2048) -> np.ndarray:
"""
Compute molecular fingerprints from SMILES strings.
fp_type: 'morgan' (ECFP-like), 'maccs', 'rdkit', 'topological'
"""
fps = []
for smi in smiles_list:
mol = Chem.MolFromSmiles(smi)
if mol is None:
fps.append(np.zeros(n_bits))
continue
if fp_type == "morgan":
fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=n_bits)
elif fp_type == "maccs":
fp = rdMolDescriptors.GetMACCSKeysFingerprint(mol)
elif fp_type == "rdkit":
fp = Chem.RDKFingerprint(mol, fpSize=n_bits)
else:
fp = AllChem.GetMorganFingerprintAsBitVect(mol, radius, nBits=n_bits)
arr = np.zeros(len(fp))
Chem.DataStructs.ConvertToNumpyArray(fp, arr)
fps.append(arr)
return np.array(fps)
def compute_descriptors(smiles: str) -> dict:
"""Compute physicochemical descriptors for a molecule."""
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return {}
return {
"molecular_weight": Descriptors.MolWt(mol),
"logP": Descriptors.MolLogP(mol),
"hbd": Descriptors.NumHDonors(mol),
"hba": Descriptors.NumHAcceptors(mol),
"tpsa": Descriptors.TPSA(mol),
"rotatable_bonds": Descriptors.NumRotatableBonds(mol),
"aromatic_rings": Descriptors.NumAromaticRings(mol),
"lipinski_violations": sum([
Descriptors.MolWt(mol) > 500,
Descriptors.MolLogP(mol) > 5,
Descriptors.NumHDonors(mol) > 5,
Descriptors.NumHAcceptors(mol) > 10,
]),
}
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import roc_auc_score, average_precision_score
def train_dti_classifier(compound_fps: np.ndarray,
target_features: np.ndarray,
labels: np.ndarray) -> dict:
"""
Train a DTI classifier using compound-target pair features.
compound_fps: molecular fingerprints (n_samples, fp_dim)
target_features: protein descriptors (n_samples, target_dim)
labels: binary interaction labels (1=interacts, 0=no interaction)
"""
# Concatenate compound and target features
X = np.hstack([compound_fps, target_features])
y = labels
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
metrics = {"auroc": [], "auprc": []}
for train_idx, test_idx in skf.split(X, y):
model = RandomForestClassifier(
n_estimators=500, max_depth=20, n_jobs=-1, random_state=42
)
model.fit(X[train_idx], y[train_idx])
pred_proba = model.predict_proba(X[test_idx])[:, 1]
metrics["auroc"].append(roc_auc_score(y[test_idx], pred_proba))
metrics["auprc"].append(average_precision_score(y[test_idx], pred_proba))
return {
"mean_auroc": np.mean(metrics["auroc"]),
"mean_auprc": np.mean(metrics["auprc"]),
"model": model,
}
Modern DTI prediction architectures:
| Method | Compound Representation | Target Representation | Architecture | |--------|------------------------|----------------------|-------------| | DeepDTA | SMILES (1D CNN) | Protein sequence (1D CNN) | Concatenation + FC | | GraphDTA | Molecular graph (GCN/GAT) | Protein sequence (CNN) | Graph + sequence fusion | | MolTrans | SMILES (Transformer) | Protein sequence (Transformer) | Cross-attention | | DrugBAN | Molecular graph | Protein graph | Bilinear attention |
import subprocess
def run_autodock_vina(receptor_pdbqt: str, ligand_pdbqt: str,
center: tuple, box_size: tuple = (20, 20, 20),
exhaustiveness: int = 8) -> dict:
"""
Run AutoDock Vina for molecular docking.
receptor_pdbqt: path to prepared receptor file
ligand_pdbqt: path to prepared ligand file
center: (x, y, z) coordinates of the binding site center
Returns docking scores and poses.
"""
cmd = [
"vina",
"--receptor", receptor_pdbqt,
"--ligand", ligand_pdbqt,
"--center_x", str(center[0]),
"--center_y", str(center[1]),
"--center_z", str(center[2]),
"--size_x", str(box_size[0]),
"--size_y", str(box_size[1]),
"--size_z", str(box_size[2]),
"--exhaustiveness", str(exhaustiveness),
"--num_modes", "9",
]
result = subprocess.run(cmd, capture_output=True, text=True)
# Parse output for binding affinities
scores = []
for line in result.stdout.split("\n"):
parts = line.split()
if len(parts) >= 4 and parts[0].isdigit():
scores.append({
"mode": int(parts[0]),
"affinity_kcal_mol": float(parts[1]),
"rmsd_lb": float(parts[2]),
"rmsd_ub": float(parts[3]),
})
return {"scores": scores, "best_affinity": scores[0]["affinity_kcal_mol"] if scores else None}
Standard benchmarks for DTI prediction:
tools
Show mcp-stata identity, connected tools, and status. Use when the user asks if mcp-stata is available, asks about access to the toolkit, or asks what Stata tools are connected.
tools
Activate when users mention Stata commands, .do files, regressions, econometrics, stored results, graphs, dataset inspection, replication, or Stata errors. Route the task through mcp-stata tools and the specialized research skills instead of treating it as plain text coding.
development
Build and review paper-ready regression, balance, and summary tables from Stata outputs. Use when the user needs a clean table for a draft, appendix, or coauthor share-out.
tools
Install, configure, update, or verify mcp-stata across Claude Code, Codex, Gemini CLI, Cursor, Windsurf, and VS Code. Activate when users ask to set up the Stata toolkit or troubleshoot the installation.