skills/02-chemistry/openbabel-conversion/SKILL.md
Use this Skill for chemical file format interconversion (SDF, SMILES, MOL2, PDB), 3D coordinate generation, conformer enumeration, and reaction SMARTS.
npx skillsauth add xjtulyc/awesome-rosetta-skills openbabel-conversionInstall this skill globally with one command. Works with Claude Code, Cursor, and Windsurf.
3 of 9 scanners reported clean
Some scanners were skipped, did not run, or reported a non-clean status. Review each row below.
One-line summary: Convert chemical file formats, generate 3D structures, enumerate conformers, and apply reaction SMARTS using OpenBabel and RDKit.
Trigger keywords: format conversion, SDF to SMILES, mol2 PDB, 3D coordinate generation, conformer generation, reaction SMARTS, OpenBabel, obabel
| Format | Use case | Contains 3D? | Common tool | |:-------|:---------|:-------------|:------------| | SMILES | Compact line notation | No | Any toolkit | | SDF/MOL | Structure + properties | Yes | OpenBabel, RDKit | | MOL2 | Docking preparation | Yes | AMBER, Dock | | PDB | Protein+ligand | Yes | PyMOL, VMD | | XYZ | QM input | Yes | Gaussian, ORCA | | InChI | Canonical identifier | No | IUPAC standard |
Generating 3D coordinates from a 2D SMILES requires:
OpenBabel uses the OBBuilder class for this; RDKit uses the EmbedMolecule/ETKDG algorithm.
For flexible molecules, multiple conformers span the accessible conformational space. The ETKDG (Experimental Torsion angle Knowledge Distance Geometry) method in RDKit produces pharmacophorically relevant conformers.
# OpenBabel (recommended via conda for easiest install)
conda install -c conda-forge openbabel
# RDKit
conda install -c conda-forge rdkit
# Or pip
pip install openbabel-wheel rdkit pandas numpy matplotlib
import openbabel.openbabel as ob
from rdkit import Chem
from rdkit.Chem import AllChem
mol = Chem.MolFromSmiles("CCO")
print(f"Ethanol atoms: {mol.GetNumAtoms()}")
obconv = ob.OBConversion()
print(f"OpenBabel version: {ob.OBReleaseVersion()}")
# Expected: Ethanol atoms: 3
import openbabel.openbabel as ob
def convert_format(input_str, from_fmt, to_fmt, gen3d=False):
"""
Convert a chemical string from one format to another.
Parameters
----------
input_str : str
Input chemical string (SMILES, InChI, etc.)
from_fmt : str
Input format ('smi', 'inchi', 'sdf', 'mol2', 'pdb', 'xyz')
to_fmt : str
Output format
gen3d : bool
If True, generate 3D coordinates before conversion
Returns
-------
str
Converted chemical string
"""
obconv = ob.OBConversion()
obconv.SetInAndOutFormats(from_fmt, to_fmt)
mol = ob.OBMol()
obconv.ReadString(mol, input_str)
if mol.NumAtoms() == 0:
raise ValueError(f"Failed to parse input as {from_fmt}")
if gen3d:
builder = ob.OBBuilder()
builder.Build(mol)
ff = ob.OBForceField.FindForceField("MMFF94")
if ff:
ff.Setup(mol)
ff.ConjugateGradients(500)
ff.GetCoordinates(mol)
mol.AddHydrogens()
return obconv.WriteString(mol)
# Examples
smiles_list = [
"CC(=O)Oc1ccccc1C(=O)O", # Aspirin
"c1ccc(cc1)C(=O)O", # Benzoic acid
"CC12CCC3C(C1CCC2O)CCC4=CC(=O)CCC34C", # Testosterone
]
for smi in smiles_list:
inchi = convert_format(smi, "smi", "inchi")
pdb_3d = convert_format(smi, "smi", "pdb", gen3d=True)
print(f"SMILES: {smi[:30]}...")
print(f" InChI: {inchi.strip()[:50]}...")
print(f" PDB (first line): {pdb_3d.splitlines()[0]}")
print()
from rdkit import Chem
from rdkit.Chem import AllChem, Draw
from rdkit.Chem.rdForceFieldHelpers import MMFFOptimizeMolecule
import numpy as np
def generate_3d_conformers(smiles, n_confs=50, seed=42):
"""
Generate multiple 3D conformers via ETKDG, return lowest-energy one.
Returns
-------
rdkit.Chem.Mol with 3D coordinates, conformer energies array
"""
mol = Chem.MolFromSmiles(smiles)
if mol is None:
raise ValueError(f"Invalid SMILES: {smiles}")
mol = Chem.AddHs(mol)
# ETKDG v3 (best for drug-like molecules)
params = AllChem.ETKDGv3()
params.randomSeed = seed
params.numThreads = 0 # use all cores
params.pruneRmsThresh = 0.5 # remove similar conformers
conf_ids = AllChem.EmbedMultipleConfs(mol, numConfs=n_confs, params=params)
if len(conf_ids) == 0:
raise RuntimeError("ETKDG embedding failed — check SMILES validity")
# MMFF94 optimization
energies = []
for cid in conf_ids:
res = MMFFOptimizeMolecule(mol, confId=cid, maxIters=2000)
if res == 0: # converged
ff = AllChem.MMFFGetMoleculeForceField(mol,
AllChem.MMFFGetMoleculeProperties(mol), confId=cid)
energies.append(ff.CalcEnergy() if ff else float('inf'))
else:
energies.append(float('inf'))
energies = np.array(energies)
best_conf_id = int(conf_ids[np.argmin(energies)])
# Keep only lowest-energy conformer for output
best_mol = Chem.RWMol(mol)
best_mol = best_mol.GetMol()
print(f"Generated {len(conf_ids)} conformers")
print(f"Energy range: {energies.min():.2f} – {energies.max():.2f} kcal/mol")
return best_mol, best_conf_id, energies
# Aspirin
mol_3d, cid, ens = generate_3d_conformers("CC(=O)Oc1ccccc1C(=O)O", n_confs=20)
print(f"Best conformer energy: {ens.min():.2f} kcal/mol")
# Save to SDF
from rdkit.Chem import SDWriter
with SDWriter("aspirin_confs.sdf") as w:
for conf_id in range(mol_3d.GetNumConformers()):
w.write(mol_3d, confId=conf_id)
print("Saved aspirin_confs.sdf")
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.Draw import MolToImage
import pandas as pd
def apply_reaction(reactant_smiles, rxn_smarts):
"""
Apply a reaction SMARTS to a reactant SMILES.
Returns
-------
list of product SMILES strings
"""
rxn = AllChem.ReactionFromSmarts(rxn_smarts)
if rxn is None:
raise ValueError(f"Invalid reaction SMARTS: {rxn_smarts}")
reactant = Chem.MolFromSmiles(reactant_smiles)
if reactant is None:
raise ValueError(f"Invalid SMILES: {reactant_smiles}")
products = rxn.RunReactants((reactant,))
product_smiles = []
for prod_tuple in products:
for prod in prod_tuple:
try:
Chem.SanitizeMol(prod)
smi = Chem.MolToSmiles(prod)
product_smiles.append(smi)
except Exception:
pass
return list(set(product_smiles)) # deduplicate
# Example reactions
reactions = {
"ester_hydrolysis": "[C:1](=O)[O:2][C:3]>>[C:1](=O)[O:2].[C:3]",
"amide_formation": "[C:1](=O)[OH].[N:2]>>[C:1](=O)[N:2]",
"aromatic_nitration": "c:1:c:c:c:c:c1>>[c:1]c(cc:c:c:c1)[N+](=O)[O-]",
}
test_molecules = {
"ester_hydrolysis": "CC(=O)OCC", # ethyl acetate
"amide_formation": "CC(=O)O", # acetic acid + NH3 (separate reactant)
}
for rxn_name, smarts in reactions.items():
if rxn_name in test_molecules:
reactant = test_molecules[rxn_name]
try:
products = apply_reaction(reactant, smarts)
print(f"\n{rxn_name}:")
print(f" Reactant: {reactant}")
print(f" Products: {products[:3]}")
except Exception as e:
print(f"{rxn_name}: {e}")
# Batch processing
records = []
smiles_batch = [
"CC(=O)Oc1ccccc1C(=O)O", # aspirin
"CC12CCC3C(C1CCC2O)", # steroid skeleton
"c1ccc(cc1)C(=O)O", # benzoic acid
]
for smi in smiles_batch:
mol = Chem.MolFromSmiles(smi)
if mol:
records.append({
"smiles": smi,
"n_atoms": mol.GetNumAtoms(),
"n_rings": mol.GetRingInfo().NumRings(),
"inchi": Chem.MolToInchi(mol),
})
df = pd.DataFrame(records)
print("\nBatch processing results:")
print(df[["smiles", "n_atoms", "n_rings"]].to_string(index=False))
from rdkit import Chem
from rdkit.Chem.Scaffolds import MurckoScaffold
import pandas as pd
def get_murcko_scaffold(smiles, generic=False):
"""
Extract Murcko scaffold (framework) from a molecule.
generic=True returns the carbon skeleton (bemis-murcko generic scaffold).
"""
mol = Chem.MolFromSmiles(smiles)
if mol is None:
return None
scaffold = MurckoScaffold.GetScaffoldForMol(mol)
if generic:
scaffold = MurckoScaffold.MakeScaffoldGeneric(scaffold)
return Chem.MolToSmiles(scaffold)
drugs = {
"Sildenafil": "CCCc1nn(C)c2c(=O)[nH]c(-c3cc(S(=O)(=O)N4CCN(C)CC4)ccc3OCC)nc12",
"Ibuprofen": "CC(C)Cc1ccc(cc1)C(C)C(=O)O",
"Atorvastatin": "CC(C)c1c(C(=O)Nc2ccccc2F)c(-c2ccccc2)c(-c2ccc(F)cc2)n1CCC(O)CC(O)CC(=O)O",
}
scaffold_df = pd.DataFrame([
{"drug": name, "smiles": smi,
"scaffold": get_murcko_scaffold(smi),
"generic_scaffold": get_murcko_scaffold(smi, generic=True)}
for name, smi in drugs.items()
])
print(scaffold_df[["drug", "scaffold"]].to_string(index=False))
Import "openbabel.openbabel" could not be resolvedCause: OpenBabel Python bindings not installed correctly.
Fix:
# Prefer conda installation
conda install -c conda-forge openbabel
# Verify
python -c "import openbabel.openbabel as ob; print(ob.OBReleaseVersion())"
Cause: SMILES has unsupported features (e.g., unusual valence) or stereospecification issues.
Fix:
from rdkit import Chem
mol = Chem.MolFromSmiles(smiles)
# Check for sanitization warnings
from rdkit import RDLogger
RDLogger.DisableLog('rdApp.*') # suppress
mol, problems = Chem.SanitizeMol(mol, catchErrors=True), Chem.DetectChemistryProblems(mol)
print(problems)
| Package | Tested versions | Known issues | |:--------|:----------------|:-------------| | openbabel | 3.1.1, 3.1.4 | Python bindings require manual build on some platforms | | rdkit | 2023.3, 2024.3 | ETKDG v3 added in 2020 |
# =============================================
# Batch SMILES → SDF with MMFF geometry
# =============================================
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem, SDWriter
from rdkit.Chem.rdForceFieldHelpers import MMFFOptimizeMolecule
smiles_data = pd.DataFrame({
"name": ["Aspirin", "Caffeine", "Dopamine"],
"smiles": ["CC(=O)Oc1ccccc1C(=O)O",
"Cn1cnc2c1c(=O)n(C)c(=O)n2C",
"NCCc1ccc(O)c(O)c1"]
})
with SDWriter("library_3d.sdf") as writer:
for _, row in smiles_data.iterrows():
mol = Chem.MolFromSmiles(row["smiles"])
if mol is None:
print(f"WARN: could not parse {row['name']}")
continue
mol.SetProp("_Name", row["name"])
mol = Chem.AddHs(mol)
AllChem.EmbedMolecule(mol, AllChem.ETKDGv3())
MMFFOptimizeMolecule(mol)
writer.write(mol)
print("Saved library_3d.sdf")
Interpreting these results: The SDF file can be opened in PyMOL, Schrödinger, or AutoDock Vina for molecular docking.
Last updated: 2026-03-17 | Maintainer: @xjtulyc Issues: GitHub Issues
tools
R research package development with devtools, roxygen2 documentation, testthat testing, CRAN submission, and vignette creation for statistical methods.
development
Reproducible research reporting with Quarto covering parameterized reports, multi-format output, inline computation, and journal article templates.
development
Python research package development with pyproject.toml, testing with pytest, documentation with Sphinx, and publishing to PyPI for academic software.
development
Write, compile, and submit LaTeX papers: IMRaD structure, key packages, bibliography management, arXiv preparation, and common error fixes.