244 lines
7.2 KiB
Python
244 lines
7.2 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Molecular Properties Calculator
|
|
|
|
Calculate comprehensive molecular properties and descriptors for molecules.
|
|
Supports single molecules or batch processing from files.
|
|
|
|
Usage:
|
|
python molecular_properties.py "CCO"
|
|
python molecular_properties.py --file molecules.smi --output properties.csv
|
|
"""
|
|
|
|
import argparse
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
try:
|
|
from rdkit import Chem
|
|
from rdkit.Chem import Descriptors, Lipinski
|
|
except ImportError:
|
|
print("Error: RDKit not installed. Install with: conda install -c conda-forge rdkit")
|
|
sys.exit(1)
|
|
|
|
|
|
def calculate_properties(mol):
|
|
"""Calculate comprehensive molecular properties."""
|
|
if mol is None:
|
|
return None
|
|
|
|
properties = {
|
|
# Basic properties
|
|
'SMILES': Chem.MolToSmiles(mol),
|
|
'Molecular_Formula': Chem.rdMolDescriptors.CalcMolFormula(mol),
|
|
|
|
# Molecular weight
|
|
'MW': Descriptors.MolWt(mol),
|
|
'ExactMW': Descriptors.ExactMolWt(mol),
|
|
|
|
# Lipophilicity
|
|
'LogP': Descriptors.MolLogP(mol),
|
|
'MR': Descriptors.MolMR(mol),
|
|
|
|
# Polar surface area
|
|
'TPSA': Descriptors.TPSA(mol),
|
|
'LabuteASA': Descriptors.LabuteASA(mol),
|
|
|
|
# Hydrogen bonding
|
|
'HBD': Descriptors.NumHDonors(mol),
|
|
'HBA': Descriptors.NumHAcceptors(mol),
|
|
|
|
# Atom counts
|
|
'Heavy_Atoms': Descriptors.HeavyAtomCount(mol),
|
|
'Heteroatoms': Descriptors.NumHeteroatoms(mol),
|
|
'Valence_Electrons': Descriptors.NumValenceElectrons(mol),
|
|
|
|
# Ring information
|
|
'Rings': Descriptors.RingCount(mol),
|
|
'Aromatic_Rings': Descriptors.NumAromaticRings(mol),
|
|
'Saturated_Rings': Descriptors.NumSaturatedRings(mol),
|
|
'Aliphatic_Rings': Descriptors.NumAliphaticRings(mol),
|
|
'Aromatic_Heterocycles': Descriptors.NumAromaticHeterocycles(mol),
|
|
|
|
# Flexibility
|
|
'Rotatable_Bonds': Descriptors.NumRotatableBonds(mol),
|
|
'Fraction_Csp3': Descriptors.FractionCsp3(mol),
|
|
|
|
# Complexity
|
|
'BertzCT': Descriptors.BertzCT(mol),
|
|
|
|
# Drug-likeness
|
|
'QED': Descriptors.qed(mol),
|
|
}
|
|
|
|
# Lipinski's Rule of Five
|
|
properties['Lipinski_Pass'] = (
|
|
properties['MW'] <= 500 and
|
|
properties['LogP'] <= 5 and
|
|
properties['HBD'] <= 5 and
|
|
properties['HBA'] <= 10
|
|
)
|
|
|
|
# Lead-likeness
|
|
properties['Lead-like'] = (
|
|
250 <= properties['MW'] <= 350 and
|
|
properties['LogP'] <= 3.5 and
|
|
properties['Rotatable_Bonds'] <= 7
|
|
)
|
|
|
|
return properties
|
|
|
|
|
|
def process_single_molecule(smiles):
|
|
"""Process a single SMILES string."""
|
|
mol = Chem.MolFromSmiles(smiles)
|
|
if mol is None:
|
|
print(f"Error: Failed to parse SMILES: {smiles}")
|
|
return None
|
|
|
|
props = calculate_properties(mol)
|
|
return props
|
|
|
|
|
|
def process_file(input_file, output_file=None):
|
|
"""Process molecules from a file."""
|
|
input_path = Path(input_file)
|
|
|
|
if not input_path.exists():
|
|
print(f"Error: File not found: {input_file}")
|
|
return
|
|
|
|
# Determine file type
|
|
if input_path.suffix.lower() in ['.sdf', '.mol']:
|
|
suppl = Chem.SDMolSupplier(str(input_path))
|
|
elif input_path.suffix.lower() in ['.smi', '.smiles', '.txt']:
|
|
suppl = Chem.SmilesMolSupplier(str(input_path), titleLine=False)
|
|
else:
|
|
print(f"Error: Unsupported file format: {input_path.suffix}")
|
|
return
|
|
|
|
results = []
|
|
for idx, mol in enumerate(suppl):
|
|
if mol is None:
|
|
print(f"Warning: Failed to parse molecule {idx+1}")
|
|
continue
|
|
|
|
props = calculate_properties(mol)
|
|
if props:
|
|
props['Index'] = idx + 1
|
|
results.append(props)
|
|
|
|
# Output results
|
|
if output_file:
|
|
write_csv(results, output_file)
|
|
print(f"Results written to: {output_file}")
|
|
else:
|
|
# Print to console
|
|
for props in results:
|
|
print("\n" + "="*60)
|
|
for key, value in props.items():
|
|
print(f"{key:25s}: {value}")
|
|
|
|
return results
|
|
|
|
|
|
def write_csv(results, output_file):
|
|
"""Write results to CSV file."""
|
|
import csv
|
|
|
|
if not results:
|
|
print("No results to write")
|
|
return
|
|
|
|
with open(output_file, 'w', newline='') as f:
|
|
fieldnames = results[0].keys()
|
|
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
writer.writerows(results)
|
|
|
|
|
|
def print_properties(props):
|
|
"""Print properties in formatted output."""
|
|
print("\nMolecular Properties:")
|
|
print("="*60)
|
|
|
|
# Group related properties
|
|
print("\n[Basic Information]")
|
|
print(f" SMILES: {props['SMILES']}")
|
|
print(f" Formula: {props['Molecular_Formula']}")
|
|
|
|
print("\n[Size & Weight]")
|
|
print(f" Molecular Weight: {props['MW']:.2f}")
|
|
print(f" Exact MW: {props['ExactMW']:.4f}")
|
|
print(f" Heavy Atoms: {props['Heavy_Atoms']}")
|
|
print(f" Heteroatoms: {props['Heteroatoms']}")
|
|
|
|
print("\n[Lipophilicity]")
|
|
print(f" LogP: {props['LogP']:.2f}")
|
|
print(f" Molar Refractivity: {props['MR']:.2f}")
|
|
|
|
print("\n[Polarity]")
|
|
print(f" TPSA: {props['TPSA']:.2f}")
|
|
print(f" Labute ASA: {props['LabuteASA']:.2f}")
|
|
print(f" H-bond Donors: {props['HBD']}")
|
|
print(f" H-bond Acceptors: {props['HBA']}")
|
|
|
|
print("\n[Ring Systems]")
|
|
print(f" Total Rings: {props['Rings']}")
|
|
print(f" Aromatic Rings: {props['Aromatic_Rings']}")
|
|
print(f" Saturated Rings: {props['Saturated_Rings']}")
|
|
print(f" Aliphatic Rings: {props['Aliphatic_Rings']}")
|
|
print(f" Aromatic Heterocycles: {props['Aromatic_Heterocycles']}")
|
|
|
|
print("\n[Flexibility & Complexity]")
|
|
print(f" Rotatable Bonds: {props['Rotatable_Bonds']}")
|
|
print(f" Fraction Csp3: {props['Fraction_Csp3']:.3f}")
|
|
print(f" Bertz Complexity: {props['BertzCT']:.1f}")
|
|
|
|
print("\n[Drug-likeness]")
|
|
print(f" QED Score: {props['QED']:.3f}")
|
|
print(f" Lipinski Pass: {'Yes' if props['Lipinski_Pass'] else 'No'}")
|
|
print(f" Lead-like: {'Yes' if props['Lead-like'] else 'No'}")
|
|
print("="*60)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description='Calculate molecular properties for molecules',
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog="""
|
|
Examples:
|
|
# Single molecule
|
|
python molecular_properties.py "CCO"
|
|
|
|
# From file
|
|
python molecular_properties.py --file molecules.smi
|
|
|
|
# Save to CSV
|
|
python molecular_properties.py --file molecules.sdf --output properties.csv
|
|
"""
|
|
)
|
|
|
|
parser.add_argument('smiles', nargs='?', help='SMILES string to analyze')
|
|
parser.add_argument('--file', '-f', help='Input file (SDF or SMILES)')
|
|
parser.add_argument('--output', '-o', help='Output CSV file')
|
|
|
|
args = parser.parse_args()
|
|
|
|
if not args.smiles and not args.file:
|
|
parser.print_help()
|
|
sys.exit(1)
|
|
|
|
if args.smiles:
|
|
# Process single molecule
|
|
props = process_single_molecule(args.smiles)
|
|
if props:
|
|
print_properties(props)
|
|
elif args.file:
|
|
# Process file
|
|
process_file(args.file, args.output)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|