Initial commit

2025-11-30 08:30:10 +08:00
commit f0bd18fb4e
824 changed files with 331919 additions and 0 deletions
--- a/skills/rdkit/scripts/molecular_properties.py
+++ b/skills/rdkit/scripts/molecular_properties.py
@@ -0,0 +1,243 @@
+#!/usr/bin/env python3
+"""
+Molecular Properties Calculator
+
+Calculate comprehensive molecular properties and descriptors for molecules.
+Supports single molecules or batch processing from files.
+
+Usage:
+    python molecular_properties.py "CCO"
+    python molecular_properties.py --file molecules.smi --output properties.csv
+"""
+
+import argparse
+import sys
+from pathlib import Path
+
+try:
+    from rdkit import Chem
+    from rdkit.Chem import Descriptors, Lipinski
+except ImportError:
+    print("Error: RDKit not installed. Install with: conda install -c conda-forge rdkit")
+    sys.exit(1)
+
+
+def calculate_properties(mol):
+    """Calculate comprehensive molecular properties."""
+    if mol is None:
+        return None
+
+    properties = {
+        # Basic properties
+        'SMILES': Chem.MolToSmiles(mol),
+        'Molecular_Formula': Chem.rdMolDescriptors.CalcMolFormula(mol),
+
+        # Molecular weight
+        'MW': Descriptors.MolWt(mol),
+        'ExactMW': Descriptors.ExactMolWt(mol),
+
+        # Lipophilicity
+        'LogP': Descriptors.MolLogP(mol),
+        'MR': Descriptors.MolMR(mol),
+
+        # Polar surface area
+        'TPSA': Descriptors.TPSA(mol),
+        'LabuteASA': Descriptors.LabuteASA(mol),
+
+        # Hydrogen bonding
+        'HBD': Descriptors.NumHDonors(mol),
+        'HBA': Descriptors.NumHAcceptors(mol),
+
+        # Atom counts
+        'Heavy_Atoms': Descriptors.HeavyAtomCount(mol),
+        'Heteroatoms': Descriptors.NumHeteroatoms(mol),
+        'Valence_Electrons': Descriptors.NumValenceElectrons(mol),
+
+        # Ring information
+        'Rings': Descriptors.RingCount(mol),
+        'Aromatic_Rings': Descriptors.NumAromaticRings(mol),
+        'Saturated_Rings': Descriptors.NumSaturatedRings(mol),
+        'Aliphatic_Rings': Descriptors.NumAliphaticRings(mol),
+        'Aromatic_Heterocycles': Descriptors.NumAromaticHeterocycles(mol),
+
+        # Flexibility
+        'Rotatable_Bonds': Descriptors.NumRotatableBonds(mol),
+        'Fraction_Csp3': Descriptors.FractionCsp3(mol),
+
+        # Complexity
+        'BertzCT': Descriptors.BertzCT(mol),
+
+        # Drug-likeness
+        'QED': Descriptors.qed(mol),
+    }
+
+    # Lipinski's Rule of Five
+    properties['Lipinski_Pass'] = (
+        properties['MW'] <= 500 and
+        properties['LogP'] <= 5 and
+        properties['HBD'] <= 5 and
+        properties['HBA'] <= 10
+    )
+
+    # Lead-likeness
+    properties['Lead-like'] = (
+        250 <= properties['MW'] <= 350 and
+        properties['LogP'] <= 3.5 and
+        properties['Rotatable_Bonds'] <= 7
+    )
+
+    return properties
+
+
+def process_single_molecule(smiles):
+    """Process a single SMILES string."""
+    mol = Chem.MolFromSmiles(smiles)
+    if mol is None:
+        print(f"Error: Failed to parse SMILES: {smiles}")
+        return None
+
+    props = calculate_properties(mol)
+    return props
+
+
+def process_file(input_file, output_file=None):
+    """Process molecules from a file."""
+    input_path = Path(input_file)
+
+    if not input_path.exists():
+        print(f"Error: File not found: {input_file}")
+        return
+
+    # Determine file type
+    if input_path.suffix.lower() in ['.sdf', '.mol']:
+        suppl = Chem.SDMolSupplier(str(input_path))
+    elif input_path.suffix.lower() in ['.smi', '.smiles', '.txt']:
+        suppl = Chem.SmilesMolSupplier(str(input_path), titleLine=False)
+    else:
+        print(f"Error: Unsupported file format: {input_path.suffix}")
+        return
+
+    results = []
+    for idx, mol in enumerate(suppl):
+        if mol is None:
+            print(f"Warning: Failed to parse molecule {idx+1}")
+            continue
+
+        props = calculate_properties(mol)
+        if props:
+            props['Index'] = idx + 1
+            results.append(props)
+
+    # Output results
+    if output_file:
+        write_csv(results, output_file)
+        print(f"Results written to: {output_file}")
+    else:
+        # Print to console
+        for props in results:
+            print("\n" + "="*60)
+            for key, value in props.items():
+                print(f"{key:25s}: {value}")
+
+    return results
+
+
+def write_csv(results, output_file):
+    """Write results to CSV file."""
+    import csv
+
+    if not results:
+        print("No results to write")
+        return
+
+    with open(output_file, 'w', newline='') as f:
+        fieldnames = results[0].keys()
+        writer = csv.DictWriter(f, fieldnames=fieldnames)
+        writer.writeheader()
+        writer.writerows(results)
+
+
+def print_properties(props):
+    """Print properties in formatted output."""
+    print("\nMolecular Properties:")
+    print("="*60)
+
+    # Group related properties
+    print("\n[Basic Information]")
+    print(f"  SMILES:              {props['SMILES']}")
+    print(f"  Formula:             {props['Molecular_Formula']}")
+
+    print("\n[Size & Weight]")
+    print(f"  Molecular Weight:    {props['MW']:.2f}")
+    print(f"  Exact MW:            {props['ExactMW']:.4f}")
+    print(f"  Heavy Atoms:         {props['Heavy_Atoms']}")
+    print(f"  Heteroatoms:         {props['Heteroatoms']}")
+
+    print("\n[Lipophilicity]")
+    print(f"  LogP:                {props['LogP']:.2f}")
+    print(f"  Molar Refractivity:  {props['MR']:.2f}")
+
+    print("\n[Polarity]")
+    print(f"  TPSA:                {props['TPSA']:.2f}")
+    print(f"  Labute ASA:          {props['LabuteASA']:.2f}")
+    print(f"  H-bond Donors:       {props['HBD']}")
+    print(f"  H-bond Acceptors:    {props['HBA']}")
+
+    print("\n[Ring Systems]")
+    print(f"  Total Rings:         {props['Rings']}")
+    print(f"  Aromatic Rings:      {props['Aromatic_Rings']}")
+    print(f"  Saturated Rings:     {props['Saturated_Rings']}")
+    print(f"  Aliphatic Rings:     {props['Aliphatic_Rings']}")
+    print(f"  Aromatic Heterocycles: {props['Aromatic_Heterocycles']}")
+
+    print("\n[Flexibility & Complexity]")
+    print(f"  Rotatable Bonds:     {props['Rotatable_Bonds']}")
+    print(f"  Fraction Csp3:       {props['Fraction_Csp3']:.3f}")
+    print(f"  Bertz Complexity:    {props['BertzCT']:.1f}")
+
+    print("\n[Drug-likeness]")
+    print(f"  QED Score:           {props['QED']:.3f}")
+    print(f"  Lipinski Pass:       {'Yes' if props['Lipinski_Pass'] else 'No'}")
+    print(f"  Lead-like:           {'Yes' if props['Lead-like'] else 'No'}")
+    print("="*60)
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Calculate molecular properties for molecules',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Single molecule
+  python molecular_properties.py "CCO"
+
+  # From file
+  python molecular_properties.py --file molecules.smi
+
+  # Save to CSV
+  python molecular_properties.py --file molecules.sdf --output properties.csv
+        """
+    )
+
+    parser.add_argument('smiles', nargs='?', help='SMILES string to analyze')
+    parser.add_argument('--file', '-f', help='Input file (SDF or SMILES)')
+    parser.add_argument('--output', '-o', help='Output CSV file')
+
+    args = parser.parse_args()
+
+    if not args.smiles and not args.file:
+        parser.print_help()
+        sys.exit(1)
+
+    if args.smiles:
+        # Process single molecule
+        props = process_single_molecule(args.smiles)
+        if props:
+            print_properties(props)
+    elif args.file:
+        # Process file
+        process_file(args.file, args.output)
+
+
+if __name__ == '__main__':
+    main()