Initial commit
This commit is contained in:
547
skills/exploratory-data-analysis/scripts/eda_analyzer.py
Executable file
547
skills/exploratory-data-analysis/scripts/eda_analyzer.py
Executable file
@@ -0,0 +1,547 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Exploratory Data Analysis Analyzer
|
||||
Analyzes scientific data files and generates comprehensive markdown reports
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
import json
|
||||
|
||||
|
||||
def detect_file_type(filepath):
|
||||
"""
|
||||
Detect the file type based on extension and content.
|
||||
|
||||
Returns:
|
||||
tuple: (extension, file_category, reference_file)
|
||||
"""
|
||||
file_path = Path(filepath)
|
||||
extension = file_path.suffix.lower()
|
||||
name = file_path.name.lower()
|
||||
|
||||
# Map extensions to categories and reference files
|
||||
extension_map = {
|
||||
# Chemistry/Molecular
|
||||
'pdb': ('chemistry_molecular', 'Protein Data Bank'),
|
||||
'cif': ('chemistry_molecular', 'Crystallographic Information File'),
|
||||
'mol': ('chemistry_molecular', 'MDL Molfile'),
|
||||
'mol2': ('chemistry_molecular', 'Tripos Mol2'),
|
||||
'sdf': ('chemistry_molecular', 'Structure Data File'),
|
||||
'xyz': ('chemistry_molecular', 'XYZ Coordinates'),
|
||||
'smi': ('chemistry_molecular', 'SMILES String'),
|
||||
'smiles': ('chemistry_molecular', 'SMILES String'),
|
||||
'pdbqt': ('chemistry_molecular', 'AutoDock PDBQT'),
|
||||
'mae': ('chemistry_molecular', 'Maestro Format'),
|
||||
'gro': ('chemistry_molecular', 'GROMACS Coordinate File'),
|
||||
'log': ('chemistry_molecular', 'Gaussian Log File'),
|
||||
'out': ('chemistry_molecular', 'Quantum Chemistry Output'),
|
||||
'wfn': ('chemistry_molecular', 'Wavefunction Files'),
|
||||
'wfx': ('chemistry_molecular', 'Wavefunction Files'),
|
||||
'fchk': ('chemistry_molecular', 'Gaussian Formatted Checkpoint'),
|
||||
'cube': ('chemistry_molecular', 'Gaussian Cube File'),
|
||||
'dcd': ('chemistry_molecular', 'Binary Trajectory'),
|
||||
'xtc': ('chemistry_molecular', 'Compressed Trajectory'),
|
||||
'trr': ('chemistry_molecular', 'GROMACS Trajectory'),
|
||||
'nc': ('chemistry_molecular', 'Amber NetCDF Trajectory'),
|
||||
'netcdf': ('chemistry_molecular', 'Amber NetCDF Trajectory'),
|
||||
|
||||
# Bioinformatics/Genomics
|
||||
'fasta': ('bioinformatics_genomics', 'FASTA Format'),
|
||||
'fa': ('bioinformatics_genomics', 'FASTA Format'),
|
||||
'fna': ('bioinformatics_genomics', 'FASTA Format'),
|
||||
'fastq': ('bioinformatics_genomics', 'FASTQ Format'),
|
||||
'fq': ('bioinformatics_genomics', 'FASTQ Format'),
|
||||
'sam': ('bioinformatics_genomics', 'Sequence Alignment/Map'),
|
||||
'bam': ('bioinformatics_genomics', 'Binary Alignment/Map'),
|
||||
'cram': ('bioinformatics_genomics', 'CRAM Format'),
|
||||
'bed': ('bioinformatics_genomics', 'Browser Extensible Data'),
|
||||
'bedgraph': ('bioinformatics_genomics', 'BED with Graph Data'),
|
||||
'bigwig': ('bioinformatics_genomics', 'Binary BigWig'),
|
||||
'bw': ('bioinformatics_genomics', 'Binary BigWig'),
|
||||
'bigbed': ('bioinformatics_genomics', 'Binary BigBed'),
|
||||
'bb': ('bioinformatics_genomics', 'Binary BigBed'),
|
||||
'gff': ('bioinformatics_genomics', 'General Feature Format'),
|
||||
'gff3': ('bioinformatics_genomics', 'General Feature Format'),
|
||||
'gtf': ('bioinformatics_genomics', 'Gene Transfer Format'),
|
||||
'vcf': ('bioinformatics_genomics', 'Variant Call Format'),
|
||||
'bcf': ('bioinformatics_genomics', 'Binary VCF'),
|
||||
'gvcf': ('bioinformatics_genomics', 'Genomic VCF'),
|
||||
|
||||
# Microscopy/Imaging
|
||||
'tif': ('microscopy_imaging', 'Tagged Image File Format'),
|
||||
'tiff': ('microscopy_imaging', 'Tagged Image File Format'),
|
||||
'nd2': ('microscopy_imaging', 'Nikon NIS-Elements'),
|
||||
'lif': ('microscopy_imaging', 'Leica Image Format'),
|
||||
'czi': ('microscopy_imaging', 'Carl Zeiss Image'),
|
||||
'oib': ('microscopy_imaging', 'Olympus Image Format'),
|
||||
'oif': ('microscopy_imaging', 'Olympus Image Format'),
|
||||
'vsi': ('microscopy_imaging', 'Olympus VSI'),
|
||||
'ims': ('microscopy_imaging', 'Imaris Format'),
|
||||
'lsm': ('microscopy_imaging', 'Zeiss LSM'),
|
||||
'stk': ('microscopy_imaging', 'MetaMorph Stack'),
|
||||
'dv': ('microscopy_imaging', 'DeltaVision'),
|
||||
'mrc': ('microscopy_imaging', 'Medical Research Council'),
|
||||
'dm3': ('microscopy_imaging', 'Gatan Digital Micrograph'),
|
||||
'dm4': ('microscopy_imaging', 'Gatan Digital Micrograph'),
|
||||
'dcm': ('microscopy_imaging', 'DICOM'),
|
||||
'nii': ('microscopy_imaging', 'NIfTI'),
|
||||
'nrrd': ('microscopy_imaging', 'Nearly Raw Raster Data'),
|
||||
|
||||
# Spectroscopy/Analytical
|
||||
'fid': ('spectroscopy_analytical', 'NMR Free Induction Decay'),
|
||||
'mzml': ('spectroscopy_analytical', 'Mass Spectrometry Markup Language'),
|
||||
'mzxml': ('spectroscopy_analytical', 'Mass Spectrometry XML'),
|
||||
'raw': ('spectroscopy_analytical', 'Vendor Raw Files'),
|
||||
'd': ('spectroscopy_analytical', 'Agilent Data Directory'),
|
||||
'mgf': ('spectroscopy_analytical', 'Mascot Generic Format'),
|
||||
'spc': ('spectroscopy_analytical', 'Galactic SPC'),
|
||||
'jdx': ('spectroscopy_analytical', 'JCAMP-DX'),
|
||||
'jcamp': ('spectroscopy_analytical', 'JCAMP-DX'),
|
||||
|
||||
# Proteomics/Metabolomics
|
||||
'pepxml': ('proteomics_metabolomics', 'Trans-Proteomic Pipeline Peptide XML'),
|
||||
'protxml': ('proteomics_metabolomics', 'Protein Inference Results'),
|
||||
'mzid': ('proteomics_metabolomics', 'Peptide Identification Format'),
|
||||
'mztab': ('proteomics_metabolomics', 'Proteomics/Metabolomics Tabular Format'),
|
||||
|
||||
# General Scientific
|
||||
'npy': ('general_scientific', 'NumPy Array'),
|
||||
'npz': ('general_scientific', 'Compressed NumPy Archive'),
|
||||
'csv': ('general_scientific', 'Comma-Separated Values'),
|
||||
'tsv': ('general_scientific', 'Tab-Separated Values'),
|
||||
'xlsx': ('general_scientific', 'Excel Spreadsheets'),
|
||||
'xls': ('general_scientific', 'Excel Spreadsheets'),
|
||||
'json': ('general_scientific', 'JavaScript Object Notation'),
|
||||
'xml': ('general_scientific', 'Extensible Markup Language'),
|
||||
'hdf5': ('general_scientific', 'Hierarchical Data Format 5'),
|
||||
'h5': ('general_scientific', 'Hierarchical Data Format 5'),
|
||||
'h5ad': ('bioinformatics_genomics', 'Anndata Format'),
|
||||
'zarr': ('general_scientific', 'Chunked Array Storage'),
|
||||
'parquet': ('general_scientific', 'Apache Parquet'),
|
||||
'mat': ('general_scientific', 'MATLAB Data'),
|
||||
'fits': ('general_scientific', 'Flexible Image Transport System'),
|
||||
}
|
||||
|
||||
ext_clean = extension.lstrip('.')
|
||||
if ext_clean in extension_map:
|
||||
category, description = extension_map[ext_clean]
|
||||
return ext_clean, category, description
|
||||
|
||||
return ext_clean, 'unknown', 'Unknown Format'
|
||||
|
||||
|
||||
def get_file_basic_info(filepath):
|
||||
"""Get basic file information."""
|
||||
file_path = Path(filepath)
|
||||
stat = file_path.stat()
|
||||
|
||||
return {
|
||||
'filename': file_path.name,
|
||||
'path': str(file_path.absolute()),
|
||||
'size_bytes': stat.st_size,
|
||||
'size_human': format_bytes(stat.st_size),
|
||||
'modified': datetime.fromtimestamp(stat.st_mtime).isoformat(),
|
||||
'extension': file_path.suffix.lower(),
|
||||
}
|
||||
|
||||
|
||||
def format_bytes(size):
|
||||
"""Convert bytes to human-readable format."""
|
||||
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
|
||||
if size < 1024.0:
|
||||
return f"{size:.2f} {unit}"
|
||||
size /= 1024.0
|
||||
return f"{size:.2f} PB"
|
||||
|
||||
|
||||
def load_reference_info(category, extension):
|
||||
"""
|
||||
Load reference information for the file type.
|
||||
|
||||
Args:
|
||||
category: File category (e.g., 'chemistry_molecular')
|
||||
extension: File extension
|
||||
|
||||
Returns:
|
||||
dict: Reference information
|
||||
"""
|
||||
# Map categories to reference files
|
||||
category_files = {
|
||||
'chemistry_molecular': 'chemistry_molecular_formats.md',
|
||||
'bioinformatics_genomics': 'bioinformatics_genomics_formats.md',
|
||||
'microscopy_imaging': 'microscopy_imaging_formats.md',
|
||||
'spectroscopy_analytical': 'spectroscopy_analytical_formats.md',
|
||||
'proteomics_metabolomics': 'proteomics_metabolomics_formats.md',
|
||||
'general_scientific': 'general_scientific_formats.md',
|
||||
}
|
||||
|
||||
if category not in category_files:
|
||||
return None
|
||||
|
||||
# Get the reference file path
|
||||
script_dir = Path(__file__).parent
|
||||
ref_file = script_dir.parent / 'references' / category_files[category]
|
||||
|
||||
if not ref_file.exists():
|
||||
return None
|
||||
|
||||
# Parse the reference file for the specific extension
|
||||
# This is a simplified parser - could be more sophisticated
|
||||
try:
|
||||
with open(ref_file, 'r') as f:
|
||||
content = f.read()
|
||||
|
||||
# Extract section for this file type
|
||||
# Look for the extension heading
|
||||
import re
|
||||
pattern = rf'### \.{extension}[^#]*?(?=###|\Z)'
|
||||
match = re.search(pattern, content, re.IGNORECASE | re.DOTALL)
|
||||
|
||||
if match:
|
||||
section = match.group(0)
|
||||
return {
|
||||
'raw_section': section,
|
||||
'reference_file': category_files[category]
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"Error loading reference: {e}", file=sys.stderr)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def analyze_file(filepath):
|
||||
"""
|
||||
Main analysis function that routes to specific analyzers.
|
||||
|
||||
Returns:
|
||||
dict: Analysis results
|
||||
"""
|
||||
basic_info = get_file_basic_info(filepath)
|
||||
extension, category, description = detect_file_type(filepath)
|
||||
|
||||
analysis = {
|
||||
'basic_info': basic_info,
|
||||
'file_type': {
|
||||
'extension': extension,
|
||||
'category': category,
|
||||
'description': description
|
||||
},
|
||||
'reference_info': load_reference_info(category, extension),
|
||||
'data_analysis': {}
|
||||
}
|
||||
|
||||
# Try to perform data-specific analysis based on file type
|
||||
try:
|
||||
if category == 'general_scientific':
|
||||
analysis['data_analysis'] = analyze_general_scientific(filepath, extension)
|
||||
elif category == 'bioinformatics_genomics':
|
||||
analysis['data_analysis'] = analyze_bioinformatics(filepath, extension)
|
||||
elif category == 'microscopy_imaging':
|
||||
analysis['data_analysis'] = analyze_imaging(filepath, extension)
|
||||
# Add more specific analyzers as needed
|
||||
except Exception as e:
|
||||
analysis['data_analysis']['error'] = str(e)
|
||||
|
||||
return analysis
|
||||
|
||||
|
||||
def analyze_general_scientific(filepath, extension):
|
||||
"""Analyze general scientific data formats."""
|
||||
results = {}
|
||||
|
||||
try:
|
||||
if extension in ['npy']:
|
||||
import numpy as np
|
||||
data = np.load(filepath)
|
||||
results = {
|
||||
'shape': data.shape,
|
||||
'dtype': str(data.dtype),
|
||||
'size': data.size,
|
||||
'ndim': data.ndim,
|
||||
'statistics': {
|
||||
'min': float(np.min(data)) if np.issubdtype(data.dtype, np.number) else None,
|
||||
'max': float(np.max(data)) if np.issubdtype(data.dtype, np.number) else None,
|
||||
'mean': float(np.mean(data)) if np.issubdtype(data.dtype, np.number) else None,
|
||||
'std': float(np.std(data)) if np.issubdtype(data.dtype, np.number) else None,
|
||||
}
|
||||
}
|
||||
|
||||
elif extension in ['npz']:
|
||||
import numpy as np
|
||||
data = np.load(filepath)
|
||||
results = {
|
||||
'arrays': list(data.files),
|
||||
'array_count': len(data.files),
|
||||
'array_shapes': {name: data[name].shape for name in data.files}
|
||||
}
|
||||
|
||||
elif extension in ['csv', 'tsv']:
|
||||
import pandas as pd
|
||||
sep = '\t' if extension == 'tsv' else ','
|
||||
df = pd.read_csv(filepath, sep=sep, nrows=10000) # Sample first 10k rows
|
||||
|
||||
results = {
|
||||
'shape': df.shape,
|
||||
'columns': list(df.columns),
|
||||
'dtypes': {col: str(dtype) for col, dtype in df.dtypes.items()},
|
||||
'missing_values': df.isnull().sum().to_dict(),
|
||||
'summary_statistics': df.describe().to_dict() if len(df.select_dtypes(include='number').columns) > 0 else {}
|
||||
}
|
||||
|
||||
elif extension in ['json']:
|
||||
with open(filepath, 'r') as f:
|
||||
data = json.load(f)
|
||||
|
||||
results = {
|
||||
'type': type(data).__name__,
|
||||
'keys': list(data.keys()) if isinstance(data, dict) else None,
|
||||
'length': len(data) if isinstance(data, (list, dict)) else None
|
||||
}
|
||||
|
||||
elif extension in ['h5', 'hdf5']:
|
||||
import h5py
|
||||
with h5py.File(filepath, 'r') as f:
|
||||
def get_structure(group, prefix=''):
|
||||
items = {}
|
||||
for key in group.keys():
|
||||
path = f"{prefix}/{key}"
|
||||
if isinstance(group[key], h5py.Dataset):
|
||||
items[path] = {
|
||||
'type': 'dataset',
|
||||
'shape': group[key].shape,
|
||||
'dtype': str(group[key].dtype)
|
||||
}
|
||||
elif isinstance(group[key], h5py.Group):
|
||||
items[path] = {'type': 'group'}
|
||||
items.update(get_structure(group[key], path))
|
||||
return items
|
||||
|
||||
results = {
|
||||
'structure': get_structure(f),
|
||||
'attributes': dict(f.attrs)
|
||||
}
|
||||
|
||||
except ImportError as e:
|
||||
results['error'] = f"Required library not installed: {e}"
|
||||
except Exception as e:
|
||||
results['error'] = f"Analysis error: {e}"
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def analyze_bioinformatics(filepath, extension):
|
||||
"""Analyze bioinformatics/genomics formats."""
|
||||
results = {}
|
||||
|
||||
try:
|
||||
if extension in ['fasta', 'fa', 'fna']:
|
||||
from Bio import SeqIO
|
||||
sequences = list(SeqIO.parse(filepath, 'fasta'))
|
||||
lengths = [len(seq) for seq in sequences]
|
||||
|
||||
results = {
|
||||
'sequence_count': len(sequences),
|
||||
'total_length': sum(lengths),
|
||||
'mean_length': sum(lengths) / len(lengths) if lengths else 0,
|
||||
'min_length': min(lengths) if lengths else 0,
|
||||
'max_length': max(lengths) if lengths else 0,
|
||||
'sequence_ids': [seq.id for seq in sequences[:10]] # First 10
|
||||
}
|
||||
|
||||
elif extension in ['fastq', 'fq']:
|
||||
from Bio import SeqIO
|
||||
sequences = []
|
||||
for i, seq in enumerate(SeqIO.parse(filepath, 'fastq')):
|
||||
sequences.append(seq)
|
||||
if i >= 9999: # Sample first 10k
|
||||
break
|
||||
|
||||
lengths = [len(seq) for seq in sequences]
|
||||
qualities = [sum(seq.letter_annotations['phred_quality']) / len(seq) for seq in sequences]
|
||||
|
||||
results = {
|
||||
'read_count_sampled': len(sequences),
|
||||
'mean_length': sum(lengths) / len(lengths) if lengths else 0,
|
||||
'mean_quality': sum(qualities) / len(qualities) if qualities else 0,
|
||||
'min_length': min(lengths) if lengths else 0,
|
||||
'max_length': max(lengths) if lengths else 0,
|
||||
}
|
||||
|
||||
except ImportError as e:
|
||||
results['error'] = f"Required library not installed (try: pip install biopython): {e}"
|
||||
except Exception as e:
|
||||
results['error'] = f"Analysis error: {e}"
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def analyze_imaging(filepath, extension):
|
||||
"""Analyze microscopy/imaging formats."""
|
||||
results = {}
|
||||
|
||||
try:
|
||||
if extension in ['tif', 'tiff', 'png', 'jpg', 'jpeg']:
|
||||
from PIL import Image
|
||||
import numpy as np
|
||||
|
||||
img = Image.open(filepath)
|
||||
img_array = np.array(img)
|
||||
|
||||
results = {
|
||||
'size': img.size,
|
||||
'mode': img.mode,
|
||||
'format': img.format,
|
||||
'shape': img_array.shape,
|
||||
'dtype': str(img_array.dtype),
|
||||
'value_range': [int(img_array.min()), int(img_array.max())],
|
||||
'mean_intensity': float(img_array.mean()),
|
||||
}
|
||||
|
||||
# Check for multi-page TIFF
|
||||
if extension in ['tif', 'tiff']:
|
||||
try:
|
||||
frame_count = 0
|
||||
while True:
|
||||
img.seek(frame_count)
|
||||
frame_count += 1
|
||||
except EOFError:
|
||||
results['page_count'] = frame_count
|
||||
|
||||
except ImportError as e:
|
||||
results['error'] = f"Required library not installed (try: pip install pillow): {e}"
|
||||
except Exception as e:
|
||||
results['error'] = f"Analysis error: {e}"
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def generate_markdown_report(analysis, output_path=None):
|
||||
"""
|
||||
Generate a comprehensive markdown report from analysis results.
|
||||
|
||||
Args:
|
||||
analysis: Analysis results dictionary
|
||||
output_path: Path to save the report (if None, prints to stdout)
|
||||
"""
|
||||
lines = []
|
||||
|
||||
# Title
|
||||
filename = analysis['basic_info']['filename']
|
||||
lines.append(f"# Exploratory Data Analysis Report: {filename}\n")
|
||||
lines.append(f"**Generated:** {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
|
||||
lines.append("---\n")
|
||||
|
||||
# Basic Information
|
||||
lines.append("## Basic Information\n")
|
||||
basic = analysis['basic_info']
|
||||
lines.append(f"- **Filename:** `{basic['filename']}`")
|
||||
lines.append(f"- **Full Path:** `{basic['path']}`")
|
||||
lines.append(f"- **File Size:** {basic['size_human']} ({basic['size_bytes']:,} bytes)")
|
||||
lines.append(f"- **Last Modified:** {basic['modified']}")
|
||||
lines.append(f"- **Extension:** `.{analysis['file_type']['extension']}`\n")
|
||||
|
||||
# File Type Information
|
||||
lines.append("## File Type\n")
|
||||
ft = analysis['file_type']
|
||||
lines.append(f"- **Category:** {ft['category'].replace('_', ' ').title()}")
|
||||
lines.append(f"- **Description:** {ft['description']}\n")
|
||||
|
||||
# Reference Information
|
||||
if analysis.get('reference_info'):
|
||||
lines.append("## Format Reference\n")
|
||||
ref = analysis['reference_info']
|
||||
if 'raw_section' in ref:
|
||||
lines.append(ref['raw_section'])
|
||||
lines.append(f"\n*Reference: {ref['reference_file']}*\n")
|
||||
|
||||
# Data Analysis
|
||||
if analysis.get('data_analysis'):
|
||||
lines.append("## Data Analysis\n")
|
||||
data = analysis['data_analysis']
|
||||
|
||||
if 'error' in data:
|
||||
lines.append(f"⚠️ **Analysis Error:** {data['error']}\n")
|
||||
else:
|
||||
# Format the data analysis based on what's present
|
||||
lines.append("### Summary Statistics\n")
|
||||
lines.append("```json")
|
||||
lines.append(json.dumps(data, indent=2, default=str))
|
||||
lines.append("```\n")
|
||||
|
||||
# Recommendations
|
||||
lines.append("## Recommendations for Further Analysis\n")
|
||||
lines.append(f"Based on the file type (`.{analysis['file_type']['extension']}`), consider the following analyses:\n")
|
||||
|
||||
# Add specific recommendations based on category
|
||||
category = analysis['file_type']['category']
|
||||
if category == 'general_scientific':
|
||||
lines.append("- Statistical distribution analysis")
|
||||
lines.append("- Missing value imputation strategies")
|
||||
lines.append("- Correlation analysis between variables")
|
||||
lines.append("- Outlier detection and handling")
|
||||
lines.append("- Dimensionality reduction (PCA, t-SNE)")
|
||||
elif category == 'bioinformatics_genomics':
|
||||
lines.append("- Sequence quality control and filtering")
|
||||
lines.append("- GC content analysis")
|
||||
lines.append("- Read alignment and mapping statistics")
|
||||
lines.append("- Variant calling and annotation")
|
||||
lines.append("- Differential expression analysis")
|
||||
elif category == 'microscopy_imaging':
|
||||
lines.append("- Image quality assessment")
|
||||
lines.append("- Background correction and normalization")
|
||||
lines.append("- Segmentation and object detection")
|
||||
lines.append("- Colocalization analysis")
|
||||
lines.append("- Intensity measurements and quantification")
|
||||
|
||||
lines.append("")
|
||||
|
||||
# Footer
|
||||
lines.append("---")
|
||||
lines.append("*This report was generated by the exploratory-data-analysis skill.*")
|
||||
|
||||
report = '\n'.join(lines)
|
||||
|
||||
if output_path:
|
||||
with open(output_path, 'w') as f:
|
||||
f.write(report)
|
||||
print(f"Report saved to: {output_path}")
|
||||
else:
|
||||
print(report)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
def main():
|
||||
"""Main CLI interface."""
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python eda_analyzer.py <filepath> [output.md]")
|
||||
print(" filepath: Path to the data file to analyze")
|
||||
print(" output.md: Optional output path for markdown report")
|
||||
sys.exit(1)
|
||||
|
||||
filepath = sys.argv[1]
|
||||
output_path = sys.argv[2] if len(sys.argv) > 2 else None
|
||||
|
||||
if not os.path.exists(filepath):
|
||||
print(f"Error: File not found: {filepath}")
|
||||
sys.exit(1)
|
||||
|
||||
# If no output path specified, use the input filename
|
||||
if output_path is None:
|
||||
input_path = Path(filepath)
|
||||
output_path = input_path.parent / f"{input_path.stem}_eda_report.md"
|
||||
|
||||
print(f"Analyzing: {filepath}")
|
||||
analysis = analyze_file(filepath)
|
||||
|
||||
print(f"\nGenerating report...")
|
||||
generate_markdown_report(analysis, output_path)
|
||||
|
||||
print(f"\n✓ Analysis complete!")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user