Initial commit
This commit is contained in:
195
skills/deeptools/scripts/validate_files.py
Normal file
195
skills/deeptools/scripts/validate_files.py
Normal file
@@ -0,0 +1,195 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
deepTools File Validation Script
|
||||
|
||||
Validates BAM, bigWig, and BED files for deepTools analysis.
|
||||
Checks for file existence, proper indexing, and basic format requirements.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def check_file_exists(filepath):
|
||||
"""Check if file exists and is readable."""
|
||||
if not os.path.exists(filepath):
|
||||
return False, f"File not found: {filepath}"
|
||||
if not os.access(filepath, os.R_OK):
|
||||
return False, f"File not readable: {filepath}"
|
||||
return True, f"✓ File exists: {filepath}"
|
||||
|
||||
|
||||
def check_bam_index(bam_file):
|
||||
"""Check if BAM file has an index (.bai or .bam.bai)."""
|
||||
bai_file1 = bam_file + ".bai"
|
||||
bai_file2 = bam_file.replace(".bam", ".bai")
|
||||
|
||||
if os.path.exists(bai_file1):
|
||||
return True, f"✓ BAM index found: {bai_file1}"
|
||||
elif os.path.exists(bai_file2):
|
||||
return True, f"✓ BAM index found: {bai_file2}"
|
||||
else:
|
||||
return False, f"✗ BAM index missing for: {bam_file}\n Run: samtools index {bam_file}"
|
||||
|
||||
|
||||
def check_bigwig_file(bw_file):
|
||||
"""Basic check for bigWig file."""
|
||||
# Check file size (bigWig files should have reasonable size)
|
||||
file_size = os.path.getsize(bw_file)
|
||||
if file_size < 100:
|
||||
return False, f"✗ bigWig file suspiciously small: {bw_file} ({file_size} bytes)"
|
||||
return True, f"✓ bigWig file appears valid: {bw_file} ({file_size} bytes)"
|
||||
|
||||
|
||||
def check_bed_file(bed_file):
|
||||
"""Basic validation of BED file format."""
|
||||
try:
|
||||
with open(bed_file, 'r') as f:
|
||||
lines = [line.strip() for line in f if line.strip() and not line.startswith('#')]
|
||||
|
||||
if len(lines) == 0:
|
||||
return False, f"✗ BED file is empty: {bed_file}"
|
||||
|
||||
# Check first few lines for basic format
|
||||
for i, line in enumerate(lines[:10], 1):
|
||||
fields = line.split('\t')
|
||||
if len(fields) < 3:
|
||||
return False, f"✗ BED file format error at line {i}: expected at least 3 columns\n Line: {line}"
|
||||
|
||||
# Check if start and end are integers
|
||||
try:
|
||||
start = int(fields[1])
|
||||
end = int(fields[2])
|
||||
if start >= end:
|
||||
return False, f"✗ BED file error at line {i}: start >= end ({start} >= {end})"
|
||||
except ValueError:
|
||||
return False, f"✗ BED file format error at line {i}: start and end must be integers\n Line: {line}"
|
||||
|
||||
return True, f"✓ BED file format appears valid: {bed_file} ({len(lines)} regions)"
|
||||
|
||||
except Exception as e:
|
||||
return False, f"✗ Error reading BED file: {bed_file}\n Error: {str(e)}"
|
||||
|
||||
|
||||
def validate_files(bam_files=None, bigwig_files=None, bed_files=None):
|
||||
"""
|
||||
Validate all provided files.
|
||||
|
||||
Args:
|
||||
bam_files: List of BAM file paths
|
||||
bigwig_files: List of bigWig file paths
|
||||
bed_files: List of BED file paths
|
||||
|
||||
Returns:
|
||||
Tuple of (success: bool, messages: list)
|
||||
"""
|
||||
all_success = True
|
||||
messages = []
|
||||
|
||||
# Validate BAM files
|
||||
if bam_files:
|
||||
messages.append("\n=== Validating BAM Files ===")
|
||||
for bam_file in bam_files:
|
||||
# Check existence
|
||||
success, msg = check_file_exists(bam_file)
|
||||
messages.append(msg)
|
||||
if not success:
|
||||
all_success = False
|
||||
continue
|
||||
|
||||
# Check index
|
||||
success, msg = check_bam_index(bam_file)
|
||||
messages.append(msg)
|
||||
if not success:
|
||||
all_success = False
|
||||
|
||||
# Validate bigWig files
|
||||
if bigwig_files:
|
||||
messages.append("\n=== Validating bigWig Files ===")
|
||||
for bw_file in bigwig_files:
|
||||
# Check existence
|
||||
success, msg = check_file_exists(bw_file)
|
||||
messages.append(msg)
|
||||
if not success:
|
||||
all_success = False
|
||||
continue
|
||||
|
||||
# Basic bigWig check
|
||||
success, msg = check_bigwig_file(bw_file)
|
||||
messages.append(msg)
|
||||
if not success:
|
||||
all_success = False
|
||||
|
||||
# Validate BED files
|
||||
if bed_files:
|
||||
messages.append("\n=== Validating BED Files ===")
|
||||
for bed_file in bed_files:
|
||||
# Check existence
|
||||
success, msg = check_file_exists(bed_file)
|
||||
messages.append(msg)
|
||||
if not success:
|
||||
all_success = False
|
||||
continue
|
||||
|
||||
# Check BED format
|
||||
success, msg = check_bed_file(bed_file)
|
||||
messages.append(msg)
|
||||
if not success:
|
||||
all_success = False
|
||||
|
||||
return all_success, messages
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Validate files for deepTools analysis",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Validate BAM files
|
||||
python validate_files.py --bam sample1.bam sample2.bam
|
||||
|
||||
# Validate all file types
|
||||
python validate_files.py --bam input.bam chip.bam --bed peaks.bed --bigwig signal.bw
|
||||
|
||||
# Validate from a directory
|
||||
python validate_files.py --bam *.bam --bed *.bed
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('--bam', nargs='+', help='BAM files to validate')
|
||||
parser.add_argument('--bigwig', '--bw', nargs='+', help='bigWig files to validate')
|
||||
parser.add_argument('--bed', nargs='+', help='BED files to validate')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check if any files were provided
|
||||
if not any([args.bam, args.bigwig, args.bed]):
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
# Run validation
|
||||
success, messages = validate_files(
|
||||
bam_files=args.bam,
|
||||
bigwig_files=args.bigwig,
|
||||
bed_files=args.bed
|
||||
)
|
||||
|
||||
# Print results
|
||||
for msg in messages:
|
||||
print(msg)
|
||||
|
||||
# Summary
|
||||
print("\n" + "="*50)
|
||||
if success:
|
||||
print("✓ All validations passed!")
|
||||
sys.exit(0)
|
||||
else:
|
||||
print("✗ Some validations failed. Please fix the issues above.")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
454
skills/deeptools/scripts/workflow_generator.py
Normal file
454
skills/deeptools/scripts/workflow_generator.py
Normal file
@@ -0,0 +1,454 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
deepTools Workflow Generator
|
||||
|
||||
Generates bash script templates for common deepTools workflows.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
|
||||
WORKFLOWS = {
|
||||
'chipseq_qc': {
|
||||
'name': 'ChIP-seq Quality Control',
|
||||
'description': 'Complete QC workflow for ChIP-seq experiments',
|
||||
},
|
||||
'chipseq_analysis': {
|
||||
'name': 'ChIP-seq Complete Analysis',
|
||||
'description': 'Full ChIP-seq analysis from BAM to heatmaps',
|
||||
},
|
||||
'rnaseq_coverage': {
|
||||
'name': 'RNA-seq Coverage Tracks',
|
||||
'description': 'Generate strand-specific RNA-seq coverage',
|
||||
},
|
||||
'atacseq': {
|
||||
'name': 'ATAC-seq Analysis',
|
||||
'description': 'ATAC-seq workflow with Tn5 correction',
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def generate_chipseq_qc_workflow(output_file, params):
|
||||
"""Generate ChIP-seq QC workflow script."""
|
||||
|
||||
script = f"""#!/bin/bash
|
||||
# deepTools ChIP-seq Quality Control Workflow
|
||||
# Generated by deepTools workflow generator
|
||||
|
||||
# Configuration
|
||||
INPUT_BAM="{params.get('input_bam', 'Input.bam')}"
|
||||
CHIP_BAM=("{params.get('chip_bams', 'ChIP1.bam ChIP2.bam')}")
|
||||
GENOME_SIZE={params.get('genome_size', '2913022398')}
|
||||
THREADS={params.get('threads', '8')}
|
||||
OUTPUT_DIR="{params.get('output_dir', 'deeptools_qc')}"
|
||||
|
||||
# Create output directory
|
||||
mkdir -p $OUTPUT_DIR
|
||||
|
||||
echo "=== Starting ChIP-seq QC workflow ==="
|
||||
|
||||
# Step 1: Correlation analysis
|
||||
echo "Step 1: Computing correlation matrix..."
|
||||
multiBamSummary bins \\
|
||||
--bamfiles $INPUT_BAM ${{CHIP_BAM[@]}} \\
|
||||
-o $OUTPUT_DIR/readCounts.npz \\
|
||||
--numberOfProcessors $THREADS
|
||||
|
||||
echo "Step 2: Generating correlation heatmap..."
|
||||
plotCorrelation \\
|
||||
-in $OUTPUT_DIR/readCounts.npz \\
|
||||
--corMethod pearson \\
|
||||
--whatToShow heatmap \\
|
||||
--plotFile $OUTPUT_DIR/correlation_heatmap.png \\
|
||||
--plotNumbers
|
||||
|
||||
echo "Step 3: Generating PCA plot..."
|
||||
plotPCA \\
|
||||
-in $OUTPUT_DIR/readCounts.npz \\
|
||||
-o $OUTPUT_DIR/PCA_plot.png \\
|
||||
-T "PCA of ChIP-seq samples"
|
||||
|
||||
# Step 2: Coverage assessment
|
||||
echo "Step 4: Assessing coverage..."
|
||||
plotCoverage \\
|
||||
--bamfiles $INPUT_BAM ${{CHIP_BAM[@]}} \\
|
||||
--plotFile $OUTPUT_DIR/coverage.png \\
|
||||
--ignoreDuplicates \\
|
||||
--numberOfProcessors $THREADS
|
||||
|
||||
# Step 3: Fragment size (for paired-end data)
|
||||
echo "Step 5: Analyzing fragment sizes..."
|
||||
bamPEFragmentSize \\
|
||||
--bamfiles $INPUT_BAM ${{CHIP_BAM[@]}} \\
|
||||
--histogram $OUTPUT_DIR/fragmentSizes.png \\
|
||||
--plotTitle "Fragment Size Distribution"
|
||||
|
||||
# Step 4: ChIP signal strength
|
||||
echo "Step 6: Evaluating ChIP enrichment..."
|
||||
plotFingerprint \\
|
||||
--bamfiles $INPUT_BAM ${{CHIP_BAM[@]}} \\
|
||||
--plotFile $OUTPUT_DIR/fingerprint.png \\
|
||||
--extendReads 200 \\
|
||||
--ignoreDuplicates \\
|
||||
--numberOfProcessors $THREADS \\
|
||||
--outQualityMetrics $OUTPUT_DIR/fingerprint_metrics.txt
|
||||
|
||||
echo "=== ChIP-seq QC workflow complete ==="
|
||||
echo "Results are in: $OUTPUT_DIR"
|
||||
"""
|
||||
|
||||
with open(output_file, 'w') as f:
|
||||
f.write(script)
|
||||
|
||||
return f"✓ Generated ChIP-seq QC workflow: {output_file}"
|
||||
|
||||
|
||||
def generate_chipseq_analysis_workflow(output_file, params):
|
||||
"""Generate complete ChIP-seq analysis workflow script."""
|
||||
|
||||
script = f"""#!/bin/bash
|
||||
# deepTools ChIP-seq Complete Analysis Workflow
|
||||
# Generated by deepTools workflow generator
|
||||
|
||||
# Configuration
|
||||
INPUT_BAM="{params.get('input_bam', 'Input.bam')}"
|
||||
CHIP_BAM="{params.get('chip_bam', 'ChIP.bam')}"
|
||||
GENES_BED="{params.get('genes_bed', 'genes.bed')}"
|
||||
PEAKS_BED="{params.get('peaks_bed', 'peaks.bed')}"
|
||||
GENOME_SIZE={params.get('genome_size', '2913022398')}
|
||||
THREADS={params.get('threads', '8')}
|
||||
OUTPUT_DIR="{params.get('output_dir', 'chipseq_analysis')}"
|
||||
|
||||
# Create output directory
|
||||
mkdir -p $OUTPUT_DIR
|
||||
|
||||
echo "=== Starting ChIP-seq analysis workflow ==="
|
||||
|
||||
# Step 1: Generate normalized coverage tracks
|
||||
echo "Step 1: Generating coverage tracks..."
|
||||
|
||||
bamCoverage \\
|
||||
--bam $INPUT_BAM \\
|
||||
--outFileName $OUTPUT_DIR/Input_coverage.bw \\
|
||||
--normalizeUsing RPGC \\
|
||||
--effectiveGenomeSize $GENOME_SIZE \\
|
||||
--binSize 10 \\
|
||||
--extendReads 200 \\
|
||||
--ignoreDuplicates \\
|
||||
--numberOfProcessors $THREADS
|
||||
|
||||
bamCoverage \\
|
||||
--bam $CHIP_BAM \\
|
||||
--outFileName $OUTPUT_DIR/ChIP_coverage.bw \\
|
||||
--normalizeUsing RPGC \\
|
||||
--effectiveGenomeSize $GENOME_SIZE \\
|
||||
--binSize 10 \\
|
||||
--extendReads 200 \\
|
||||
--ignoreDuplicates \\
|
||||
--numberOfProcessors $THREADS
|
||||
|
||||
# Step 2: Create log2 ratio track
|
||||
echo "Step 2: Creating log2 ratio track..."
|
||||
bamCompare \\
|
||||
--bamfile1 $CHIP_BAM \\
|
||||
--bamfile2 $INPUT_BAM \\
|
||||
--outFileName $OUTPUT_DIR/ChIP_vs_Input_log2ratio.bw \\
|
||||
--operation log2 \\
|
||||
--scaleFactorsMethod readCount \\
|
||||
--binSize 10 \\
|
||||
--extendReads 200 \\
|
||||
--ignoreDuplicates \\
|
||||
--numberOfProcessors $THREADS
|
||||
|
||||
# Step 3: Compute matrix around TSS
|
||||
echo "Step 3: Computing matrix around TSS..."
|
||||
computeMatrix reference-point \\
|
||||
--referencePoint TSS \\
|
||||
--scoreFileName $OUTPUT_DIR/ChIP_coverage.bw \\
|
||||
--regionsFileName $GENES_BED \\
|
||||
--beforeRegionStartLength 3000 \\
|
||||
--afterRegionStartLength 3000 \\
|
||||
--binSize 10 \\
|
||||
--sortRegions descend \\
|
||||
--sortUsing mean \\
|
||||
--outFileName $OUTPUT_DIR/matrix_TSS.gz \\
|
||||
--numberOfProcessors $THREADS
|
||||
|
||||
# Step 4: Generate heatmap
|
||||
echo "Step 4: Generating heatmap..."
|
||||
plotHeatmap \\
|
||||
--matrixFile $OUTPUT_DIR/matrix_TSS.gz \\
|
||||
--outFileName $OUTPUT_DIR/heatmap_TSS.png \\
|
||||
--colorMap RdBu \\
|
||||
--whatToShow 'plot, heatmap and colorbar' \\
|
||||
--yAxisLabel "Genes" \\
|
||||
--xAxisLabel "Distance from TSS (bp)" \\
|
||||
--refPointLabel "TSS" \\
|
||||
--heatmapHeight 15 \\
|
||||
--kmeans 3
|
||||
|
||||
# Step 5: Generate profile plot
|
||||
echo "Step 5: Generating profile plot..."
|
||||
plotProfile \\
|
||||
--matrixFile $OUTPUT_DIR/matrix_TSS.gz \\
|
||||
--outFileName $OUTPUT_DIR/profile_TSS.png \\
|
||||
--plotType lines \\
|
||||
--perGroup \\
|
||||
--colors blue \\
|
||||
--plotTitle "ChIP-seq signal around TSS" \\
|
||||
--yAxisLabel "Average signal" \\
|
||||
--refPointLabel "TSS"
|
||||
|
||||
# Step 6: Enrichment at peaks (if peaks provided)
|
||||
if [ -f "$PEAKS_BED" ]; then
|
||||
echo "Step 6: Calculating enrichment at peaks..."
|
||||
plotEnrichment \\
|
||||
--bamfiles $INPUT_BAM $CHIP_BAM \\
|
||||
--BED $PEAKS_BED \\
|
||||
--labels Input ChIP \\
|
||||
--plotFile $OUTPUT_DIR/enrichment.png \\
|
||||
--outRawCounts $OUTPUT_DIR/enrichment_counts.tab \\
|
||||
--extendReads 200 \\
|
||||
--ignoreDuplicates
|
||||
fi
|
||||
|
||||
echo "=== ChIP-seq analysis complete ==="
|
||||
echo "Results are in: $OUTPUT_DIR"
|
||||
"""
|
||||
|
||||
with open(output_file, 'w') as f:
|
||||
f.write(script)
|
||||
|
||||
return f"✓ Generated ChIP-seq analysis workflow: {output_file}"
|
||||
|
||||
|
||||
def generate_rnaseq_coverage_workflow(output_file, params):
|
||||
"""Generate RNA-seq coverage workflow script."""
|
||||
|
||||
script = f"""#!/bin/bash
|
||||
# deepTools RNA-seq Coverage Workflow
|
||||
# Generated by deepTools workflow generator
|
||||
|
||||
# Configuration
|
||||
RNASEQ_BAM="{params.get('rnaseq_bam', 'rnaseq.bam')}"
|
||||
THREADS={params.get('threads', '8')}
|
||||
OUTPUT_DIR="{params.get('output_dir', 'rnaseq_coverage')}"
|
||||
|
||||
# Create output directory
|
||||
mkdir -p $OUTPUT_DIR
|
||||
|
||||
echo "=== Starting RNA-seq coverage workflow ==="
|
||||
|
||||
# Generate strand-specific coverage tracks
|
||||
echo "Step 1: Generating forward strand coverage..."
|
||||
bamCoverage \\
|
||||
--bam $RNASEQ_BAM \\
|
||||
--outFileName $OUTPUT_DIR/forward_coverage.bw \\
|
||||
--filterRNAstrand forward \\
|
||||
--normalizeUsing CPM \\
|
||||
--binSize 1 \\
|
||||
--numberOfProcessors $THREADS
|
||||
|
||||
echo "Step 2: Generating reverse strand coverage..."
|
||||
bamCoverage \\
|
||||
--bam $RNASEQ_BAM \\
|
||||
--outFileName $OUTPUT_DIR/reverse_coverage.bw \\
|
||||
--filterRNAstrand reverse \\
|
||||
--normalizeUsing CPM \\
|
||||
--binSize 1 \\
|
||||
--numberOfProcessors $THREADS
|
||||
|
||||
echo "=== RNA-seq coverage workflow complete ==="
|
||||
echo "Results are in: $OUTPUT_DIR"
|
||||
echo ""
|
||||
echo "Note: These bigWig files can be loaded into genome browsers"
|
||||
echo "for strand-specific visualization of RNA-seq data."
|
||||
"""
|
||||
|
||||
with open(output_file, 'w') as f:
|
||||
f.write(script)
|
||||
|
||||
return f"✓ Generated RNA-seq coverage workflow: {output_file}"
|
||||
|
||||
|
||||
def generate_atacseq_workflow(output_file, params):
|
||||
"""Generate ATAC-seq workflow script."""
|
||||
|
||||
script = f"""#!/bin/bash
|
||||
# deepTools ATAC-seq Analysis Workflow
|
||||
# Generated by deepTools workflow generator
|
||||
|
||||
# Configuration
|
||||
ATAC_BAM="{params.get('atac_bam', 'atacseq.bam')}"
|
||||
PEAKS_BED="{params.get('peaks_bed', 'peaks.bed')}"
|
||||
GENOME_SIZE={params.get('genome_size', '2913022398')}
|
||||
THREADS={params.get('threads', '8')}
|
||||
OUTPUT_DIR="{params.get('output_dir', 'atacseq_analysis')}"
|
||||
|
||||
# Create output directory
|
||||
mkdir -p $OUTPUT_DIR
|
||||
|
||||
echo "=== Starting ATAC-seq analysis workflow ==="
|
||||
|
||||
# Step 1: Shift reads for Tn5 correction
|
||||
echo "Step 1: Applying Tn5 offset correction..."
|
||||
alignmentSieve \\
|
||||
--bam $ATAC_BAM \\
|
||||
--outFile $OUTPUT_DIR/atacseq_shifted.bam \\
|
||||
--ATACshift \\
|
||||
--minFragmentLength 38 \\
|
||||
--maxFragmentLength 2000 \\
|
||||
--ignoreDuplicates
|
||||
|
||||
# Index the shifted BAM
|
||||
samtools index $OUTPUT_DIR/atacseq_shifted.bam
|
||||
|
||||
# Step 2: Generate coverage track
|
||||
echo "Step 2: Generating coverage track..."
|
||||
bamCoverage \\
|
||||
--bam $OUTPUT_DIR/atacseq_shifted.bam \\
|
||||
--outFileName $OUTPUT_DIR/atacseq_coverage.bw \\
|
||||
--normalizeUsing RPGC \\
|
||||
--effectiveGenomeSize $GENOME_SIZE \\
|
||||
--binSize 1 \\
|
||||
--numberOfProcessors $THREADS
|
||||
|
||||
# Step 3: Fragment size analysis
|
||||
echo "Step 3: Analyzing fragment sizes..."
|
||||
bamPEFragmentSize \\
|
||||
--bamfiles $ATAC_BAM \\
|
||||
--histogram $OUTPUT_DIR/fragmentSizes.png \\
|
||||
--maxFragmentLength 1000
|
||||
|
||||
# Step 4: Compute matrix at peaks (if peaks provided)
|
||||
if [ -f "$PEAKS_BED" ]; then
|
||||
echo "Step 4: Computing matrix at peaks..."
|
||||
computeMatrix reference-point \\
|
||||
--referencePoint center \\
|
||||
--scoreFileName $OUTPUT_DIR/atacseq_coverage.bw \\
|
||||
--regionsFileName $PEAKS_BED \\
|
||||
--beforeRegionStartLength 2000 \\
|
||||
--afterRegionStartLength 2000 \\
|
||||
--binSize 10 \\
|
||||
--outFileName $OUTPUT_DIR/matrix_peaks.gz \\
|
||||
--numberOfProcessors $THREADS
|
||||
|
||||
echo "Step 5: Generating heatmap..."
|
||||
plotHeatmap \\
|
||||
--matrixFile $OUTPUT_DIR/matrix_peaks.gz \\
|
||||
--outFileName $OUTPUT_DIR/heatmap_peaks.png \\
|
||||
--colorMap YlOrRd \\
|
||||
--refPointLabel "Peak Center" \\
|
||||
--heatmapHeight 15
|
||||
fi
|
||||
|
||||
echo "=== ATAC-seq analysis complete ==="
|
||||
echo "Results are in: $OUTPUT_DIR"
|
||||
echo ""
|
||||
echo "Expected fragment size pattern:"
|
||||
echo " ~50bp: nucleosome-free regions"
|
||||
echo " ~200bp: mono-nucleosome"
|
||||
echo " ~400bp: di-nucleosome"
|
||||
"""
|
||||
|
||||
with open(output_file, 'w') as f:
|
||||
f.write(script)
|
||||
|
||||
return f"✓ Generated ATAC-seq workflow: {output_file}"
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate deepTools workflow scripts",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=f"""
|
||||
Available workflows:
|
||||
{chr(10).join(f" {key}: {value['name']}" for key, value in WORKFLOWS.items())}
|
||||
|
||||
Examples:
|
||||
# Generate ChIP-seq QC workflow
|
||||
python workflow_generator.py chipseq_qc -o chipseq_qc.sh
|
||||
|
||||
# Generate ChIP-seq analysis with custom parameters
|
||||
python workflow_generator.py chipseq_analysis -o analysis.sh \\
|
||||
--chip-bam H3K4me3.bam --input-bam Input.bam
|
||||
|
||||
# List all available workflows
|
||||
python workflow_generator.py --list
|
||||
"""
|
||||
)
|
||||
|
||||
parser.add_argument('workflow', nargs='?', choices=list(WORKFLOWS.keys()),
|
||||
help='Workflow type to generate')
|
||||
parser.add_argument('-o', '--output', default='deeptools_workflow.sh',
|
||||
help='Output script filename (default: deeptools_workflow.sh)')
|
||||
parser.add_argument('--list', action='store_true',
|
||||
help='List all available workflows')
|
||||
|
||||
# Common parameters
|
||||
parser.add_argument('--threads', type=int, default=8,
|
||||
help='Number of threads (default: 8)')
|
||||
parser.add_argument('--genome-size', type=int, default=2913022398,
|
||||
help='Effective genome size (default: 2913022398 for hg38)')
|
||||
parser.add_argument('--output-dir', default=None,
|
||||
help='Output directory for results')
|
||||
|
||||
# Workflow-specific parameters
|
||||
parser.add_argument('--input-bam', help='Input/control BAM file')
|
||||
parser.add_argument('--chip-bam', help='ChIP BAM file')
|
||||
parser.add_argument('--chip-bams', help='Multiple ChIP BAM files (space-separated)')
|
||||
parser.add_argument('--rnaseq-bam', help='RNA-seq BAM file')
|
||||
parser.add_argument('--atac-bam', help='ATAC-seq BAM file')
|
||||
parser.add_argument('--genes-bed', help='Genes BED file')
|
||||
parser.add_argument('--peaks-bed', help='Peaks BED file')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# List workflows
|
||||
if args.list:
|
||||
print("\nAvailable deepTools workflows:\n")
|
||||
for key, value in WORKFLOWS.items():
|
||||
print(f" {key}")
|
||||
print(f" {value['name']}")
|
||||
print(f" {value['description']}\n")
|
||||
sys.exit(0)
|
||||
|
||||
# Check if workflow was specified
|
||||
if not args.workflow:
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
# Prepare parameters
|
||||
params = {
|
||||
'threads': args.threads,
|
||||
'genome_size': args.genome_size,
|
||||
'output_dir': args.output_dir or f"{args.workflow}_output",
|
||||
'input_bam': args.input_bam,
|
||||
'chip_bam': args.chip_bam,
|
||||
'chip_bams': args.chip_bams,
|
||||
'rnaseq_bam': args.rnaseq_bam,
|
||||
'atac_bam': args.atac_bam,
|
||||
'genes_bed': args.genes_bed,
|
||||
'peaks_bed': args.peaks_bed,
|
||||
}
|
||||
|
||||
# Generate workflow
|
||||
if args.workflow == 'chipseq_qc':
|
||||
message = generate_chipseq_qc_workflow(args.output, params)
|
||||
elif args.workflow == 'chipseq_analysis':
|
||||
message = generate_chipseq_analysis_workflow(args.output, params)
|
||||
elif args.workflow == 'rnaseq_coverage':
|
||||
message = generate_rnaseq_coverage_workflow(args.output, params)
|
||||
elif args.workflow == 'atacseq':
|
||||
message = generate_atacseq_workflow(args.output, params)
|
||||
|
||||
print(message)
|
||||
print(f"\nTo run the workflow:")
|
||||
print(f" chmod +x {args.output}")
|
||||
print(f" ./{args.output}")
|
||||
print(f"\nNote: Edit the script to customize file paths and parameters.")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user