Initial commit
This commit is contained in:
97
skills/arboreto/scripts/basic_grn_inference.py
Normal file
97
skills/arboreto/scripts/basic_grn_inference.py
Normal file
@@ -0,0 +1,97 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Basic GRN inference example using Arboreto.
|
||||
|
||||
This script demonstrates the standard workflow for inferring gene regulatory
|
||||
networks from expression data using GRNBoost2.
|
||||
|
||||
Usage:
|
||||
python basic_grn_inference.py <expression_file> <output_file> [--tf-file TF_FILE] [--seed SEED]
|
||||
|
||||
Arguments:
|
||||
expression_file: Path to expression matrix (TSV format, genes as columns)
|
||||
output_file: Path for output network (TSV format)
|
||||
--tf-file: Optional path to transcription factors file (one per line)
|
||||
--seed: Random seed for reproducibility (default: 777)
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import pandas as pd
|
||||
from arboreto.algo import grnboost2
|
||||
from arboreto.utils import load_tf_names
|
||||
|
||||
|
||||
def run_grn_inference(expression_file, output_file, tf_file=None, seed=777):
|
||||
"""
|
||||
Run GRN inference using GRNBoost2.
|
||||
|
||||
Args:
|
||||
expression_file: Path to expression matrix TSV file
|
||||
output_file: Path for output network file
|
||||
tf_file: Optional path to TF names file
|
||||
seed: Random seed for reproducibility
|
||||
"""
|
||||
print(f"Loading expression data from {expression_file}...")
|
||||
expression_data = pd.read_csv(expression_file, sep='\t')
|
||||
|
||||
print(f"Expression matrix shape: {expression_data.shape}")
|
||||
print(f"Number of genes: {expression_data.shape[1]}")
|
||||
print(f"Number of observations: {expression_data.shape[0]}")
|
||||
|
||||
# Load TF names if provided
|
||||
tf_names = 'all'
|
||||
if tf_file:
|
||||
print(f"Loading transcription factors from {tf_file}...")
|
||||
tf_names = load_tf_names(tf_file)
|
||||
print(f"Number of TFs: {len(tf_names)}")
|
||||
|
||||
# Run GRN inference
|
||||
print(f"Running GRNBoost2 with seed={seed}...")
|
||||
network = grnboost2(
|
||||
expression_data=expression_data,
|
||||
tf_names=tf_names,
|
||||
seed=seed,
|
||||
verbose=True
|
||||
)
|
||||
|
||||
# Save results
|
||||
print(f"Saving network to {output_file}...")
|
||||
network.to_csv(output_file, sep='\t', index=False, header=False)
|
||||
|
||||
print(f"Done! Network contains {len(network)} regulatory links.")
|
||||
print(f"\nTop 10 regulatory links:")
|
||||
print(network.head(10).to_string(index=False))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Infer gene regulatory network using GRNBoost2'
|
||||
)
|
||||
parser.add_argument(
|
||||
'expression_file',
|
||||
help='Path to expression matrix (TSV format, genes as columns)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'output_file',
|
||||
help='Path for output network (TSV format)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--tf-file',
|
||||
help='Path to transcription factors file (one per line)',
|
||||
default=None
|
||||
)
|
||||
parser.add_argument(
|
||||
'--seed',
|
||||
help='Random seed for reproducibility (default: 777)',
|
||||
type=int,
|
||||
default=777
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
run_grn_inference(
|
||||
expression_file=args.expression_file,
|
||||
output_file=args.output_file,
|
||||
tf_file=args.tf_file,
|
||||
seed=args.seed
|
||||
)
|
||||
Reference in New Issue
Block a user