9.2 KiB
9.2 KiB
NetworkX Input/Output
Reading Graphs from Files
Adjacency List Format
# Read adjacency list (simple text format)
G = nx.read_adjlist('graph.adjlist')
# With node type conversion
G = nx.read_adjlist('graph.adjlist', nodetype=int)
# For directed graphs
G = nx.read_adjlist('graph.adjlist', create_using=nx.DiGraph())
# Write adjacency list
nx.write_adjlist(G, 'graph.adjlist')
Example adjacency list format:
# node neighbors
0 1 2
1 0 3 4
2 0 3
3 1 2 4
4 1 3
Edge List Format
# Read edge list
G = nx.read_edgelist('graph.edgelist')
# With node types and edge data
G = nx.read_edgelist('graph.edgelist',
nodetype=int,
data=(('weight', float),))
# Read weighted edge list
G = nx.read_weighted_edgelist('weighted.edgelist')
# Write edge list
nx.write_edgelist(G, 'graph.edgelist')
# Write weighted edge list
nx.write_weighted_edgelist(G, 'weighted.edgelist')
Example edge list format:
# source target
0 1
1 2
2 3
3 0
Example weighted edge list:
# source target weight
0 1 0.5
1 2 1.0
2 3 0.75
GML (Graph Modelling Language)
# Read GML (preserves all attributes)
G = nx.read_gml('graph.gml')
# Write GML
nx.write_gml(G, 'graph.gml')
GraphML Format
# Read GraphML (XML-based format)
G = nx.read_graphml('graph.graphml')
# Write GraphML
nx.write_graphml(G, 'graph.graphml')
# With specific encoding
nx.write_graphml(G, 'graph.graphml', encoding='utf-8')
GEXF (Graph Exchange XML Format)
# Read GEXF
G = nx.read_gexf('graph.gexf')
# Write GEXF
nx.write_gexf(G, 'graph.gexf')
Pajek Format
# Read Pajek .net files
G = nx.read_pajek('graph.net')
# Write Pajek format
nx.write_pajek(G, 'graph.net')
LEDA Format
# Read LEDA format
G = nx.read_leda('graph.leda')
# Write LEDA format
nx.write_leda(G, 'graph.leda')
Working with Pandas
From Pandas DataFrame
import pandas as pd
# Create graph from edge list DataFrame
df = pd.DataFrame({
'source': [1, 2, 3, 4],
'target': [2, 3, 4, 1],
'weight': [0.5, 1.0, 0.75, 0.25]
})
# Create graph
G = nx.from_pandas_edgelist(df,
source='source',
target='target',
edge_attr='weight')
# With multiple edge attributes
G = nx.from_pandas_edgelist(df,
source='source',
target='target',
edge_attr=['weight', 'color', 'type'])
# Create directed graph
G = nx.from_pandas_edgelist(df,
source='source',
target='target',
create_using=nx.DiGraph())
To Pandas DataFrame
# Convert graph to edge list DataFrame
df = nx.to_pandas_edgelist(G)
# With specific edge attributes
df = nx.to_pandas_edgelist(G, source='node1', target='node2')
Adjacency Matrix with Pandas
# Create DataFrame from adjacency matrix
df = nx.to_pandas_adjacency(G, dtype=int)
# Create graph from adjacency DataFrame
G = nx.from_pandas_adjacency(df)
# For directed graphs
G = nx.from_pandas_adjacency(df, create_using=nx.DiGraph())
NumPy and SciPy Integration
Adjacency Matrix
import numpy as np
# To NumPy adjacency matrix
A = nx.to_numpy_array(G, dtype=int)
# With specific node order
nodelist = [1, 2, 3, 4, 5]
A = nx.to_numpy_array(G, nodelist=nodelist)
# From NumPy array
G = nx.from_numpy_array(A)
# For directed graphs
G = nx.from_numpy_array(A, create_using=nx.DiGraph())
Sparse Matrix (SciPy)
from scipy import sparse
# To sparse matrix
A = nx.to_scipy_sparse_array(G)
# With specific format (csr, csc, coo, etc.)
A_csr = nx.to_scipy_sparse_array(G, format='csr')
# From sparse matrix
G = nx.from_scipy_sparse_array(A)
JSON Format
Node-Link Format
import json
# To node-link format (good for d3.js)
data = nx.node_link_data(G)
with open('graph.json', 'w') as f:
json.dump(data, f)
# From node-link format
with open('graph.json', 'r') as f:
data = json.load(f)
G = nx.node_link_graph(data)
Adjacency Data Format
# To adjacency format
data = nx.adjacency_data(G)
with open('graph.json', 'w') as f:
json.dump(data, f)
# From adjacency format
with open('graph.json', 'r') as f:
data = json.load(f)
G = nx.adjacency_graph(data)
Tree Data Format
# For tree graphs
data = nx.tree_data(G, root=0)
with open('tree.json', 'w') as f:
json.dump(data, f)
# From tree format
with open('tree.json', 'r') as f:
data = json.load(f)
G = nx.tree_graph(data)
Pickle Format
Binary Pickle
import pickle
# Write pickle (preserves all Python objects)
with open('graph.pkl', 'wb') as f:
pickle.dump(G, f)
# Read pickle
with open('graph.pkl', 'rb') as f:
G = pickle.load(f)
# NetworkX convenience functions
nx.write_gpickle(G, 'graph.gpickle')
G = nx.read_gpickle('graph.gpickle')
CSV Files
Custom CSV Reading
import csv
# Read edges from CSV
G = nx.Graph()
with open('edges.csv', 'r') as f:
reader = csv.DictReader(f)
for row in reader:
G.add_edge(row['source'], row['target'], weight=float(row['weight']))
# Write edges to CSV
with open('edges.csv', 'w', newline='') as f:
writer = csv.writer(f)
writer.writerow(['source', 'target', 'weight'])
for u, v, data in G.edges(data=True):
writer.writerow([u, v, data.get('weight', 1.0)])
Database Integration
SQL Databases
import sqlite3
import pandas as pd
# Read from SQL database via pandas
conn = sqlite3.connect('network.db')
df = pd.read_sql_query("SELECT source, target, weight FROM edges", conn)
G = nx.from_pandas_edgelist(df, 'source', 'target', edge_attr='weight')
conn.close()
# Write to SQL database
df = nx.to_pandas_edgelist(G)
conn = sqlite3.connect('network.db')
df.to_sql('edges', conn, if_exists='replace', index=False)
conn.close()
Graph Formats for Visualization
DOT Format (Graphviz)
# Write DOT file for Graphviz
nx.drawing.nx_pydot.write_dot(G, 'graph.dot')
# Read DOT file
G = nx.drawing.nx_pydot.read_dot('graph.dot')
# Generate directly to image (requires Graphviz)
from networkx.drawing.nx_pydot import to_pydot
pydot_graph = to_pydot(G)
pydot_graph.write_png('graph.png')
Cytoscape Integration
Cytoscape JSON
# Export for Cytoscape
data = nx.cytoscape_data(G)
with open('cytoscape.json', 'w') as f:
json.dump(data, f)
# Import from Cytoscape
with open('cytoscape.json', 'r') as f:
data = json.load(f)
G = nx.cytoscape_graph(data)
Specialized Formats
Matrix Market Format
from scipy.io import mmread, mmwrite
# Read Matrix Market
A = mmread('graph.mtx')
G = nx.from_scipy_sparse_array(A)
# Write Matrix Market
A = nx.to_scipy_sparse_array(G)
mmwrite('graph.mtx', A)
Shapefile (for Geographic Networks)
# Requires pyshp library
# Read geographic network from shapefile
G = nx.read_shp('roads.shp')
# Write to shapefile
nx.write_shp(G, 'network')
Format Selection Guidelines
Choose Based on Requirements
Adjacency List - Simple, human-readable, no attributes
- Best for: Simple unweighted graphs, quick viewing
Edge List - Simple, supports weights, human-readable
- Best for: Weighted graphs, importing/exporting data
GML/GraphML - Full attribute preservation, XML-based
- Best for: Complete graph serialization with all metadata
JSON - Web-friendly, JavaScript integration
- Best for: Web applications, d3.js visualizations
Pickle - Fast, preserves Python objects, binary
- Best for: Python-only storage, complex attributes
Pandas - Data analysis integration, DataFrame operations
- Best for: Data processing pipelines, statistical analysis
NumPy/SciPy - Numerical computation, sparse matrices
- Best for: Matrix operations, scientific computing
DOT - Visualization, Graphviz integration
- Best for: Creating visual diagrams
Performance Considerations
Large Graphs
For large graphs, consider:
# Use compressed formats
import gzip
with gzip.open('graph.adjlist.gz', 'wt') as f:
nx.write_adjlist(G, f)
with gzip.open('graph.adjlist.gz', 'rt') as f:
G = nx.read_adjlist(f)
# Use binary formats (faster)
nx.write_gpickle(G, 'graph.gpickle') # Faster than text formats
# Use sparse matrices for adjacency
A = nx.to_scipy_sparse_array(G, format='csr') # Memory efficient
Incremental Loading
For very large graphs:
# Load graph incrementally from edge list
G = nx.Graph()
with open('huge_graph.edgelist') as f:
for line in f:
u, v = line.strip().split()
G.add_edge(u, v)
# Process in chunks
if G.number_of_edges() % 100000 == 0:
print(f"Loaded {G.number_of_edges()} edges")
Error Handling
Robust File Reading
try:
G = nx.read_graphml('graph.graphml')
except nx.NetworkXError as e:
print(f"Error reading GraphML: {e}")
except FileNotFoundError:
print("File not found")
G = nx.Graph()
# Check if file format is supported
if os.path.exists('graph.txt'):
with open('graph.txt') as f:
first_line = f.readline()
# Detect format and read accordingly