9.2 KiB
9.2 KiB
Table Operations (astropy.table)
The astropy.table module provides flexible tools for working with tabular data, with support for units, masked values, and various file formats.
Creating Tables
Basic Table Creation
from astropy.table import Table, QTable
import astropy.units as u
import numpy as np
# From column arrays
a = [1, 4, 5]
b = [2.0, 5.0, 8.2]
c = ['x', 'y', 'z']
t = Table([a, b, c], names=('id', 'flux', 'name'))
# With units (use QTable)
flux = [1.2, 2.3, 3.4] * u.Jy
wavelength = [500, 600, 700] * u.nm
t = QTable([flux, wavelength], names=('flux', 'wavelength'))
From Lists of Rows
# List of tuples
rows = [(1, 10.5, 'A'), (2, 11.2, 'B'), (3, 12.3, 'C')]
t = Table(rows=rows, names=('id', 'value', 'name'))
# List of dictionaries
rows = [{'id': 1, 'value': 10.5}, {'id': 2, 'value': 11.2}]
t = Table(rows)
From NumPy Arrays
# Structured array
arr = np.array([(1, 2.0, 'x'), (4, 5.0, 'y')],
dtype=[('a', 'i4'), ('b', 'f8'), ('c', 'U10')])
t = Table(arr)
# 2D array with column names
data = np.random.random((100, 3))
t = Table(data, names=['col1', 'col2', 'col3'])
From Pandas DataFrame
import pandas as pd
df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
t = Table.from_pandas(df)
Accessing Table Data
Basic Access
# Column access
ra_col = t['ra'] # Returns Column object
dec_col = t['dec']
# Row access
first_row = t[0] # Returns Row object
row_slice = t[10:20] # Returns new Table
# Cell access
value = t['ra'][5] # 6th value in 'ra' column
value = t[5]['ra'] # Same thing
# Multiple columns
subset = t['ra', 'dec', 'mag']
Table Properties
len(t) # Number of rows
t.colnames # List of column names
t.dtype # Column data types
t.info # Detailed information
t.meta # Metadata dictionary
Iteration
# Iterate over rows
for row in t:
print(row['ra'], row['dec'])
# Iterate over columns
for colname in t.colnames:
print(t[colname])
Modifying Tables
Adding Columns
# Add new column
t['new_col'] = [1, 2, 3, 4, 5]
t['calc'] = t['a'] + t['b'] # Calculated column
# Add column with units
t['velocity'] = [10, 20, 30] * u.km / u.s
# Add empty column
from astropy.table import Column
t['empty'] = Column(length=len(t), dtype=float)
# Insert at specific position
t.add_column([7, 8, 9], name='inserted', index=2)
Removing Columns
# Remove single column
t.remove_column('old_col')
# Remove multiple columns
t.remove_columns(['col1', 'col2'])
# Delete syntax
del t['col_name']
# Keep only specific columns
t.keep_columns(['ra', 'dec', 'mag'])
Renaming Columns
t.rename_column('old_name', 'new_name')
# Rename multiple
t.rename_columns(['old1', 'old2'], ['new1', 'new2'])
Adding Rows
# Add single row
t.add_row([1, 2.5, 'new'])
# Add row as dict
t.add_row({'ra': 10.5, 'dec': 41.2, 'mag': 18.5})
# Note: Adding rows one at a time is slow!
# Better to collect rows and create table at once
Modifying Data
# Modify column values
t['flux'] = t['flux'] * gain
t['mag'][t['mag'] < 0] = np.nan
# Modify single cell
t['ra'][5] = 10.5
# Modify entire row
t[0] = [new_id, new_ra, new_dec]
Sorting and Filtering
Sorting
# Sort by single column
t.sort('mag')
# Sort descending
t.sort('mag', reverse=True)
# Sort by multiple columns
t.sort(['priority', 'mag'])
# Get sorted indices without modifying table
indices = t.argsort('mag')
sorted_table = t[indices]
Filtering
# Boolean indexing
bright = t[t['mag'] < 18]
nearby = t[t['distance'] < 100*u.pc]
# Multiple conditions
selected = t[(t['mag'] < 18) & (t['dec'] > 0)]
# Using numpy functions
high_snr = t[np.abs(t['flux'] / t['error']) > 5]
Reading and Writing Files
Supported Formats
FITS, HDF5, ASCII (CSV, ECSV, IPAC, etc.), VOTable, Parquet, ASDF
Reading Files
# Automatic format detection
t = Table.read('catalog.fits')
t = Table.read('data.csv')
t = Table.read('table.vot')
# Specify format explicitly
t = Table.read('data.txt', format='ascii')
t = Table.read('catalog.hdf5', path='/dataset/table')
# Read specific HDU from FITS
t = Table.read('file.fits', hdu=2)
Writing Files
# Automatic format from extension
t.write('output.fits')
t.write('output.csv')
# Specify format
t.write('output.txt', format='ascii.csv')
t.write('output.hdf5', path='/data/table', serialize_meta=True)
# Overwrite existing file
t.write('output.fits', overwrite=True)
ASCII Format Options
# CSV with custom delimiter
t.write('output.csv', format='ascii.csv', delimiter='|')
# Fixed-width format
t.write('output.txt', format='ascii.fixed_width')
# IPAC format
t.write('output.tbl', format='ascii.ipac')
# LaTeX table
t.write('table.tex', format='ascii.latex')
Table Operations
Stacking Tables (Vertical)
from astropy.table import vstack
# Concatenate tables vertically
t1 = Table([[1, 2], [3, 4]], names=('a', 'b'))
t2 = Table([[5, 6], [7, 8]], names=('a', 'b'))
t_combined = vstack([t1, t2])
Joining Tables (Horizontal)
from astropy.table import hstack
# Concatenate tables horizontally
t1 = Table([[1, 2]], names=['a'])
t2 = Table([[3, 4]], names=['b'])
t_combined = hstack([t1, t2])
Database-Style Joins
from astropy.table import join
# Inner join on common column
t1 = Table([[1, 2, 3], ['a', 'b', 'c']], names=('id', 'data1'))
t2 = Table([[1, 2, 4], ['x', 'y', 'z']], names=('id', 'data2'))
t_joined = join(t1, t2, keys='id')
# Left/right/outer joins
t_joined = join(t1, t2, join_type='left')
t_joined = join(t1, t2, join_type='outer')
Grouping and Aggregating
# Group by column
g = t.group_by('filter')
# Aggregate groups
means = g.groups.aggregate(np.mean)
# Iterate over groups
for group in g.groups:
print(f"Filter: {group['filter'][0]}")
print(f"Mean mag: {np.mean(group['mag'])}")
Unique Rows
# Get unique rows
t_unique = t.unique('id')
# Multiple columns
t_unique = t.unique(['ra', 'dec'])
Units and Quantities
Use QTable for unit-aware operations:
from astropy.table import QTable
# Create table with units
t = QTable()
t['flux'] = [1.2, 2.3, 3.4] * u.Jy
t['wavelength'] = [500, 600, 700] * u.nm
# Unit conversions
t['flux'].to(u.mJy)
t['wavelength'].to(u.angstrom)
# Calculations preserve units
t['freq'] = t['wavelength'].to(u.Hz, equivalencies=u.spectral())
Masking Missing Data
from astropy.table import MaskedColumn
import numpy as np
# Create masked column
flux = MaskedColumn([1.2, np.nan, 3.4], mask=[False, True, False])
t = Table([flux], names=['flux'])
# Operations automatically handle masks
mean_flux = np.ma.mean(t['flux'])
# Fill masked values
t['flux'].filled(0) # Replace masked with 0
Indexing for Fast Lookup
Create indices for fast row retrieval:
# Add index on column
t.add_index('id')
# Fast lookup by index
row = t.loc[12345] # Find row where id=12345
# Range queries
subset = t.loc[100:200]
Table Metadata
# Set table-level metadata
t.meta['TELESCOPE'] = 'HST'
t.meta['FILTER'] = 'F814W'
t.meta['EXPTIME'] = 300.0
# Set column-level metadata
t['ra'].meta['unit'] = 'deg'
t['ra'].meta['description'] = 'Right Ascension'
t['ra'].description = 'Right Ascension' # Shortcut
Performance Tips
Fast Table Construction
# SLOW: Adding rows one at a time
t = Table(names=['a', 'b'])
for i in range(1000):
t.add_row([i, i**2])
# FAST: Build from lists
rows = [(i, i**2) for i in range(1000)]
t = Table(rows=rows, names=['a', 'b'])
Memory-Mapped FITS Tables
# Don't load entire table into memory
t = Table.read('huge_catalog.fits', memmap=True)
# Only loads data when accessed
subset = t[10000:10100] # Efficient
Copy vs. View
# Create view (shares data, fast)
t_view = t['ra', 'dec']
# Create copy (independent data)
t_copy = t['ra', 'dec'].copy()
Displaying Tables
# Print to console
print(t)
# Show in interactive browser
t.show_in_browser()
t.show_in_browser(jsviewer=True) # Interactive sorting/filtering
# Paginated viewing
t.more()
# Custom formatting
t['flux'].format = '%.3f'
t['ra'].format = '{:.6f}'
Converting to Other Formats
# To NumPy array
arr = np.array(t)
# To Pandas DataFrame
df = t.to_pandas()
# To dictionary
d = {name: t[name] for name in t.colnames}
Common Use Cases
Cross-Matching Catalogs
from astropy.coordinates import SkyCoord, match_coordinates_sky
# Create coordinate objects from table columns
coords1 = SkyCoord(t1['ra'], t1['dec'], unit='deg')
coords2 = SkyCoord(t2['ra'], t2['dec'], unit='deg')
# Find matches
idx, sep, _ = coords1.match_to_catalog_sky(coords2)
# Filter by separation
max_sep = 1 * u.arcsec
matches = sep < max_sep
t1_matched = t1[matches]
t2_matched = t2[idx[matches]]
Binning Data
from astropy.table import Table
import numpy as np
# Bin by magnitude
mag_bins = np.arange(10, 20, 0.5)
binned = t.group_by(np.digitize(t['mag'], mag_bins))
counts = binned.groups.aggregate(len)