Initial commit
This commit is contained in:
489
skills/astropy/references/tables.md
Normal file
489
skills/astropy/references/tables.md
Normal file
@@ -0,0 +1,489 @@
|
||||
# Table Operations (astropy.table)
|
||||
|
||||
The `astropy.table` module provides flexible tools for working with tabular data, with support for units, masked values, and various file formats.
|
||||
|
||||
## Creating Tables
|
||||
|
||||
### Basic Table Creation
|
||||
|
||||
```python
|
||||
from astropy.table import Table, QTable
|
||||
import astropy.units as u
|
||||
import numpy as np
|
||||
|
||||
# From column arrays
|
||||
a = [1, 4, 5]
|
||||
b = [2.0, 5.0, 8.2]
|
||||
c = ['x', 'y', 'z']
|
||||
|
||||
t = Table([a, b, c], names=('id', 'flux', 'name'))
|
||||
|
||||
# With units (use QTable)
|
||||
flux = [1.2, 2.3, 3.4] * u.Jy
|
||||
wavelength = [500, 600, 700] * u.nm
|
||||
t = QTable([flux, wavelength], names=('flux', 'wavelength'))
|
||||
```
|
||||
|
||||
### From Lists of Rows
|
||||
|
||||
```python
|
||||
# List of tuples
|
||||
rows = [(1, 10.5, 'A'), (2, 11.2, 'B'), (3, 12.3, 'C')]
|
||||
t = Table(rows=rows, names=('id', 'value', 'name'))
|
||||
|
||||
# List of dictionaries
|
||||
rows = [{'id': 1, 'value': 10.5}, {'id': 2, 'value': 11.2}]
|
||||
t = Table(rows)
|
||||
```
|
||||
|
||||
### From NumPy Arrays
|
||||
|
||||
```python
|
||||
# Structured array
|
||||
arr = np.array([(1, 2.0, 'x'), (4, 5.0, 'y')],
|
||||
dtype=[('a', 'i4'), ('b', 'f8'), ('c', 'U10')])
|
||||
t = Table(arr)
|
||||
|
||||
# 2D array with column names
|
||||
data = np.random.random((100, 3))
|
||||
t = Table(data, names=['col1', 'col2', 'col3'])
|
||||
```
|
||||
|
||||
### From Pandas DataFrame
|
||||
|
||||
```python
|
||||
import pandas as pd
|
||||
|
||||
df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
|
||||
t = Table.from_pandas(df)
|
||||
```
|
||||
|
||||
## Accessing Table Data
|
||||
|
||||
### Basic Access
|
||||
|
||||
```python
|
||||
# Column access
|
||||
ra_col = t['ra'] # Returns Column object
|
||||
dec_col = t['dec']
|
||||
|
||||
# Row access
|
||||
first_row = t[0] # Returns Row object
|
||||
row_slice = t[10:20] # Returns new Table
|
||||
|
||||
# Cell access
|
||||
value = t['ra'][5] # 6th value in 'ra' column
|
||||
value = t[5]['ra'] # Same thing
|
||||
|
||||
# Multiple columns
|
||||
subset = t['ra', 'dec', 'mag']
|
||||
```
|
||||
|
||||
### Table Properties
|
||||
|
||||
```python
|
||||
len(t) # Number of rows
|
||||
t.colnames # List of column names
|
||||
t.dtype # Column data types
|
||||
t.info # Detailed information
|
||||
t.meta # Metadata dictionary
|
||||
```
|
||||
|
||||
### Iteration
|
||||
|
||||
```python
|
||||
# Iterate over rows
|
||||
for row in t:
|
||||
print(row['ra'], row['dec'])
|
||||
|
||||
# Iterate over columns
|
||||
for colname in t.colnames:
|
||||
print(t[colname])
|
||||
```
|
||||
|
||||
## Modifying Tables
|
||||
|
||||
### Adding Columns
|
||||
|
||||
```python
|
||||
# Add new column
|
||||
t['new_col'] = [1, 2, 3, 4, 5]
|
||||
t['calc'] = t['a'] + t['b'] # Calculated column
|
||||
|
||||
# Add column with units
|
||||
t['velocity'] = [10, 20, 30] * u.km / u.s
|
||||
|
||||
# Add empty column
|
||||
from astropy.table import Column
|
||||
t['empty'] = Column(length=len(t), dtype=float)
|
||||
|
||||
# Insert at specific position
|
||||
t.add_column([7, 8, 9], name='inserted', index=2)
|
||||
```
|
||||
|
||||
### Removing Columns
|
||||
|
||||
```python
|
||||
# Remove single column
|
||||
t.remove_column('old_col')
|
||||
|
||||
# Remove multiple columns
|
||||
t.remove_columns(['col1', 'col2'])
|
||||
|
||||
# Delete syntax
|
||||
del t['col_name']
|
||||
|
||||
# Keep only specific columns
|
||||
t.keep_columns(['ra', 'dec', 'mag'])
|
||||
```
|
||||
|
||||
### Renaming Columns
|
||||
|
||||
```python
|
||||
t.rename_column('old_name', 'new_name')
|
||||
|
||||
# Rename multiple
|
||||
t.rename_columns(['old1', 'old2'], ['new1', 'new2'])
|
||||
```
|
||||
|
||||
### Adding Rows
|
||||
|
||||
```python
|
||||
# Add single row
|
||||
t.add_row([1, 2.5, 'new'])
|
||||
|
||||
# Add row as dict
|
||||
t.add_row({'ra': 10.5, 'dec': 41.2, 'mag': 18.5})
|
||||
|
||||
# Note: Adding rows one at a time is slow!
|
||||
# Better to collect rows and create table at once
|
||||
```
|
||||
|
||||
### Modifying Data
|
||||
|
||||
```python
|
||||
# Modify column values
|
||||
t['flux'] = t['flux'] * gain
|
||||
t['mag'][t['mag'] < 0] = np.nan
|
||||
|
||||
# Modify single cell
|
||||
t['ra'][5] = 10.5
|
||||
|
||||
# Modify entire row
|
||||
t[0] = [new_id, new_ra, new_dec]
|
||||
```
|
||||
|
||||
## Sorting and Filtering
|
||||
|
||||
### Sorting
|
||||
|
||||
```python
|
||||
# Sort by single column
|
||||
t.sort('mag')
|
||||
|
||||
# Sort descending
|
||||
t.sort('mag', reverse=True)
|
||||
|
||||
# Sort by multiple columns
|
||||
t.sort(['priority', 'mag'])
|
||||
|
||||
# Get sorted indices without modifying table
|
||||
indices = t.argsort('mag')
|
||||
sorted_table = t[indices]
|
||||
```
|
||||
|
||||
### Filtering
|
||||
|
||||
```python
|
||||
# Boolean indexing
|
||||
bright = t[t['mag'] < 18]
|
||||
nearby = t[t['distance'] < 100*u.pc]
|
||||
|
||||
# Multiple conditions
|
||||
selected = t[(t['mag'] < 18) & (t['dec'] > 0)]
|
||||
|
||||
# Using numpy functions
|
||||
high_snr = t[np.abs(t['flux'] / t['error']) > 5]
|
||||
```
|
||||
|
||||
## Reading and Writing Files
|
||||
|
||||
### Supported Formats
|
||||
|
||||
FITS, HDF5, ASCII (CSV, ECSV, IPAC, etc.), VOTable, Parquet, ASDF
|
||||
|
||||
### Reading Files
|
||||
|
||||
```python
|
||||
# Automatic format detection
|
||||
t = Table.read('catalog.fits')
|
||||
t = Table.read('data.csv')
|
||||
t = Table.read('table.vot')
|
||||
|
||||
# Specify format explicitly
|
||||
t = Table.read('data.txt', format='ascii')
|
||||
t = Table.read('catalog.hdf5', path='/dataset/table')
|
||||
|
||||
# Read specific HDU from FITS
|
||||
t = Table.read('file.fits', hdu=2)
|
||||
```
|
||||
|
||||
### Writing Files
|
||||
|
||||
```python
|
||||
# Automatic format from extension
|
||||
t.write('output.fits')
|
||||
t.write('output.csv')
|
||||
|
||||
# Specify format
|
||||
t.write('output.txt', format='ascii.csv')
|
||||
t.write('output.hdf5', path='/data/table', serialize_meta=True)
|
||||
|
||||
# Overwrite existing file
|
||||
t.write('output.fits', overwrite=True)
|
||||
```
|
||||
|
||||
### ASCII Format Options
|
||||
|
||||
```python
|
||||
# CSV with custom delimiter
|
||||
t.write('output.csv', format='ascii.csv', delimiter='|')
|
||||
|
||||
# Fixed-width format
|
||||
t.write('output.txt', format='ascii.fixed_width')
|
||||
|
||||
# IPAC format
|
||||
t.write('output.tbl', format='ascii.ipac')
|
||||
|
||||
# LaTeX table
|
||||
t.write('table.tex', format='ascii.latex')
|
||||
```
|
||||
|
||||
## Table Operations
|
||||
|
||||
### Stacking Tables (Vertical)
|
||||
|
||||
```python
|
||||
from astropy.table import vstack
|
||||
|
||||
# Concatenate tables vertically
|
||||
t1 = Table([[1, 2], [3, 4]], names=('a', 'b'))
|
||||
t2 = Table([[5, 6], [7, 8]], names=('a', 'b'))
|
||||
t_combined = vstack([t1, t2])
|
||||
```
|
||||
|
||||
### Joining Tables (Horizontal)
|
||||
|
||||
```python
|
||||
from astropy.table import hstack
|
||||
|
||||
# Concatenate tables horizontally
|
||||
t1 = Table([[1, 2]], names=['a'])
|
||||
t2 = Table([[3, 4]], names=['b'])
|
||||
t_combined = hstack([t1, t2])
|
||||
```
|
||||
|
||||
### Database-Style Joins
|
||||
|
||||
```python
|
||||
from astropy.table import join
|
||||
|
||||
# Inner join on common column
|
||||
t1 = Table([[1, 2, 3], ['a', 'b', 'c']], names=('id', 'data1'))
|
||||
t2 = Table([[1, 2, 4], ['x', 'y', 'z']], names=('id', 'data2'))
|
||||
t_joined = join(t1, t2, keys='id')
|
||||
|
||||
# Left/right/outer joins
|
||||
t_joined = join(t1, t2, join_type='left')
|
||||
t_joined = join(t1, t2, join_type='outer')
|
||||
```
|
||||
|
||||
### Grouping and Aggregating
|
||||
|
||||
```python
|
||||
# Group by column
|
||||
g = t.group_by('filter')
|
||||
|
||||
# Aggregate groups
|
||||
means = g.groups.aggregate(np.mean)
|
||||
|
||||
# Iterate over groups
|
||||
for group in g.groups:
|
||||
print(f"Filter: {group['filter'][0]}")
|
||||
print(f"Mean mag: {np.mean(group['mag'])}")
|
||||
```
|
||||
|
||||
### Unique Rows
|
||||
|
||||
```python
|
||||
# Get unique rows
|
||||
t_unique = t.unique('id')
|
||||
|
||||
# Multiple columns
|
||||
t_unique = t.unique(['ra', 'dec'])
|
||||
```
|
||||
|
||||
## Units and Quantities
|
||||
|
||||
Use QTable for unit-aware operations:
|
||||
|
||||
```python
|
||||
from astropy.table import QTable
|
||||
|
||||
# Create table with units
|
||||
t = QTable()
|
||||
t['flux'] = [1.2, 2.3, 3.4] * u.Jy
|
||||
t['wavelength'] = [500, 600, 700] * u.nm
|
||||
|
||||
# Unit conversions
|
||||
t['flux'].to(u.mJy)
|
||||
t['wavelength'].to(u.angstrom)
|
||||
|
||||
# Calculations preserve units
|
||||
t['freq'] = t['wavelength'].to(u.Hz, equivalencies=u.spectral())
|
||||
```
|
||||
|
||||
## Masking Missing Data
|
||||
|
||||
```python
|
||||
from astropy.table import MaskedColumn
|
||||
import numpy as np
|
||||
|
||||
# Create masked column
|
||||
flux = MaskedColumn([1.2, np.nan, 3.4], mask=[False, True, False])
|
||||
t = Table([flux], names=['flux'])
|
||||
|
||||
# Operations automatically handle masks
|
||||
mean_flux = np.ma.mean(t['flux'])
|
||||
|
||||
# Fill masked values
|
||||
t['flux'].filled(0) # Replace masked with 0
|
||||
```
|
||||
|
||||
## Indexing for Fast Lookup
|
||||
|
||||
Create indices for fast row retrieval:
|
||||
|
||||
```python
|
||||
# Add index on column
|
||||
t.add_index('id')
|
||||
|
||||
# Fast lookup by index
|
||||
row = t.loc[12345] # Find row where id=12345
|
||||
|
||||
# Range queries
|
||||
subset = t.loc[100:200]
|
||||
```
|
||||
|
||||
## Table Metadata
|
||||
|
||||
```python
|
||||
# Set table-level metadata
|
||||
t.meta['TELESCOPE'] = 'HST'
|
||||
t.meta['FILTER'] = 'F814W'
|
||||
t.meta['EXPTIME'] = 300.0
|
||||
|
||||
# Set column-level metadata
|
||||
t['ra'].meta['unit'] = 'deg'
|
||||
t['ra'].meta['description'] = 'Right Ascension'
|
||||
t['ra'].description = 'Right Ascension' # Shortcut
|
||||
```
|
||||
|
||||
## Performance Tips
|
||||
|
||||
### Fast Table Construction
|
||||
|
||||
```python
|
||||
# SLOW: Adding rows one at a time
|
||||
t = Table(names=['a', 'b'])
|
||||
for i in range(1000):
|
||||
t.add_row([i, i**2])
|
||||
|
||||
# FAST: Build from lists
|
||||
rows = [(i, i**2) for i in range(1000)]
|
||||
t = Table(rows=rows, names=['a', 'b'])
|
||||
```
|
||||
|
||||
### Memory-Mapped FITS Tables
|
||||
|
||||
```python
|
||||
# Don't load entire table into memory
|
||||
t = Table.read('huge_catalog.fits', memmap=True)
|
||||
|
||||
# Only loads data when accessed
|
||||
subset = t[10000:10100] # Efficient
|
||||
```
|
||||
|
||||
### Copy vs. View
|
||||
|
||||
```python
|
||||
# Create view (shares data, fast)
|
||||
t_view = t['ra', 'dec']
|
||||
|
||||
# Create copy (independent data)
|
||||
t_copy = t['ra', 'dec'].copy()
|
||||
```
|
||||
|
||||
## Displaying Tables
|
||||
|
||||
```python
|
||||
# Print to console
|
||||
print(t)
|
||||
|
||||
# Show in interactive browser
|
||||
t.show_in_browser()
|
||||
t.show_in_browser(jsviewer=True) # Interactive sorting/filtering
|
||||
|
||||
# Paginated viewing
|
||||
t.more()
|
||||
|
||||
# Custom formatting
|
||||
t['flux'].format = '%.3f'
|
||||
t['ra'].format = '{:.6f}'
|
||||
```
|
||||
|
||||
## Converting to Other Formats
|
||||
|
||||
```python
|
||||
# To NumPy array
|
||||
arr = np.array(t)
|
||||
|
||||
# To Pandas DataFrame
|
||||
df = t.to_pandas()
|
||||
|
||||
# To dictionary
|
||||
d = {name: t[name] for name in t.colnames}
|
||||
```
|
||||
|
||||
## Common Use Cases
|
||||
|
||||
### Cross-Matching Catalogs
|
||||
|
||||
```python
|
||||
from astropy.coordinates import SkyCoord, match_coordinates_sky
|
||||
|
||||
# Create coordinate objects from table columns
|
||||
coords1 = SkyCoord(t1['ra'], t1['dec'], unit='deg')
|
||||
coords2 = SkyCoord(t2['ra'], t2['dec'], unit='deg')
|
||||
|
||||
# Find matches
|
||||
idx, sep, _ = coords1.match_to_catalog_sky(coords2)
|
||||
|
||||
# Filter by separation
|
||||
max_sep = 1 * u.arcsec
|
||||
matches = sep < max_sep
|
||||
t1_matched = t1[matches]
|
||||
t2_matched = t2[idx[matches]]
|
||||
```
|
||||
|
||||
### Binning Data
|
||||
|
||||
```python
|
||||
from astropy.table import Table
|
||||
import numpy as np
|
||||
|
||||
# Bin by magnitude
|
||||
mag_bins = np.arange(10, 20, 0.5)
|
||||
binned = t.group_by(np.digitize(t['mag'], mag_bins))
|
||||
counts = binned.groups.aggregate(len)
|
||||
```
|
||||
Reference in New Issue
Block a user