zhongwei/gh-k-dense-ai-claude-scientific-skills-scientific-skills

Files

Zhongwei Li f0bd18fb4e Initial commit

2025-11-30 08:30:10 +08:00

9.2 KiB

Raw Permalink Blame History

Table Operations (astropy.table)

The astropy.table module provides flexible tools for working with tabular data, with support for units, masked values, and various file formats.

Creating Tables

Basic Table Creation

from astropy.table import Table, QTable
import astropy.units as u
import numpy as np

# From column arrays
a = [1, 4, 5]
b = [2.0, 5.0, 8.2]
c = ['x', 'y', 'z']

t = Table([a, b, c], names=('id', 'flux', 'name'))

# With units (use QTable)
flux = [1.2, 2.3, 3.4] * u.Jy
wavelength = [500, 600, 700] * u.nm
t = QTable([flux, wavelength], names=('flux', 'wavelength'))

From Lists of Rows

# List of tuples
rows = [(1, 10.5, 'A'), (2, 11.2, 'B'), (3, 12.3, 'C')]
t = Table(rows=rows, names=('id', 'value', 'name'))

# List of dictionaries
rows = [{'id': 1, 'value': 10.5}, {'id': 2, 'value': 11.2}]
t = Table(rows)

From NumPy Arrays

# Structured array
arr = np.array([(1, 2.0, 'x'), (4, 5.0, 'y')],
               dtype=[('a', 'i4'), ('b', 'f8'), ('c', 'U10')])
t = Table(arr)

# 2D array with column names
data = np.random.random((100, 3))
t = Table(data, names=['col1', 'col2', 'col3'])

From Pandas DataFrame

import pandas as pd

df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
t = Table.from_pandas(df)

Accessing Table Data

Basic Access

# Column access
ra_col = t['ra']           # Returns Column object
dec_col = t['dec']

# Row access
first_row = t[0]           # Returns Row object
row_slice = t[10:20]       # Returns new Table

# Cell access
value = t['ra'][5]         # 6th value in 'ra' column
value = t[5]['ra']         # Same thing

# Multiple columns
subset = t['ra', 'dec', 'mag']

Table Properties

len(t)              # Number of rows
t.colnames          # List of column names
t.dtype             # Column data types
t.info              # Detailed information
t.meta              # Metadata dictionary

Iteration

# Iterate over rows
for row in t:
    print(row['ra'], row['dec'])

# Iterate over columns
for colname in t.colnames:
    print(t[colname])

Modifying Tables

Adding Columns

# Add new column
t['new_col'] = [1, 2, 3, 4, 5]
t['calc'] = t['a'] + t['b']  # Calculated column

# Add column with units
t['velocity'] = [10, 20, 30] * u.km / u.s

# Add empty column
from astropy.table import Column
t['empty'] = Column(length=len(t), dtype=float)

# Insert at specific position
t.add_column([7, 8, 9], name='inserted', index=2)

Removing Columns

# Remove single column
t.remove_column('old_col')

# Remove multiple columns
t.remove_columns(['col1', 'col2'])

# Delete syntax
del t['col_name']

# Keep only specific columns
t.keep_columns(['ra', 'dec', 'mag'])

Renaming Columns

t.rename_column('old_name', 'new_name')

# Rename multiple
t.rename_columns(['old1', 'old2'], ['new1', 'new2'])

Adding Rows

# Add single row
t.add_row([1, 2.5, 'new'])

# Add row as dict
t.add_row({'ra': 10.5, 'dec': 41.2, 'mag': 18.5})

# Note: Adding rows one at a time is slow!
# Better to collect rows and create table at once

Modifying Data

# Modify column values
t['flux'] = t['flux'] * gain
t['mag'][t['mag'] < 0] = np.nan

# Modify single cell
t['ra'][5] = 10.5

# Modify entire row
t[0] = [new_id, new_ra, new_dec]

Sorting and Filtering

Sorting

# Sort by single column
t.sort('mag')

# Sort descending
t.sort('mag', reverse=True)

# Sort by multiple columns
t.sort(['priority', 'mag'])

# Get sorted indices without modifying table
indices = t.argsort('mag')
sorted_table = t[indices]

Filtering

# Boolean indexing
bright = t[t['mag'] < 18]
nearby = t[t['distance'] < 100*u.pc]

# Multiple conditions
selected = t[(t['mag'] < 18) & (t['dec'] > 0)]

# Using numpy functions
high_snr = t[np.abs(t['flux'] / t['error']) > 5]

Reading and Writing Files

Supported Formats

FITS, HDF5, ASCII (CSV, ECSV, IPAC, etc.), VOTable, Parquet, ASDF

Reading Files

# Automatic format detection
t = Table.read('catalog.fits')
t = Table.read('data.csv')
t = Table.read('table.vot')

# Specify format explicitly
t = Table.read('data.txt', format='ascii')
t = Table.read('catalog.hdf5', path='/dataset/table')

# Read specific HDU from FITS
t = Table.read('file.fits', hdu=2)

Writing Files

# Automatic format from extension
t.write('output.fits')
t.write('output.csv')

# Specify format
t.write('output.txt', format='ascii.csv')
t.write('output.hdf5', path='/data/table', serialize_meta=True)

# Overwrite existing file
t.write('output.fits', overwrite=True)

ASCII Format Options

# CSV with custom delimiter
t.write('output.csv', format='ascii.csv', delimiter='|')

# Fixed-width format
t.write('output.txt', format='ascii.fixed_width')

# IPAC format
t.write('output.tbl', format='ascii.ipac')

# LaTeX table
t.write('table.tex', format='ascii.latex')

Table Operations

Stacking Tables (Vertical)

from astropy.table import vstack

# Concatenate tables vertically
t1 = Table([[1, 2], [3, 4]], names=('a', 'b'))
t2 = Table([[5, 6], [7, 8]], names=('a', 'b'))
t_combined = vstack([t1, t2])

Joining Tables (Horizontal)

from astropy.table import hstack

# Concatenate tables horizontally
t1 = Table([[1, 2]], names=['a'])
t2 = Table([[3, 4]], names=['b'])
t_combined = hstack([t1, t2])

Database-Style Joins

from astropy.table import join

# Inner join on common column
t1 = Table([[1, 2, 3], ['a', 'b', 'c']], names=('id', 'data1'))
t2 = Table([[1, 2, 4], ['x', 'y', 'z']], names=('id', 'data2'))
t_joined = join(t1, t2, keys='id')

# Left/right/outer joins
t_joined = join(t1, t2, join_type='left')
t_joined = join(t1, t2, join_type='outer')

Grouping and Aggregating

# Group by column
g = t.group_by('filter')

# Aggregate groups
means = g.groups.aggregate(np.mean)

# Iterate over groups
for group in g.groups:
    print(f"Filter: {group['filter'][0]}")
    print(f"Mean mag: {np.mean(group['mag'])}")

Unique Rows

# Get unique rows
t_unique = t.unique('id')

# Multiple columns
t_unique = t.unique(['ra', 'dec'])

Units and Quantities

Use QTable for unit-aware operations:

from astropy.table import QTable

# Create table with units
t = QTable()
t['flux'] = [1.2, 2.3, 3.4] * u.Jy
t['wavelength'] = [500, 600, 700] * u.nm

# Unit conversions
t['flux'].to(u.mJy)
t['wavelength'].to(u.angstrom)

# Calculations preserve units
t['freq'] = t['wavelength'].to(u.Hz, equivalencies=u.spectral())

Masking Missing Data

from astropy.table import MaskedColumn
import numpy as np

# Create masked column
flux = MaskedColumn([1.2, np.nan, 3.4], mask=[False, True, False])
t = Table([flux], names=['flux'])

# Operations automatically handle masks
mean_flux = np.ma.mean(t['flux'])

# Fill masked values
t['flux'].filled(0)  # Replace masked with 0

Indexing for Fast Lookup

Create indices for fast row retrieval:

# Add index on column
t.add_index('id')

# Fast lookup by index
row = t.loc[12345]  # Find row where id=12345

# Range queries
subset = t.loc[100:200]

Table Metadata

# Set table-level metadata
t.meta['TELESCOPE'] = 'HST'
t.meta['FILTER'] = 'F814W'
t.meta['EXPTIME'] = 300.0

# Set column-level metadata
t['ra'].meta['unit'] = 'deg'
t['ra'].meta['description'] = 'Right Ascension'
t['ra'].description = 'Right Ascension'  # Shortcut

Performance Tips

Fast Table Construction

# SLOW: Adding rows one at a time
t = Table(names=['a', 'b'])
for i in range(1000):
    t.add_row([i, i**2])

# FAST: Build from lists
rows = [(i, i**2) for i in range(1000)]
t = Table(rows=rows, names=['a', 'b'])

Memory-Mapped FITS Tables

# Don't load entire table into memory
t = Table.read('huge_catalog.fits', memmap=True)

# Only loads data when accessed
subset = t[10000:10100]  # Efficient

Copy vs. View

# Create view (shares data, fast)
t_view = t['ra', 'dec']

# Create copy (independent data)
t_copy = t['ra', 'dec'].copy()

Displaying Tables

# Print to console
print(t)

# Show in interactive browser
t.show_in_browser()
t.show_in_browser(jsviewer=True)  # Interactive sorting/filtering

# Paginated viewing
t.more()

# Custom formatting
t['flux'].format = '%.3f'
t['ra'].format = '{:.6f}'

Converting to Other Formats

# To NumPy array
arr = np.array(t)

# To Pandas DataFrame
df = t.to_pandas()

# To dictionary
d = {name: t[name] for name in t.colnames}

Common Use Cases

Cross-Matching Catalogs

from astropy.coordinates import SkyCoord, match_coordinates_sky

# Create coordinate objects from table columns
coords1 = SkyCoord(t1['ra'], t1['dec'], unit='deg')
coords2 = SkyCoord(t2['ra'], t2['dec'], unit='deg')

# Find matches
idx, sep, _ = coords1.match_to_catalog_sky(coords2)

# Filter by separation
max_sep = 1 * u.arcsec
matches = sep < max_sep
t1_matched = t1[matches]
t2_matched = t2[idx[matches]]

Binning Data

from astropy.table import Table
import numpy as np

# Bin by magnitude
mag_bins = np.arange(10, 20, 0.5)
binned = t.group_by(np.digitize(t['mag'], mag_bins))
counts = binned.groups.aggregate(len)

9.2 KiB Raw Permalink Blame History