Initial commit

2025-11-30 08:30:10 +08:00
commit f0bd18fb4e
824 changed files with 331919 additions and 0 deletions
--- a/skills/astropy/references/tables.md
+++ b/skills/astropy/references/tables.md
@@ -0,0 +1,489 @@
+# Table Operations (astropy.table)
+
+The `astropy.table` module provides flexible tools for working with tabular data, with support for units, masked values, and various file formats.
+
+## Creating Tables
+
+### Basic Table Creation
+
+```python
+from astropy.table import Table, QTable
+import astropy.units as u
+import numpy as np
+
+# From column arrays
+a = [1, 4, 5]
+b = [2.0, 5.0, 8.2]
+c = ['x', 'y', 'z']
+
+t = Table([a, b, c], names=('id', 'flux', 'name'))
+
+# With units (use QTable)
+flux = [1.2, 2.3, 3.4] * u.Jy
+wavelength = [500, 600, 700] * u.nm
+t = QTable([flux, wavelength], names=('flux', 'wavelength'))
+```
+
+### From Lists of Rows
+
+```python
+# List of tuples
+rows = [(1, 10.5, 'A'), (2, 11.2, 'B'), (3, 12.3, 'C')]
+t = Table(rows=rows, names=('id', 'value', 'name'))
+
+# List of dictionaries
+rows = [{'id': 1, 'value': 10.5}, {'id': 2, 'value': 11.2}]
+t = Table(rows)
+```
+
+### From NumPy Arrays
+
+```python
+# Structured array
+arr = np.array([(1, 2.0, 'x'), (4, 5.0, 'y')],
+               dtype=[('a', 'i4'), ('b', 'f8'), ('c', 'U10')])
+t = Table(arr)
+
+# 2D array with column names
+data = np.random.random((100, 3))
+t = Table(data, names=['col1', 'col2', 'col3'])
+```
+
+### From Pandas DataFrame
+
+```python
+import pandas as pd
+
+df = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
+t = Table.from_pandas(df)
+```
+
+## Accessing Table Data
+
+### Basic Access
+
+```python
+# Column access
+ra_col = t['ra']           # Returns Column object
+dec_col = t['dec']
+
+# Row access
+first_row = t[0]           # Returns Row object
+row_slice = t[10:20]       # Returns new Table
+
+# Cell access
+value = t['ra'][5]         # 6th value in 'ra' column
+value = t[5]['ra']         # Same thing
+
+# Multiple columns
+subset = t['ra', 'dec', 'mag']
+```
+
+### Table Properties
+
+```python
+len(t)              # Number of rows
+t.colnames          # List of column names
+t.dtype             # Column data types
+t.info              # Detailed information
+t.meta              # Metadata dictionary
+```
+
+### Iteration
+
+```python
+# Iterate over rows
+for row in t:
+    print(row['ra'], row['dec'])
+
+# Iterate over columns
+for colname in t.colnames:
+    print(t[colname])
+```
+
+## Modifying Tables
+
+### Adding Columns
+
+```python
+# Add new column
+t['new_col'] = [1, 2, 3, 4, 5]
+t['calc'] = t['a'] + t['b']  # Calculated column
+
+# Add column with units
+t['velocity'] = [10, 20, 30] * u.km / u.s
+
+# Add empty column
+from astropy.table import Column
+t['empty'] = Column(length=len(t), dtype=float)
+
+# Insert at specific position
+t.add_column([7, 8, 9], name='inserted', index=2)
+```
+
+### Removing Columns
+
+```python
+# Remove single column
+t.remove_column('old_col')
+
+# Remove multiple columns
+t.remove_columns(['col1', 'col2'])
+
+# Delete syntax
+del t['col_name']
+
+# Keep only specific columns
+t.keep_columns(['ra', 'dec', 'mag'])
+```
+
+### Renaming Columns
+
+```python
+t.rename_column('old_name', 'new_name')
+
+# Rename multiple
+t.rename_columns(['old1', 'old2'], ['new1', 'new2'])
+```
+
+### Adding Rows
+
+```python
+# Add single row
+t.add_row([1, 2.5, 'new'])
+
+# Add row as dict
+t.add_row({'ra': 10.5, 'dec': 41.2, 'mag': 18.5})
+
+# Note: Adding rows one at a time is slow!
+# Better to collect rows and create table at once
+```
+
+### Modifying Data
+
+```python
+# Modify column values
+t['flux'] = t['flux'] * gain
+t['mag'][t['mag'] < 0] = np.nan
+
+# Modify single cell
+t['ra'][5] = 10.5
+
+# Modify entire row
+t[0] = [new_id, new_ra, new_dec]
+```
+
+## Sorting and Filtering
+
+### Sorting
+
+```python
+# Sort by single column
+t.sort('mag')
+
+# Sort descending
+t.sort('mag', reverse=True)
+
+# Sort by multiple columns
+t.sort(['priority', 'mag'])
+
+# Get sorted indices without modifying table
+indices = t.argsort('mag')
+sorted_table = t[indices]
+```
+
+### Filtering
+
+```python
+# Boolean indexing
+bright = t[t['mag'] < 18]
+nearby = t[t['distance'] < 100*u.pc]
+
+# Multiple conditions
+selected = t[(t['mag'] < 18) & (t['dec'] > 0)]
+
+# Using numpy functions
+high_snr = t[np.abs(t['flux'] / t['error']) > 5]
+```
+
+## Reading and Writing Files
+
+### Supported Formats
+
+FITS, HDF5, ASCII (CSV, ECSV, IPAC, etc.), VOTable, Parquet, ASDF
+
+### Reading Files
+
+```python
+# Automatic format detection
+t = Table.read('catalog.fits')
+t = Table.read('data.csv')
+t = Table.read('table.vot')
+
+# Specify format explicitly
+t = Table.read('data.txt', format='ascii')
+t = Table.read('catalog.hdf5', path='/dataset/table')
+
+# Read specific HDU from FITS
+t = Table.read('file.fits', hdu=2)
+```
+
+### Writing Files
+
+```python
+# Automatic format from extension
+t.write('output.fits')
+t.write('output.csv')
+
+# Specify format
+t.write('output.txt', format='ascii.csv')
+t.write('output.hdf5', path='/data/table', serialize_meta=True)
+
+# Overwrite existing file
+t.write('output.fits', overwrite=True)
+```
+
+### ASCII Format Options
+
+```python
+# CSV with custom delimiter
+t.write('output.csv', format='ascii.csv', delimiter='|')
+
+# Fixed-width format
+t.write('output.txt', format='ascii.fixed_width')
+
+# IPAC format
+t.write('output.tbl', format='ascii.ipac')
+
+# LaTeX table
+t.write('table.tex', format='ascii.latex')
+```
+
+## Table Operations
+
+### Stacking Tables (Vertical)
+
+```python
+from astropy.table import vstack
+
+# Concatenate tables vertically
+t1 = Table([[1, 2], [3, 4]], names=('a', 'b'))
+t2 = Table([[5, 6], [7, 8]], names=('a', 'b'))
+t_combined = vstack([t1, t2])
+```
+
+### Joining Tables (Horizontal)
+
+```python
+from astropy.table import hstack
+
+# Concatenate tables horizontally
+t1 = Table([[1, 2]], names=['a'])
+t2 = Table([[3, 4]], names=['b'])
+t_combined = hstack([t1, t2])
+```
+
+### Database-Style Joins
+
+```python
+from astropy.table import join
+
+# Inner join on common column
+t1 = Table([[1, 2, 3], ['a', 'b', 'c']], names=('id', 'data1'))
+t2 = Table([[1, 2, 4], ['x', 'y', 'z']], names=('id', 'data2'))
+t_joined = join(t1, t2, keys='id')
+
+# Left/right/outer joins
+t_joined = join(t1, t2, join_type='left')
+t_joined = join(t1, t2, join_type='outer')
+```
+
+### Grouping and Aggregating
+
+```python
+# Group by column
+g = t.group_by('filter')
+
+# Aggregate groups
+means = g.groups.aggregate(np.mean)
+
+# Iterate over groups
+for group in g.groups:
+    print(f"Filter: {group['filter'][0]}")
+    print(f"Mean mag: {np.mean(group['mag'])}")
+```
+
+### Unique Rows
+
+```python
+# Get unique rows
+t_unique = t.unique('id')
+
+# Multiple columns
+t_unique = t.unique(['ra', 'dec'])
+```
+
+## Units and Quantities
+
+Use QTable for unit-aware operations:
+
+```python
+from astropy.table import QTable
+
+# Create table with units
+t = QTable()
+t['flux'] = [1.2, 2.3, 3.4] * u.Jy
+t['wavelength'] = [500, 600, 700] * u.nm
+
+# Unit conversions
+t['flux'].to(u.mJy)
+t['wavelength'].to(u.angstrom)
+
+# Calculations preserve units
+t['freq'] = t['wavelength'].to(u.Hz, equivalencies=u.spectral())
+```
+
+## Masking Missing Data
+
+```python
+from astropy.table import MaskedColumn
+import numpy as np
+
+# Create masked column
+flux = MaskedColumn([1.2, np.nan, 3.4], mask=[False, True, False])
+t = Table([flux], names=['flux'])
+
+# Operations automatically handle masks
+mean_flux = np.ma.mean(t['flux'])
+
+# Fill masked values
+t['flux'].filled(0)  # Replace masked with 0
+```
+
+## Indexing for Fast Lookup
+
+Create indices for fast row retrieval:
+
+```python
+# Add index on column
+t.add_index('id')
+
+# Fast lookup by index
+row = t.loc[12345]  # Find row where id=12345
+
+# Range queries
+subset = t.loc[100:200]
+```
+
+## Table Metadata
+
+```python
+# Set table-level metadata
+t.meta['TELESCOPE'] = 'HST'
+t.meta['FILTER'] = 'F814W'
+t.meta['EXPTIME'] = 300.0
+
+# Set column-level metadata
+t['ra'].meta['unit'] = 'deg'
+t['ra'].meta['description'] = 'Right Ascension'
+t['ra'].description = 'Right Ascension'  # Shortcut
+```
+
+## Performance Tips
+
+### Fast Table Construction
+
+```python
+# SLOW: Adding rows one at a time
+t = Table(names=['a', 'b'])
+for i in range(1000):
+    t.add_row([i, i**2])
+
+# FAST: Build from lists
+rows = [(i, i**2) for i in range(1000)]
+t = Table(rows=rows, names=['a', 'b'])
+```
+
+### Memory-Mapped FITS Tables
+
+```python
+# Don't load entire table into memory
+t = Table.read('huge_catalog.fits', memmap=True)
+
+# Only loads data when accessed
+subset = t[10000:10100]  # Efficient
+```
+
+### Copy vs. View
+
+```python
+# Create view (shares data, fast)
+t_view = t['ra', 'dec']
+
+# Create copy (independent data)
+t_copy = t['ra', 'dec'].copy()
+```
+
+## Displaying Tables
+
+```python
+# Print to console
+print(t)
+
+# Show in interactive browser
+t.show_in_browser()
+t.show_in_browser(jsviewer=True)  # Interactive sorting/filtering
+
+# Paginated viewing
+t.more()
+
+# Custom formatting
+t['flux'].format = '%.3f'
+t['ra'].format = '{:.6f}'
+```
+
+## Converting to Other Formats
+
+```python
+# To NumPy array
+arr = np.array(t)
+
+# To Pandas DataFrame
+df = t.to_pandas()
+
+# To dictionary
+d = {name: t[name] for name in t.colnames}
+```
+
+## Common Use Cases
+
+### Cross-Matching Catalogs
+
+```python
+from astropy.coordinates import SkyCoord, match_coordinates_sky
+
+# Create coordinate objects from table columns
+coords1 = SkyCoord(t1['ra'], t1['dec'], unit='deg')
+coords2 = SkyCoord(t2['ra'], t2['dec'], unit='deg')
+
+# Find matches
+idx, sep, _ = coords1.match_to_catalog_sky(coords2)
+
+# Filter by separation
+max_sep = 1 * u.arcsec
+matches = sep < max_sep
+t1_matched = t1[matches]
+t2_matched = t2[idx[matches]]
+```
+
+### Binning Data
+
+```python
+from astropy.table import Table
+import numpy as np
+
+# Bin by magnitude
+mag_bins = np.arange(10, 20, 0.5)
+binned = t.group_by(np.digitize(t['mag'], mag_bins))
+counts = binned.groups.aggregate(len)
+```