Initial commit

2025-11-30 08:30:10 +08:00
commit f0bd18fb4e
824 changed files with 331919 additions and 0 deletions
--- a/skills/omero-integration/references/tables.md
+++ b/skills/omero-integration/references/tables.md
@@ -0,0 +1,532 @@
+# OMERO Tables
+
+This reference covers creating and managing structured tabular data in OMERO using OMERO.tables.
+
+## OMERO.tables Overview
+
+OMERO.tables provides a way to store structured tabular data associated with OMERO objects. Tables are stored as HDF5 files and can be queried efficiently. Common use cases include:
+
+- Storing quantitative measurements from images
+- Recording analysis results
+- Tracking experimental metadata
+- Linking measurements to specific images or ROIs
+
+## Column Types
+
+OMERO.tables supports various column types:
+
+- **LongColumn**: Integer values (64-bit)
+- **DoubleColumn**: Floating-point values
+- **StringColumn**: Text data (fixed max length)
+- **BoolColumn**: Boolean values
+- **LongArrayColumn**: Arrays of integers
+- **DoubleArrayColumn**: Arrays of floats
+- **FileColumn**: References to OMERO files
+- **ImageColumn**: References to OMERO images
+- **RoiColumn**: References to OMERO ROIs
+- **WellColumn**: References to OMERO wells
+
+## Creating Tables
+
+### Basic Table Creation
+
+```python
+from random import random
+import omero.grid
+
+# Create unique table name
+table_name = f"MyAnalysisTable_{random()}"
+
+# Define columns (empty data for initialization)
+col1 = omero.grid.LongColumn('ImageID', 'Image identifier', [])
+col2 = omero.grid.DoubleColumn('MeanIntensity', 'Mean pixel intensity', [])
+col3 = omero.grid.StringColumn('Category', 'Classification', 64, [])
+
+columns = [col1, col2, col3]
+
+# Get resources and create table
+resources = conn.c.sf.sharedResources()
+repository_id = resources.repositories().descriptions[0].getId().getValue()
+table = resources.newTable(repository_id, table_name)
+
+# Initialize table with column definitions
+table.initialize(columns)
+```
+
+### Add Data to Table
+
+```python
+# Prepare data
+image_ids = [1, 2, 3, 4, 5]
+intensities = [123.4, 145.2, 98.7, 156.3, 132.8]
+categories = ["Good", "Good", "Poor", "Excellent", "Good"]
+
+# Create data columns
+data_col1 = omero.grid.LongColumn('ImageID', 'Image identifier', image_ids)
+data_col2 = omero.grid.DoubleColumn('MeanIntensity', 'Mean pixel intensity', intensities)
+data_col3 = omero.grid.StringColumn('Category', 'Classification', 64, categories)
+
+data = [data_col1, data_col2, data_col3]
+
+# Add data to table
+table.addData(data)
+
+# Get file reference
+orig_file = table.getOriginalFile()
+table.close()  # Always close table when done
+```
+
+### Link Table to Dataset
+
+```python
+# Create file annotation from table
+orig_file_id = orig_file.id.val
+file_ann = omero.model.FileAnnotationI()
+file_ann.setFile(omero.model.OriginalFileI(orig_file_id, False))
+file_ann = conn.getUpdateService().saveAndReturnObject(file_ann)
+
+# Link to dataset
+link = omero.model.DatasetAnnotationLinkI()
+link.setParent(omero.model.DatasetI(dataset_id, False))
+link.setChild(omero.model.FileAnnotationI(file_ann.getId().getValue(), False))
+conn.getUpdateService().saveAndReturnObject(link)
+
+print(f"Linked table to dataset {dataset_id}")
+```
+
+## Column Types in Detail
+
+### Long Column (Integers)
+
+```python
+# Column for integer values
+image_ids = [101, 102, 103, 104, 105]
+col = omero.grid.LongColumn('ImageID', 'Image identifier', image_ids)
+```
+
+### Double Column (Floats)
+
+```python
+# Column for floating-point values
+measurements = [12.34, 56.78, 90.12, 34.56, 78.90]
+col = omero.grid.DoubleColumn('Measurement', 'Value in microns', measurements)
+```
+
+### String Column (Text)
+
+```python
+# Column for text (max length required)
+labels = ["Control", "Treatment A", "Treatment B", "Control", "Treatment A"]
+col = omero.grid.StringColumn('Condition', 'Experimental condition', 64, labels)
+```
+
+### Boolean Column
+
+```python
+# Column for boolean values
+flags = [True, False, True, True, False]
+col = omero.grid.BoolColumn('QualityPass', 'Passes quality control', flags)
+```
+
+### Image Column (References to Images)
+
+```python
+# Column linking to OMERO images
+image_ids = [101, 102, 103, 104, 105]
+col = omero.grid.ImageColumn('Image', 'Source image', image_ids)
+```
+
+### ROI Column (References to ROIs)
+
+```python
+# Column linking to OMERO ROIs
+roi_ids = [201, 202, 203, 204, 205]
+col = omero.grid.RoiColumn('ROI', 'Associated ROI', roi_ids)
+```
+
+### Array Columns
+
+```python
+# Column for arrays of doubles
+histogram_data = [
+    [10, 20, 30, 40],
+    [15, 25, 35, 45],
+    [12, 22, 32, 42]
+]
+col = omero.grid.DoubleArrayColumn('Histogram', 'Intensity histogram', histogram_data)
+
+# Column for arrays of longs
+bin_counts = [[5, 10, 15], [8, 12, 16], [6, 11, 14]]
+col = omero.grid.LongArrayColumn('Bins', 'Histogram bins', bin_counts)
+```
+
+## Reading Table Data
+
+### Open Existing Table
+
+```python
+# Get table file by name
+orig_table_file = conn.getObject("OriginalFile",
+                                 attributes={'name': table_name})
+
+# Open table
+resources = conn.c.sf.sharedResources()
+table = resources.openTable(orig_table_file._obj)
+
+print(f"Opened table: {table.getOriginalFile().getName().getValue()}")
+print(f"Number of rows: {table.getNumberOfRows()}")
+```
+
+### Read All Data
+
+```python
+# Get column headers
+print("Columns:")
+for col in table.getHeaders():
+    print(f"  {col.name}: {col.description}")
+
+# Read all data
+row_count = table.getNumberOfRows()
+data = table.readCoordinates(range(row_count))
+
+# Display data
+for col in data.columns:
+    print(f"\nColumn: {col.name}")
+    for value in col.values:
+        print(f"  {value}")
+
+table.close()
+```
+
+### Read Specific Rows
+
+```python
+# Read rows 10-20
+start = 10
+stop = 20
+data = table.read(list(range(table.getHeaders().__len__())), start, stop)
+
+for col in data.columns:
+    print(f"Column: {col.name}")
+    for value in col.values:
+        print(f"  {value}")
+```
+
+### Read Specific Columns
+
+```python
+# Read only columns 0 and 2
+column_indices = [0, 2]
+start = 0
+stop = table.getNumberOfRows()
+
+data = table.read(column_indices, start, stop)
+
+for col in data.columns:
+    print(f"Column: {col.name}")
+    print(f"Values: {col.values}")
+```
+
+## Querying Tables
+
+### Query with Conditions
+
+```python
+# Query rows where MeanIntensity > 100
+row_count = table.getNumberOfRows()
+
+query_rows = table.getWhereList(
+    "(MeanIntensity > 100)",
+    variables={},
+    start=0,
+    stop=row_count,
+    step=0
+)
+
+print(f"Found {len(query_rows)} matching rows")
+
+# Read matching rows
+data = table.readCoordinates(query_rows)
+
+for col in data.columns:
+    print(f"\n{col.name}:")
+    for value in col.values:
+        print(f"  {value}")
+```
+
+### Complex Queries
+
+```python
+# Multiple conditions with AND
+query_rows = table.getWhereList(
+    "(MeanIntensity > 100) & (MeanIntensity < 150)",
+    variables={},
+    start=0,
+    stop=row_count,
+    step=0
+)
+
+# Multiple conditions with OR
+query_rows = table.getWhereList(
+    "(Category == 'Good') | (Category == 'Excellent')",
+    variables={},
+    start=0,
+    stop=row_count,
+    step=0
+)
+
+# String matching
+query_rows = table.getWhereList(
+    "(Category == 'Good')",
+    variables={},
+    start=0,
+    stop=row_count,
+    step=0
+)
+```
+
+## Complete Example: Image Analysis Results
+
+```python
+from omero.gateway import BlitzGateway
+import omero.grid
+import omero.model
+import numpy as np
+
+HOST = 'omero.example.com'
+PORT = 4064
+USERNAME = 'user'
+PASSWORD = 'pass'
+
+with BlitzGateway(USERNAME, PASSWORD, host=HOST, port=PORT) as conn:
+    # Get dataset
+    dataset = conn.getObject("Dataset", dataset_id)
+    print(f"Analyzing dataset: {dataset.getName()}")
+
+    # Collect measurements from images
+    image_ids = []
+    mean_intensities = []
+    max_intensities = []
+    cell_counts = []
+
+    for image in dataset.listChildren():
+        image_ids.append(image.getId())
+
+        # Get pixel data
+        pixels = image.getPrimaryPixels()
+        plane = pixels.getPlane(0, 0, 0)  # Z=0, C=0, T=0
+
+        # Calculate statistics
+        mean_intensities.append(float(np.mean(plane)))
+        max_intensities.append(float(np.max(plane)))
+
+        # Simulate cell count (would be from actual analysis)
+        cell_counts.append(np.random.randint(50, 200))
+
+    # Create table
+    table_name = f"Analysis_Results_{dataset.getId()}"
+
+    # Define columns
+    col1 = omero.grid.ImageColumn('Image', 'Source image', [])
+    col2 = omero.grid.DoubleColumn('MeanIntensity', 'Mean pixel value', [])
+    col3 = omero.grid.DoubleColumn('MaxIntensity', 'Maximum pixel value', [])
+    col4 = omero.grid.LongColumn('CellCount', 'Number of cells detected', [])
+
+    # Initialize table
+    resources = conn.c.sf.sharedResources()
+    repository_id = resources.repositories().descriptions[0].getId().getValue()
+    table = resources.newTable(repository_id, table_name)
+    table.initialize([col1, col2, col3, col4])
+
+    # Add data
+    data_col1 = omero.grid.ImageColumn('Image', 'Source image', image_ids)
+    data_col2 = omero.grid.DoubleColumn('MeanIntensity', 'Mean pixel value',
+                                        mean_intensities)
+    data_col3 = omero.grid.DoubleColumn('MaxIntensity', 'Maximum pixel value',
+                                        max_intensities)
+    data_col4 = omero.grid.LongColumn('CellCount', 'Number of cells detected',
+                                      cell_counts)
+
+    table.addData([data_col1, data_col2, data_col3, data_col4])
+
+    # Get file and close table
+    orig_file = table.getOriginalFile()
+    table.close()
+
+    # Link to dataset
+    orig_file_id = orig_file.id.val
+    file_ann = omero.model.FileAnnotationI()
+    file_ann.setFile(omero.model.OriginalFileI(orig_file_id, False))
+    file_ann = conn.getUpdateService().saveAndReturnObject(file_ann)
+
+    link = omero.model.DatasetAnnotationLinkI()
+    link.setParent(omero.model.DatasetI(dataset_id, False))
+    link.setChild(omero.model.FileAnnotationI(file_ann.getId().getValue(), False))
+    conn.getUpdateService().saveAndReturnObject(link)
+
+    print(f"Created and linked table with {len(image_ids)} rows")
+
+    # Query results
+    table = resources.openTable(orig_file)
+
+    high_cell_count_rows = table.getWhereList(
+        "(CellCount > 100)",
+        variables={},
+        start=0,
+        stop=table.getNumberOfRows(),
+        step=0
+    )
+
+    print(f"Images with >100 cells: {len(high_cell_count_rows)}")
+
+    # Read those rows
+    data = table.readCoordinates(high_cell_count_rows)
+    for i in range(len(high_cell_count_rows)):
+        img_id = data.columns[0].values[i]
+        count = data.columns[3].values[i]
+        print(f"  Image {img_id}: {count} cells")
+
+    table.close()
+```
+
+## Retrieve Tables from Objects
+
+### Find Tables Attached to Dataset
+
+```python
+# Get dataset
+dataset = conn.getObject("Dataset", dataset_id)
+
+# List file annotations
+for ann in dataset.listAnnotations():
+    if isinstance(ann, omero.gateway.FileAnnotationWrapper):
+        file_obj = ann.getFile()
+        file_name = file_obj.getName()
+
+        # Check if it's a table (might have specific naming pattern)
+        if "Table" in file_name or file_name.endswith(".h5"):
+            print(f"Found table: {file_name} (ID: {file_obj.getId()})")
+
+            # Open and inspect
+            resources = conn.c.sf.sharedResources()
+            table = resources.openTable(file_obj._obj)
+
+            print(f"  Rows: {table.getNumberOfRows()}")
+            print(f"  Columns:")
+            for col in table.getHeaders():
+                print(f"    {col.name}")
+
+            table.close()
+```
+
+## Updating Tables
+
+### Append Rows
+
+```python
+# Open existing table
+resources = conn.c.sf.sharedResources()
+table = resources.openTable(orig_file._obj)
+
+# Prepare new data
+new_image_ids = [106, 107]
+new_intensities = [88.9, 92.3]
+new_categories = ["Good", "Excellent"]
+
+# Create data columns
+data_col1 = omero.grid.LongColumn('ImageID', '', new_image_ids)
+data_col2 = omero.grid.DoubleColumn('MeanIntensity', '', new_intensities)
+data_col3 = omero.grid.StringColumn('Category', '', 64, new_categories)
+
+# Append data
+table.addData([data_col1, data_col2, data_col3])
+
+print(f"New row count: {table.getNumberOfRows()}")
+table.close()
+```
+
+## Deleting Tables
+
+### Delete Table File
+
+```python
+# Get file object
+orig_file = conn.getObject("OriginalFile", file_id)
+
+# Delete file (also deletes table)
+conn.deleteObjects("OriginalFile", [file_id], wait=True)
+print(f"Deleted table file {file_id}")
+```
+
+### Unlink Table from Object
+
+```python
+# Find annotation links
+dataset = conn.getObject("Dataset", dataset_id)
+
+for ann in dataset.listAnnotations():
+    if isinstance(ann, omero.gateway.FileAnnotationWrapper):
+        if "Table" in ann.getFile().getName():
+            # Delete link (keeps table, removes association)
+            conn.deleteObjects("DatasetAnnotationLink",
+                             [ann.link.getId()],
+                             wait=True)
+            print(f"Unlinked table from dataset")
+```
+
+## Best Practices
+
+1. **Descriptive Names**: Use meaningful table and column names
+2. **Close Tables**: Always close tables after use
+3. **String Length**: Set appropriate max length for string columns
+4. **Link to Objects**: Attach tables to relevant datasets or projects
+5. **Use References**: Use ImageColumn, RoiColumn for object references
+6. **Query Efficiently**: Use getWhereList() instead of reading all data
+7. **Document**: Add descriptions to columns
+8. **Version Control**: Include version info in table name or metadata
+9. **Batch Operations**: Add data in batches for better performance
+10. **Error Handling**: Check for None returns and handle exceptions
+
+## Common Patterns
+
+### ROI Measurements Table
+
+```python
+# Table structure for ROI measurements
+columns = [
+    omero.grid.ImageColumn('Image', 'Source image', []),
+    omero.grid.RoiColumn('ROI', 'Measured ROI', []),
+    omero.grid.LongColumn('ChannelIndex', 'Channel number', []),
+    omero.grid.DoubleColumn('Area', 'ROI area in pixels', []),
+    omero.grid.DoubleColumn('MeanIntensity', 'Mean intensity', []),
+    omero.grid.DoubleColumn('IntegratedDensity', 'Sum of intensities', []),
+    omero.grid.StringColumn('CellType', 'Cell classification', 32, [])
+]
+```
+
+### Time Series Data Table
+
+```python
+# Table structure for time series measurements
+columns = [
+    omero.grid.ImageColumn('Image', 'Time series image', []),
+    omero.grid.LongColumn('Timepoint', 'Time index', []),
+    omero.grid.DoubleColumn('Timestamp', 'Time in seconds', []),
+    omero.grid.DoubleColumn('Value', 'Measured value', []),
+    omero.grid.StringColumn('Measurement', 'Type of measurement', 64, [])
+]
+```
+
+### Screening Results Table
+
+```python
+# Table structure for screening plate analysis
+columns = [
+    omero.grid.WellColumn('Well', 'Plate well', []),
+    omero.grid.LongColumn('FieldIndex', 'Field number', []),
+    omero.grid.DoubleColumn('CellCount', 'Number of cells', []),
+    omero.grid.DoubleColumn('Viability', 'Percent viable', []),
+    omero.grid.StringColumn('Phenotype', 'Observed phenotype', 128, []),
+    omero.grid.BoolColumn('Hit', 'Hit in screen', [])
+]
+```