Initial commit
This commit is contained in:
532
skills/omero-integration/references/tables.md
Normal file
532
skills/omero-integration/references/tables.md
Normal file
@@ -0,0 +1,532 @@
|
||||
# OMERO Tables
|
||||
|
||||
This reference covers creating and managing structured tabular data in OMERO using OMERO.tables.
|
||||
|
||||
## OMERO.tables Overview
|
||||
|
||||
OMERO.tables provides a way to store structured tabular data associated with OMERO objects. Tables are stored as HDF5 files and can be queried efficiently. Common use cases include:
|
||||
|
||||
- Storing quantitative measurements from images
|
||||
- Recording analysis results
|
||||
- Tracking experimental metadata
|
||||
- Linking measurements to specific images or ROIs
|
||||
|
||||
## Column Types
|
||||
|
||||
OMERO.tables supports various column types:
|
||||
|
||||
- **LongColumn**: Integer values (64-bit)
|
||||
- **DoubleColumn**: Floating-point values
|
||||
- **StringColumn**: Text data (fixed max length)
|
||||
- **BoolColumn**: Boolean values
|
||||
- **LongArrayColumn**: Arrays of integers
|
||||
- **DoubleArrayColumn**: Arrays of floats
|
||||
- **FileColumn**: References to OMERO files
|
||||
- **ImageColumn**: References to OMERO images
|
||||
- **RoiColumn**: References to OMERO ROIs
|
||||
- **WellColumn**: References to OMERO wells
|
||||
|
||||
## Creating Tables
|
||||
|
||||
### Basic Table Creation
|
||||
|
||||
```python
|
||||
from random import random
|
||||
import omero.grid
|
||||
|
||||
# Create unique table name
|
||||
table_name = f"MyAnalysisTable_{random()}"
|
||||
|
||||
# Define columns (empty data for initialization)
|
||||
col1 = omero.grid.LongColumn('ImageID', 'Image identifier', [])
|
||||
col2 = omero.grid.DoubleColumn('MeanIntensity', 'Mean pixel intensity', [])
|
||||
col3 = omero.grid.StringColumn('Category', 'Classification', 64, [])
|
||||
|
||||
columns = [col1, col2, col3]
|
||||
|
||||
# Get resources and create table
|
||||
resources = conn.c.sf.sharedResources()
|
||||
repository_id = resources.repositories().descriptions[0].getId().getValue()
|
||||
table = resources.newTable(repository_id, table_name)
|
||||
|
||||
# Initialize table with column definitions
|
||||
table.initialize(columns)
|
||||
```
|
||||
|
||||
### Add Data to Table
|
||||
|
||||
```python
|
||||
# Prepare data
|
||||
image_ids = [1, 2, 3, 4, 5]
|
||||
intensities = [123.4, 145.2, 98.7, 156.3, 132.8]
|
||||
categories = ["Good", "Good", "Poor", "Excellent", "Good"]
|
||||
|
||||
# Create data columns
|
||||
data_col1 = omero.grid.LongColumn('ImageID', 'Image identifier', image_ids)
|
||||
data_col2 = omero.grid.DoubleColumn('MeanIntensity', 'Mean pixel intensity', intensities)
|
||||
data_col3 = omero.grid.StringColumn('Category', 'Classification', 64, categories)
|
||||
|
||||
data = [data_col1, data_col2, data_col3]
|
||||
|
||||
# Add data to table
|
||||
table.addData(data)
|
||||
|
||||
# Get file reference
|
||||
orig_file = table.getOriginalFile()
|
||||
table.close() # Always close table when done
|
||||
```
|
||||
|
||||
### Link Table to Dataset
|
||||
|
||||
```python
|
||||
# Create file annotation from table
|
||||
orig_file_id = orig_file.id.val
|
||||
file_ann = omero.model.FileAnnotationI()
|
||||
file_ann.setFile(omero.model.OriginalFileI(orig_file_id, False))
|
||||
file_ann = conn.getUpdateService().saveAndReturnObject(file_ann)
|
||||
|
||||
# Link to dataset
|
||||
link = omero.model.DatasetAnnotationLinkI()
|
||||
link.setParent(omero.model.DatasetI(dataset_id, False))
|
||||
link.setChild(omero.model.FileAnnotationI(file_ann.getId().getValue(), False))
|
||||
conn.getUpdateService().saveAndReturnObject(link)
|
||||
|
||||
print(f"Linked table to dataset {dataset_id}")
|
||||
```
|
||||
|
||||
## Column Types in Detail
|
||||
|
||||
### Long Column (Integers)
|
||||
|
||||
```python
|
||||
# Column for integer values
|
||||
image_ids = [101, 102, 103, 104, 105]
|
||||
col = omero.grid.LongColumn('ImageID', 'Image identifier', image_ids)
|
||||
```
|
||||
|
||||
### Double Column (Floats)
|
||||
|
||||
```python
|
||||
# Column for floating-point values
|
||||
measurements = [12.34, 56.78, 90.12, 34.56, 78.90]
|
||||
col = omero.grid.DoubleColumn('Measurement', 'Value in microns', measurements)
|
||||
```
|
||||
|
||||
### String Column (Text)
|
||||
|
||||
```python
|
||||
# Column for text (max length required)
|
||||
labels = ["Control", "Treatment A", "Treatment B", "Control", "Treatment A"]
|
||||
col = omero.grid.StringColumn('Condition', 'Experimental condition', 64, labels)
|
||||
```
|
||||
|
||||
### Boolean Column
|
||||
|
||||
```python
|
||||
# Column for boolean values
|
||||
flags = [True, False, True, True, False]
|
||||
col = omero.grid.BoolColumn('QualityPass', 'Passes quality control', flags)
|
||||
```
|
||||
|
||||
### Image Column (References to Images)
|
||||
|
||||
```python
|
||||
# Column linking to OMERO images
|
||||
image_ids = [101, 102, 103, 104, 105]
|
||||
col = omero.grid.ImageColumn('Image', 'Source image', image_ids)
|
||||
```
|
||||
|
||||
### ROI Column (References to ROIs)
|
||||
|
||||
```python
|
||||
# Column linking to OMERO ROIs
|
||||
roi_ids = [201, 202, 203, 204, 205]
|
||||
col = omero.grid.RoiColumn('ROI', 'Associated ROI', roi_ids)
|
||||
```
|
||||
|
||||
### Array Columns
|
||||
|
||||
```python
|
||||
# Column for arrays of doubles
|
||||
histogram_data = [
|
||||
[10, 20, 30, 40],
|
||||
[15, 25, 35, 45],
|
||||
[12, 22, 32, 42]
|
||||
]
|
||||
col = omero.grid.DoubleArrayColumn('Histogram', 'Intensity histogram', histogram_data)
|
||||
|
||||
# Column for arrays of longs
|
||||
bin_counts = [[5, 10, 15], [8, 12, 16], [6, 11, 14]]
|
||||
col = omero.grid.LongArrayColumn('Bins', 'Histogram bins', bin_counts)
|
||||
```
|
||||
|
||||
## Reading Table Data
|
||||
|
||||
### Open Existing Table
|
||||
|
||||
```python
|
||||
# Get table file by name
|
||||
orig_table_file = conn.getObject("OriginalFile",
|
||||
attributes={'name': table_name})
|
||||
|
||||
# Open table
|
||||
resources = conn.c.sf.sharedResources()
|
||||
table = resources.openTable(orig_table_file._obj)
|
||||
|
||||
print(f"Opened table: {table.getOriginalFile().getName().getValue()}")
|
||||
print(f"Number of rows: {table.getNumberOfRows()}")
|
||||
```
|
||||
|
||||
### Read All Data
|
||||
|
||||
```python
|
||||
# Get column headers
|
||||
print("Columns:")
|
||||
for col in table.getHeaders():
|
||||
print(f" {col.name}: {col.description}")
|
||||
|
||||
# Read all data
|
||||
row_count = table.getNumberOfRows()
|
||||
data = table.readCoordinates(range(row_count))
|
||||
|
||||
# Display data
|
||||
for col in data.columns:
|
||||
print(f"\nColumn: {col.name}")
|
||||
for value in col.values:
|
||||
print(f" {value}")
|
||||
|
||||
table.close()
|
||||
```
|
||||
|
||||
### Read Specific Rows
|
||||
|
||||
```python
|
||||
# Read rows 10-20
|
||||
start = 10
|
||||
stop = 20
|
||||
data = table.read(list(range(table.getHeaders().__len__())), start, stop)
|
||||
|
||||
for col in data.columns:
|
||||
print(f"Column: {col.name}")
|
||||
for value in col.values:
|
||||
print(f" {value}")
|
||||
```
|
||||
|
||||
### Read Specific Columns
|
||||
|
||||
```python
|
||||
# Read only columns 0 and 2
|
||||
column_indices = [0, 2]
|
||||
start = 0
|
||||
stop = table.getNumberOfRows()
|
||||
|
||||
data = table.read(column_indices, start, stop)
|
||||
|
||||
for col in data.columns:
|
||||
print(f"Column: {col.name}")
|
||||
print(f"Values: {col.values}")
|
||||
```
|
||||
|
||||
## Querying Tables
|
||||
|
||||
### Query with Conditions
|
||||
|
||||
```python
|
||||
# Query rows where MeanIntensity > 100
|
||||
row_count = table.getNumberOfRows()
|
||||
|
||||
query_rows = table.getWhereList(
|
||||
"(MeanIntensity > 100)",
|
||||
variables={},
|
||||
start=0,
|
||||
stop=row_count,
|
||||
step=0
|
||||
)
|
||||
|
||||
print(f"Found {len(query_rows)} matching rows")
|
||||
|
||||
# Read matching rows
|
||||
data = table.readCoordinates(query_rows)
|
||||
|
||||
for col in data.columns:
|
||||
print(f"\n{col.name}:")
|
||||
for value in col.values:
|
||||
print(f" {value}")
|
||||
```
|
||||
|
||||
### Complex Queries
|
||||
|
||||
```python
|
||||
# Multiple conditions with AND
|
||||
query_rows = table.getWhereList(
|
||||
"(MeanIntensity > 100) & (MeanIntensity < 150)",
|
||||
variables={},
|
||||
start=0,
|
||||
stop=row_count,
|
||||
step=0
|
||||
)
|
||||
|
||||
# Multiple conditions with OR
|
||||
query_rows = table.getWhereList(
|
||||
"(Category == 'Good') | (Category == 'Excellent')",
|
||||
variables={},
|
||||
start=0,
|
||||
stop=row_count,
|
||||
step=0
|
||||
)
|
||||
|
||||
# String matching
|
||||
query_rows = table.getWhereList(
|
||||
"(Category == 'Good')",
|
||||
variables={},
|
||||
start=0,
|
||||
stop=row_count,
|
||||
step=0
|
||||
)
|
||||
```
|
||||
|
||||
## Complete Example: Image Analysis Results
|
||||
|
||||
```python
|
||||
from omero.gateway import BlitzGateway
|
||||
import omero.grid
|
||||
import omero.model
|
||||
import numpy as np
|
||||
|
||||
HOST = 'omero.example.com'
|
||||
PORT = 4064
|
||||
USERNAME = 'user'
|
||||
PASSWORD = 'pass'
|
||||
|
||||
with BlitzGateway(USERNAME, PASSWORD, host=HOST, port=PORT) as conn:
|
||||
# Get dataset
|
||||
dataset = conn.getObject("Dataset", dataset_id)
|
||||
print(f"Analyzing dataset: {dataset.getName()}")
|
||||
|
||||
# Collect measurements from images
|
||||
image_ids = []
|
||||
mean_intensities = []
|
||||
max_intensities = []
|
||||
cell_counts = []
|
||||
|
||||
for image in dataset.listChildren():
|
||||
image_ids.append(image.getId())
|
||||
|
||||
# Get pixel data
|
||||
pixels = image.getPrimaryPixels()
|
||||
plane = pixels.getPlane(0, 0, 0) # Z=0, C=0, T=0
|
||||
|
||||
# Calculate statistics
|
||||
mean_intensities.append(float(np.mean(plane)))
|
||||
max_intensities.append(float(np.max(plane)))
|
||||
|
||||
# Simulate cell count (would be from actual analysis)
|
||||
cell_counts.append(np.random.randint(50, 200))
|
||||
|
||||
# Create table
|
||||
table_name = f"Analysis_Results_{dataset.getId()}"
|
||||
|
||||
# Define columns
|
||||
col1 = omero.grid.ImageColumn('Image', 'Source image', [])
|
||||
col2 = omero.grid.DoubleColumn('MeanIntensity', 'Mean pixel value', [])
|
||||
col3 = omero.grid.DoubleColumn('MaxIntensity', 'Maximum pixel value', [])
|
||||
col4 = omero.grid.LongColumn('CellCount', 'Number of cells detected', [])
|
||||
|
||||
# Initialize table
|
||||
resources = conn.c.sf.sharedResources()
|
||||
repository_id = resources.repositories().descriptions[0].getId().getValue()
|
||||
table = resources.newTable(repository_id, table_name)
|
||||
table.initialize([col1, col2, col3, col4])
|
||||
|
||||
# Add data
|
||||
data_col1 = omero.grid.ImageColumn('Image', 'Source image', image_ids)
|
||||
data_col2 = omero.grid.DoubleColumn('MeanIntensity', 'Mean pixel value',
|
||||
mean_intensities)
|
||||
data_col3 = omero.grid.DoubleColumn('MaxIntensity', 'Maximum pixel value',
|
||||
max_intensities)
|
||||
data_col4 = omero.grid.LongColumn('CellCount', 'Number of cells detected',
|
||||
cell_counts)
|
||||
|
||||
table.addData([data_col1, data_col2, data_col3, data_col4])
|
||||
|
||||
# Get file and close table
|
||||
orig_file = table.getOriginalFile()
|
||||
table.close()
|
||||
|
||||
# Link to dataset
|
||||
orig_file_id = orig_file.id.val
|
||||
file_ann = omero.model.FileAnnotationI()
|
||||
file_ann.setFile(omero.model.OriginalFileI(orig_file_id, False))
|
||||
file_ann = conn.getUpdateService().saveAndReturnObject(file_ann)
|
||||
|
||||
link = omero.model.DatasetAnnotationLinkI()
|
||||
link.setParent(omero.model.DatasetI(dataset_id, False))
|
||||
link.setChild(omero.model.FileAnnotationI(file_ann.getId().getValue(), False))
|
||||
conn.getUpdateService().saveAndReturnObject(link)
|
||||
|
||||
print(f"Created and linked table with {len(image_ids)} rows")
|
||||
|
||||
# Query results
|
||||
table = resources.openTable(orig_file)
|
||||
|
||||
high_cell_count_rows = table.getWhereList(
|
||||
"(CellCount > 100)",
|
||||
variables={},
|
||||
start=0,
|
||||
stop=table.getNumberOfRows(),
|
||||
step=0
|
||||
)
|
||||
|
||||
print(f"Images with >100 cells: {len(high_cell_count_rows)}")
|
||||
|
||||
# Read those rows
|
||||
data = table.readCoordinates(high_cell_count_rows)
|
||||
for i in range(len(high_cell_count_rows)):
|
||||
img_id = data.columns[0].values[i]
|
||||
count = data.columns[3].values[i]
|
||||
print(f" Image {img_id}: {count} cells")
|
||||
|
||||
table.close()
|
||||
```
|
||||
|
||||
## Retrieve Tables from Objects
|
||||
|
||||
### Find Tables Attached to Dataset
|
||||
|
||||
```python
|
||||
# Get dataset
|
||||
dataset = conn.getObject("Dataset", dataset_id)
|
||||
|
||||
# List file annotations
|
||||
for ann in dataset.listAnnotations():
|
||||
if isinstance(ann, omero.gateway.FileAnnotationWrapper):
|
||||
file_obj = ann.getFile()
|
||||
file_name = file_obj.getName()
|
||||
|
||||
# Check if it's a table (might have specific naming pattern)
|
||||
if "Table" in file_name or file_name.endswith(".h5"):
|
||||
print(f"Found table: {file_name} (ID: {file_obj.getId()})")
|
||||
|
||||
# Open and inspect
|
||||
resources = conn.c.sf.sharedResources()
|
||||
table = resources.openTable(file_obj._obj)
|
||||
|
||||
print(f" Rows: {table.getNumberOfRows()}")
|
||||
print(f" Columns:")
|
||||
for col in table.getHeaders():
|
||||
print(f" {col.name}")
|
||||
|
||||
table.close()
|
||||
```
|
||||
|
||||
## Updating Tables
|
||||
|
||||
### Append Rows
|
||||
|
||||
```python
|
||||
# Open existing table
|
||||
resources = conn.c.sf.sharedResources()
|
||||
table = resources.openTable(orig_file._obj)
|
||||
|
||||
# Prepare new data
|
||||
new_image_ids = [106, 107]
|
||||
new_intensities = [88.9, 92.3]
|
||||
new_categories = ["Good", "Excellent"]
|
||||
|
||||
# Create data columns
|
||||
data_col1 = omero.grid.LongColumn('ImageID', '', new_image_ids)
|
||||
data_col2 = omero.grid.DoubleColumn('MeanIntensity', '', new_intensities)
|
||||
data_col3 = omero.grid.StringColumn('Category', '', 64, new_categories)
|
||||
|
||||
# Append data
|
||||
table.addData([data_col1, data_col2, data_col3])
|
||||
|
||||
print(f"New row count: {table.getNumberOfRows()}")
|
||||
table.close()
|
||||
```
|
||||
|
||||
## Deleting Tables
|
||||
|
||||
### Delete Table File
|
||||
|
||||
```python
|
||||
# Get file object
|
||||
orig_file = conn.getObject("OriginalFile", file_id)
|
||||
|
||||
# Delete file (also deletes table)
|
||||
conn.deleteObjects("OriginalFile", [file_id], wait=True)
|
||||
print(f"Deleted table file {file_id}")
|
||||
```
|
||||
|
||||
### Unlink Table from Object
|
||||
|
||||
```python
|
||||
# Find annotation links
|
||||
dataset = conn.getObject("Dataset", dataset_id)
|
||||
|
||||
for ann in dataset.listAnnotations():
|
||||
if isinstance(ann, omero.gateway.FileAnnotationWrapper):
|
||||
if "Table" in ann.getFile().getName():
|
||||
# Delete link (keeps table, removes association)
|
||||
conn.deleteObjects("DatasetAnnotationLink",
|
||||
[ann.link.getId()],
|
||||
wait=True)
|
||||
print(f"Unlinked table from dataset")
|
||||
```
|
||||
|
||||
## Best Practices
|
||||
|
||||
1. **Descriptive Names**: Use meaningful table and column names
|
||||
2. **Close Tables**: Always close tables after use
|
||||
3. **String Length**: Set appropriate max length for string columns
|
||||
4. **Link to Objects**: Attach tables to relevant datasets or projects
|
||||
5. **Use References**: Use ImageColumn, RoiColumn for object references
|
||||
6. **Query Efficiently**: Use getWhereList() instead of reading all data
|
||||
7. **Document**: Add descriptions to columns
|
||||
8. **Version Control**: Include version info in table name or metadata
|
||||
9. **Batch Operations**: Add data in batches for better performance
|
||||
10. **Error Handling**: Check for None returns and handle exceptions
|
||||
|
||||
## Common Patterns
|
||||
|
||||
### ROI Measurements Table
|
||||
|
||||
```python
|
||||
# Table structure for ROI measurements
|
||||
columns = [
|
||||
omero.grid.ImageColumn('Image', 'Source image', []),
|
||||
omero.grid.RoiColumn('ROI', 'Measured ROI', []),
|
||||
omero.grid.LongColumn('ChannelIndex', 'Channel number', []),
|
||||
omero.grid.DoubleColumn('Area', 'ROI area in pixels', []),
|
||||
omero.grid.DoubleColumn('MeanIntensity', 'Mean intensity', []),
|
||||
omero.grid.DoubleColumn('IntegratedDensity', 'Sum of intensities', []),
|
||||
omero.grid.StringColumn('CellType', 'Cell classification', 32, [])
|
||||
]
|
||||
```
|
||||
|
||||
### Time Series Data Table
|
||||
|
||||
```python
|
||||
# Table structure for time series measurements
|
||||
columns = [
|
||||
omero.grid.ImageColumn('Image', 'Time series image', []),
|
||||
omero.grid.LongColumn('Timepoint', 'Time index', []),
|
||||
omero.grid.DoubleColumn('Timestamp', 'Time in seconds', []),
|
||||
omero.grid.DoubleColumn('Value', 'Measured value', []),
|
||||
omero.grid.StringColumn('Measurement', 'Type of measurement', 64, [])
|
||||
]
|
||||
```
|
||||
|
||||
### Screening Results Table
|
||||
|
||||
```python
|
||||
# Table structure for screening plate analysis
|
||||
columns = [
|
||||
omero.grid.WellColumn('Well', 'Plate well', []),
|
||||
omero.grid.LongColumn('FieldIndex', 'Field number', []),
|
||||
omero.grid.DoubleColumn('CellCount', 'Number of cells', []),
|
||||
omero.grid.DoubleColumn('Viability', 'Percent viable', []),
|
||||
omero.grid.StringColumn('Phenotype', 'Observed phenotype', 128, []),
|
||||
omero.grid.BoolColumn('Hit', 'Hit in screen', [])
|
||||
]
|
||||
```
|
||||
Reference in New Issue
Block a user