Initial commit

This commit is contained in:
Zhongwei Li
2025-11-29 17:51:42 +08:00
commit 24486941f6
7 changed files with 682 additions and 0 deletions

View File

@@ -0,0 +1,12 @@
{
"name": "editing-obo-ontologies",
"description": "Skills and tools for editing OBO format ontologies",
"version": "0.0.0-2025.11.28",
"author": {
"name": "Chris Mungall",
"email": "cjmungall@lbl.gov"
},
"skills": [
"./skills/editing-obo-ontologies"
]
}

3
README.md Normal file
View File

@@ -0,0 +1,3 @@
# editing-obo-ontologies
Skills and tools for editing OBO format ontologies

56
plugin.lock.json Normal file
View File

@@ -0,0 +1,56 @@
{
"$schema": "internal://schemas/plugin.lock.v1.json",
"pluginId": "gh:ai4curation/curation-skills:editing-obo-ontologies",
"normalized": {
"repo": null,
"ref": "refs/tags/v20251128.0",
"commit": "2e46cb48c132c3e0073b5a32902b1a75911d251b",
"treeHash": "8e04abba47a740d345735dd6c73d389dde13ec45d3117b23d4868f0345ee27cc",
"generatedAt": "2025-11-28T10:13:06.237881Z",
"toolVersion": "publish_plugins.py@0.2.0"
},
"origin": {
"remote": "git@github.com:zhongweili/42plugin-data.git",
"branch": "master",
"commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390",
"repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data"
},
"manifest": {
"name": "editing-obo-ontologies",
"description": "Skills and tools for editing OBO format ontologies"
},
"content": {
"files": [
{
"path": "README.md",
"sha256": "0037bd2663121bc876aa389182b4f78ba7ba0bafab3b2c63ac7971a67637c44c"
},
{
"path": ".claude-plugin/plugin.json",
"sha256": "87335cfd3ce8c657dac9511fa97142d9791b2e79bd2e35c3a6251e3cd17f19bb"
},
{
"path": "skills/editing-obo-ontologies/obo-grep.pl",
"sha256": "7383994a9f6be84943dd2a82544dee33469f096d7d4fb8d0eebae84e544e881a"
},
{
"path": "skills/editing-obo-ontologies/SKILL.md",
"sha256": "77aab4f9875dcbaf0b04376073cbb2cd6ad50d63f9371095434f36329ba5747e"
},
{
"path": "skills/editing-obo-ontologies/obo-checkin.pl",
"sha256": "7c386bdd2f7ade4a139dce22935472818ba336574227d4d57ad9bdce756c358c"
},
{
"path": "skills/editing-obo-ontologies/obo-checkout.pl",
"sha256": "de1c16c2b96ea393d576667dc3eee8954bd6c92bd8d0a2680a2aa267c741628c"
}
],
"dirSha256": "8e04abba47a740d345735dd6c73d389dde13ec45d3117b23d4868f0345ee27cc"
},
"security": {
"scannedAt": null,
"scannerVersion": null,
"flags": []
}
}

View File

@@ -0,0 +1,203 @@
---
name: editing-obo-ontologies
description: Skills and tools for editing OBO format ontologies, including querying terms, checking out/checking in individual terms, and following OBO format conventions. Do not use this if the source for the ontology you are editing is not in obo format (e.g. ofn)
---
# OBO Ontology Editing Guide
This skill provides guidance and tools for editing ontologies in OBO format.
## Project Layout Conventions
Most OBO ontologies follow a similar structure:
- Main development file is typically `src/ontology/{ontology}-edit.obo`
- Individual terms can be checked out to `terms/` directory for editing
- Some projects may have different layouts - check the project's documentation
## Querying Ontology Terms
Use the `obo-grep.pl` script for searching OBO files:
- Look at a specific term by ID:
- `obo-grep.pl --noheader -r 'id: ONTO:0004177' src/ontology/{ontology}-edit.obo`
- All mentions of an ID:
- `obo-grep.pl --noheader -r 'ONTO:0004177' src/ontology/{ontology}-edit.obo`
- Search by regex (e.g., all mentions of hand or foot):
- `obo-grep.pl --noheader -r '(hand|foot)' src/ontology/{ontology}-edit.obo`
- Search is much faster than full file reads
- ONLY search the main edit file (usually `src/ontology/{ontology}-edit.obo`)
- DO NOT do manual greps or read entire files unless necessary
## Before Making Edits
- Read the request carefully and make a plan, especially if there is nuance
- If a PMID is mentioned, try to read it using: `aurelian fulltext PMID:NNNNNN`
- This also works for DOIs and URLs for scientific papers (if accessible)
- ALWAYS check proposed parent terms for consistency
- Check project-specific guidelines if available
## Editing Workflow
### IMPORTANT: Use Checkout/Checkin for Large Files
- Do not edit large ontology files directly
- Use the checkout/checkin workflow for individual terms
- Check out a term: `obo-checkout.pl src/ontology/{ontology}-edit.obo ONTO:1234567 [OTHER_IDS]`
- This creates a single stanza file: `terms/{ontology}_1234567.obo` (note: colon replaced with underscore)
- Edit the small file in the `terms/` folder
- Check back in: `obo-checkin.pl src/ontology/{ontology}-edit.obo ONTO:1234567 [OTHER_IDS]`
- Checking in updates the edit file and removes the file from `terms/`
- You can edit multiple terms in one batch file if needed
### Scripts Available
This skill includes three essential scripts:
1. `obo-grep.pl` - Fast searching of OBO files
2. `obo-checkout.pl` - Extract terms to individual files for editing
3. `obo-checkin.pl` - Merge edited terms back into main file
All scripts are available in your PATH when this skill is loaded.
## OBO Format Guidelines
### Basic Structure
- Term ID format: `ONTO:NNNNNNN` (check project conventions for number of digits)
- Each term requires:
- `id:` - unique identifier
- `name:` - human-readable label
- `namespace:` - ontology namespace
- `def:` - definition with references in square brackets
- Use standard relationship types: `is_a`, `part_of`, `has_part`, etc.
- Follow existing term patterns for consistency
### Handling New Term Requests (NTRs)
- Check project conventions for temporary ID ranges
- Example: Some projects use ranges like `ONTO:777xxxx` for new terms
- Always check for ID clashes: `grep 'id: ONTO:777' src/ontology/{ontology}-edit.obo`
- NEVER guess ontology IDs - use search tools to find actual terms
- NEVER guess PMIDs for references - do web searches if needed
### Citations and References
- Cite publications appropriately: `def: "..." [PMID:nnnn, doi:mmmm]`
- Fetch full text when needed: `aurelian fulltext <PMID:nnn>` (also works with DOIs and URLs)
- All synonyms should include proper citations
- Never use empty brackets `[]` without a source
### Synonyms
Synonyms should include proper attribution:
**Correct:**
```
synonym: "alternative name" EXACT [PMID:12345678]
synonym: "abbrev" EXACT ABBREVIATION [PMID:12345678]
```
### Relationships and Logical Definitions
- All terms should have at least one `is_a` parent
- Logical definitions follow genus-differentia form
- Text definitions should mirror logical definitions
- Include source attribution for relationships when based on literature:
### Logical Definitions (intersection_of)
Example of proper intersection_of usage:
```
[Term]
id: ONTO:0000715
name: specific disease
def: "A general disease that involves specific location." [PMID:12345678]
is_a: ONTO:0001082 ! general disease
intersection_of: ONTO:0004971 ! general disease
intersection_of: disease_has_location UBERON:0000029 ! specific location
```
Note that in OWL this corresponds to: `'specific disease' EquivalentTo 'general disease' and 'disease has location' some 'specific location'`
## Obsoleting Terms
- Obsolete terms should have NO logical axioms (`is_a`, `relationship`, `intersection_of`)
- Obsolete terms may have one `replaced_by` tag (exact replacement)
- Or multiple `consider` tags (suggested alternatives)
- Always include obsolescence reason and tracker reference
Example of simple obsolescence:
```
[Term]
id: ONTO:0100334
name: obsolete term name
property_value: IAO:0000231 OMO:0001000
property_value: IAO:0000233 "https://github.com/{project}/issues/XXXX" xsd:anyURI
is_obsolete: true
replaced_by: ONTO:0100321
```
Example with considerations instead of replacement:
```
[Term]
id: ONTO:0100229
name: obsolete term name
def: "OBSOLETE. Original definition here." [original references]
property_value: IAO:0000231 OMO:0001000
property_value: IAO:0000233 "https://github.com/{project}/issues/XXXX" xsd:anyURI
is_obsolete: true
consider: ONTO:0100259
consider: ONTO:0100260
```
### Important Notes on Obsolescence
- Synonyms and xrefs can be migrated to replacement terms judiciously
- Never do complete merges with `alt_id` - use obsolescence with replacement instead
- No relationships should point to an obsolete term
- When obsoleting, you may need to rewire other terms to "skip" the obsoleted term
## Metadata Best Practices
- Link to issue trackers: `property_value: IAO:0000233 "https://github.com/{project}/issues/XXXX" xsd:anyURI`
- Sign new terms (don't tag pre-existing terms):
```
property_value: http://purl.org/dc/terms/creator https://orcid.org/0000-0001-2345-6789
```
- All terms should have definitions with at least one reference (preferably PMID)
- Dates are typically auto-generated by build processes
## Syntax Checking
Validate OBO syntax using ROBOT:
```bash
robot convert --catalog src/ontology/catalog-v001.xml \
-i src/ontology/{ontology}-edit.obo \
-f obo \
-o {ontology}-edit.TMP.obo
```
Use `-vvv` flag for full stack trace if there are errors.
## Design Patterns
Many OBO ontologies use DOSDP (Dead Simple Ontology Design Patterns):
- Check `src/patterns/dosdp-patterns/*.yaml` for project-specific patterns
- Follow existing patterns when creating similar terms
- Common patterns include:
- Location-based disease patterns
- Gene-related disease patterns
- Part-of hierarchies
- Abnormality patterns
## Important Reminders
- NEVER guess identifiers of any kind
- If you include an identifier not provided by the user, you MUST verify it
- PMIDs can be checked with `aurelian` or web search
- Always follow project-specific conventions and check existing examples
- When in doubt, ask for clarification rather than making assumptions

View File

@@ -0,0 +1,201 @@
#!/usr/bin/perl -w
use strict;
use FileHandle;
my $outdir = "terms";
my $cmd;
my $dry_run = 0;
my $preserve_files = 0;
while ($ARGV[0] =~ /^\-/) {
my $opt = shift @ARGV;
if ($opt eq '-h' || $opt eq '--help') {
print usage();
exit 0;
}
if ($opt eq '-d' || $opt eq '--outdir') {
$outdir = shift @ARGV;
}
if ($opt eq '-n' || $opt eq '--dry-run') {
$dry_run = 1;
}
if ($opt eq '-p' || $opt eq '--preserve-files') {
$preserve_files = 1;
}
}
`mkdir -p $outdir`;
my $id;
my $stanza = "";
my @alt_ids = ();
my $fn = shift @ARGV;
# ensure ids are sorted
my @ids = sort @ARGV;
my %new_stanza_map = ();
foreach my $id (@ids) {
my $path = get_path($id);
# check if $id is a path to a file that exists
if ($id =~ m@[\./]@ && -e $id) {
open(F, $id) || die "no such file $id";
my @lines = <F>;
close(F);
my $uber_stanza = join("", @lines);
my @stanzas_in_block = split(/\n\n/, $uber_stanza);
foreach my $stanza (@stanzas_in_block) {
# trim whitespace
$stanza =~ s/\s+$//;
if (!length($stanza)) {
next;
}
# check if stanza has id (note that stanza is multi-line)
if ($stanza =~ /id:\s+(\S+)/) {
my $stanza_id = $1;
$new_stanza_map{$stanza_id} = "$stanza\n\n";
}
else {
die "no id found in $stanza";
}
}
}
else {
open(F, $path) || die "no such file $path";
my $stanza = "";
while(<F>) {
chomp;
$stanza .= "$_\n";
}
close(F);
if ($stanza =~ /id: (\S+)/) {
# check id matches
if ($1 ne $id) {
die "id mismatch $1 ne $id";
}
}
else {
die "no id found in $path";
}
$new_stanza_map{$id} = $stanza;
}
}
open(W, ">$fn.tmp") || die "cannot write tp $fn.tmp";
my %stanza_map = ();
my %stanza_type_map = (); # To track stanza type (Term or Typedef)
$/ = "\n\n";
open(F, $fn) || die "cannot open $fn";
while(<F>) {
if ($_ =~ /id: (\S+)/) {
my $id = $1;
$stanza_map{$id} = $_;
# Determine stanza type
if ($_ =~ /\[(\w+)\]/) {
$stanza_type_map{$id} = $1;
}
else {
# Default to Term if type not specified
$stanza_type_map{$id} = "Term";
}
}
else {
print W $_;
}
}
close(F);
# combine old and new stanzas
foreach my $id (sort keys %new_stanza_map) {
$stanza_map{$id} = $new_stanza_map{$id};
# Update stanza type for new stanzas
if ($new_stanza_map{$id} =~ /\[(\w+)\]/) {
my $s = $1;
$stanza_type_map{$id} = $s;
}
else {
# Default to Term if type not specified
$stanza_type_map{$id} = "Term";
}
}
# Sort ids by stanza type (Term first, then Typedef) and then alphabetically within each type
my @sorted_ids = sort {
# First compare stanza types (Term comes before Typedef)
my $type_compare = ($stanza_type_map{$a} eq "Typedef") <=> ($stanza_type_map{$b} eq "Typedef");
# If same type, sort alphabetically by ID
return $type_compare || $a cmp $b;
} keys %stanza_map;
foreach my $id (@sorted_ids) {
my $s = $stanza_map{$id};
# normalize line endings to strip trailing whitespace
$s =~ s@[\r\n]+$@\n\n@;
print W $s;
}
close(W);
if ($dry_run) {
print "dry run, no changes made\n";
}
else {
`mv $fn.tmp $fn`;
# clear out @ids from $outdir
foreach my $id (@ids) {
my $path = get_path($id);
if (!$preserve_files) {
unlink $path;
}
}
}
# get the path for an id
# the ID should be either:
# - an ontology curie, e.g. GO:0000001, in which case the path is terms/GO_0000001.obo
# - an OWL local name, e.g. GO_0000001, in which case the path is terms/GO_0000001.obo
# - a file name, e.g. terms/my_terms.obo, in which case the path is terms/my_terms.obo
sub get_path {
my ($id) = @_;
my $fn = "$id";
$fn =~ s@:@_@;
# if the id has : or / in it and is a path to a file that exists, return it
if ($fn =~ m@[\./]@ && -e $fn) {
return $fn;
}
return "$outdir/$fn.obo"
}
sub w {
my ($id, $stanza) = @_;
my $path = get_path($id);
open(F, ">$path") || die($path);
print F $stanza;
close(F)
}
sub scriptname {
my @p = split(/\//,$0);
pop @p;
}
sub usage {
my $sn = scriptname();
<<EOM;
$sn OBO-FILE [ -d TERM-DIR ] TERM1 TERM2 ...
Checks in obo files from TERM-DIR into the OBO-FILE
Example:
$sn src/ontology/foo-edit.obo FOO:0000087 FOO:0000081
This will check in the FOO:0000087 and FOO:0000081 terms from the terms directory
into the foo-edit.obo file.
EOM
}

View File

@@ -0,0 +1,94 @@
#!/usr/bin/perl -w
use strict;
use FileHandle;
my $outdir = "terms";
my $cmd;
while ($ARGV[0] =~ /^\-/) {
my $opt = shift @ARGV;
if ($opt eq '-h' || $opt eq '--help') {
print usage();
exit 0;
}
if ($opt eq '-d' || $opt eq '--outdir') {
$outdir = shift @ARGV;
}
}
`mkdir -p $outdir`;
my $id;
my $stanza = "";
my @alt_ids = ();
my $fn = shift @ARGV;
my @ids = @ARGV;
my %idmap = map {$_ => 1} @ids;
my $num_ids = scalar(@ids);
my $n = 0;
print "Reading $fn\n";
open(F, $fn) || die "no such file $fn";
while(<F>) {
if (m@^\[@) {
$n++;
if ($id) {
# check if id is in %idmap
if ($idmap{$id}) {
w($id, $stanza);
}
}
$stanza = "";
$id = "";
}
if (m@^id: (\S+)@) {
$id = $1;
}
if (m@^alt_id: (\S+)@) {
push(@alt_ids, $1);
}
$stanza .= $_;
}
close(F);
#print "n: $n\n";
sub get_path {
my ($id) = @_;
my $fn = "$id";
$fn =~ s@:@_@;
return "$outdir/$fn.obo"
}
sub w {
my ($id, $stanza) = @_;
my $path = get_path($id);
print "Checking out $id to $path\n";
open(W, ">$path") || die($path);
print W $stanza;
close(W)
}
sub scriptname {
my @p = split(/\//,$0);
pop @p;
}
sub usage {
my $sn = scriptname();
<<EOM;
$sn OBO-FILE [ -d TERM-DIR ] TERM1 TERM2 ...
Checks out obo files into TERM-DIR from the OBO-FILE
Example:
$sn src/ontology/foo-edit.obo FOO:0000087 FOO:0000081
This will extract the FOO:0000087 and FOO:0000081 terms from the foo-edit.obo file
and write them to the terms directory, as files:
terms/FOO_0000087.obo
terms/FOO_0000081.obo
EOM
}

View File

@@ -0,0 +1,113 @@
#!/usr/bin/perl -w
use strict;
my %tag_h=();
my $regexp = '';
my $noheader;
my $negate;
my $count;
my $idfile;
while ($ARGV[0] =~ /^\-.+/) {
my $opt = shift @ARGV;
if ($opt eq '-h' || $opt eq '--help') {
print usage();
exit 0;
}
if ($opt eq '-r' || $opt eq '--regexp') {
$regexp = shift @ARGV;
}
if ($opt eq '--regexp-file') {
my $f = shift @ARGV;
my @or = ();
open(F,$f);
while(<F>) {
chomp;
push(@or,$_);
}
close(F);
$regexp = sprintf('id: (%s)\n', join('|',@or));
}
if ($opt eq '-c' || $opt eq '--count') {
$count = 1;
}
if ($opt eq '--noheader') {
$noheader = 1;
}
if ($opt eq '--idfile') {
my $idfile = shift;
open(F,$idfile) || die $idfile;
my @ids = ();
while(<F>) {
chomp;
s@\s.*@@;
push(@ids, $_);
}
close(F);
$regexp = "id: (" . join("|", @ids) . ")\n";
}
if ($opt eq '-v' || $opt eq '--neg') {
$negate = 1;
}
}
$/ = "\n\n";
my $n = 0;
while (@ARGV) {
my $f = pop @ARGV;
if ($f eq '-') {
*F=*STDIN;
}
else {
open(F,$f) || die "cannot open $f";
}
my $hdr = 0;
while(<F>) {
if (!$hdr && $_ !~ /^\[/) {
print unless $noheader || $count;
$hdr = 1;
}
else {
if ($negate) {
if ($_ !~ /$regexp/) {
$n++;
print unless $count;
}
}
else {
if (/$regexp/) {
$n++;
print unless $count;
}
}
}
}
}
if ($count) {
print "$n\n";
}
exit 0;
sub scriptname {
my @p = split(/\//,$0);
pop @p;
}
sub usage {
my $sn = scriptname();
<<EOM;
$sn [--noheader] [--neg] [--r REGULAR-EXPRESSION] [--regexp-file FILE] OBO-FILE
filters out stanzas from obo files
Example:
$sn -r 'def:.*transcript' go.obo
EOM
}