commit 24486941f6ec378c7c2af5e3aa1e5dbe7a4a7e14 Author: Zhongwei Li Date: Sat Nov 29 17:51:42 2025 +0800 Initial commit diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..38fb410 --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,12 @@ +{ + "name": "editing-obo-ontologies", + "description": "Skills and tools for editing OBO format ontologies", + "version": "0.0.0-2025.11.28", + "author": { + "name": "Chris Mungall", + "email": "cjmungall@lbl.gov" + }, + "skills": [ + "./skills/editing-obo-ontologies" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..7a8dd4b --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# editing-obo-ontologies + +Skills and tools for editing OBO format ontologies diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..108f499 --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,56 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:ai4curation/curation-skills:editing-obo-ontologies", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "2e46cb48c132c3e0073b5a32902b1a75911d251b", + "treeHash": "8e04abba47a740d345735dd6c73d389dde13ec45d3117b23d4868f0345ee27cc", + "generatedAt": "2025-11-28T10:13:06.237881Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "editing-obo-ontologies", + "description": "Skills and tools for editing OBO format ontologies" + }, + "content": { + "files": [ + { + "path": "README.md", + "sha256": "0037bd2663121bc876aa389182b4f78ba7ba0bafab3b2c63ac7971a67637c44c" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "87335cfd3ce8c657dac9511fa97142d9791b2e79bd2e35c3a6251e3cd17f19bb" + }, + { + "path": "skills/editing-obo-ontologies/obo-grep.pl", + "sha256": "7383994a9f6be84943dd2a82544dee33469f096d7d4fb8d0eebae84e544e881a" + }, + { + "path": "skills/editing-obo-ontologies/SKILL.md", + "sha256": "77aab4f9875dcbaf0b04376073cbb2cd6ad50d63f9371095434f36329ba5747e" + }, + { + "path": "skills/editing-obo-ontologies/obo-checkin.pl", + "sha256": "7c386bdd2f7ade4a139dce22935472818ba336574227d4d57ad9bdce756c358c" + }, + { + "path": "skills/editing-obo-ontologies/obo-checkout.pl", + "sha256": "de1c16c2b96ea393d576667dc3eee8954bd6c92bd8d0a2680a2aa267c741628c" + } + ], + "dirSha256": "8e04abba47a740d345735dd6c73d389dde13ec45d3117b23d4868f0345ee27cc" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file diff --git a/skills/editing-obo-ontologies/SKILL.md b/skills/editing-obo-ontologies/SKILL.md new file mode 100644 index 0000000..859aa62 --- /dev/null +++ b/skills/editing-obo-ontologies/SKILL.md @@ -0,0 +1,203 @@ +--- +name: editing-obo-ontologies +description: Skills and tools for editing OBO format ontologies, including querying terms, checking out/checking in individual terms, and following OBO format conventions. Do not use this if the source for the ontology you are editing is not in obo format (e.g. ofn) +--- + +# OBO Ontology Editing Guide + +This skill provides guidance and tools for editing ontologies in OBO format. + +## Project Layout Conventions + +Most OBO ontologies follow a similar structure: +- Main development file is typically `src/ontology/{ontology}-edit.obo` +- Individual terms can be checked out to `terms/` directory for editing +- Some projects may have different layouts - check the project's documentation + +## Querying Ontology Terms + +Use the `obo-grep.pl` script for searching OBO files: + +- Look at a specific term by ID: + - `obo-grep.pl --noheader -r 'id: ONTO:0004177' src/ontology/{ontology}-edit.obo` +- All mentions of an ID: + - `obo-grep.pl --noheader -r 'ONTO:0004177' src/ontology/{ontology}-edit.obo` +- Search by regex (e.g., all mentions of hand or foot): + - `obo-grep.pl --noheader -r '(hand|foot)' src/ontology/{ontology}-edit.obo` +- Search is much faster than full file reads +- ONLY search the main edit file (usually `src/ontology/{ontology}-edit.obo`) +- DO NOT do manual greps or read entire files unless necessary + +## Before Making Edits + +- Read the request carefully and make a plan, especially if there is nuance +- If a PMID is mentioned, try to read it using: `aurelian fulltext PMID:NNNNNN` +- This also works for DOIs and URLs for scientific papers (if accessible) +- ALWAYS check proposed parent terms for consistency +- Check project-specific guidelines if available + +## Editing Workflow + +### IMPORTANT: Use Checkout/Checkin for Large Files + +- Do not edit large ontology files directly +- Use the checkout/checkin workflow for individual terms +- Check out a term: `obo-checkout.pl src/ontology/{ontology}-edit.obo ONTO:1234567 [OTHER_IDS]` +- This creates a single stanza file: `terms/{ontology}_1234567.obo` (note: colon replaced with underscore) +- Edit the small file in the `terms/` folder +- Check back in: `obo-checkin.pl src/ontology/{ontology}-edit.obo ONTO:1234567 [OTHER_IDS]` +- Checking in updates the edit file and removes the file from `terms/` +- You can edit multiple terms in one batch file if needed + +### Scripts Available + +This skill includes three essential scripts: +1. `obo-grep.pl` - Fast searching of OBO files +2. `obo-checkout.pl` - Extract terms to individual files for editing +3. `obo-checkin.pl` - Merge edited terms back into main file + +All scripts are available in your PATH when this skill is loaded. + +## OBO Format Guidelines + +### Basic Structure + +- Term ID format: `ONTO:NNNNNNN` (check project conventions for number of digits) +- Each term requires: + - `id:` - unique identifier + - `name:` - human-readable label + - `namespace:` - ontology namespace + - `def:` - definition with references in square brackets +- Use standard relationship types: `is_a`, `part_of`, `has_part`, etc. +- Follow existing term patterns for consistency + +### Handling New Term Requests (NTRs) + +- Check project conventions for temporary ID ranges +- Example: Some projects use ranges like `ONTO:777xxxx` for new terms +- Always check for ID clashes: `grep 'id: ONTO:777' src/ontology/{ontology}-edit.obo` +- NEVER guess ontology IDs - use search tools to find actual terms +- NEVER guess PMIDs for references - do web searches if needed + +### Citations and References + +- Cite publications appropriately: `def: "..." [PMID:nnnn, doi:mmmm]` +- Fetch full text when needed: `aurelian fulltext ` (also works with DOIs and URLs) +- All synonyms should include proper citations +- Never use empty brackets `[]` without a source + +### Synonyms + +Synonyms should include proper attribution: + +**Correct:** +``` +synonym: "alternative name" EXACT [PMID:12345678] +synonym: "abbrev" EXACT ABBREVIATION [PMID:12345678] +``` + +### Relationships and Logical Definitions + +- All terms should have at least one `is_a` parent +- Logical definitions follow genus-differentia form +- Text definitions should mirror logical definitions +- Include source attribution for relationships when based on literature: + +### Logical Definitions (intersection_of) + +Example of proper intersection_of usage: + +``` +[Term] +id: ONTO:0000715 +name: specific disease +def: "A general disease that involves specific location." [PMID:12345678] +is_a: ONTO:0001082 ! general disease +intersection_of: ONTO:0004971 ! general disease +intersection_of: disease_has_location UBERON:0000029 ! specific location +``` + +Note that in OWL this corresponds to: `'specific disease' EquivalentTo 'general disease' and 'disease has location' some 'specific location'` + +## Obsoleting Terms + +- Obsolete terms should have NO logical axioms (`is_a`, `relationship`, `intersection_of`) +- Obsolete terms may have one `replaced_by` tag (exact replacement) +- Or multiple `consider` tags (suggested alternatives) +- Always include obsolescence reason and tracker reference + +Example of simple obsolescence: + +``` +[Term] +id: ONTO:0100334 +name: obsolete term name +property_value: IAO:0000231 OMO:0001000 +property_value: IAO:0000233 "https://github.com/{project}/issues/XXXX" xsd:anyURI +is_obsolete: true +replaced_by: ONTO:0100321 +``` + +Example with considerations instead of replacement: + +``` +[Term] +id: ONTO:0100229 +name: obsolete term name +def: "OBSOLETE. Original definition here." [original references] +property_value: IAO:0000231 OMO:0001000 +property_value: IAO:0000233 "https://github.com/{project}/issues/XXXX" xsd:anyURI +is_obsolete: true +consider: ONTO:0100259 +consider: ONTO:0100260 +``` + +### Important Notes on Obsolescence + +- Synonyms and xrefs can be migrated to replacement terms judiciously +- Never do complete merges with `alt_id` - use obsolescence with replacement instead +- No relationships should point to an obsolete term +- When obsoleting, you may need to rewire other terms to "skip" the obsoleted term + +## Metadata Best Practices + +- Link to issue trackers: `property_value: IAO:0000233 "https://github.com/{project}/issues/XXXX" xsd:anyURI` +- Sign new terms (don't tag pre-existing terms): + ``` + property_value: http://purl.org/dc/terms/creator https://orcid.org/0000-0001-2345-6789 + ``` +- All terms should have definitions with at least one reference (preferably PMID) +- Dates are typically auto-generated by build processes + +## Syntax Checking + +Validate OBO syntax using ROBOT: + +```bash +robot convert --catalog src/ontology/catalog-v001.xml \ + -i src/ontology/{ontology}-edit.obo \ + -f obo \ + -o {ontology}-edit.TMP.obo +``` + +Use `-vvv` flag for full stack trace if there are errors. + +## Design Patterns + +Many OBO ontologies use DOSDP (Dead Simple Ontology Design Patterns): +- Check `src/patterns/dosdp-patterns/*.yaml` for project-specific patterns +- Follow existing patterns when creating similar terms +- Common patterns include: + - Location-based disease patterns + - Gene-related disease patterns + - Part-of hierarchies + - Abnormality patterns + + +## Important Reminders + +- NEVER guess identifiers of any kind +- If you include an identifier not provided by the user, you MUST verify it +- PMIDs can be checked with `aurelian` or web search +- Always follow project-specific conventions and check existing examples +- When in doubt, ask for clarification rather than making assumptions diff --git a/skills/editing-obo-ontologies/obo-checkin.pl b/skills/editing-obo-ontologies/obo-checkin.pl new file mode 100755 index 0000000..0fcc473 --- /dev/null +++ b/skills/editing-obo-ontologies/obo-checkin.pl @@ -0,0 +1,201 @@ +#!/usr/bin/perl -w + +use strict; +use FileHandle; +my $outdir = "terms"; +my $cmd; +my $dry_run = 0; +my $preserve_files = 0; +while ($ARGV[0] =~ /^\-/) { + my $opt = shift @ARGV; + if ($opt eq '-h' || $opt eq '--help') { + print usage(); + exit 0; + } + if ($opt eq '-d' || $opt eq '--outdir') { + $outdir = shift @ARGV; + } + if ($opt eq '-n' || $opt eq '--dry-run') { + $dry_run = 1; + } + if ($opt eq '-p' || $opt eq '--preserve-files') { + $preserve_files = 1; + } +} +`mkdir -p $outdir`; +my $id; +my $stanza = ""; +my @alt_ids = (); +my $fn = shift @ARGV; +# ensure ids are sorted +my @ids = sort @ARGV; + +my %new_stanza_map = (); + +foreach my $id (@ids) { + my $path = get_path($id); + # check if $id is a path to a file that exists + if ($id =~ m@[\./]@ && -e $id) { + open(F, $id) || die "no such file $id"; + my @lines = ; + close(F); + my $uber_stanza = join("", @lines); + my @stanzas_in_block = split(/\n\n/, $uber_stanza); + foreach my $stanza (@stanzas_in_block) { + # trim whitespace + $stanza =~ s/\s+$//; + if (!length($stanza)) { + next; + } + # check if stanza has id (note that stanza is multi-line) + if ($stanza =~ /id:\s+(\S+)/) { + my $stanza_id = $1; + $new_stanza_map{$stanza_id} = "$stanza\n\n"; + } + else { + die "no id found in $stanza"; + } + } + } + else { + open(F, $path) || die "no such file $path"; + my $stanza = ""; + while() { + chomp; + $stanza .= "$_\n"; + } + close(F); + if ($stanza =~ /id: (\S+)/) { + # check id matches + if ($1 ne $id) { + die "id mismatch $1 ne $id"; + } + } + else { + die "no id found in $path"; + } + $new_stanza_map{$id} = $stanza; + } +} + +open(W, ">$fn.tmp") || die "cannot write tp $fn.tmp"; + +my %stanza_map = (); +my %stanza_type_map = (); # To track stanza type (Term or Typedef) +$/ = "\n\n"; +open(F, $fn) || die "cannot open $fn"; +while() { + if ($_ =~ /id: (\S+)/) { + my $id = $1; + $stanza_map{$id} = $_; + + # Determine stanza type + if ($_ =~ /\[(\w+)\]/) { + $stanza_type_map{$id} = $1; + } + else { + # Default to Term if type not specified + $stanza_type_map{$id} = "Term"; + } + } + else { + print W $_; + } +} +close(F); + +# combine old and new stanzas +foreach my $id (sort keys %new_stanza_map) { + $stanza_map{$id} = $new_stanza_map{$id}; + + # Update stanza type for new stanzas + if ($new_stanza_map{$id} =~ /\[(\w+)\]/) { + my $s = $1; + $stanza_type_map{$id} = $s; + } + else { + # Default to Term if type not specified + $stanza_type_map{$id} = "Term"; + } +} + +# Sort ids by stanza type (Term first, then Typedef) and then alphabetically within each type +my @sorted_ids = sort { + # First compare stanza types (Term comes before Typedef) + my $type_compare = ($stanza_type_map{$a} eq "Typedef") <=> ($stanza_type_map{$b} eq "Typedef"); + + # If same type, sort alphabetically by ID + return $type_compare || $a cmp $b; +} keys %stanza_map; + +foreach my $id (@sorted_ids) { + my $s = $stanza_map{$id}; + # normalize line endings to strip trailing whitespace + $s =~ s@[\r\n]+$@\n\n@; + print W $s; +} +close(W); + +if ($dry_run) { + print "dry run, no changes made\n"; +} +else { + `mv $fn.tmp $fn`; + # clear out @ids from $outdir + foreach my $id (@ids) { + my $path = get_path($id); + if (!$preserve_files) { + unlink $path; + } + } +} + +# get the path for an id +# the ID should be either: +# - an ontology curie, e.g. GO:0000001, in which case the path is terms/GO_0000001.obo +# - an OWL local name, e.g. GO_0000001, in which case the path is terms/GO_0000001.obo +# - a file name, e.g. terms/my_terms.obo, in which case the path is terms/my_terms.obo +sub get_path { + my ($id) = @_; + my $fn = "$id"; + $fn =~ s@:@_@; + # if the id has : or / in it and is a path to a file that exists, return it + if ($fn =~ m@[\./]@ && -e $fn) { + return $fn; + } + return "$outdir/$fn.obo" + +} + +sub w { + my ($id, $stanza) = @_; + my $path = get_path($id); + open(F, ">$path") || die($path); + print F $stanza; + close(F) +} + +sub scriptname { + my @p = split(/\//,$0); + pop @p; +} + + +sub usage { + my $sn = scriptname(); + + < 1} @ids; +my $num_ids = scalar(@ids); + +my $n = 0; +print "Reading $fn\n"; +open(F, $fn) || die "no such file $fn"; +while() { + if (m@^\[@) { + $n++; + if ($id) { + # check if id is in %idmap + if ($idmap{$id}) { + w($id, $stanza); + } + } + $stanza = ""; + $id = ""; + } + if (m@^id: (\S+)@) { + $id = $1; + } + if (m@^alt_id: (\S+)@) { + push(@alt_ids, $1); + } + $stanza .= $_; +} +close(F); +#print "n: $n\n"; +sub get_path { + my ($id) = @_; + my $fn = "$id"; + $fn =~ s@:@_@; + return "$outdir/$fn.obo" + +} + +sub w { + my ($id, $stanza) = @_; + my $path = get_path($id); + print "Checking out $id to $path\n"; + open(W, ">$path") || die($path); + print W $stanza; + close(W) +} + +sub scriptname { + my @p = split(/\//,$0); + pop @p; +} + + +sub usage { + my $sn = scriptname(); + + <) { + chomp; + push(@or,$_); + } + close(F); + $regexp = sprintf('id: (%s)\n', join('|',@or)); + } + if ($opt eq '-c' || $opt eq '--count') { + $count = 1; + } + if ($opt eq '--noheader') { + $noheader = 1; + } + if ($opt eq '--idfile') { + my $idfile = shift; + open(F,$idfile) || die $idfile; + my @ids = (); + while() { + chomp; + s@\s.*@@; + push(@ids, $_); + } + close(F); + $regexp = "id: (" . join("|", @ids) . ")\n"; + } + if ($opt eq '-v' || $opt eq '--neg') { + $negate = 1; + } +} + + +$/ = "\n\n"; + +my $n = 0; +while (@ARGV) { + my $f = pop @ARGV; + if ($f eq '-') { + *F=*STDIN; + } + else { + open(F,$f) || die "cannot open $f"; + } + my $hdr = 0; + while() { + if (!$hdr && $_ !~ /^\[/) { + print unless $noheader || $count; + $hdr = 1; + } + else { + if ($negate) { + if ($_ !~ /$regexp/) { + $n++; + print unless $count; + } + } + else { + if (/$regexp/) { + $n++; + print unless $count; + } + } + } + } +} +if ($count) { + print "$n\n"; +} + +exit 0; + +sub scriptname { + my @p = split(/\//,$0); + pop @p; +} + + +sub usage { + my $sn = scriptname(); + + <