Initial commit

2025-11-29 17:51:42 +08:00
commit 24486941f6
7 changed files with 682 additions and 0 deletions
--- a/.claude-plugin/plugin.json
+++ b/.claude-plugin/plugin.json
@@ -0,0 +1,12 @@
 {
  "name": "editing-obo-ontologies",
  "description": "Skills and tools for editing OBO format ontologies",
  "version": "0.0.0-2025.11.28",
  "author": {
    "name": "Chris Mungall",
    "email": "cjmungall@lbl.gov"
  },
  "skills": [
    "./skills/editing-obo-ontologies"
  ]
 }
--- a/README.md
+++ b/README.md
@@ -0,0 +1,3 @@
 # editing-obo-ontologies
 Skills and tools for editing OBO format ontologies
--- a/plugin.lock.json
+++ b/plugin.lock.json
@@ -0,0 +1,56 @@
 {
  "$schema": "internal://schemas/plugin.lock.v1.json",
  "pluginId": "gh:ai4curation/curation-skills:editing-obo-ontologies",
  "normalized": {
    "repo": null,
    "ref": "refs/tags/v20251128.0",
    "commit": "2e46cb48c132c3e0073b5a32902b1a75911d251b",
    "treeHash": "8e04abba47a740d345735dd6c73d389dde13ec45d3117b23d4868f0345ee27cc",
    "generatedAt": "2025-11-28T10:13:06.237881Z",
    "toolVersion": "publish_plugins.py@0.2.0"
  },
  "origin": {
    "remote": "git@github.com:zhongweili/42plugin-data.git",
    "branch": "master",
    "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390",
    "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data"
  },
  "manifest": {
    "name": "editing-obo-ontologies",
    "description": "Skills and tools for editing OBO format ontologies"
  },
  "content": {
    "files": [
      {
        "path": "README.md",
        "sha256": "0037bd2663121bc876aa389182b4f78ba7ba0bafab3b2c63ac7971a67637c44c"
      },
      {
        "path": ".claude-plugin/plugin.json",
        "sha256": "87335cfd3ce8c657dac9511fa97142d9791b2e79bd2e35c3a6251e3cd17f19bb"
      },
      {
        "path": "skills/editing-obo-ontologies/obo-grep.pl",
        "sha256": "7383994a9f6be84943dd2a82544dee33469f096d7d4fb8d0eebae84e544e881a"
      },
      {
        "path": "skills/editing-obo-ontologies/SKILL.md",
        "sha256": "77aab4f9875dcbaf0b04376073cbb2cd6ad50d63f9371095434f36329ba5747e"
      },
      {
        "path": "skills/editing-obo-ontologies/obo-checkin.pl",
        "sha256": "7c386bdd2f7ade4a139dce22935472818ba336574227d4d57ad9bdce756c358c"
      },
      {
        "path": "skills/editing-obo-ontologies/obo-checkout.pl",
        "sha256": "de1c16c2b96ea393d576667dc3eee8954bd6c92bd8d0a2680a2aa267c741628c"
      }
    ],
    "dirSha256": "8e04abba47a740d345735dd6c73d389dde13ec45d3117b23d4868f0345ee27cc"
  },
  "security": {
    "scannedAt": null,
    "scannerVersion": null,
    "flags": []
  }
 }
--- a/skills/editing-obo-ontologies/SKILL.md
+++ b/skills/editing-obo-ontologies/SKILL.md
@@ -0,0 +1,203 @@
 ---
 name: editing-obo-ontologies
 description: Skills and tools for editing OBO format ontologies, including querying terms, checking out/checking in individual terms, and following OBO format conventions. Do not use this if the source for the ontology you are editing is not in obo format (e.g. ofn)
 ---
 # OBO Ontology Editing Guide
 This skill provides guidance and tools for editing ontologies in OBO format.
 ## Project Layout Conventions
 Most OBO ontologies follow a similar structure:
 - Main development file is typically `src/ontology/{ontology}-edit.obo`
 - Individual terms can be checked out to `terms/` directory for editing
 - Some projects may have different layouts - check the project's documentation
 ## Querying Ontology Terms
 Use the `obo-grep.pl` script for searching OBO files:
 - Look at a specific term by ID:
    - `obo-grep.pl --noheader -r 'id: ONTO:0004177' src/ontology/{ontology}-edit.obo`
 - All mentions of an ID:
    - `obo-grep.pl --noheader -r 'ONTO:0004177' src/ontology/{ontology}-edit.obo`
 - Search by regex (e.g., all mentions of hand or foot):
    - `obo-grep.pl --noheader -r '(hand|foot)' src/ontology/{ontology}-edit.obo`
 - Search is much faster than full file reads
 - ONLY search the main edit file (usually `src/ontology/{ontology}-edit.obo`)
 - DO NOT do manual greps or read entire files unless necessary
 ## Before Making Edits
 - Read the request carefully and make a plan, especially if there is nuance
 - If a PMID is mentioned, try to read it using: `aurelian fulltext PMID:NNNNNN`
 - This also works for DOIs and URLs for scientific papers (if accessible)
 - ALWAYS check proposed parent terms for consistency
 - Check project-specific guidelines if available
 ## Editing Workflow
 ### IMPORTANT: Use Checkout/Checkin for Large Files
 - Do not edit large ontology files directly
 - Use the checkout/checkin workflow for individual terms
 - Check out a term: `obo-checkout.pl src/ontology/{ontology}-edit.obo ONTO:1234567 [OTHER_IDS]`
 - This creates a single stanza file: `terms/{ontology}_1234567.obo` (note: colon replaced with underscore)
 - Edit the small file in the `terms/` folder
 - Check back in: `obo-checkin.pl src/ontology/{ontology}-edit.obo ONTO:1234567 [OTHER_IDS]`
 - Checking in updates the edit file and removes the file from `terms/`
 - You can edit multiple terms in one batch file if needed
 ### Scripts Available
 This skill includes three essential scripts:
 1. `obo-grep.pl` - Fast searching of OBO files
 2. `obo-checkout.pl` - Extract terms to individual files for editing
 3. `obo-checkin.pl` - Merge edited terms back into main file
 All scripts are available in your PATH when this skill is loaded.
 ## OBO Format Guidelines
 ### Basic Structure
 - Term ID format: `ONTO:NNNNNNN` (check project conventions for number of digits)
 - Each term requires:
  - `id:` - unique identifier
  - `name:` - human-readable label
  - `namespace:` - ontology namespace
  - `def:` - definition with references in square brackets
 - Use standard relationship types: `is_a`, `part_of`, `has_part`, etc.
 - Follow existing term patterns for consistency
 ### Handling New Term Requests (NTRs)
 - Check project conventions for temporary ID ranges
 - Example: Some projects use ranges like `ONTO:777xxxx` for new terms
 - Always check for ID clashes: `grep 'id: ONTO:777' src/ontology/{ontology}-edit.obo`
 - NEVER guess ontology IDs - use search tools to find actual terms
 - NEVER guess PMIDs for references - do web searches if needed
 ### Citations and References
 - Cite publications appropriately: `def: "..." [PMID:nnnn, doi:mmmm]`
 - Fetch full text when needed: `aurelian fulltext <PMID:nnn>` (also works with DOIs and URLs)
 - All synonyms should include proper citations
 - Never use empty brackets `[]` without a source
 ### Synonyms
 Synonyms should include proper attribution:
 **Correct:**
 ```
 synonym: "alternative name" EXACT [PMID:12345678]
 synonym: "abbrev" EXACT ABBREVIATION [PMID:12345678]
 ```
 ### Relationships and Logical Definitions
 - All terms should have at least one `is_a` parent
 - Logical definitions follow genus-differentia form
 - Text definitions should mirror logical definitions
 - Include source attribution for relationships when based on literature:
 ### Logical Definitions (intersection_of)
 Example of proper intersection_of usage:
 ```
 [Term]
 id: ONTO:0000715
 name: specific disease
 def: "A general disease that involves specific location." [PMID:12345678]
 is_a: ONTO:0001082 ! general disease
 intersection_of: ONTO:0004971 ! general disease
 intersection_of: disease_has_location UBERON:0000029 ! specific location
 ```
 Note that in OWL this corresponds to: `'specific disease' EquivalentTo 'general disease' and 'disease has location' some 'specific location'`
 ## Obsoleting Terms
 - Obsolete terms should have NO logical axioms (`is_a`, `relationship`, `intersection_of`)
 - Obsolete terms may have one `replaced_by` tag (exact replacement)
 - Or multiple `consider` tags (suggested alternatives)
 - Always include obsolescence reason and tracker reference
 Example of simple obsolescence:
 ```
 [Term]
 id: ONTO:0100334
 name: obsolete term name
 property_value: IAO:0000231 OMO:0001000
 property_value: IAO:0000233 "https://github.com/{project}/issues/XXXX" xsd:anyURI
 is_obsolete: true
 replaced_by: ONTO:0100321
 ```
 Example with considerations instead of replacement:
 ```
 [Term]
 id: ONTO:0100229
 name: obsolete term name
 def: "OBSOLETE. Original definition here." [original references]
 property_value: IAO:0000231 OMO:0001000
 property_value: IAO:0000233 "https://github.com/{project}/issues/XXXX" xsd:anyURI
 is_obsolete: true
 consider: ONTO:0100259
 consider: ONTO:0100260
 ```
 ### Important Notes on Obsolescence
 - Synonyms and xrefs can be migrated to replacement terms judiciously
 - Never do complete merges with `alt_id` - use obsolescence with replacement instead
 - No relationships should point to an obsolete term
 - When obsoleting, you may need to rewire other terms to "skip" the obsoleted term
 ## Metadata Best Practices
 - Link to issue trackers: `property_value: IAO:0000233 "https://github.com/{project}/issues/XXXX" xsd:anyURI`
 - Sign new terms (don't tag pre-existing terms):
  ```
  property_value: http://purl.org/dc/terms/creator https://orcid.org/0000-0001-2345-6789
  ```
 - All terms should have definitions with at least one reference (preferably PMID)
 - Dates are typically auto-generated by build processes
 ## Syntax Checking
 Validate OBO syntax using ROBOT:
 ```bash
 robot convert --catalog src/ontology/catalog-v001.xml \
  -i src/ontology/{ontology}-edit.obo \
  -f obo \
  -o {ontology}-edit.TMP.obo
 ```
 Use `-vvv` flag for full stack trace if there are errors.
 ## Design Patterns
 Many OBO ontologies use DOSDP (Dead Simple Ontology Design Patterns):
 - Check `src/patterns/dosdp-patterns/*.yaml` for project-specific patterns
 - Follow existing patterns when creating similar terms
 - Common patterns include:
  - Location-based disease patterns
  - Gene-related disease patterns
  - Part-of hierarchies
  - Abnormality patterns
 ## Important Reminders
 - NEVER guess identifiers of any kind
 - If you include an identifier not provided by the user, you MUST verify it
 - PMIDs can be checked with `aurelian` or web search
 - Always follow project-specific conventions and check existing examples
 - When in doubt, ask for clarification rather than making assumptions
--- a/skills/editing-obo-ontologies/obo-checkin.pl
+++ b/skills/editing-obo-ontologies/obo-checkin.pl
@@ -0,0 +1,201 @@
 #!/usr/bin/perl -w
 use strict;
 use FileHandle;
 my $outdir = "terms";
 my $cmd;
 my $dry_run = 0;
 my $preserve_files = 0;
 while ($ARGV[0] =~ /^\-/) {
    my $opt = shift @ARGV;
    if ($opt eq '-h' || $opt eq '--help') {
        print usage();
        exit 0;
    }
    if ($opt eq '-d' || $opt eq '--outdir') {
        $outdir = shift @ARGV;
    }
    if ($opt eq '-n' || $opt eq '--dry-run') {
        $dry_run = 1;
    }
    if ($opt eq '-p' || $opt eq '--preserve-files') {
        $preserve_files = 1;
    }
 }
 `mkdir -p $outdir`;
 my $id;
 my $stanza = "";
 my @alt_ids = ();
 my $fn = shift @ARGV;
 # ensure ids are sorted
 my @ids = sort @ARGV;
 my %new_stanza_map = ();
 foreach my $id (@ids) {
    my $path = get_path($id);
    # check if $id is a path to a file that exists
    if ($id =~ m@[\./]@ && -e $id) {
        open(F, $id) || die "no such file $id";
        my @lines = <F>;
        close(F);
        my $uber_stanza = join("", @lines);
        my @stanzas_in_block = split(/\n\n/, $uber_stanza);
        foreach my $stanza (@stanzas_in_block) {
            # trim whitespace
            $stanza =~ s/\s+$//;
            if (!length($stanza)) {
                next;
            }
            # check if stanza has id (note that stanza is multi-line)
            if ($stanza =~ /id:\s+(\S+)/) {
                my $stanza_id = $1;
                $new_stanza_map{$stanza_id} = "$stanza\n\n";
            }
            else {
                die "no id found in $stanza";
            }
        }
    }
    else {
        open(F, $path) || die "no such file $path";
        my $stanza = "";
        while(<F>) {
            chomp;
            $stanza .= "$_\n";
        }
        close(F);
        if ($stanza =~ /id: (\S+)/) {
            # check id matches
            if ($1 ne $id) {
                die "id mismatch $1 ne $id";
            }
        }
        else {
            die "no id found in $path";
        }
        $new_stanza_map{$id} = $stanza;
    }
 }
 open(W, ">$fn.tmp") || die "cannot write tp $fn.tmp";
 my %stanza_map = ();
 my %stanza_type_map = (); # To track stanza type (Term or Typedef)
 $/ = "\n\n";
 open(F, $fn) || die "cannot open $fn";
 while(<F>) {
    if ($_ =~ /id: (\S+)/) {
        my $id = $1;
        $stanza_map{$id} = $_;
        # Determine stanza type
        if ($_ =~ /\[(\w+)\]/) {
            $stanza_type_map{$id} = $1;
        }
        else {
            # Default to Term if type not specified
            $stanza_type_map{$id} = "Term";
        }
    }
    else {
        print W $_;
    }
 }
 close(F);
 # combine old and new stanzas
 foreach my $id (sort keys %new_stanza_map) {
    $stanza_map{$id} = $new_stanza_map{$id};
    # Update stanza type for new stanzas
    if ($new_stanza_map{$id} =~ /\[(\w+)\]/) {
        my $s = $1;
        $stanza_type_map{$id} = $s;
    }
    else {
        # Default to Term if type not specified
        $stanza_type_map{$id} = "Term";
    }
 }
 # Sort ids by stanza type (Term first, then Typedef) and then alphabetically within each type
 my @sorted_ids = sort {
    # First compare stanza types (Term comes before Typedef)
    my $type_compare = ($stanza_type_map{$a} eq "Typedef") <=> ($stanza_type_map{$b} eq "Typedef");
    # If same type, sort alphabetically by ID
    return $type_compare || $a cmp $b;
 } keys %stanza_map;
 foreach my $id (@sorted_ids) {
    my $s = $stanza_map{$id};
    # normalize line endings to strip trailing whitespace
    $s =~ s@[\r\n]+$@\n\n@;
    print W $s;
 }
 close(W);
 if ($dry_run) {
    print "dry run, no changes made\n";
 }
 else {
    `mv $fn.tmp $fn`;
    # clear out @ids from $outdir
    foreach my $id (@ids) {
        my $path = get_path($id);
        if (!$preserve_files) {
            unlink $path;
        }
    }
 }
 # get the path for an id
 # the ID should be either:
 # - an ontology curie, e.g. GO:0000001, in which case the path is terms/GO_0000001.obo
 # - an OWL local name, e.g. GO_0000001, in which case the path is terms/GO_0000001.obo
 # - a file name, e.g. terms/my_terms.obo, in which case the path is terms/my_terms.obo
 sub get_path {
    my ($id) = @_;
    my $fn = "$id";
    $fn =~ s@:@_@;
    # if the id has : or / in it and is a path to a file that exists, return it
    if ($fn =~ m@[\./]@ && -e $fn) {
        return $fn;
    }
    return "$outdir/$fn.obo"
 }
 sub w {
    my ($id, $stanza) = @_;
    my $path = get_path($id);
    open(F, ">$path") || die($path);
    print F $stanza;
    close(F)
 }
 sub scriptname {
    my @p = split(/\//,$0);
    pop @p;
 }
 sub usage {
    my $sn = scriptname();
    <<EOM;
 $sn  OBO-FILE [ -d TERM-DIR ] TERM1 TERM2 ...
 Checks in obo files from TERM-DIR into the OBO-FILE
 Example:
 $sn src/ontology/foo-edit.obo FOO:0000087 FOO:0000081
 This will check in the FOO:0000087 and FOO:0000081 terms from the terms directory
 into the foo-edit.obo file.
 EOM
 }
--- a/skills/editing-obo-ontologies/obo-checkout.pl
+++ b/skills/editing-obo-ontologies/obo-checkout.pl
@@ -0,0 +1,94 @@
 #!/usr/bin/perl -w
 use strict;
 use FileHandle;
 my $outdir = "terms";
 my $cmd;
 while ($ARGV[0] =~ /^\-/) {
    my $opt = shift @ARGV;
    if ($opt eq '-h' || $opt eq '--help') {
        print usage();
        exit 0;
    }
    if ($opt eq '-d' || $opt eq '--outdir') {
        $outdir = shift @ARGV;
    }
 }
 `mkdir -p $outdir`;
 my $id;
 my $stanza = "";
 my @alt_ids = ();
 my $fn = shift @ARGV;
 my @ids = @ARGV;
 my %idmap = map {$_ => 1} @ids;
 my $num_ids = scalar(@ids);
 my $n = 0;
 print "Reading $fn\n";
 open(F, $fn) || die "no such file $fn";
 while(<F>) {
    if (m@^\[@) {
        $n++;
        if ($id) {
            # check if id is in %idmap
            if ($idmap{$id}) {
                w($id, $stanza);
            }
        }
        $stanza = "";
        $id = "";
    }
    if (m@^id: (\S+)@) {
        $id = $1;
    }
    if (m@^alt_id: (\S+)@) {
        push(@alt_ids, $1);
    }
    $stanza .= $_;
 }
 close(F);
 #print "n: $n\n";
 sub get_path {
    my ($id) = @_;
    my $fn = "$id";
    $fn =~ s@:@_@;
    return "$outdir/$fn.obo"
 }
 sub w {
    my ($id, $stanza) = @_;
    my $path = get_path($id);
    print "Checking out $id to $path\n";
    open(W, ">$path") || die($path);
    print W $stanza;
    close(W)
 }
 sub scriptname {
    my @p = split(/\//,$0);
    pop @p;
 }
 sub usage {
    my $sn = scriptname();
    <<EOM;
 $sn  OBO-FILE [ -d TERM-DIR ] TERM1 TERM2 ...
 Checks out obo files into TERM-DIR from the OBO-FILE
 Example:
 $sn src/ontology/foo-edit.obo FOO:0000087 FOO:0000081
 This will extract the FOO:0000087 and FOO:0000081 terms from the foo-edit.obo file
 and write them to the terms directory, as files:
 terms/FOO_0000087.obo
 terms/FOO_0000081.obo
 EOM
 }
--- a/skills/editing-obo-ontologies/obo-grep.pl
+++ b/skills/editing-obo-ontologies/obo-grep.pl
@@ -0,0 +1,113 @@
 #!/usr/bin/perl -w
 use strict;
 my %tag_h=();
 my $regexp = '';
 my $noheader;
 my $negate;
 my $count;
 my $idfile;
 while ($ARGV[0] =~ /^\-.+/) {
    my $opt = shift @ARGV;
    if ($opt eq '-h' || $opt eq '--help') {
        print usage();
        exit 0;
    }
    if ($opt eq '-r' || $opt eq '--regexp') {
        $regexp = shift @ARGV;
    }
    if ($opt eq '--regexp-file') {
        my $f = shift @ARGV;
        my @or = ();
        open(F,$f);
        while(<F>) {
            chomp;
            push(@or,$_);
        }
        close(F);
        $regexp = sprintf('id: (%s)\n', join('|',@or));
    }
    if ($opt eq '-c' || $opt eq '--count') {
        $count = 1;
    }
    if ($opt eq '--noheader') {
        $noheader = 1;
    }
    if ($opt eq '--idfile') {
        my $idfile = shift;
        open(F,$idfile) || die $idfile;
        my @ids = ();
        while(<F>) {
            chomp;
            s@\s.*@@;
            push(@ids, $_);
        }
        close(F);
        $regexp = "id: (" .  join("|", @ids) . ")\n";
    }
    if ($opt eq '-v' || $opt eq '--neg') {
        $negate = 1;
    }
 }
 $/ = "\n\n";
 my $n = 0;
 while (@ARGV) {
    my $f = pop @ARGV;
    if ($f eq '-') {
        *F=*STDIN;
    }
    else {
        open(F,$f) || die "cannot open $f";
    }
    my $hdr = 0;
    while(<F>) {
        if (!$hdr && $_ !~ /^\[/) {
            print unless $noheader || $count;
            $hdr = 1;
        }
        else {
            if ($negate) {
                if ($_ !~ /$regexp/) {
                    $n++;
                    print unless $count;
                }
            }
            else {
                if (/$regexp/) {
                    $n++;
                    print unless $count;
                }
            }
        }
    }
 }
 if ($count) {
    print "$n\n";
 }
 exit 0;
 sub scriptname {
    my @p = split(/\//,$0);
    pop @p;
 }
 sub usage {
    my $sn = scriptname();
    <<EOM;
 $sn [--noheader] [--neg] [--r REGULAR-EXPRESSION] [--regexp-file FILE] OBO-FILE
 filters out stanzas from obo files
 Example:
 $sn -r 'def:.*transcript' go.obo
 EOM
 }
		`@@ -0,0 +1,3 @@`
							`# editing-obo-ontologies`

							`Skills and tools for editing OBO format ontologies`