Initial commit
This commit is contained in:
12
.claude-plugin/plugin.json
Normal file
12
.claude-plugin/plugin.json
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
{
|
||||||
|
"name": "editing-obo-ontologies",
|
||||||
|
"description": "Skills and tools for editing OBO format ontologies",
|
||||||
|
"version": "0.0.0-2025.11.28",
|
||||||
|
"author": {
|
||||||
|
"name": "Chris Mungall",
|
||||||
|
"email": "cjmungall@lbl.gov"
|
||||||
|
},
|
||||||
|
"skills": [
|
||||||
|
"./skills/editing-obo-ontologies"
|
||||||
|
]
|
||||||
|
}
|
||||||
3
README.md
Normal file
3
README.md
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
# editing-obo-ontologies
|
||||||
|
|
||||||
|
Skills and tools for editing OBO format ontologies
|
||||||
56
plugin.lock.json
Normal file
56
plugin.lock.json
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
{
|
||||||
|
"$schema": "internal://schemas/plugin.lock.v1.json",
|
||||||
|
"pluginId": "gh:ai4curation/curation-skills:editing-obo-ontologies",
|
||||||
|
"normalized": {
|
||||||
|
"repo": null,
|
||||||
|
"ref": "refs/tags/v20251128.0",
|
||||||
|
"commit": "2e46cb48c132c3e0073b5a32902b1a75911d251b",
|
||||||
|
"treeHash": "8e04abba47a740d345735dd6c73d389dde13ec45d3117b23d4868f0345ee27cc",
|
||||||
|
"generatedAt": "2025-11-28T10:13:06.237881Z",
|
||||||
|
"toolVersion": "publish_plugins.py@0.2.0"
|
||||||
|
},
|
||||||
|
"origin": {
|
||||||
|
"remote": "git@github.com:zhongweili/42plugin-data.git",
|
||||||
|
"branch": "master",
|
||||||
|
"commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390",
|
||||||
|
"repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data"
|
||||||
|
},
|
||||||
|
"manifest": {
|
||||||
|
"name": "editing-obo-ontologies",
|
||||||
|
"description": "Skills and tools for editing OBO format ontologies"
|
||||||
|
},
|
||||||
|
"content": {
|
||||||
|
"files": [
|
||||||
|
{
|
||||||
|
"path": "README.md",
|
||||||
|
"sha256": "0037bd2663121bc876aa389182b4f78ba7ba0bafab3b2c63ac7971a67637c44c"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": ".claude-plugin/plugin.json",
|
||||||
|
"sha256": "87335cfd3ce8c657dac9511fa97142d9791b2e79bd2e35c3a6251e3cd17f19bb"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": "skills/editing-obo-ontologies/obo-grep.pl",
|
||||||
|
"sha256": "7383994a9f6be84943dd2a82544dee33469f096d7d4fb8d0eebae84e544e881a"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": "skills/editing-obo-ontologies/SKILL.md",
|
||||||
|
"sha256": "77aab4f9875dcbaf0b04376073cbb2cd6ad50d63f9371095434f36329ba5747e"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": "skills/editing-obo-ontologies/obo-checkin.pl",
|
||||||
|
"sha256": "7c386bdd2f7ade4a139dce22935472818ba336574227d4d57ad9bdce756c358c"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"path": "skills/editing-obo-ontologies/obo-checkout.pl",
|
||||||
|
"sha256": "de1c16c2b96ea393d576667dc3eee8954bd6c92bd8d0a2680a2aa267c741628c"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"dirSha256": "8e04abba47a740d345735dd6c73d389dde13ec45d3117b23d4868f0345ee27cc"
|
||||||
|
},
|
||||||
|
"security": {
|
||||||
|
"scannedAt": null,
|
||||||
|
"scannerVersion": null,
|
||||||
|
"flags": []
|
||||||
|
}
|
||||||
|
}
|
||||||
203
skills/editing-obo-ontologies/SKILL.md
Normal file
203
skills/editing-obo-ontologies/SKILL.md
Normal file
@@ -0,0 +1,203 @@
|
|||||||
|
---
|
||||||
|
name: editing-obo-ontologies
|
||||||
|
description: Skills and tools for editing OBO format ontologies, including querying terms, checking out/checking in individual terms, and following OBO format conventions. Do not use this if the source for the ontology you are editing is not in obo format (e.g. ofn)
|
||||||
|
---
|
||||||
|
|
||||||
|
# OBO Ontology Editing Guide
|
||||||
|
|
||||||
|
This skill provides guidance and tools for editing ontologies in OBO format.
|
||||||
|
|
||||||
|
## Project Layout Conventions
|
||||||
|
|
||||||
|
Most OBO ontologies follow a similar structure:
|
||||||
|
- Main development file is typically `src/ontology/{ontology}-edit.obo`
|
||||||
|
- Individual terms can be checked out to `terms/` directory for editing
|
||||||
|
- Some projects may have different layouts - check the project's documentation
|
||||||
|
|
||||||
|
## Querying Ontology Terms
|
||||||
|
|
||||||
|
Use the `obo-grep.pl` script for searching OBO files:
|
||||||
|
|
||||||
|
- Look at a specific term by ID:
|
||||||
|
- `obo-grep.pl --noheader -r 'id: ONTO:0004177' src/ontology/{ontology}-edit.obo`
|
||||||
|
- All mentions of an ID:
|
||||||
|
- `obo-grep.pl --noheader -r 'ONTO:0004177' src/ontology/{ontology}-edit.obo`
|
||||||
|
- Search by regex (e.g., all mentions of hand or foot):
|
||||||
|
- `obo-grep.pl --noheader -r '(hand|foot)' src/ontology/{ontology}-edit.obo`
|
||||||
|
- Search is much faster than full file reads
|
||||||
|
- ONLY search the main edit file (usually `src/ontology/{ontology}-edit.obo`)
|
||||||
|
- DO NOT do manual greps or read entire files unless necessary
|
||||||
|
|
||||||
|
## Before Making Edits
|
||||||
|
|
||||||
|
- Read the request carefully and make a plan, especially if there is nuance
|
||||||
|
- If a PMID is mentioned, try to read it using: `aurelian fulltext PMID:NNNNNN`
|
||||||
|
- This also works for DOIs and URLs for scientific papers (if accessible)
|
||||||
|
- ALWAYS check proposed parent terms for consistency
|
||||||
|
- Check project-specific guidelines if available
|
||||||
|
|
||||||
|
## Editing Workflow
|
||||||
|
|
||||||
|
### IMPORTANT: Use Checkout/Checkin for Large Files
|
||||||
|
|
||||||
|
- Do not edit large ontology files directly
|
||||||
|
- Use the checkout/checkin workflow for individual terms
|
||||||
|
- Check out a term: `obo-checkout.pl src/ontology/{ontology}-edit.obo ONTO:1234567 [OTHER_IDS]`
|
||||||
|
- This creates a single stanza file: `terms/{ontology}_1234567.obo` (note: colon replaced with underscore)
|
||||||
|
- Edit the small file in the `terms/` folder
|
||||||
|
- Check back in: `obo-checkin.pl src/ontology/{ontology}-edit.obo ONTO:1234567 [OTHER_IDS]`
|
||||||
|
- Checking in updates the edit file and removes the file from `terms/`
|
||||||
|
- You can edit multiple terms in one batch file if needed
|
||||||
|
|
||||||
|
### Scripts Available
|
||||||
|
|
||||||
|
This skill includes three essential scripts:
|
||||||
|
1. `obo-grep.pl` - Fast searching of OBO files
|
||||||
|
2. `obo-checkout.pl` - Extract terms to individual files for editing
|
||||||
|
3. `obo-checkin.pl` - Merge edited terms back into main file
|
||||||
|
|
||||||
|
All scripts are available in your PATH when this skill is loaded.
|
||||||
|
|
||||||
|
## OBO Format Guidelines
|
||||||
|
|
||||||
|
### Basic Structure
|
||||||
|
|
||||||
|
- Term ID format: `ONTO:NNNNNNN` (check project conventions for number of digits)
|
||||||
|
- Each term requires:
|
||||||
|
- `id:` - unique identifier
|
||||||
|
- `name:` - human-readable label
|
||||||
|
- `namespace:` - ontology namespace
|
||||||
|
- `def:` - definition with references in square brackets
|
||||||
|
- Use standard relationship types: `is_a`, `part_of`, `has_part`, etc.
|
||||||
|
- Follow existing term patterns for consistency
|
||||||
|
|
||||||
|
### Handling New Term Requests (NTRs)
|
||||||
|
|
||||||
|
- Check project conventions for temporary ID ranges
|
||||||
|
- Example: Some projects use ranges like `ONTO:777xxxx` for new terms
|
||||||
|
- Always check for ID clashes: `grep 'id: ONTO:777' src/ontology/{ontology}-edit.obo`
|
||||||
|
- NEVER guess ontology IDs - use search tools to find actual terms
|
||||||
|
- NEVER guess PMIDs for references - do web searches if needed
|
||||||
|
|
||||||
|
### Citations and References
|
||||||
|
|
||||||
|
- Cite publications appropriately: `def: "..." [PMID:nnnn, doi:mmmm]`
|
||||||
|
- Fetch full text when needed: `aurelian fulltext <PMID:nnn>` (also works with DOIs and URLs)
|
||||||
|
- All synonyms should include proper citations
|
||||||
|
- Never use empty brackets `[]` without a source
|
||||||
|
|
||||||
|
### Synonyms
|
||||||
|
|
||||||
|
Synonyms should include proper attribution:
|
||||||
|
|
||||||
|
**Correct:**
|
||||||
|
```
|
||||||
|
synonym: "alternative name" EXACT [PMID:12345678]
|
||||||
|
synonym: "abbrev" EXACT ABBREVIATION [PMID:12345678]
|
||||||
|
```
|
||||||
|
|
||||||
|
### Relationships and Logical Definitions
|
||||||
|
|
||||||
|
- All terms should have at least one `is_a` parent
|
||||||
|
- Logical definitions follow genus-differentia form
|
||||||
|
- Text definitions should mirror logical definitions
|
||||||
|
- Include source attribution for relationships when based on literature:
|
||||||
|
|
||||||
|
### Logical Definitions (intersection_of)
|
||||||
|
|
||||||
|
Example of proper intersection_of usage:
|
||||||
|
|
||||||
|
```
|
||||||
|
[Term]
|
||||||
|
id: ONTO:0000715
|
||||||
|
name: specific disease
|
||||||
|
def: "A general disease that involves specific location." [PMID:12345678]
|
||||||
|
is_a: ONTO:0001082 ! general disease
|
||||||
|
intersection_of: ONTO:0004971 ! general disease
|
||||||
|
intersection_of: disease_has_location UBERON:0000029 ! specific location
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that in OWL this corresponds to: `'specific disease' EquivalentTo 'general disease' and 'disease has location' some 'specific location'`
|
||||||
|
|
||||||
|
## Obsoleting Terms
|
||||||
|
|
||||||
|
- Obsolete terms should have NO logical axioms (`is_a`, `relationship`, `intersection_of`)
|
||||||
|
- Obsolete terms may have one `replaced_by` tag (exact replacement)
|
||||||
|
- Or multiple `consider` tags (suggested alternatives)
|
||||||
|
- Always include obsolescence reason and tracker reference
|
||||||
|
|
||||||
|
Example of simple obsolescence:
|
||||||
|
|
||||||
|
```
|
||||||
|
[Term]
|
||||||
|
id: ONTO:0100334
|
||||||
|
name: obsolete term name
|
||||||
|
property_value: IAO:0000231 OMO:0001000
|
||||||
|
property_value: IAO:0000233 "https://github.com/{project}/issues/XXXX" xsd:anyURI
|
||||||
|
is_obsolete: true
|
||||||
|
replaced_by: ONTO:0100321
|
||||||
|
```
|
||||||
|
|
||||||
|
Example with considerations instead of replacement:
|
||||||
|
|
||||||
|
```
|
||||||
|
[Term]
|
||||||
|
id: ONTO:0100229
|
||||||
|
name: obsolete term name
|
||||||
|
def: "OBSOLETE. Original definition here." [original references]
|
||||||
|
property_value: IAO:0000231 OMO:0001000
|
||||||
|
property_value: IAO:0000233 "https://github.com/{project}/issues/XXXX" xsd:anyURI
|
||||||
|
is_obsolete: true
|
||||||
|
consider: ONTO:0100259
|
||||||
|
consider: ONTO:0100260
|
||||||
|
```
|
||||||
|
|
||||||
|
### Important Notes on Obsolescence
|
||||||
|
|
||||||
|
- Synonyms and xrefs can be migrated to replacement terms judiciously
|
||||||
|
- Never do complete merges with `alt_id` - use obsolescence with replacement instead
|
||||||
|
- No relationships should point to an obsolete term
|
||||||
|
- When obsoleting, you may need to rewire other terms to "skip" the obsoleted term
|
||||||
|
|
||||||
|
## Metadata Best Practices
|
||||||
|
|
||||||
|
- Link to issue trackers: `property_value: IAO:0000233 "https://github.com/{project}/issues/XXXX" xsd:anyURI`
|
||||||
|
- Sign new terms (don't tag pre-existing terms):
|
||||||
|
```
|
||||||
|
property_value: http://purl.org/dc/terms/creator https://orcid.org/0000-0001-2345-6789
|
||||||
|
```
|
||||||
|
- All terms should have definitions with at least one reference (preferably PMID)
|
||||||
|
- Dates are typically auto-generated by build processes
|
||||||
|
|
||||||
|
## Syntax Checking
|
||||||
|
|
||||||
|
Validate OBO syntax using ROBOT:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
robot convert --catalog src/ontology/catalog-v001.xml \
|
||||||
|
-i src/ontology/{ontology}-edit.obo \
|
||||||
|
-f obo \
|
||||||
|
-o {ontology}-edit.TMP.obo
|
||||||
|
```
|
||||||
|
|
||||||
|
Use `-vvv` flag for full stack trace if there are errors.
|
||||||
|
|
||||||
|
## Design Patterns
|
||||||
|
|
||||||
|
Many OBO ontologies use DOSDP (Dead Simple Ontology Design Patterns):
|
||||||
|
- Check `src/patterns/dosdp-patterns/*.yaml` for project-specific patterns
|
||||||
|
- Follow existing patterns when creating similar terms
|
||||||
|
- Common patterns include:
|
||||||
|
- Location-based disease patterns
|
||||||
|
- Gene-related disease patterns
|
||||||
|
- Part-of hierarchies
|
||||||
|
- Abnormality patterns
|
||||||
|
|
||||||
|
|
||||||
|
## Important Reminders
|
||||||
|
|
||||||
|
- NEVER guess identifiers of any kind
|
||||||
|
- If you include an identifier not provided by the user, you MUST verify it
|
||||||
|
- PMIDs can be checked with `aurelian` or web search
|
||||||
|
- Always follow project-specific conventions and check existing examples
|
||||||
|
- When in doubt, ask for clarification rather than making assumptions
|
||||||
201
skills/editing-obo-ontologies/obo-checkin.pl
Executable file
201
skills/editing-obo-ontologies/obo-checkin.pl
Executable file
@@ -0,0 +1,201 @@
|
|||||||
|
#!/usr/bin/perl -w
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use FileHandle;
|
||||||
|
my $outdir = "terms";
|
||||||
|
my $cmd;
|
||||||
|
my $dry_run = 0;
|
||||||
|
my $preserve_files = 0;
|
||||||
|
while ($ARGV[0] =~ /^\-/) {
|
||||||
|
my $opt = shift @ARGV;
|
||||||
|
if ($opt eq '-h' || $opt eq '--help') {
|
||||||
|
print usage();
|
||||||
|
exit 0;
|
||||||
|
}
|
||||||
|
if ($opt eq '-d' || $opt eq '--outdir') {
|
||||||
|
$outdir = shift @ARGV;
|
||||||
|
}
|
||||||
|
if ($opt eq '-n' || $opt eq '--dry-run') {
|
||||||
|
$dry_run = 1;
|
||||||
|
}
|
||||||
|
if ($opt eq '-p' || $opt eq '--preserve-files') {
|
||||||
|
$preserve_files = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
`mkdir -p $outdir`;
|
||||||
|
my $id;
|
||||||
|
my $stanza = "";
|
||||||
|
my @alt_ids = ();
|
||||||
|
my $fn = shift @ARGV;
|
||||||
|
# ensure ids are sorted
|
||||||
|
my @ids = sort @ARGV;
|
||||||
|
|
||||||
|
my %new_stanza_map = ();
|
||||||
|
|
||||||
|
foreach my $id (@ids) {
|
||||||
|
my $path = get_path($id);
|
||||||
|
# check if $id is a path to a file that exists
|
||||||
|
if ($id =~ m@[\./]@ && -e $id) {
|
||||||
|
open(F, $id) || die "no such file $id";
|
||||||
|
my @lines = <F>;
|
||||||
|
close(F);
|
||||||
|
my $uber_stanza = join("", @lines);
|
||||||
|
my @stanzas_in_block = split(/\n\n/, $uber_stanza);
|
||||||
|
foreach my $stanza (@stanzas_in_block) {
|
||||||
|
# trim whitespace
|
||||||
|
$stanza =~ s/\s+$//;
|
||||||
|
if (!length($stanza)) {
|
||||||
|
next;
|
||||||
|
}
|
||||||
|
# check if stanza has id (note that stanza is multi-line)
|
||||||
|
if ($stanza =~ /id:\s+(\S+)/) {
|
||||||
|
my $stanza_id = $1;
|
||||||
|
$new_stanza_map{$stanza_id} = "$stanza\n\n";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
die "no id found in $stanza";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
open(F, $path) || die "no such file $path";
|
||||||
|
my $stanza = "";
|
||||||
|
while(<F>) {
|
||||||
|
chomp;
|
||||||
|
$stanza .= "$_\n";
|
||||||
|
}
|
||||||
|
close(F);
|
||||||
|
if ($stanza =~ /id: (\S+)/) {
|
||||||
|
# check id matches
|
||||||
|
if ($1 ne $id) {
|
||||||
|
die "id mismatch $1 ne $id";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
die "no id found in $path";
|
||||||
|
}
|
||||||
|
$new_stanza_map{$id} = $stanza;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
open(W, ">$fn.tmp") || die "cannot write tp $fn.tmp";
|
||||||
|
|
||||||
|
my %stanza_map = ();
|
||||||
|
my %stanza_type_map = (); # To track stanza type (Term or Typedef)
|
||||||
|
$/ = "\n\n";
|
||||||
|
open(F, $fn) || die "cannot open $fn";
|
||||||
|
while(<F>) {
|
||||||
|
if ($_ =~ /id: (\S+)/) {
|
||||||
|
my $id = $1;
|
||||||
|
$stanza_map{$id} = $_;
|
||||||
|
|
||||||
|
# Determine stanza type
|
||||||
|
if ($_ =~ /\[(\w+)\]/) {
|
||||||
|
$stanza_type_map{$id} = $1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
# Default to Term if type not specified
|
||||||
|
$stanza_type_map{$id} = "Term";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
print W $_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
close(F);
|
||||||
|
|
||||||
|
# combine old and new stanzas
|
||||||
|
foreach my $id (sort keys %new_stanza_map) {
|
||||||
|
$stanza_map{$id} = $new_stanza_map{$id};
|
||||||
|
|
||||||
|
# Update stanza type for new stanzas
|
||||||
|
if ($new_stanza_map{$id} =~ /\[(\w+)\]/) {
|
||||||
|
my $s = $1;
|
||||||
|
$stanza_type_map{$id} = $s;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
# Default to Term if type not specified
|
||||||
|
$stanza_type_map{$id} = "Term";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Sort ids by stanza type (Term first, then Typedef) and then alphabetically within each type
|
||||||
|
my @sorted_ids = sort {
|
||||||
|
# First compare stanza types (Term comes before Typedef)
|
||||||
|
my $type_compare = ($stanza_type_map{$a} eq "Typedef") <=> ($stanza_type_map{$b} eq "Typedef");
|
||||||
|
|
||||||
|
# If same type, sort alphabetically by ID
|
||||||
|
return $type_compare || $a cmp $b;
|
||||||
|
} keys %stanza_map;
|
||||||
|
|
||||||
|
foreach my $id (@sorted_ids) {
|
||||||
|
my $s = $stanza_map{$id};
|
||||||
|
# normalize line endings to strip trailing whitespace
|
||||||
|
$s =~ s@[\r\n]+$@\n\n@;
|
||||||
|
print W $s;
|
||||||
|
}
|
||||||
|
close(W);
|
||||||
|
|
||||||
|
if ($dry_run) {
|
||||||
|
print "dry run, no changes made\n";
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
`mv $fn.tmp $fn`;
|
||||||
|
# clear out @ids from $outdir
|
||||||
|
foreach my $id (@ids) {
|
||||||
|
my $path = get_path($id);
|
||||||
|
if (!$preserve_files) {
|
||||||
|
unlink $path;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# get the path for an id
|
||||||
|
# the ID should be either:
|
||||||
|
# - an ontology curie, e.g. GO:0000001, in which case the path is terms/GO_0000001.obo
|
||||||
|
# - an OWL local name, e.g. GO_0000001, in which case the path is terms/GO_0000001.obo
|
||||||
|
# - a file name, e.g. terms/my_terms.obo, in which case the path is terms/my_terms.obo
|
||||||
|
sub get_path {
|
||||||
|
my ($id) = @_;
|
||||||
|
my $fn = "$id";
|
||||||
|
$fn =~ s@:@_@;
|
||||||
|
# if the id has : or / in it and is a path to a file that exists, return it
|
||||||
|
if ($fn =~ m@[\./]@ && -e $fn) {
|
||||||
|
return $fn;
|
||||||
|
}
|
||||||
|
return "$outdir/$fn.obo"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
sub w {
|
||||||
|
my ($id, $stanza) = @_;
|
||||||
|
my $path = get_path($id);
|
||||||
|
open(F, ">$path") || die($path);
|
||||||
|
print F $stanza;
|
||||||
|
close(F)
|
||||||
|
}
|
||||||
|
|
||||||
|
sub scriptname {
|
||||||
|
my @p = split(/\//,$0);
|
||||||
|
pop @p;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
sub usage {
|
||||||
|
my $sn = scriptname();
|
||||||
|
|
||||||
|
<<EOM;
|
||||||
|
$sn OBO-FILE [ -d TERM-DIR ] TERM1 TERM2 ...
|
||||||
|
|
||||||
|
Checks in obo files from TERM-DIR into the OBO-FILE
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
$sn src/ontology/foo-edit.obo FOO:0000087 FOO:0000081
|
||||||
|
|
||||||
|
This will check in the FOO:0000087 and FOO:0000081 terms from the terms directory
|
||||||
|
into the foo-edit.obo file.
|
||||||
|
|
||||||
|
EOM
|
||||||
|
}
|
||||||
|
|
||||||
94
skills/editing-obo-ontologies/obo-checkout.pl
Executable file
94
skills/editing-obo-ontologies/obo-checkout.pl
Executable file
@@ -0,0 +1,94 @@
|
|||||||
|
#!/usr/bin/perl -w
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use FileHandle;
|
||||||
|
my $outdir = "terms";
|
||||||
|
my $cmd;
|
||||||
|
while ($ARGV[0] =~ /^\-/) {
|
||||||
|
my $opt = shift @ARGV;
|
||||||
|
if ($opt eq '-h' || $opt eq '--help') {
|
||||||
|
print usage();
|
||||||
|
exit 0;
|
||||||
|
}
|
||||||
|
if ($opt eq '-d' || $opt eq '--outdir') {
|
||||||
|
$outdir = shift @ARGV;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
`mkdir -p $outdir`;
|
||||||
|
my $id;
|
||||||
|
my $stanza = "";
|
||||||
|
my @alt_ids = ();
|
||||||
|
my $fn = shift @ARGV;
|
||||||
|
my @ids = @ARGV;
|
||||||
|
my %idmap = map {$_ => 1} @ids;
|
||||||
|
my $num_ids = scalar(@ids);
|
||||||
|
|
||||||
|
my $n = 0;
|
||||||
|
print "Reading $fn\n";
|
||||||
|
open(F, $fn) || die "no such file $fn";
|
||||||
|
while(<F>) {
|
||||||
|
if (m@^\[@) {
|
||||||
|
$n++;
|
||||||
|
if ($id) {
|
||||||
|
# check if id is in %idmap
|
||||||
|
if ($idmap{$id}) {
|
||||||
|
w($id, $stanza);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$stanza = "";
|
||||||
|
$id = "";
|
||||||
|
}
|
||||||
|
if (m@^id: (\S+)@) {
|
||||||
|
$id = $1;
|
||||||
|
}
|
||||||
|
if (m@^alt_id: (\S+)@) {
|
||||||
|
push(@alt_ids, $1);
|
||||||
|
}
|
||||||
|
$stanza .= $_;
|
||||||
|
}
|
||||||
|
close(F);
|
||||||
|
#print "n: $n\n";
|
||||||
|
sub get_path {
|
||||||
|
my ($id) = @_;
|
||||||
|
my $fn = "$id";
|
||||||
|
$fn =~ s@:@_@;
|
||||||
|
return "$outdir/$fn.obo"
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
sub w {
|
||||||
|
my ($id, $stanza) = @_;
|
||||||
|
my $path = get_path($id);
|
||||||
|
print "Checking out $id to $path\n";
|
||||||
|
open(W, ">$path") || die($path);
|
||||||
|
print W $stanza;
|
||||||
|
close(W)
|
||||||
|
}
|
||||||
|
|
||||||
|
sub scriptname {
|
||||||
|
my @p = split(/\//,$0);
|
||||||
|
pop @p;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
sub usage {
|
||||||
|
my $sn = scriptname();
|
||||||
|
|
||||||
|
<<EOM;
|
||||||
|
$sn OBO-FILE [ -d TERM-DIR ] TERM1 TERM2 ...
|
||||||
|
|
||||||
|
Checks out obo files into TERM-DIR from the OBO-FILE
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
$sn src/ontology/foo-edit.obo FOO:0000087 FOO:0000081
|
||||||
|
|
||||||
|
This will extract the FOO:0000087 and FOO:0000081 terms from the foo-edit.obo file
|
||||||
|
and write them to the terms directory, as files:
|
||||||
|
|
||||||
|
terms/FOO_0000087.obo
|
||||||
|
terms/FOO_0000081.obo
|
||||||
|
|
||||||
|
EOM
|
||||||
|
}
|
||||||
|
|
||||||
113
skills/editing-obo-ontologies/obo-grep.pl
Executable file
113
skills/editing-obo-ontologies/obo-grep.pl
Executable file
@@ -0,0 +1,113 @@
|
|||||||
|
#!/usr/bin/perl -w
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
my %tag_h=();
|
||||||
|
my $regexp = '';
|
||||||
|
my $noheader;
|
||||||
|
my $negate;
|
||||||
|
my $count;
|
||||||
|
my $idfile;
|
||||||
|
while ($ARGV[0] =~ /^\-.+/) {
|
||||||
|
my $opt = shift @ARGV;
|
||||||
|
if ($opt eq '-h' || $opt eq '--help') {
|
||||||
|
print usage();
|
||||||
|
exit 0;
|
||||||
|
}
|
||||||
|
if ($opt eq '-r' || $opt eq '--regexp') {
|
||||||
|
$regexp = shift @ARGV;
|
||||||
|
}
|
||||||
|
if ($opt eq '--regexp-file') {
|
||||||
|
my $f = shift @ARGV;
|
||||||
|
my @or = ();
|
||||||
|
open(F,$f);
|
||||||
|
while(<F>) {
|
||||||
|
chomp;
|
||||||
|
push(@or,$_);
|
||||||
|
}
|
||||||
|
close(F);
|
||||||
|
$regexp = sprintf('id: (%s)\n', join('|',@or));
|
||||||
|
}
|
||||||
|
if ($opt eq '-c' || $opt eq '--count') {
|
||||||
|
$count = 1;
|
||||||
|
}
|
||||||
|
if ($opt eq '--noheader') {
|
||||||
|
$noheader = 1;
|
||||||
|
}
|
||||||
|
if ($opt eq '--idfile') {
|
||||||
|
my $idfile = shift;
|
||||||
|
open(F,$idfile) || die $idfile;
|
||||||
|
my @ids = ();
|
||||||
|
while(<F>) {
|
||||||
|
chomp;
|
||||||
|
s@\s.*@@;
|
||||||
|
push(@ids, $_);
|
||||||
|
}
|
||||||
|
close(F);
|
||||||
|
$regexp = "id: (" . join("|", @ids) . ")\n";
|
||||||
|
}
|
||||||
|
if ($opt eq '-v' || $opt eq '--neg') {
|
||||||
|
$negate = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
$/ = "\n\n";
|
||||||
|
|
||||||
|
my $n = 0;
|
||||||
|
while (@ARGV) {
|
||||||
|
my $f = pop @ARGV;
|
||||||
|
if ($f eq '-') {
|
||||||
|
*F=*STDIN;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
open(F,$f) || die "cannot open $f";
|
||||||
|
}
|
||||||
|
my $hdr = 0;
|
||||||
|
while(<F>) {
|
||||||
|
if (!$hdr && $_ !~ /^\[/) {
|
||||||
|
print unless $noheader || $count;
|
||||||
|
$hdr = 1;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if ($negate) {
|
||||||
|
if ($_ !~ /$regexp/) {
|
||||||
|
$n++;
|
||||||
|
print unless $count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (/$regexp/) {
|
||||||
|
$n++;
|
||||||
|
print unless $count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ($count) {
|
||||||
|
print "$n\n";
|
||||||
|
}
|
||||||
|
|
||||||
|
exit 0;
|
||||||
|
|
||||||
|
sub scriptname {
|
||||||
|
my @p = split(/\//,$0);
|
||||||
|
pop @p;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
sub usage {
|
||||||
|
my $sn = scriptname();
|
||||||
|
|
||||||
|
<<EOM;
|
||||||
|
$sn [--noheader] [--neg] [--r REGULAR-EXPRESSION] [--regexp-file FILE] OBO-FILE
|
||||||
|
|
||||||
|
filters out stanzas from obo files
|
||||||
|
|
||||||
|
Example:
|
||||||
|
|
||||||
|
$sn -r 'def:.*transcript' go.obo
|
||||||
|
|
||||||
|
EOM
|
||||||
|
}
|
||||||
|
|
||||||
Reference in New Issue
Block a user