Initial commit
This commit is contained in:
358
skills/context-memory/utils/init_vault.py
Normal file
358
skills/context-memory/utils/init_vault.py
Normal file
@@ -0,0 +1,358 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Initialize PRISM Context Memory Obsidian Vault
|
||||
|
||||
Creates the folder structure and initial index files for the Obsidian vault.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
from storage_obsidian import get_vault_path, get_folder_paths, ensure_folder
|
||||
|
||||
def init_vault():
|
||||
"""Initialize Obsidian vault structure."""
|
||||
print("Initializing PRISM Context Memory Obsidian Vault")
|
||||
print("=" * 60)
|
||||
|
||||
vault = get_vault_path()
|
||||
folders = get_folder_paths()
|
||||
|
||||
print(f"\nVault location: {vault}")
|
||||
|
||||
# Create all folders
|
||||
print("\nCreating folder structure...")
|
||||
for name, path in folders.items():
|
||||
ensure_folder(path)
|
||||
print(f" [OK] {name}: {path.relative_to(vault)}")
|
||||
|
||||
# Create README.md
|
||||
readme_path = vault / "PRISM-Memory" / "Index" / "README.md"
|
||||
if not readme_path.exists():
|
||||
print("\nCreating README...")
|
||||
readme_content = """---
|
||||
type: index
|
||||
created_at: """ + __import__('datetime').datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") + """
|
||||
---
|
||||
|
||||
# PRISM Context Memory
|
||||
|
||||
Welcome to your PRISM knowledge vault! This vault stores context captured by PRISM skills during development.
|
||||
|
||||
## Vault Structure
|
||||
|
||||
- **[[File Index|Files]]** - Analysis of code files
|
||||
- **[[Pattern Index|Patterns]]** - Reusable code patterns and conventions
|
||||
- **[[Decision Log|Decisions]]** - Architectural decisions and reasoning
|
||||
- **Commits** - Git commit context and history
|
||||
- **Interactions** - Agent learnings and outcomes
|
||||
- **Learnings** - Story completion learnings and consolidations
|
||||
- **Preferences** - Learned preferences and coding style
|
||||
|
||||
## How It Works
|
||||
|
||||
1. **Automatic Capture:** Hooks capture context as you code
|
||||
2. **Intelligent Storage:** Claude analyzes and stores as structured markdown
|
||||
3. **Easy Retrieval:** Search and link notes in Obsidian
|
||||
4. **Knowledge Graph:** Visualize connections between files, patterns, and decisions
|
||||
|
||||
## Getting Started
|
||||
|
||||
### View Recent Activity
|
||||
|
||||
Check the [[File Index]] to see recently analyzed files.
|
||||
|
||||
### Explore Patterns
|
||||
|
||||
Browse [[Pattern Index]] to discover reusable patterns in your codebase.
|
||||
|
||||
### Review Decisions
|
||||
|
||||
Read the [[Decision Log]] to understand architectural choices.
|
||||
|
||||
### Search
|
||||
|
||||
Use Obsidian's search (Cmd/Ctrl+Shift+F) to find specific context:
|
||||
- Search by file name, pattern, or concept
|
||||
- Use tags like #authentication, #testing, #architecture
|
||||
- Follow links to explore related notes
|
||||
|
||||
## Obsidian Features
|
||||
|
||||
### Graph View
|
||||
|
||||
Open the graph view (Cmd/Ctrl+G) to visualize your knowledge network.
|
||||
|
||||
### Tags
|
||||
|
||||
Filter by tags:
|
||||
- `#python`, `#typescript`, `#javascript` - Languages
|
||||
- `#architecture`, `#testing`, `#security` - Categories
|
||||
- `#simple`, `#moderate`, `#complex` - Complexity
|
||||
|
||||
### Daily Notes
|
||||
|
||||
Link PRISM context to your daily notes for project journal.
|
||||
|
||||
### Dataview (Optional)
|
||||
|
||||
If you have Dataview plugin installed, see dynamic queries in index pages.
|
||||
|
||||
## Tips
|
||||
|
||||
1. **Add Context Manually:** Create notes in any folder to add custom context
|
||||
2. **Link Liberally:** Use `[[wikilinks]]` to connect related concepts
|
||||
3. **Tag Consistently:** Use consistent tags for better filtering
|
||||
4. **Review Regularly:** Browse recent changes to stay aware of system evolution
|
||||
5. **Customize Structure:** Reorganize folders to match your mental model
|
||||
|
||||
---
|
||||
|
||||
**Last Updated:** """ + __import__('datetime').datetime.now().strftime("%Y-%m-%d") + """
|
||||
"""
|
||||
with open(readme_path, 'w', encoding='utf-8') as f:
|
||||
f.write(readme_content)
|
||||
print(f" [OK] Created README")
|
||||
|
||||
# Create File Index
|
||||
file_index_path = vault / "PRISM-Memory" / "Index" / "File Index.md"
|
||||
if not file_index_path.exists():
|
||||
print("Creating File Index...")
|
||||
file_index_content = """---
|
||||
type: index
|
||||
category: files
|
||||
---
|
||||
|
||||
# File Index
|
||||
|
||||
Map of Contents (MOC) for analyzed code files.
|
||||
|
||||
## Recent Analyses
|
||||
|
||||
<!-- 20 most recently analyzed files -->
|
||||
|
||||
## By Language
|
||||
|
||||
### Python
|
||||
<!-- All Python files -->
|
||||
|
||||
### TypeScript
|
||||
<!-- All TypeScript files -->
|
||||
|
||||
### JavaScript
|
||||
<!-- All JavaScript files -->
|
||||
|
||||
## By Complexity
|
||||
|
||||
### Simple
|
||||
<!-- Simple files -->
|
||||
|
||||
### Moderate
|
||||
<!-- Moderate complexity files -->
|
||||
|
||||
### Complex
|
||||
<!-- Complex files -->
|
||||
|
||||
## Search Files
|
||||
|
||||
Use Obsidian search with:
|
||||
- `path:<search>` - Search by file path
|
||||
- `tag:#<language>` - Filter by language
|
||||
- `tag:#<complexity>` - Filter by complexity
|
||||
|
||||
---
|
||||
|
||||
**Tip:** If you have Dataview plugin, uncomment these queries:
|
||||
|
||||
```dataview
|
||||
TABLE file_path, language, complexity, analyzed_at
|
||||
FROM "PRISM-Memory/Files"
|
||||
WHERE type = "file-analysis"
|
||||
SORT analyzed_at DESC
|
||||
LIMIT 20
|
||||
```
|
||||
"""
|
||||
with open(file_index_path, 'w', encoding='utf-8') as f:
|
||||
f.write(file_index_content)
|
||||
print(f" [OK] Created File Index")
|
||||
|
||||
# Create Pattern Index
|
||||
pattern_index_path = vault / "PRISM-Memory" / "Index" / "Pattern Index.md"
|
||||
if not pattern_index_path.exists():
|
||||
print("Creating Pattern Index...")
|
||||
pattern_index_content = """---
|
||||
type: index
|
||||
category: patterns
|
||||
---
|
||||
|
||||
# Pattern Index
|
||||
|
||||
Map of Contents (MOC) for code patterns and conventions.
|
||||
|
||||
## By Category
|
||||
|
||||
### Architecture
|
||||
<!-- Architectural patterns -->
|
||||
|
||||
### Testing
|
||||
<!-- Testing patterns -->
|
||||
|
||||
### Security
|
||||
<!-- Security patterns -->
|
||||
|
||||
### Performance
|
||||
<!-- Performance patterns -->
|
||||
|
||||
## Most Used Patterns
|
||||
|
||||
<!-- Patterns sorted by usage_count -->
|
||||
|
||||
## Search Patterns
|
||||
|
||||
Use Obsidian search with:
|
||||
- `tag:#<category>` - Filter by category
|
||||
- Full-text search for pattern descriptions
|
||||
|
||||
---
|
||||
|
||||
**Tip:** If you have Dataview plugin, uncomment these queries:
|
||||
|
||||
```dataview
|
||||
TABLE category, usage_count, updated_at
|
||||
FROM "PRISM-Memory/Patterns"
|
||||
WHERE type = "pattern"
|
||||
SORT usage_count DESC
|
||||
```
|
||||
"""
|
||||
with open(pattern_index_path, 'w', encoding='utf-8') as f:
|
||||
f.write(pattern_index_content)
|
||||
print(f" [OK] Created Pattern Index")
|
||||
|
||||
# Create Decision Log
|
||||
decision_log_path = vault / "PRISM-Memory" / "Index" / "Decision Log.md"
|
||||
if not decision_log_path.exists():
|
||||
print("Creating Decision Log...")
|
||||
decision_log_content = """---
|
||||
type: index
|
||||
category: decisions
|
||||
---
|
||||
|
||||
# Decision Log
|
||||
|
||||
Chronological log of architectural decisions.
|
||||
|
||||
## Recent Decisions
|
||||
|
||||
<!-- 20 most recent decisions -->
|
||||
|
||||
## By Impact
|
||||
|
||||
### High Impact
|
||||
<!-- High impact decisions -->
|
||||
|
||||
### Medium Impact
|
||||
<!-- Medium impact decisions -->
|
||||
|
||||
### Low Impact
|
||||
<!-- Low impact decisions -->
|
||||
|
||||
## By Status
|
||||
|
||||
### Accepted
|
||||
<!-- Active decisions -->
|
||||
|
||||
### Superseded
|
||||
<!-- Decisions that have been replaced -->
|
||||
|
||||
## Search Decisions
|
||||
|
||||
Use Obsidian search with:
|
||||
- `tag:#<topic>` - Filter by topic
|
||||
- Date-based search: `YYYY-MM-DD`
|
||||
|
||||
---
|
||||
|
||||
**Tip:** If you have Dataview plugin, uncomment these queries:
|
||||
|
||||
```dataview
|
||||
TABLE decision_date, status, impact
|
||||
FROM "PRISM-Memory/Decisions"
|
||||
WHERE type = "decision"
|
||||
SORT decision_date DESC
|
||||
LIMIT 20
|
||||
```
|
||||
"""
|
||||
with open(decision_log_path, 'w', encoding='utf-8') as f:
|
||||
f.write(decision_log_content)
|
||||
print(f" [OK] Created Decision Log")
|
||||
|
||||
# Create .gitignore in vault to ignore Obsidian config
|
||||
gitignore_path = vault / ".gitignore"
|
||||
if not gitignore_path.exists():
|
||||
print("\nCreating .gitignore...")
|
||||
gitignore_content = """# Obsidian configuration (workspace-specific)
|
||||
.obsidian/workspace.json
|
||||
.obsidian/workspace-mobile.json
|
||||
|
||||
# Obsidian cache
|
||||
.obsidian/cache/
|
||||
|
||||
# Personal settings
|
||||
.obsidian/app.json
|
||||
.obsidian/appearance.json
|
||||
.obsidian/hotkeys.json
|
||||
|
||||
# Keep these for consistency across users:
|
||||
# .obsidian/core-plugins.json
|
||||
# .obsidian/community-plugins.json
|
||||
"""
|
||||
with open(gitignore_path, 'w', encoding='utf-8') as f:
|
||||
f.write(gitignore_content)
|
||||
print(f" [OK] Created .gitignore")
|
||||
|
||||
# Update project .gitignore to exclude vault data
|
||||
git_root = Path(__import__('storage_obsidian').find_git_root() or '.')
|
||||
project_gitignore = git_root / ".gitignore"
|
||||
|
||||
if project_gitignore.exists():
|
||||
print("\nUpdating project .gitignore...")
|
||||
with open(project_gitignore, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
vault_relative = vault.relative_to(git_root) if vault.is_relative_to(git_root) else vault
|
||||
ignore_line = f"\n# PRISM Context Memory Obsidian Vault\n{vault_relative}/\n"
|
||||
|
||||
if str(vault_relative) not in content:
|
||||
with open(project_gitignore, 'a', encoding='utf-8') as f:
|
||||
f.write(ignore_line)
|
||||
print(f" [OK] Added vault to project .gitignore")
|
||||
else:
|
||||
print(f" [OK] Vault already in .gitignore")
|
||||
|
||||
print("\n" + "=" * 60)
|
||||
print("[SUCCESS] Vault initialization complete!")
|
||||
print("\nNext steps:")
|
||||
print("1. Open the vault in Obsidian")
|
||||
print(f" File > Open vault > {vault}")
|
||||
print("2. Install recommended plugins (optional):")
|
||||
print(" - Dataview - For dynamic queries")
|
||||
print(" - Templater - For note templates")
|
||||
print(" - Graph Analysis - For knowledge graph insights")
|
||||
print("3. Enable PRISM hooks to capture context automatically")
|
||||
print(" (See reference/quickstart.md for hook configuration)")
|
||||
print("\nVault location:")
|
||||
print(f" {vault}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
try:
|
||||
init_vault()
|
||||
except Exception as e:
|
||||
print(f"\n[ERROR] Error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
574
skills/context-memory/utils/memory_intelligence.py
Normal file
574
skills/context-memory/utils/memory_intelligence.py
Normal file
@@ -0,0 +1,574 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
PRISM Context Memory - Intelligence Layer
|
||||
|
||||
Implements memory decay, self-evaluation, and learning over time.
|
||||
Based on research in persistent memory systems with confidence scoring.
|
||||
|
||||
Key Concepts:
|
||||
- Memory Decay: Confidence scores decay following Ebbinghaus curve unless reinforced
|
||||
- Self-Evaluation: Track retrieval success and relevance
|
||||
- Upsert Logic: Update existing knowledge rather than duplicate
|
||||
- Confidence Scoring: Increases with successful usage, decays over time
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
import math
|
||||
import re
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
try:
|
||||
import frontmatter
|
||||
except ImportError:
|
||||
print("[ERROR] python-frontmatter not installed")
|
||||
sys.exit(1)
|
||||
|
||||
# Lazy import to avoid circular dependency
|
||||
# storage_obsidian imports from this file, so we can't import it at module level
|
||||
_storage_obsidian = None
|
||||
|
||||
def _get_storage():
|
||||
"""Lazy load storage_obsidian to avoid circular import."""
|
||||
global _storage_obsidian
|
||||
if _storage_obsidian is None:
|
||||
from storage_obsidian import get_vault_path, get_folder_paths, ensure_folder
|
||||
_storage_obsidian = {
|
||||
'get_vault_path': get_vault_path,
|
||||
'get_folder_paths': get_folder_paths,
|
||||
'ensure_folder': ensure_folder
|
||||
}
|
||||
return _storage_obsidian
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# MEMORY DECAY & CONFIDENCE SCORING
|
||||
# ============================================================================
|
||||
|
||||
def calculate_decay(
|
||||
confidence: float,
|
||||
last_accessed: datetime,
|
||||
half_life_days: int = 30
|
||||
) -> float:
|
||||
"""
|
||||
Calculate memory decay using exponential decay model (Ebbinghaus curve).
|
||||
|
||||
Confidence decays unless memory is reinforced through successful retrieval.
|
||||
|
||||
Args:
|
||||
confidence: Current confidence score (0-1)
|
||||
last_accessed: When memory was last accessed
|
||||
half_life_days: Days for confidence to decay to 50%
|
||||
|
||||
Returns:
|
||||
Decayed confidence score
|
||||
"""
|
||||
days_since_access = (datetime.now() - last_accessed).days
|
||||
|
||||
if days_since_access == 0:
|
||||
return confidence
|
||||
|
||||
# Exponential decay: C(t) = C₀ * (0.5)^(t/h)
|
||||
# where h is half-life
|
||||
decay_factor = math.pow(0.5, days_since_access / half_life_days)
|
||||
decayed_confidence = confidence * decay_factor
|
||||
|
||||
# Don't decay below minimum threshold
|
||||
return max(decayed_confidence, 0.1)
|
||||
|
||||
|
||||
def reinforce_confidence(
|
||||
current_confidence: float,
|
||||
retrieval_success: bool,
|
||||
learning_rate: float = 0.1
|
||||
) -> float:
|
||||
"""
|
||||
Reinforce or weaken confidence based on retrieval outcome.
|
||||
|
||||
Successful retrievals increase confidence; failures decrease it.
|
||||
|
||||
Args:
|
||||
current_confidence: Current score (0-1)
|
||||
retrieval_success: Whether retrieval was successful/relevant
|
||||
learning_rate: How quickly confidence adjusts (0-1)
|
||||
|
||||
Returns:
|
||||
Updated confidence score
|
||||
"""
|
||||
if retrieval_success:
|
||||
# Increase confidence, with diminishing returns as it approaches 1
|
||||
delta = learning_rate * (1 - current_confidence)
|
||||
return min(current_confidence + delta, 1.0)
|
||||
else:
|
||||
# Decrease confidence
|
||||
delta = learning_rate * current_confidence
|
||||
return max(current_confidence - delta, 0.1)
|
||||
|
||||
|
||||
def calculate_relevance_score(
|
||||
access_count: int,
|
||||
last_accessed: datetime,
|
||||
confidence: float,
|
||||
recency_weight: float = 0.3,
|
||||
frequency_weight: float = 0.3,
|
||||
confidence_weight: float = 0.4
|
||||
) -> float:
|
||||
"""
|
||||
Calculate overall relevance score combining multiple factors.
|
||||
|
||||
Args:
|
||||
access_count: Number of times accessed
|
||||
last_accessed: Most recent access time
|
||||
confidence: Current confidence score
|
||||
recency_weight: Weight for recency (default 0.3)
|
||||
frequency_weight: Weight for frequency (default 0.3)
|
||||
confidence_weight: Weight for confidence (default 0.4)
|
||||
|
||||
Returns:
|
||||
Relevance score (0-1)
|
||||
"""
|
||||
# Recency score (exponential decay)
|
||||
days_since = (datetime.now() - last_accessed).days
|
||||
recency = math.exp(-days_since / 30) # 30-day half-life
|
||||
|
||||
# Frequency score (logarithmic scaling)
|
||||
frequency = math.log(1 + access_count) / math.log(101) # Scale to 0-1
|
||||
|
||||
# Weighted combination
|
||||
relevance = (
|
||||
recency * recency_weight +
|
||||
frequency * frequency_weight +
|
||||
confidence * confidence_weight
|
||||
)
|
||||
|
||||
return min(relevance, 1.0)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# INTELLIGENT TAGGING
|
||||
# ============================================================================
|
||||
|
||||
def extract_tags_from_content(content: str, existing_tags: List[str] = None) -> List[str]:
|
||||
"""
|
||||
Extract intelligent tags from content.
|
||||
|
||||
Generates:
|
||||
- Concept tags (from domain terms)
|
||||
- Entity tags (specific technologies)
|
||||
- Action tags (verbs describing operations)
|
||||
|
||||
Args:
|
||||
content: Note content
|
||||
existing_tags: Tags already assigned
|
||||
|
||||
Returns:
|
||||
List of extracted tags
|
||||
"""
|
||||
existing_tags = existing_tags or []
|
||||
extracted = set(existing_tags)
|
||||
|
||||
content_lower = content.lower()
|
||||
|
||||
# Common concept tags
|
||||
concept_map = {
|
||||
'authentication': ['auth', 'login', 'oauth', 'jwt', 'token'],
|
||||
'database': ['sql', 'query', 'schema', 'migration', 'postgresql', 'mongodb'],
|
||||
'testing': ['test', 'spec', 'assert', 'mock', 'fixture'],
|
||||
'api': ['endpoint', 'route', 'request', 'response', 'rest'],
|
||||
'security': ['encrypt', 'hash', 'secure', 'vulnerable', 'xss', 'csrf'],
|
||||
'performance': ['optimize', 'cache', 'latency', 'throughput'],
|
||||
'architecture': ['pattern', 'design', 'structure', 'component'],
|
||||
}
|
||||
|
||||
for concept, keywords in concept_map.items():
|
||||
if any(kw in content_lower for kw in keywords):
|
||||
extracted.add(concept)
|
||||
|
||||
# Technology entity tags
|
||||
tech_patterns = [
|
||||
r'\b(react|vue|angular|svelte)\b',
|
||||
r'\b(python|javascript|typescript|java|go|rust)\b',
|
||||
r'\b(postgres|mysql|mongodb|redis|elasticsearch)\b',
|
||||
r'\b(docker|kubernetes|aws|azure|gcp)\b',
|
||||
r'\b(jwt|oauth|saml|ldap)\b',
|
||||
]
|
||||
|
||||
for pattern in tech_patterns:
|
||||
matches = re.findall(pattern, content_lower, re.IGNORECASE)
|
||||
extracted.update(matches)
|
||||
|
||||
return sorted(list(extracted))
|
||||
|
||||
|
||||
def generate_tag_hierarchy(tags: List[str]) -> Dict[str, List[str]]:
|
||||
"""
|
||||
Organize tags into hierarchical structure.
|
||||
|
||||
Returns:
|
||||
Dict mapping parent categories to child tags
|
||||
"""
|
||||
hierarchy = {
|
||||
'technology': [],
|
||||
'concept': [],
|
||||
'domain': [],
|
||||
'pattern': []
|
||||
}
|
||||
|
||||
# Categorize tags
|
||||
tech_keywords = ['python', 'javascript', 'typescript', 'react', 'postgres', 'docker']
|
||||
concept_keywords = ['authentication', 'testing', 'security', 'performance']
|
||||
pattern_keywords = ['repository', 'service', 'factory', 'singleton']
|
||||
|
||||
for tag in tags:
|
||||
tag_lower = tag.lower()
|
||||
if any(tech in tag_lower for tech in tech_keywords):
|
||||
hierarchy['technology'].append(tag)
|
||||
elif any(concept in tag_lower for concept in concept_keywords):
|
||||
hierarchy['concept'].append(tag)
|
||||
elif any(pattern in tag_lower for pattern in pattern_keywords):
|
||||
hierarchy['pattern'].append(tag)
|
||||
else:
|
||||
hierarchy['domain'].append(tag)
|
||||
|
||||
# Remove empty categories
|
||||
return {k: v for k, v in hierarchy.items() if v}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# UPSERT LOGIC - UPDATE EXISTING KNOWLEDGE
|
||||
# ============================================================================
|
||||
|
||||
def find_similar_notes(
|
||||
title: str,
|
||||
content: str,
|
||||
note_type: str,
|
||||
threshold: float = 0.7
|
||||
) -> List[Tuple[Path, float]]:
|
||||
"""
|
||||
Find existing notes that might be duplicates or updates.
|
||||
|
||||
Uses title similarity and content overlap to identify candidates.
|
||||
|
||||
Args:
|
||||
title: Note title
|
||||
content: Note content
|
||||
note_type: Type of note (file-analysis, pattern, decision)
|
||||
threshold: Similarity threshold (0-1)
|
||||
|
||||
Returns:
|
||||
List of (path, similarity_score) tuples
|
||||
"""
|
||||
storage = _get_storage()
|
||||
folders = storage['get_folder_paths']()
|
||||
vault = storage['get_vault_path']()
|
||||
|
||||
# Map note type to folder
|
||||
folder_map = {
|
||||
'file-analysis': folders['files'],
|
||||
'pattern': folders['patterns'],
|
||||
'decision': folders['decisions'],
|
||||
'interaction': folders['interactions']
|
||||
}
|
||||
|
||||
search_folder = folder_map.get(note_type)
|
||||
if not search_folder or not search_folder.exists():
|
||||
return []
|
||||
|
||||
candidates = []
|
||||
title_lower = title.lower()
|
||||
content_words = set(re.findall(r'\w+', content.lower()))
|
||||
|
||||
for note_file in search_folder.rglob("*.md"):
|
||||
try:
|
||||
# Check title similarity
|
||||
note_title = note_file.stem.lower()
|
||||
title_similarity = compute_string_similarity(title_lower, note_title)
|
||||
|
||||
if title_similarity < 0.5:
|
||||
continue
|
||||
|
||||
# Check content overlap
|
||||
post = frontmatter.load(note_file)
|
||||
note_content_words = set(re.findall(r'\w+', post.content.lower()))
|
||||
|
||||
# Jaccard similarity
|
||||
intersection = len(content_words & note_content_words)
|
||||
union = len(content_words | note_content_words)
|
||||
content_similarity = intersection / union if union > 0 else 0
|
||||
|
||||
# Combined score (weighted average)
|
||||
overall_similarity = (title_similarity * 0.6 + content_similarity * 0.4)
|
||||
|
||||
if overall_similarity >= threshold:
|
||||
candidates.append((note_file, overall_similarity))
|
||||
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Sort by similarity (highest first)
|
||||
candidates.sort(key=lambda x: x[1], reverse=True)
|
||||
return candidates
|
||||
|
||||
|
||||
def compute_string_similarity(s1: str, s2: str) -> float:
|
||||
"""
|
||||
Compute similarity between two strings using Levenshtein-based approach.
|
||||
|
||||
Returns:
|
||||
Similarity score (0-1)
|
||||
"""
|
||||
# Simple word overlap method
|
||||
words1 = set(s1.split())
|
||||
words2 = set(s2.split())
|
||||
|
||||
if not words1 or not words2:
|
||||
return 0.0
|
||||
|
||||
intersection = len(words1 & words2)
|
||||
union = len(words1 | words2)
|
||||
|
||||
return intersection / union if union > 0 else 0.0
|
||||
|
||||
|
||||
def should_update_existing(
|
||||
existing_path: Path,
|
||||
new_content: str,
|
||||
similarity_score: float
|
||||
) -> bool:
|
||||
"""
|
||||
Decide whether to update existing note or create new one.
|
||||
|
||||
Args:
|
||||
existing_path: Path to existing note
|
||||
new_content: New content to potentially add
|
||||
similarity_score: How similar notes are (0-1)
|
||||
|
||||
Returns:
|
||||
True if should update, False if should create new
|
||||
"""
|
||||
# High similarity -> update existing
|
||||
if similarity_score >= 0.85:
|
||||
return True
|
||||
|
||||
# Medium similarity -> check if new content adds value
|
||||
if similarity_score >= 0.7:
|
||||
post = frontmatter.load(existing_path)
|
||||
existing_length = len(post.content)
|
||||
new_length = len(new_content)
|
||||
|
||||
# If new content is substantially different/longer, keep separate
|
||||
if new_length > existing_length * 1.5:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
# Low similarity -> create new
|
||||
return False
|
||||
|
||||
|
||||
def merge_note_content(
|
||||
existing_content: str,
|
||||
new_content: str,
|
||||
merge_strategy: str = "append"
|
||||
) -> str:
|
||||
"""
|
||||
Intelligently merge new content into existing note.
|
||||
|
||||
Args:
|
||||
existing_content: Current note content
|
||||
new_content: New information to add
|
||||
merge_strategy: How to merge ("append", "replace", "sections")
|
||||
|
||||
Returns:
|
||||
Merged content
|
||||
"""
|
||||
if merge_strategy == "replace":
|
||||
return new_content
|
||||
|
||||
elif merge_strategy == "append":
|
||||
# Add new content at end with separator
|
||||
return f"{existing_content}\n\n## Updated Information\n\n{new_content}"
|
||||
|
||||
elif merge_strategy == "sections":
|
||||
# Merge by sections (smarter merging)
|
||||
# For now, append with date
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d")
|
||||
return f"{existing_content}\n\n## Update - {timestamp}\n\n{new_content}"
|
||||
|
||||
return existing_content
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# SELF-EVALUATION & MAINTENANCE
|
||||
# ============================================================================
|
||||
|
||||
def evaluate_memory_health(vault_path: Path = None) -> Dict:
|
||||
"""
|
||||
Evaluate overall memory system health.
|
||||
|
||||
Checks:
|
||||
- Low-confidence memories
|
||||
- Stale memories (not accessed recently)
|
||||
- Duplicate candidates
|
||||
- Tag consistency
|
||||
|
||||
Returns:
|
||||
Health report dictionary
|
||||
"""
|
||||
if vault_path is None:
|
||||
storage = _get_storage()
|
||||
vault_path = storage['get_vault_path']()
|
||||
|
||||
report = {
|
||||
'total_notes': 0,
|
||||
'low_confidence': [],
|
||||
'stale_memories': [],
|
||||
'duplicate_candidates': [],
|
||||
'tag_issues': [],
|
||||
'avg_confidence': 0.0,
|
||||
'avg_relevance': 0.0
|
||||
}
|
||||
|
||||
confidences = []
|
||||
relevances = []
|
||||
|
||||
for note_file in vault_path.rglob("*.md"):
|
||||
try:
|
||||
post = frontmatter.load(note_file)
|
||||
|
||||
if post.get('type') not in ['file-analysis', 'pattern', 'decision']:
|
||||
continue
|
||||
|
||||
report['total_notes'] += 1
|
||||
|
||||
# Check confidence
|
||||
confidence = post.get('confidence_score', 0.5)
|
||||
confidences.append(confidence)
|
||||
|
||||
if confidence < 0.3:
|
||||
report['low_confidence'].append(str(note_file.relative_to(vault_path)))
|
||||
|
||||
# Check staleness
|
||||
last_accessed_str = post.get('last_accessed')
|
||||
if last_accessed_str:
|
||||
last_accessed = datetime.fromisoformat(last_accessed_str)
|
||||
days_stale = (datetime.now() - last_accessed).days
|
||||
|
||||
if days_stale > 90:
|
||||
report['stale_memories'].append({
|
||||
'path': str(note_file.relative_to(vault_path)),
|
||||
'days_stale': days_stale
|
||||
})
|
||||
|
||||
# Check tags
|
||||
tags = post.get('tags', [])
|
||||
if not tags:
|
||||
report['tag_issues'].append(str(note_file.relative_to(vault_path)))
|
||||
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if confidences:
|
||||
report['avg_confidence'] = sum(confidences) / len(confidences)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
def consolidate_duplicates(
|
||||
duplicate_candidates: List[Tuple[Path, Path, float]],
|
||||
auto_merge_threshold: float = 0.95
|
||||
) -> List[Dict]:
|
||||
"""
|
||||
Consolidate duplicate or near-duplicate memories.
|
||||
|
||||
Args:
|
||||
duplicate_candidates: List of (path1, path2, similarity) tuples
|
||||
auto_merge_threshold: Automatically merge if similarity above this
|
||||
|
||||
Returns:
|
||||
List of consolidation actions taken
|
||||
"""
|
||||
actions = []
|
||||
|
||||
for path1, path2, similarity in duplicate_candidates:
|
||||
if similarity >= auto_merge_threshold:
|
||||
# Auto-merge high-similarity duplicates
|
||||
try:
|
||||
post1 = frontmatter.load(path1)
|
||||
post2 = frontmatter.load(path2)
|
||||
|
||||
# Keep the one with higher confidence
|
||||
conf1 = post1.get('confidence_score', 0.5)
|
||||
conf2 = post2.get('confidence_score', 0.5)
|
||||
|
||||
if conf1 >= conf2:
|
||||
primary, secondary = path1, path2
|
||||
else:
|
||||
primary, secondary = path2, path1
|
||||
|
||||
# Merge content
|
||||
post_primary = frontmatter.load(primary)
|
||||
post_secondary = frontmatter.load(secondary)
|
||||
|
||||
merged_content = merge_note_content(
|
||||
post_primary.content,
|
||||
post_secondary.content,
|
||||
"sections"
|
||||
)
|
||||
|
||||
# Update primary
|
||||
post_primary.content = merged_content
|
||||
|
||||
with open(primary, 'w', encoding='utf-8') as f:
|
||||
f.write(frontmatter.dumps(post_primary))
|
||||
|
||||
# Archive secondary
|
||||
secondary.unlink()
|
||||
|
||||
actions.append({
|
||||
'action': 'merged',
|
||||
'primary': str(primary),
|
||||
'secondary': str(secondary),
|
||||
'similarity': similarity
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
actions.append({
|
||||
'action': 'error',
|
||||
'files': [str(path1), str(path2)],
|
||||
'error': str(e)
|
||||
})
|
||||
|
||||
return actions
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Memory Intelligence System")
|
||||
print("=" * 60)
|
||||
|
||||
# Test decay calculation
|
||||
confidence = 0.8
|
||||
last_access = datetime.now() - timedelta(days=45)
|
||||
decayed = calculate_decay(confidence, last_access)
|
||||
print(f"\nDecay Test:")
|
||||
print(f" Initial confidence: {confidence}")
|
||||
print(f" Days since access: 45")
|
||||
print(f" Decayed confidence: {decayed:.3f}")
|
||||
|
||||
# Test reinforcement
|
||||
reinforced = reinforce_confidence(decayed, True)
|
||||
print(f"\nReinforcement Test:")
|
||||
print(f" After successful retrieval: {reinforced:.3f}")
|
||||
|
||||
# Test tag extraction
|
||||
sample = "Implement JWT authentication using jsonwebtoken library for secure API access"
|
||||
tags = extract_tags_from_content(sample)
|
||||
print(f"\nTag Extraction Test:")
|
||||
print(f" Content: {sample}")
|
||||
print(f" Tags: {tags}")
|
||||
|
||||
print("\n[OK] Memory intelligence layer operational")
|
||||
1418
skills/context-memory/utils/storage_obsidian.py
Normal file
1418
skills/context-memory/utils/storage_obsidian.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user