Initial commit
This commit is contained in:
574
skills/context-memory/utils/memory_intelligence.py
Normal file
574
skills/context-memory/utils/memory_intelligence.py
Normal file
@@ -0,0 +1,574 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
PRISM Context Memory - Intelligence Layer
|
||||
|
||||
Implements memory decay, self-evaluation, and learning over time.
|
||||
Based on research in persistent memory systems with confidence scoring.
|
||||
|
||||
Key Concepts:
|
||||
- Memory Decay: Confidence scores decay following Ebbinghaus curve unless reinforced
|
||||
- Self-Evaluation: Track retrieval success and relevance
|
||||
- Upsert Logic: Update existing knowledge rather than duplicate
|
||||
- Confidence Scoring: Increases with successful usage, decays over time
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Dict, List, Optional, Tuple
|
||||
import math
|
||||
import re
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
try:
|
||||
import frontmatter
|
||||
except ImportError:
|
||||
print("[ERROR] python-frontmatter not installed")
|
||||
sys.exit(1)
|
||||
|
||||
# Lazy import to avoid circular dependency
|
||||
# storage_obsidian imports from this file, so we can't import it at module level
|
||||
_storage_obsidian = None
|
||||
|
||||
def _get_storage():
|
||||
"""Lazy load storage_obsidian to avoid circular import."""
|
||||
global _storage_obsidian
|
||||
if _storage_obsidian is None:
|
||||
from storage_obsidian import get_vault_path, get_folder_paths, ensure_folder
|
||||
_storage_obsidian = {
|
||||
'get_vault_path': get_vault_path,
|
||||
'get_folder_paths': get_folder_paths,
|
||||
'ensure_folder': ensure_folder
|
||||
}
|
||||
return _storage_obsidian
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# MEMORY DECAY & CONFIDENCE SCORING
|
||||
# ============================================================================
|
||||
|
||||
def calculate_decay(
|
||||
confidence: float,
|
||||
last_accessed: datetime,
|
||||
half_life_days: int = 30
|
||||
) -> float:
|
||||
"""
|
||||
Calculate memory decay using exponential decay model (Ebbinghaus curve).
|
||||
|
||||
Confidence decays unless memory is reinforced through successful retrieval.
|
||||
|
||||
Args:
|
||||
confidence: Current confidence score (0-1)
|
||||
last_accessed: When memory was last accessed
|
||||
half_life_days: Days for confidence to decay to 50%
|
||||
|
||||
Returns:
|
||||
Decayed confidence score
|
||||
"""
|
||||
days_since_access = (datetime.now() - last_accessed).days
|
||||
|
||||
if days_since_access == 0:
|
||||
return confidence
|
||||
|
||||
# Exponential decay: C(t) = C₀ * (0.5)^(t/h)
|
||||
# where h is half-life
|
||||
decay_factor = math.pow(0.5, days_since_access / half_life_days)
|
||||
decayed_confidence = confidence * decay_factor
|
||||
|
||||
# Don't decay below minimum threshold
|
||||
return max(decayed_confidence, 0.1)
|
||||
|
||||
|
||||
def reinforce_confidence(
|
||||
current_confidence: float,
|
||||
retrieval_success: bool,
|
||||
learning_rate: float = 0.1
|
||||
) -> float:
|
||||
"""
|
||||
Reinforce or weaken confidence based on retrieval outcome.
|
||||
|
||||
Successful retrievals increase confidence; failures decrease it.
|
||||
|
||||
Args:
|
||||
current_confidence: Current score (0-1)
|
||||
retrieval_success: Whether retrieval was successful/relevant
|
||||
learning_rate: How quickly confidence adjusts (0-1)
|
||||
|
||||
Returns:
|
||||
Updated confidence score
|
||||
"""
|
||||
if retrieval_success:
|
||||
# Increase confidence, with diminishing returns as it approaches 1
|
||||
delta = learning_rate * (1 - current_confidence)
|
||||
return min(current_confidence + delta, 1.0)
|
||||
else:
|
||||
# Decrease confidence
|
||||
delta = learning_rate * current_confidence
|
||||
return max(current_confidence - delta, 0.1)
|
||||
|
||||
|
||||
def calculate_relevance_score(
|
||||
access_count: int,
|
||||
last_accessed: datetime,
|
||||
confidence: float,
|
||||
recency_weight: float = 0.3,
|
||||
frequency_weight: float = 0.3,
|
||||
confidence_weight: float = 0.4
|
||||
) -> float:
|
||||
"""
|
||||
Calculate overall relevance score combining multiple factors.
|
||||
|
||||
Args:
|
||||
access_count: Number of times accessed
|
||||
last_accessed: Most recent access time
|
||||
confidence: Current confidence score
|
||||
recency_weight: Weight for recency (default 0.3)
|
||||
frequency_weight: Weight for frequency (default 0.3)
|
||||
confidence_weight: Weight for confidence (default 0.4)
|
||||
|
||||
Returns:
|
||||
Relevance score (0-1)
|
||||
"""
|
||||
# Recency score (exponential decay)
|
||||
days_since = (datetime.now() - last_accessed).days
|
||||
recency = math.exp(-days_since / 30) # 30-day half-life
|
||||
|
||||
# Frequency score (logarithmic scaling)
|
||||
frequency = math.log(1 + access_count) / math.log(101) # Scale to 0-1
|
||||
|
||||
# Weighted combination
|
||||
relevance = (
|
||||
recency * recency_weight +
|
||||
frequency * frequency_weight +
|
||||
confidence * confidence_weight
|
||||
)
|
||||
|
||||
return min(relevance, 1.0)
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# INTELLIGENT TAGGING
|
||||
# ============================================================================
|
||||
|
||||
def extract_tags_from_content(content: str, existing_tags: List[str] = None) -> List[str]:
|
||||
"""
|
||||
Extract intelligent tags from content.
|
||||
|
||||
Generates:
|
||||
- Concept tags (from domain terms)
|
||||
- Entity tags (specific technologies)
|
||||
- Action tags (verbs describing operations)
|
||||
|
||||
Args:
|
||||
content: Note content
|
||||
existing_tags: Tags already assigned
|
||||
|
||||
Returns:
|
||||
List of extracted tags
|
||||
"""
|
||||
existing_tags = existing_tags or []
|
||||
extracted = set(existing_tags)
|
||||
|
||||
content_lower = content.lower()
|
||||
|
||||
# Common concept tags
|
||||
concept_map = {
|
||||
'authentication': ['auth', 'login', 'oauth', 'jwt', 'token'],
|
||||
'database': ['sql', 'query', 'schema', 'migration', 'postgresql', 'mongodb'],
|
||||
'testing': ['test', 'spec', 'assert', 'mock', 'fixture'],
|
||||
'api': ['endpoint', 'route', 'request', 'response', 'rest'],
|
||||
'security': ['encrypt', 'hash', 'secure', 'vulnerable', 'xss', 'csrf'],
|
||||
'performance': ['optimize', 'cache', 'latency', 'throughput'],
|
||||
'architecture': ['pattern', 'design', 'structure', 'component'],
|
||||
}
|
||||
|
||||
for concept, keywords in concept_map.items():
|
||||
if any(kw in content_lower for kw in keywords):
|
||||
extracted.add(concept)
|
||||
|
||||
# Technology entity tags
|
||||
tech_patterns = [
|
||||
r'\b(react|vue|angular|svelte)\b',
|
||||
r'\b(python|javascript|typescript|java|go|rust)\b',
|
||||
r'\b(postgres|mysql|mongodb|redis|elasticsearch)\b',
|
||||
r'\b(docker|kubernetes|aws|azure|gcp)\b',
|
||||
r'\b(jwt|oauth|saml|ldap)\b',
|
||||
]
|
||||
|
||||
for pattern in tech_patterns:
|
||||
matches = re.findall(pattern, content_lower, re.IGNORECASE)
|
||||
extracted.update(matches)
|
||||
|
||||
return sorted(list(extracted))
|
||||
|
||||
|
||||
def generate_tag_hierarchy(tags: List[str]) -> Dict[str, List[str]]:
|
||||
"""
|
||||
Organize tags into hierarchical structure.
|
||||
|
||||
Returns:
|
||||
Dict mapping parent categories to child tags
|
||||
"""
|
||||
hierarchy = {
|
||||
'technology': [],
|
||||
'concept': [],
|
||||
'domain': [],
|
||||
'pattern': []
|
||||
}
|
||||
|
||||
# Categorize tags
|
||||
tech_keywords = ['python', 'javascript', 'typescript', 'react', 'postgres', 'docker']
|
||||
concept_keywords = ['authentication', 'testing', 'security', 'performance']
|
||||
pattern_keywords = ['repository', 'service', 'factory', 'singleton']
|
||||
|
||||
for tag in tags:
|
||||
tag_lower = tag.lower()
|
||||
if any(tech in tag_lower for tech in tech_keywords):
|
||||
hierarchy['technology'].append(tag)
|
||||
elif any(concept in tag_lower for concept in concept_keywords):
|
||||
hierarchy['concept'].append(tag)
|
||||
elif any(pattern in tag_lower for pattern in pattern_keywords):
|
||||
hierarchy['pattern'].append(tag)
|
||||
else:
|
||||
hierarchy['domain'].append(tag)
|
||||
|
||||
# Remove empty categories
|
||||
return {k: v for k, v in hierarchy.items() if v}
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# UPSERT LOGIC - UPDATE EXISTING KNOWLEDGE
|
||||
# ============================================================================
|
||||
|
||||
def find_similar_notes(
|
||||
title: str,
|
||||
content: str,
|
||||
note_type: str,
|
||||
threshold: float = 0.7
|
||||
) -> List[Tuple[Path, float]]:
|
||||
"""
|
||||
Find existing notes that might be duplicates or updates.
|
||||
|
||||
Uses title similarity and content overlap to identify candidates.
|
||||
|
||||
Args:
|
||||
title: Note title
|
||||
content: Note content
|
||||
note_type: Type of note (file-analysis, pattern, decision)
|
||||
threshold: Similarity threshold (0-1)
|
||||
|
||||
Returns:
|
||||
List of (path, similarity_score) tuples
|
||||
"""
|
||||
storage = _get_storage()
|
||||
folders = storage['get_folder_paths']()
|
||||
vault = storage['get_vault_path']()
|
||||
|
||||
# Map note type to folder
|
||||
folder_map = {
|
||||
'file-analysis': folders['files'],
|
||||
'pattern': folders['patterns'],
|
||||
'decision': folders['decisions'],
|
||||
'interaction': folders['interactions']
|
||||
}
|
||||
|
||||
search_folder = folder_map.get(note_type)
|
||||
if not search_folder or not search_folder.exists():
|
||||
return []
|
||||
|
||||
candidates = []
|
||||
title_lower = title.lower()
|
||||
content_words = set(re.findall(r'\w+', content.lower()))
|
||||
|
||||
for note_file in search_folder.rglob("*.md"):
|
||||
try:
|
||||
# Check title similarity
|
||||
note_title = note_file.stem.lower()
|
||||
title_similarity = compute_string_similarity(title_lower, note_title)
|
||||
|
||||
if title_similarity < 0.5:
|
||||
continue
|
||||
|
||||
# Check content overlap
|
||||
post = frontmatter.load(note_file)
|
||||
note_content_words = set(re.findall(r'\w+', post.content.lower()))
|
||||
|
||||
# Jaccard similarity
|
||||
intersection = len(content_words & note_content_words)
|
||||
union = len(content_words | note_content_words)
|
||||
content_similarity = intersection / union if union > 0 else 0
|
||||
|
||||
# Combined score (weighted average)
|
||||
overall_similarity = (title_similarity * 0.6 + content_similarity * 0.4)
|
||||
|
||||
if overall_similarity >= threshold:
|
||||
candidates.append((note_file, overall_similarity))
|
||||
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
# Sort by similarity (highest first)
|
||||
candidates.sort(key=lambda x: x[1], reverse=True)
|
||||
return candidates
|
||||
|
||||
|
||||
def compute_string_similarity(s1: str, s2: str) -> float:
|
||||
"""
|
||||
Compute similarity between two strings using Levenshtein-based approach.
|
||||
|
||||
Returns:
|
||||
Similarity score (0-1)
|
||||
"""
|
||||
# Simple word overlap method
|
||||
words1 = set(s1.split())
|
||||
words2 = set(s2.split())
|
||||
|
||||
if not words1 or not words2:
|
||||
return 0.0
|
||||
|
||||
intersection = len(words1 & words2)
|
||||
union = len(words1 | words2)
|
||||
|
||||
return intersection / union if union > 0 else 0.0
|
||||
|
||||
|
||||
def should_update_existing(
|
||||
existing_path: Path,
|
||||
new_content: str,
|
||||
similarity_score: float
|
||||
) -> bool:
|
||||
"""
|
||||
Decide whether to update existing note or create new one.
|
||||
|
||||
Args:
|
||||
existing_path: Path to existing note
|
||||
new_content: New content to potentially add
|
||||
similarity_score: How similar notes are (0-1)
|
||||
|
||||
Returns:
|
||||
True if should update, False if should create new
|
||||
"""
|
||||
# High similarity -> update existing
|
||||
if similarity_score >= 0.85:
|
||||
return True
|
||||
|
||||
# Medium similarity -> check if new content adds value
|
||||
if similarity_score >= 0.7:
|
||||
post = frontmatter.load(existing_path)
|
||||
existing_length = len(post.content)
|
||||
new_length = len(new_content)
|
||||
|
||||
# If new content is substantially different/longer, keep separate
|
||||
if new_length > existing_length * 1.5:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
# Low similarity -> create new
|
||||
return False
|
||||
|
||||
|
||||
def merge_note_content(
|
||||
existing_content: str,
|
||||
new_content: str,
|
||||
merge_strategy: str = "append"
|
||||
) -> str:
|
||||
"""
|
||||
Intelligently merge new content into existing note.
|
||||
|
||||
Args:
|
||||
existing_content: Current note content
|
||||
new_content: New information to add
|
||||
merge_strategy: How to merge ("append", "replace", "sections")
|
||||
|
||||
Returns:
|
||||
Merged content
|
||||
"""
|
||||
if merge_strategy == "replace":
|
||||
return new_content
|
||||
|
||||
elif merge_strategy == "append":
|
||||
# Add new content at end with separator
|
||||
return f"{existing_content}\n\n## Updated Information\n\n{new_content}"
|
||||
|
||||
elif merge_strategy == "sections":
|
||||
# Merge by sections (smarter merging)
|
||||
# For now, append with date
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d")
|
||||
return f"{existing_content}\n\n## Update - {timestamp}\n\n{new_content}"
|
||||
|
||||
return existing_content
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# SELF-EVALUATION & MAINTENANCE
|
||||
# ============================================================================
|
||||
|
||||
def evaluate_memory_health(vault_path: Path = None) -> Dict:
|
||||
"""
|
||||
Evaluate overall memory system health.
|
||||
|
||||
Checks:
|
||||
- Low-confidence memories
|
||||
- Stale memories (not accessed recently)
|
||||
- Duplicate candidates
|
||||
- Tag consistency
|
||||
|
||||
Returns:
|
||||
Health report dictionary
|
||||
"""
|
||||
if vault_path is None:
|
||||
storage = _get_storage()
|
||||
vault_path = storage['get_vault_path']()
|
||||
|
||||
report = {
|
||||
'total_notes': 0,
|
||||
'low_confidence': [],
|
||||
'stale_memories': [],
|
||||
'duplicate_candidates': [],
|
||||
'tag_issues': [],
|
||||
'avg_confidence': 0.0,
|
||||
'avg_relevance': 0.0
|
||||
}
|
||||
|
||||
confidences = []
|
||||
relevances = []
|
||||
|
||||
for note_file in vault_path.rglob("*.md"):
|
||||
try:
|
||||
post = frontmatter.load(note_file)
|
||||
|
||||
if post.get('type') not in ['file-analysis', 'pattern', 'decision']:
|
||||
continue
|
||||
|
||||
report['total_notes'] += 1
|
||||
|
||||
# Check confidence
|
||||
confidence = post.get('confidence_score', 0.5)
|
||||
confidences.append(confidence)
|
||||
|
||||
if confidence < 0.3:
|
||||
report['low_confidence'].append(str(note_file.relative_to(vault_path)))
|
||||
|
||||
# Check staleness
|
||||
last_accessed_str = post.get('last_accessed')
|
||||
if last_accessed_str:
|
||||
last_accessed = datetime.fromisoformat(last_accessed_str)
|
||||
days_stale = (datetime.now() - last_accessed).days
|
||||
|
||||
if days_stale > 90:
|
||||
report['stale_memories'].append({
|
||||
'path': str(note_file.relative_to(vault_path)),
|
||||
'days_stale': days_stale
|
||||
})
|
||||
|
||||
# Check tags
|
||||
tags = post.get('tags', [])
|
||||
if not tags:
|
||||
report['tag_issues'].append(str(note_file.relative_to(vault_path)))
|
||||
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if confidences:
|
||||
report['avg_confidence'] = sum(confidences) / len(confidences)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
def consolidate_duplicates(
|
||||
duplicate_candidates: List[Tuple[Path, Path, float]],
|
||||
auto_merge_threshold: float = 0.95
|
||||
) -> List[Dict]:
|
||||
"""
|
||||
Consolidate duplicate or near-duplicate memories.
|
||||
|
||||
Args:
|
||||
duplicate_candidates: List of (path1, path2, similarity) tuples
|
||||
auto_merge_threshold: Automatically merge if similarity above this
|
||||
|
||||
Returns:
|
||||
List of consolidation actions taken
|
||||
"""
|
||||
actions = []
|
||||
|
||||
for path1, path2, similarity in duplicate_candidates:
|
||||
if similarity >= auto_merge_threshold:
|
||||
# Auto-merge high-similarity duplicates
|
||||
try:
|
||||
post1 = frontmatter.load(path1)
|
||||
post2 = frontmatter.load(path2)
|
||||
|
||||
# Keep the one with higher confidence
|
||||
conf1 = post1.get('confidence_score', 0.5)
|
||||
conf2 = post2.get('confidence_score', 0.5)
|
||||
|
||||
if conf1 >= conf2:
|
||||
primary, secondary = path1, path2
|
||||
else:
|
||||
primary, secondary = path2, path1
|
||||
|
||||
# Merge content
|
||||
post_primary = frontmatter.load(primary)
|
||||
post_secondary = frontmatter.load(secondary)
|
||||
|
||||
merged_content = merge_note_content(
|
||||
post_primary.content,
|
||||
post_secondary.content,
|
||||
"sections"
|
||||
)
|
||||
|
||||
# Update primary
|
||||
post_primary.content = merged_content
|
||||
|
||||
with open(primary, 'w', encoding='utf-8') as f:
|
||||
f.write(frontmatter.dumps(post_primary))
|
||||
|
||||
# Archive secondary
|
||||
secondary.unlink()
|
||||
|
||||
actions.append({
|
||||
'action': 'merged',
|
||||
'primary': str(primary),
|
||||
'secondary': str(secondary),
|
||||
'similarity': similarity
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
actions.append({
|
||||
'action': 'error',
|
||||
'files': [str(path1), str(path2)],
|
||||
'error': str(e)
|
||||
})
|
||||
|
||||
return actions
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print("Memory Intelligence System")
|
||||
print("=" * 60)
|
||||
|
||||
# Test decay calculation
|
||||
confidence = 0.8
|
||||
last_access = datetime.now() - timedelta(days=45)
|
||||
decayed = calculate_decay(confidence, last_access)
|
||||
print(f"\nDecay Test:")
|
||||
print(f" Initial confidence: {confidence}")
|
||||
print(f" Days since access: 45")
|
||||
print(f" Decayed confidence: {decayed:.3f}")
|
||||
|
||||
# Test reinforcement
|
||||
reinforced = reinforce_confidence(decayed, True)
|
||||
print(f"\nReinforcement Test:")
|
||||
print(f" After successful retrieval: {reinforced:.3f}")
|
||||
|
||||
# Test tag extraction
|
||||
sample = "Implement JWT authentication using jsonwebtoken library for secure API access"
|
||||
tags = extract_tags_from_content(sample)
|
||||
print(f"\nTag Extraction Test:")
|
||||
print(f" Content: {sample}")
|
||||
print(f" Tags: {tags}")
|
||||
|
||||
print("\n[OK] Memory intelligence layer operational")
|
||||
Reference in New Issue
Block a user