#!/usr/bin/env python3 """ PRISM Context Memory - Intelligence Layer Implements memory decay, self-evaluation, and learning over time. Based on research in persistent memory systems with confidence scoring. Key Concepts: - Memory Decay: Confidence scores decay following Ebbinghaus curve unless reinforced - Self-Evaluation: Track retrieval success and relevance - Upsert Logic: Update existing knowledge rather than duplicate - Confidence Scoring: Increases with successful usage, decays over time """ import os import sys from pathlib import Path from datetime import datetime, timedelta from typing import Dict, List, Optional, Tuple import math import re sys.path.insert(0, str(Path(__file__).parent)) try: import frontmatter except ImportError: print("[ERROR] python-frontmatter not installed") sys.exit(1) # Lazy import to avoid circular dependency # storage_obsidian imports from this file, so we can't import it at module level _storage_obsidian = None def _get_storage(): """Lazy load storage_obsidian to avoid circular import.""" global _storage_obsidian if _storage_obsidian is None: from storage_obsidian import get_vault_path, get_folder_paths, ensure_folder _storage_obsidian = { 'get_vault_path': get_vault_path, 'get_folder_paths': get_folder_paths, 'ensure_folder': ensure_folder } return _storage_obsidian # ============================================================================ # MEMORY DECAY & CONFIDENCE SCORING # ============================================================================ def calculate_decay( confidence: float, last_accessed: datetime, half_life_days: int = 30 ) -> float: """ Calculate memory decay using exponential decay model (Ebbinghaus curve). Confidence decays unless memory is reinforced through successful retrieval. Args: confidence: Current confidence score (0-1) last_accessed: When memory was last accessed half_life_days: Days for confidence to decay to 50% Returns: Decayed confidence score """ days_since_access = (datetime.now() - last_accessed).days if days_since_access == 0: return confidence # Exponential decay: C(t) = C₀ * (0.5)^(t/h) # where h is half-life decay_factor = math.pow(0.5, days_since_access / half_life_days) decayed_confidence = confidence * decay_factor # Don't decay below minimum threshold return max(decayed_confidence, 0.1) def reinforce_confidence( current_confidence: float, retrieval_success: bool, learning_rate: float = 0.1 ) -> float: """ Reinforce or weaken confidence based on retrieval outcome. Successful retrievals increase confidence; failures decrease it. Args: current_confidence: Current score (0-1) retrieval_success: Whether retrieval was successful/relevant learning_rate: How quickly confidence adjusts (0-1) Returns: Updated confidence score """ if retrieval_success: # Increase confidence, with diminishing returns as it approaches 1 delta = learning_rate * (1 - current_confidence) return min(current_confidence + delta, 1.0) else: # Decrease confidence delta = learning_rate * current_confidence return max(current_confidence - delta, 0.1) def calculate_relevance_score( access_count: int, last_accessed: datetime, confidence: float, recency_weight: float = 0.3, frequency_weight: float = 0.3, confidence_weight: float = 0.4 ) -> float: """ Calculate overall relevance score combining multiple factors. Args: access_count: Number of times accessed last_accessed: Most recent access time confidence: Current confidence score recency_weight: Weight for recency (default 0.3) frequency_weight: Weight for frequency (default 0.3) confidence_weight: Weight for confidence (default 0.4) Returns: Relevance score (0-1) """ # Recency score (exponential decay) days_since = (datetime.now() - last_accessed).days recency = math.exp(-days_since / 30) # 30-day half-life # Frequency score (logarithmic scaling) frequency = math.log(1 + access_count) / math.log(101) # Scale to 0-1 # Weighted combination relevance = ( recency * recency_weight + frequency * frequency_weight + confidence * confidence_weight ) return min(relevance, 1.0) # ============================================================================ # INTELLIGENT TAGGING # ============================================================================ def extract_tags_from_content(content: str, existing_tags: List[str] = None) -> List[str]: """ Extract intelligent tags from content. Generates: - Concept tags (from domain terms) - Entity tags (specific technologies) - Action tags (verbs describing operations) Args: content: Note content existing_tags: Tags already assigned Returns: List of extracted tags """ existing_tags = existing_tags or [] extracted = set(existing_tags) content_lower = content.lower() # Common concept tags concept_map = { 'authentication': ['auth', 'login', 'oauth', 'jwt', 'token'], 'database': ['sql', 'query', 'schema', 'migration', 'postgresql', 'mongodb'], 'testing': ['test', 'spec', 'assert', 'mock', 'fixture'], 'api': ['endpoint', 'route', 'request', 'response', 'rest'], 'security': ['encrypt', 'hash', 'secure', 'vulnerable', 'xss', 'csrf'], 'performance': ['optimize', 'cache', 'latency', 'throughput'], 'architecture': ['pattern', 'design', 'structure', 'component'], } for concept, keywords in concept_map.items(): if any(kw in content_lower for kw in keywords): extracted.add(concept) # Technology entity tags tech_patterns = [ r'\b(react|vue|angular|svelte)\b', r'\b(python|javascript|typescript|java|go|rust)\b', r'\b(postgres|mysql|mongodb|redis|elasticsearch)\b', r'\b(docker|kubernetes|aws|azure|gcp)\b', r'\b(jwt|oauth|saml|ldap)\b', ] for pattern in tech_patterns: matches = re.findall(pattern, content_lower, re.IGNORECASE) extracted.update(matches) return sorted(list(extracted)) def generate_tag_hierarchy(tags: List[str]) -> Dict[str, List[str]]: """ Organize tags into hierarchical structure. Returns: Dict mapping parent categories to child tags """ hierarchy = { 'technology': [], 'concept': [], 'domain': [], 'pattern': [] } # Categorize tags tech_keywords = ['python', 'javascript', 'typescript', 'react', 'postgres', 'docker'] concept_keywords = ['authentication', 'testing', 'security', 'performance'] pattern_keywords = ['repository', 'service', 'factory', 'singleton'] for tag in tags: tag_lower = tag.lower() if any(tech in tag_lower for tech in tech_keywords): hierarchy['technology'].append(tag) elif any(concept in tag_lower for concept in concept_keywords): hierarchy['concept'].append(tag) elif any(pattern in tag_lower for pattern in pattern_keywords): hierarchy['pattern'].append(tag) else: hierarchy['domain'].append(tag) # Remove empty categories return {k: v for k, v in hierarchy.items() if v} # ============================================================================ # UPSERT LOGIC - UPDATE EXISTING KNOWLEDGE # ============================================================================ def find_similar_notes( title: str, content: str, note_type: str, threshold: float = 0.7 ) -> List[Tuple[Path, float]]: """ Find existing notes that might be duplicates or updates. Uses title similarity and content overlap to identify candidates. Args: title: Note title content: Note content note_type: Type of note (file-analysis, pattern, decision) threshold: Similarity threshold (0-1) Returns: List of (path, similarity_score) tuples """ storage = _get_storage() folders = storage['get_folder_paths']() vault = storage['get_vault_path']() # Map note type to folder folder_map = { 'file-analysis': folders['files'], 'pattern': folders['patterns'], 'decision': folders['decisions'], 'interaction': folders['interactions'] } search_folder = folder_map.get(note_type) if not search_folder or not search_folder.exists(): return [] candidates = [] title_lower = title.lower() content_words = set(re.findall(r'\w+', content.lower())) for note_file in search_folder.rglob("*.md"): try: # Check title similarity note_title = note_file.stem.lower() title_similarity = compute_string_similarity(title_lower, note_title) if title_similarity < 0.5: continue # Check content overlap post = frontmatter.load(note_file) note_content_words = set(re.findall(r'\w+', post.content.lower())) # Jaccard similarity intersection = len(content_words & note_content_words) union = len(content_words | note_content_words) content_similarity = intersection / union if union > 0 else 0 # Combined score (weighted average) overall_similarity = (title_similarity * 0.6 + content_similarity * 0.4) if overall_similarity >= threshold: candidates.append((note_file, overall_similarity)) except Exception: continue # Sort by similarity (highest first) candidates.sort(key=lambda x: x[1], reverse=True) return candidates def compute_string_similarity(s1: str, s2: str) -> float: """ Compute similarity between two strings using Levenshtein-based approach. Returns: Similarity score (0-1) """ # Simple word overlap method words1 = set(s1.split()) words2 = set(s2.split()) if not words1 or not words2: return 0.0 intersection = len(words1 & words2) union = len(words1 | words2) return intersection / union if union > 0 else 0.0 def should_update_existing( existing_path: Path, new_content: str, similarity_score: float ) -> bool: """ Decide whether to update existing note or create new one. Args: existing_path: Path to existing note new_content: New content to potentially add similarity_score: How similar notes are (0-1) Returns: True if should update, False if should create new """ # High similarity -> update existing if similarity_score >= 0.85: return True # Medium similarity -> check if new content adds value if similarity_score >= 0.7: post = frontmatter.load(existing_path) existing_length = len(post.content) new_length = len(new_content) # If new content is substantially different/longer, keep separate if new_length > existing_length * 1.5: return False return True # Low similarity -> create new return False def merge_note_content( existing_content: str, new_content: str, merge_strategy: str = "append" ) -> str: """ Intelligently merge new content into existing note. Args: existing_content: Current note content new_content: New information to add merge_strategy: How to merge ("append", "replace", "sections") Returns: Merged content """ if merge_strategy == "replace": return new_content elif merge_strategy == "append": # Add new content at end with separator return f"{existing_content}\n\n## Updated Information\n\n{new_content}" elif merge_strategy == "sections": # Merge by sections (smarter merging) # For now, append with date timestamp = datetime.now().strftime("%Y-%m-%d") return f"{existing_content}\n\n## Update - {timestamp}\n\n{new_content}" return existing_content # ============================================================================ # SELF-EVALUATION & MAINTENANCE # ============================================================================ def evaluate_memory_health(vault_path: Path = None) -> Dict: """ Evaluate overall memory system health. Checks: - Low-confidence memories - Stale memories (not accessed recently) - Duplicate candidates - Tag consistency Returns: Health report dictionary """ if vault_path is None: storage = _get_storage() vault_path = storage['get_vault_path']() report = { 'total_notes': 0, 'low_confidence': [], 'stale_memories': [], 'duplicate_candidates': [], 'tag_issues': [], 'avg_confidence': 0.0, 'avg_relevance': 0.0 } confidences = [] relevances = [] for note_file in vault_path.rglob("*.md"): try: post = frontmatter.load(note_file) if post.get('type') not in ['file-analysis', 'pattern', 'decision']: continue report['total_notes'] += 1 # Check confidence confidence = post.get('confidence_score', 0.5) confidences.append(confidence) if confidence < 0.3: report['low_confidence'].append(str(note_file.relative_to(vault_path))) # Check staleness last_accessed_str = post.get('last_accessed') if last_accessed_str: last_accessed = datetime.fromisoformat(last_accessed_str) days_stale = (datetime.now() - last_accessed).days if days_stale > 90: report['stale_memories'].append({ 'path': str(note_file.relative_to(vault_path)), 'days_stale': days_stale }) # Check tags tags = post.get('tags', []) if not tags: report['tag_issues'].append(str(note_file.relative_to(vault_path))) except Exception: continue if confidences: report['avg_confidence'] = sum(confidences) / len(confidences) return report def consolidate_duplicates( duplicate_candidates: List[Tuple[Path, Path, float]], auto_merge_threshold: float = 0.95 ) -> List[Dict]: """ Consolidate duplicate or near-duplicate memories. Args: duplicate_candidates: List of (path1, path2, similarity) tuples auto_merge_threshold: Automatically merge if similarity above this Returns: List of consolidation actions taken """ actions = [] for path1, path2, similarity in duplicate_candidates: if similarity >= auto_merge_threshold: # Auto-merge high-similarity duplicates try: post1 = frontmatter.load(path1) post2 = frontmatter.load(path2) # Keep the one with higher confidence conf1 = post1.get('confidence_score', 0.5) conf2 = post2.get('confidence_score', 0.5) if conf1 >= conf2: primary, secondary = path1, path2 else: primary, secondary = path2, path1 # Merge content post_primary = frontmatter.load(primary) post_secondary = frontmatter.load(secondary) merged_content = merge_note_content( post_primary.content, post_secondary.content, "sections" ) # Update primary post_primary.content = merged_content with open(primary, 'w', encoding='utf-8') as f: f.write(frontmatter.dumps(post_primary)) # Archive secondary secondary.unlink() actions.append({ 'action': 'merged', 'primary': str(primary), 'secondary': str(secondary), 'similarity': similarity }) except Exception as e: actions.append({ 'action': 'error', 'files': [str(path1), str(path2)], 'error': str(e) }) return actions if __name__ == "__main__": print("Memory Intelligence System") print("=" * 60) # Test decay calculation confidence = 0.8 last_access = datetime.now() - timedelta(days=45) decayed = calculate_decay(confidence, last_access) print(f"\nDecay Test:") print(f" Initial confidence: {confidence}") print(f" Days since access: 45") print(f" Decayed confidence: {decayed:.3f}") # Test reinforcement reinforced = reinforce_confidence(decayed, True) print(f"\nReinforcement Test:") print(f" After successful retrieval: {reinforced:.3f}") # Test tag extraction sample = "Implement JWT authentication using jsonwebtoken library for secure API access" tags = extract_tags_from_content(sample) print(f"\nTag Extraction Test:") print(f" Content: {sample}") print(f" Tags: {tags}") print("\n[OK] Memory intelligence layer operational")