Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 08:51:34 +08:00
commit acde81dcfe
59 changed files with 22282 additions and 0 deletions

View File

@@ -0,0 +1,358 @@
#!/usr/bin/env python3
"""
Initialize PRISM Context Memory Obsidian Vault
Creates the folder structure and initial index files for the Obsidian vault.
"""
import os
import sys
from pathlib import Path
# Add to path for imports
sys.path.insert(0, str(Path(__file__).parent))
from storage_obsidian import get_vault_path, get_folder_paths, ensure_folder
def init_vault():
"""Initialize Obsidian vault structure."""
print("Initializing PRISM Context Memory Obsidian Vault")
print("=" * 60)
vault = get_vault_path()
folders = get_folder_paths()
print(f"\nVault location: {vault}")
# Create all folders
print("\nCreating folder structure...")
for name, path in folders.items():
ensure_folder(path)
print(f" [OK] {name}: {path.relative_to(vault)}")
# Create README.md
readme_path = vault / "PRISM-Memory" / "Index" / "README.md"
if not readme_path.exists():
print("\nCreating README...")
readme_content = """---
type: index
created_at: """ + __import__('datetime').datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ") + """
---
# PRISM Context Memory
Welcome to your PRISM knowledge vault! This vault stores context captured by PRISM skills during development.
## Vault Structure
- **[[File Index|Files]]** - Analysis of code files
- **[[Pattern Index|Patterns]]** - Reusable code patterns and conventions
- **[[Decision Log|Decisions]]** - Architectural decisions and reasoning
- **Commits** - Git commit context and history
- **Interactions** - Agent learnings and outcomes
- **Learnings** - Story completion learnings and consolidations
- **Preferences** - Learned preferences and coding style
## How It Works
1. **Automatic Capture:** Hooks capture context as you code
2. **Intelligent Storage:** Claude analyzes and stores as structured markdown
3. **Easy Retrieval:** Search and link notes in Obsidian
4. **Knowledge Graph:** Visualize connections between files, patterns, and decisions
## Getting Started
### View Recent Activity
Check the [[File Index]] to see recently analyzed files.
### Explore Patterns
Browse [[Pattern Index]] to discover reusable patterns in your codebase.
### Review Decisions
Read the [[Decision Log]] to understand architectural choices.
### Search
Use Obsidian's search (Cmd/Ctrl+Shift+F) to find specific context:
- Search by file name, pattern, or concept
- Use tags like #authentication, #testing, #architecture
- Follow links to explore related notes
## Obsidian Features
### Graph View
Open the graph view (Cmd/Ctrl+G) to visualize your knowledge network.
### Tags
Filter by tags:
- `#python`, `#typescript`, `#javascript` - Languages
- `#architecture`, `#testing`, `#security` - Categories
- `#simple`, `#moderate`, `#complex` - Complexity
### Daily Notes
Link PRISM context to your daily notes for project journal.
### Dataview (Optional)
If you have Dataview plugin installed, see dynamic queries in index pages.
## Tips
1. **Add Context Manually:** Create notes in any folder to add custom context
2. **Link Liberally:** Use `[[wikilinks]]` to connect related concepts
3. **Tag Consistently:** Use consistent tags for better filtering
4. **Review Regularly:** Browse recent changes to stay aware of system evolution
5. **Customize Structure:** Reorganize folders to match your mental model
---
**Last Updated:** """ + __import__('datetime').datetime.now().strftime("%Y-%m-%d") + """
"""
with open(readme_path, 'w', encoding='utf-8') as f:
f.write(readme_content)
print(f" [OK] Created README")
# Create File Index
file_index_path = vault / "PRISM-Memory" / "Index" / "File Index.md"
if not file_index_path.exists():
print("Creating File Index...")
file_index_content = """---
type: index
category: files
---
# File Index
Map of Contents (MOC) for analyzed code files.
## Recent Analyses
<!-- 20 most recently analyzed files -->
## By Language
### Python
<!-- All Python files -->
### TypeScript
<!-- All TypeScript files -->
### JavaScript
<!-- All JavaScript files -->
## By Complexity
### Simple
<!-- Simple files -->
### Moderate
<!-- Moderate complexity files -->
### Complex
<!-- Complex files -->
## Search Files
Use Obsidian search with:
- `path:<search>` - Search by file path
- `tag:#<language>` - Filter by language
- `tag:#<complexity>` - Filter by complexity
---
**Tip:** If you have Dataview plugin, uncomment these queries:
```dataview
TABLE file_path, language, complexity, analyzed_at
FROM "PRISM-Memory/Files"
WHERE type = "file-analysis"
SORT analyzed_at DESC
LIMIT 20
```
"""
with open(file_index_path, 'w', encoding='utf-8') as f:
f.write(file_index_content)
print(f" [OK] Created File Index")
# Create Pattern Index
pattern_index_path = vault / "PRISM-Memory" / "Index" / "Pattern Index.md"
if not pattern_index_path.exists():
print("Creating Pattern Index...")
pattern_index_content = """---
type: index
category: patterns
---
# Pattern Index
Map of Contents (MOC) for code patterns and conventions.
## By Category
### Architecture
<!-- Architectural patterns -->
### Testing
<!-- Testing patterns -->
### Security
<!-- Security patterns -->
### Performance
<!-- Performance patterns -->
## Most Used Patterns
<!-- Patterns sorted by usage_count -->
## Search Patterns
Use Obsidian search with:
- `tag:#<category>` - Filter by category
- Full-text search for pattern descriptions
---
**Tip:** If you have Dataview plugin, uncomment these queries:
```dataview
TABLE category, usage_count, updated_at
FROM "PRISM-Memory/Patterns"
WHERE type = "pattern"
SORT usage_count DESC
```
"""
with open(pattern_index_path, 'w', encoding='utf-8') as f:
f.write(pattern_index_content)
print(f" [OK] Created Pattern Index")
# Create Decision Log
decision_log_path = vault / "PRISM-Memory" / "Index" / "Decision Log.md"
if not decision_log_path.exists():
print("Creating Decision Log...")
decision_log_content = """---
type: index
category: decisions
---
# Decision Log
Chronological log of architectural decisions.
## Recent Decisions
<!-- 20 most recent decisions -->
## By Impact
### High Impact
<!-- High impact decisions -->
### Medium Impact
<!-- Medium impact decisions -->
### Low Impact
<!-- Low impact decisions -->
## By Status
### Accepted
<!-- Active decisions -->
### Superseded
<!-- Decisions that have been replaced -->
## Search Decisions
Use Obsidian search with:
- `tag:#<topic>` - Filter by topic
- Date-based search: `YYYY-MM-DD`
---
**Tip:** If you have Dataview plugin, uncomment these queries:
```dataview
TABLE decision_date, status, impact
FROM "PRISM-Memory/Decisions"
WHERE type = "decision"
SORT decision_date DESC
LIMIT 20
```
"""
with open(decision_log_path, 'w', encoding='utf-8') as f:
f.write(decision_log_content)
print(f" [OK] Created Decision Log")
# Create .gitignore in vault to ignore Obsidian config
gitignore_path = vault / ".gitignore"
if not gitignore_path.exists():
print("\nCreating .gitignore...")
gitignore_content = """# Obsidian configuration (workspace-specific)
.obsidian/workspace.json
.obsidian/workspace-mobile.json
# Obsidian cache
.obsidian/cache/
# Personal settings
.obsidian/app.json
.obsidian/appearance.json
.obsidian/hotkeys.json
# Keep these for consistency across users:
# .obsidian/core-plugins.json
# .obsidian/community-plugins.json
"""
with open(gitignore_path, 'w', encoding='utf-8') as f:
f.write(gitignore_content)
print(f" [OK] Created .gitignore")
# Update project .gitignore to exclude vault data
git_root = Path(__import__('storage_obsidian').find_git_root() or '.')
project_gitignore = git_root / ".gitignore"
if project_gitignore.exists():
print("\nUpdating project .gitignore...")
with open(project_gitignore, 'r', encoding='utf-8') as f:
content = f.read()
vault_relative = vault.relative_to(git_root) if vault.is_relative_to(git_root) else vault
ignore_line = f"\n# PRISM Context Memory Obsidian Vault\n{vault_relative}/\n"
if str(vault_relative) not in content:
with open(project_gitignore, 'a', encoding='utf-8') as f:
f.write(ignore_line)
print(f" [OK] Added vault to project .gitignore")
else:
print(f" [OK] Vault already in .gitignore")
print("\n" + "=" * 60)
print("[SUCCESS] Vault initialization complete!")
print("\nNext steps:")
print("1. Open the vault in Obsidian")
print(f" File > Open vault > {vault}")
print("2. Install recommended plugins (optional):")
print(" - Dataview - For dynamic queries")
print(" - Templater - For note templates")
print(" - Graph Analysis - For knowledge graph insights")
print("3. Enable PRISM hooks to capture context automatically")
print(" (See reference/quickstart.md for hook configuration)")
print("\nVault location:")
print(f" {vault}")
if __name__ == "__main__":
try:
init_vault()
except Exception as e:
print(f"\n[ERROR] Error: {e}")
import traceback
traceback.print_exc()
sys.exit(1)

View File

@@ -0,0 +1,574 @@
#!/usr/bin/env python3
"""
PRISM Context Memory - Intelligence Layer
Implements memory decay, self-evaluation, and learning over time.
Based on research in persistent memory systems with confidence scoring.
Key Concepts:
- Memory Decay: Confidence scores decay following Ebbinghaus curve unless reinforced
- Self-Evaluation: Track retrieval success and relevance
- Upsert Logic: Update existing knowledge rather than duplicate
- Confidence Scoring: Increases with successful usage, decays over time
"""
import os
import sys
from pathlib import Path
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Tuple
import math
import re
sys.path.insert(0, str(Path(__file__).parent))
try:
import frontmatter
except ImportError:
print("[ERROR] python-frontmatter not installed")
sys.exit(1)
# Lazy import to avoid circular dependency
# storage_obsidian imports from this file, so we can't import it at module level
_storage_obsidian = None
def _get_storage():
"""Lazy load storage_obsidian to avoid circular import."""
global _storage_obsidian
if _storage_obsidian is None:
from storage_obsidian import get_vault_path, get_folder_paths, ensure_folder
_storage_obsidian = {
'get_vault_path': get_vault_path,
'get_folder_paths': get_folder_paths,
'ensure_folder': ensure_folder
}
return _storage_obsidian
# ============================================================================
# MEMORY DECAY & CONFIDENCE SCORING
# ============================================================================
def calculate_decay(
confidence: float,
last_accessed: datetime,
half_life_days: int = 30
) -> float:
"""
Calculate memory decay using exponential decay model (Ebbinghaus curve).
Confidence decays unless memory is reinforced through successful retrieval.
Args:
confidence: Current confidence score (0-1)
last_accessed: When memory was last accessed
half_life_days: Days for confidence to decay to 50%
Returns:
Decayed confidence score
"""
days_since_access = (datetime.now() - last_accessed).days
if days_since_access == 0:
return confidence
# Exponential decay: C(t) = C₀ * (0.5)^(t/h)
# where h is half-life
decay_factor = math.pow(0.5, days_since_access / half_life_days)
decayed_confidence = confidence * decay_factor
# Don't decay below minimum threshold
return max(decayed_confidence, 0.1)
def reinforce_confidence(
current_confidence: float,
retrieval_success: bool,
learning_rate: float = 0.1
) -> float:
"""
Reinforce or weaken confidence based on retrieval outcome.
Successful retrievals increase confidence; failures decrease it.
Args:
current_confidence: Current score (0-1)
retrieval_success: Whether retrieval was successful/relevant
learning_rate: How quickly confidence adjusts (0-1)
Returns:
Updated confidence score
"""
if retrieval_success:
# Increase confidence, with diminishing returns as it approaches 1
delta = learning_rate * (1 - current_confidence)
return min(current_confidence + delta, 1.0)
else:
# Decrease confidence
delta = learning_rate * current_confidence
return max(current_confidence - delta, 0.1)
def calculate_relevance_score(
access_count: int,
last_accessed: datetime,
confidence: float,
recency_weight: float = 0.3,
frequency_weight: float = 0.3,
confidence_weight: float = 0.4
) -> float:
"""
Calculate overall relevance score combining multiple factors.
Args:
access_count: Number of times accessed
last_accessed: Most recent access time
confidence: Current confidence score
recency_weight: Weight for recency (default 0.3)
frequency_weight: Weight for frequency (default 0.3)
confidence_weight: Weight for confidence (default 0.4)
Returns:
Relevance score (0-1)
"""
# Recency score (exponential decay)
days_since = (datetime.now() - last_accessed).days
recency = math.exp(-days_since / 30) # 30-day half-life
# Frequency score (logarithmic scaling)
frequency = math.log(1 + access_count) / math.log(101) # Scale to 0-1
# Weighted combination
relevance = (
recency * recency_weight +
frequency * frequency_weight +
confidence * confidence_weight
)
return min(relevance, 1.0)
# ============================================================================
# INTELLIGENT TAGGING
# ============================================================================
def extract_tags_from_content(content: str, existing_tags: List[str] = None) -> List[str]:
"""
Extract intelligent tags from content.
Generates:
- Concept tags (from domain terms)
- Entity tags (specific technologies)
- Action tags (verbs describing operations)
Args:
content: Note content
existing_tags: Tags already assigned
Returns:
List of extracted tags
"""
existing_tags = existing_tags or []
extracted = set(existing_tags)
content_lower = content.lower()
# Common concept tags
concept_map = {
'authentication': ['auth', 'login', 'oauth', 'jwt', 'token'],
'database': ['sql', 'query', 'schema', 'migration', 'postgresql', 'mongodb'],
'testing': ['test', 'spec', 'assert', 'mock', 'fixture'],
'api': ['endpoint', 'route', 'request', 'response', 'rest'],
'security': ['encrypt', 'hash', 'secure', 'vulnerable', 'xss', 'csrf'],
'performance': ['optimize', 'cache', 'latency', 'throughput'],
'architecture': ['pattern', 'design', 'structure', 'component'],
}
for concept, keywords in concept_map.items():
if any(kw in content_lower for kw in keywords):
extracted.add(concept)
# Technology entity tags
tech_patterns = [
r'\b(react|vue|angular|svelte)\b',
r'\b(python|javascript|typescript|java|go|rust)\b',
r'\b(postgres|mysql|mongodb|redis|elasticsearch)\b',
r'\b(docker|kubernetes|aws|azure|gcp)\b',
r'\b(jwt|oauth|saml|ldap)\b',
]
for pattern in tech_patterns:
matches = re.findall(pattern, content_lower, re.IGNORECASE)
extracted.update(matches)
return sorted(list(extracted))
def generate_tag_hierarchy(tags: List[str]) -> Dict[str, List[str]]:
"""
Organize tags into hierarchical structure.
Returns:
Dict mapping parent categories to child tags
"""
hierarchy = {
'technology': [],
'concept': [],
'domain': [],
'pattern': []
}
# Categorize tags
tech_keywords = ['python', 'javascript', 'typescript', 'react', 'postgres', 'docker']
concept_keywords = ['authentication', 'testing', 'security', 'performance']
pattern_keywords = ['repository', 'service', 'factory', 'singleton']
for tag in tags:
tag_lower = tag.lower()
if any(tech in tag_lower for tech in tech_keywords):
hierarchy['technology'].append(tag)
elif any(concept in tag_lower for concept in concept_keywords):
hierarchy['concept'].append(tag)
elif any(pattern in tag_lower for pattern in pattern_keywords):
hierarchy['pattern'].append(tag)
else:
hierarchy['domain'].append(tag)
# Remove empty categories
return {k: v for k, v in hierarchy.items() if v}
# ============================================================================
# UPSERT LOGIC - UPDATE EXISTING KNOWLEDGE
# ============================================================================
def find_similar_notes(
title: str,
content: str,
note_type: str,
threshold: float = 0.7
) -> List[Tuple[Path, float]]:
"""
Find existing notes that might be duplicates or updates.
Uses title similarity and content overlap to identify candidates.
Args:
title: Note title
content: Note content
note_type: Type of note (file-analysis, pattern, decision)
threshold: Similarity threshold (0-1)
Returns:
List of (path, similarity_score) tuples
"""
storage = _get_storage()
folders = storage['get_folder_paths']()
vault = storage['get_vault_path']()
# Map note type to folder
folder_map = {
'file-analysis': folders['files'],
'pattern': folders['patterns'],
'decision': folders['decisions'],
'interaction': folders['interactions']
}
search_folder = folder_map.get(note_type)
if not search_folder or not search_folder.exists():
return []
candidates = []
title_lower = title.lower()
content_words = set(re.findall(r'\w+', content.lower()))
for note_file in search_folder.rglob("*.md"):
try:
# Check title similarity
note_title = note_file.stem.lower()
title_similarity = compute_string_similarity(title_lower, note_title)
if title_similarity < 0.5:
continue
# Check content overlap
post = frontmatter.load(note_file)
note_content_words = set(re.findall(r'\w+', post.content.lower()))
# Jaccard similarity
intersection = len(content_words & note_content_words)
union = len(content_words | note_content_words)
content_similarity = intersection / union if union > 0 else 0
# Combined score (weighted average)
overall_similarity = (title_similarity * 0.6 + content_similarity * 0.4)
if overall_similarity >= threshold:
candidates.append((note_file, overall_similarity))
except Exception:
continue
# Sort by similarity (highest first)
candidates.sort(key=lambda x: x[1], reverse=True)
return candidates
def compute_string_similarity(s1: str, s2: str) -> float:
"""
Compute similarity between two strings using Levenshtein-based approach.
Returns:
Similarity score (0-1)
"""
# Simple word overlap method
words1 = set(s1.split())
words2 = set(s2.split())
if not words1 or not words2:
return 0.0
intersection = len(words1 & words2)
union = len(words1 | words2)
return intersection / union if union > 0 else 0.0
def should_update_existing(
existing_path: Path,
new_content: str,
similarity_score: float
) -> bool:
"""
Decide whether to update existing note or create new one.
Args:
existing_path: Path to existing note
new_content: New content to potentially add
similarity_score: How similar notes are (0-1)
Returns:
True if should update, False if should create new
"""
# High similarity -> update existing
if similarity_score >= 0.85:
return True
# Medium similarity -> check if new content adds value
if similarity_score >= 0.7:
post = frontmatter.load(existing_path)
existing_length = len(post.content)
new_length = len(new_content)
# If new content is substantially different/longer, keep separate
if new_length > existing_length * 1.5:
return False
return True
# Low similarity -> create new
return False
def merge_note_content(
existing_content: str,
new_content: str,
merge_strategy: str = "append"
) -> str:
"""
Intelligently merge new content into existing note.
Args:
existing_content: Current note content
new_content: New information to add
merge_strategy: How to merge ("append", "replace", "sections")
Returns:
Merged content
"""
if merge_strategy == "replace":
return new_content
elif merge_strategy == "append":
# Add new content at end with separator
return f"{existing_content}\n\n## Updated Information\n\n{new_content}"
elif merge_strategy == "sections":
# Merge by sections (smarter merging)
# For now, append with date
timestamp = datetime.now().strftime("%Y-%m-%d")
return f"{existing_content}\n\n## Update - {timestamp}\n\n{new_content}"
return existing_content
# ============================================================================
# SELF-EVALUATION & MAINTENANCE
# ============================================================================
def evaluate_memory_health(vault_path: Path = None) -> Dict:
"""
Evaluate overall memory system health.
Checks:
- Low-confidence memories
- Stale memories (not accessed recently)
- Duplicate candidates
- Tag consistency
Returns:
Health report dictionary
"""
if vault_path is None:
storage = _get_storage()
vault_path = storage['get_vault_path']()
report = {
'total_notes': 0,
'low_confidence': [],
'stale_memories': [],
'duplicate_candidates': [],
'tag_issues': [],
'avg_confidence': 0.0,
'avg_relevance': 0.0
}
confidences = []
relevances = []
for note_file in vault_path.rglob("*.md"):
try:
post = frontmatter.load(note_file)
if post.get('type') not in ['file-analysis', 'pattern', 'decision']:
continue
report['total_notes'] += 1
# Check confidence
confidence = post.get('confidence_score', 0.5)
confidences.append(confidence)
if confidence < 0.3:
report['low_confidence'].append(str(note_file.relative_to(vault_path)))
# Check staleness
last_accessed_str = post.get('last_accessed')
if last_accessed_str:
last_accessed = datetime.fromisoformat(last_accessed_str)
days_stale = (datetime.now() - last_accessed).days
if days_stale > 90:
report['stale_memories'].append({
'path': str(note_file.relative_to(vault_path)),
'days_stale': days_stale
})
# Check tags
tags = post.get('tags', [])
if not tags:
report['tag_issues'].append(str(note_file.relative_to(vault_path)))
except Exception:
continue
if confidences:
report['avg_confidence'] = sum(confidences) / len(confidences)
return report
def consolidate_duplicates(
duplicate_candidates: List[Tuple[Path, Path, float]],
auto_merge_threshold: float = 0.95
) -> List[Dict]:
"""
Consolidate duplicate or near-duplicate memories.
Args:
duplicate_candidates: List of (path1, path2, similarity) tuples
auto_merge_threshold: Automatically merge if similarity above this
Returns:
List of consolidation actions taken
"""
actions = []
for path1, path2, similarity in duplicate_candidates:
if similarity >= auto_merge_threshold:
# Auto-merge high-similarity duplicates
try:
post1 = frontmatter.load(path1)
post2 = frontmatter.load(path2)
# Keep the one with higher confidence
conf1 = post1.get('confidence_score', 0.5)
conf2 = post2.get('confidence_score', 0.5)
if conf1 >= conf2:
primary, secondary = path1, path2
else:
primary, secondary = path2, path1
# Merge content
post_primary = frontmatter.load(primary)
post_secondary = frontmatter.load(secondary)
merged_content = merge_note_content(
post_primary.content,
post_secondary.content,
"sections"
)
# Update primary
post_primary.content = merged_content
with open(primary, 'w', encoding='utf-8') as f:
f.write(frontmatter.dumps(post_primary))
# Archive secondary
secondary.unlink()
actions.append({
'action': 'merged',
'primary': str(primary),
'secondary': str(secondary),
'similarity': similarity
})
except Exception as e:
actions.append({
'action': 'error',
'files': [str(path1), str(path2)],
'error': str(e)
})
return actions
if __name__ == "__main__":
print("Memory Intelligence System")
print("=" * 60)
# Test decay calculation
confidence = 0.8
last_access = datetime.now() - timedelta(days=45)
decayed = calculate_decay(confidence, last_access)
print(f"\nDecay Test:")
print(f" Initial confidence: {confidence}")
print(f" Days since access: 45")
print(f" Decayed confidence: {decayed:.3f}")
# Test reinforcement
reinforced = reinforce_confidence(decayed, True)
print(f"\nReinforcement Test:")
print(f" After successful retrieval: {reinforced:.3f}")
# Test tag extraction
sample = "Implement JWT authentication using jsonwebtoken library for secure API access"
tags = extract_tags_from_content(sample)
print(f"\nTag Extraction Test:")
print(f" Content: {sample}")
print(f" Tags: {tags}")
print("\n[OK] Memory intelligence layer operational")

File diff suppressed because it is too large Load Diff