Initial commit

2025-11-30 08:35:59 +08:00
commit 90883a4d25
287 changed files with 75058 additions and 0 deletions
--- a/skills/research/scripts/context-loader.js
+++ b/skills/research/scripts/context-loader.js
@@ -0,0 +1,117 @@
+#!/usr/bin/env bun
+
+/**
+ * Domain Context Loader
+ *
+ * Loads relevant user context from preferences based on detected research domain.
+ *
+ * Usage: bun context-loader.js <domain>
+ *
+ * Domains: travel, work_education, shopping, ai_coding, consulting
+ */
+
+import fs from 'fs';
+import path from 'path';
+
+const PREFERENCES_PATH = path.join(
+  process.env.HOME,
+  'Library/Mobile Documents/com~apple~CloudDocs/Geoffrey/knowledge/preferences.json'
+);
+
+function loadPreferences() {
+  const content = fs.readFileSync(PREFERENCES_PATH, 'utf-8');
+  return JSON.parse(content);
+}
+
+function getNestedValue(obj, keyPath) {
+  return keyPath.split('.').reduce((current, key) => {
+    return current && current[key] !== undefined ? current[key] : null;
+  }, obj);
+}
+
+function detectDomain(query) {
+  const queryLower = query.toLowerCase();
+
+  const domainPatterns = {
+    travel: ['trip', 'vacation', 'hotel', 'flight', 'points', 'miles', 'ski', 'travel', 'resort', 'airline', 'marriott', 'alaska'],
+    work_education: ['school', 'district', 'education', 'udl', 'learning', 'student', 'teacher', 'psd', 'k-12', 'iste', 'cosn'],
+    shopping: ['buy', 'purchase', 'product', 'review', 'price', 'deal', 'compare', 'best', 'recommend'],
+    ai_coding: ['code', 'programming', 'api', 'framework', 'library', 'github', 'ai', 'llm', 'model', 'development'],
+    consulting: ['strategy', 'client', 'engagement', 'project', 'consulting', 'advisory']
+  };
+
+  for (const [domain, patterns] of Object.entries(domainPatterns)) {
+    if (patterns.some(p => queryLower.includes(p))) {
+      return domain;
+    }
+  }
+
+  return 'general';
+}
+
+function loadDomainContext(domain, preferences) {
+  const domainConfig = preferences.preferences.research_domains?.[domain];
+
+  if (!domainConfig) {
+    return {
+      domain,
+      context: {},
+      patterns: [],
+      dynamic_data: []
+    };
+  }
+
+  // Load context from specified keys
+  const context = {};
+  for (const keyPath of domainConfig.context_keys || []) {
+    const value = getNestedValue(preferences.preferences, keyPath);
+    if (value) {
+      context[keyPath] = value;
+    }
+  }
+
+  return {
+    domain,
+    context,
+    patterns: domainConfig.research_patterns || [],
+    dynamic_data: domainConfig.fetch_dynamic || []
+  };
+}
+
+async function main() {
+  const args = process.argv.slice(2);
+  const domainOrQuery = args.join(' ');
+
+  if (!domainOrQuery) {
+    console.error(JSON.stringify({
+      error: 'Missing domain or query',
+      usage: 'bun context-loader.js <domain or query>',
+      domains: ['travel', 'work_education', 'shopping', 'ai_coding', 'consulting']
+    }));
+    process.exit(1);
+  }
+
+  try {
+    const preferences = loadPreferences();
+
+    // Check if it's a known domain name or a query to detect from
+    const knownDomains = ['travel', 'work_education', 'shopping', 'ai_coding', 'consulting'];
+    const domain = knownDomains.includes(domainOrQuery.toLowerCase())
+      ? domainOrQuery.toLowerCase()
+      : detectDomain(domainOrQuery);
+
+    const result = loadDomainContext(domain, preferences);
+
+    console.log(JSON.stringify(result, null, 2));
+
+  } catch (error) {
+    console.error(JSON.stringify({
+      error: error.message
+    }));
+    process.exit(1);
+  }
+}
+
+main();
+
+module.exports = { loadPreferences, detectDomain, loadDomainContext };
--- a/skills/research/scripts/orchestrator.js
+++ b/skills/research/scripts/orchestrator.js
@@ -0,0 +1,263 @@
+#!/usr/bin/env bun
+
+/**
+ * Deep Research Orchestrator
+ *
+ * Architecture based on current best practices:
+ * - Single agent with iterative deepening
+ * - Search → Summarize → Compress → Report
+ * - Specialized models for different steps
+ * - Cross-validation and reflection
+ *
+ * Sources:
+ * - https://arxiv.org/abs/2506.18096 (Deep Research Agents survey)
+ * - https://github.com/langchain-ai/open_deep_research
+ * - https://openai.com/index/introducing-deep-research/
+ *
+ * Usage: bun orchestrator.js --query "Research topic" [--domain travel]
+ */
+
+import fs from 'fs';
+import path from 'path';
+
+const SECRETS_PATH = path.join(
+  process.env.HOME,
+  'Library/Mobile Documents/com~apple~CloudDocs/Geoffrey/secrets/.env'
+);
+
+// Load environment variables
+function loadEnv() {
+  if (fs.existsSync(SECRETS_PATH)) {
+    const content = fs.readFileSync(SECRETS_PATH, 'utf-8');
+    for (const line of content.split('\n')) {
+      const [key, ...valueParts] = line.split('=');
+      if (key && valueParts.length) {
+        process.env[key.trim()] = valueParts.join('=').trim();
+      }
+    }
+  }
+}
+
+loadEnv();
+
+/**
+ * Phase 1: Query Decomposition
+ * Break research topic into 5-10 sub-questions from different angles
+ */
+async function decomposeQuery(topic, context) {
+  // This would ideally call an LLM, but we can structure it deterministically
+  const angles = [
+    'factual/definitional',
+    'comparative/alternatives',
+    'current state/latest developments',
+    'expert opinions/professional sources',
+    'user experiences/reviews',
+    'common problems/gotchas',
+    'optimization/best practices',
+    'multimedia/visual resources'
+  ];
+
+  const subQueries = angles.map(angle => ({
+    angle,
+    query: `${topic} - ${angle}`,
+    priority: angle.includes('user') || angle.includes('current') ? 'high' : 'normal'
+  }));
+
+  return {
+    mainTopic: topic,
+    context,
+    subQueries,
+    timestamp: new Date().toISOString()
+  };
+}
+
+/**
+ * Phase 2: Parallel Search
+ * Execute multiple searches simultaneously
+ */
+async function executeSearches(decomposition) {
+  const results = [];
+
+  // In full implementation, this would:
+  // 1. Call WebSearch for each sub-query
+  // 2. Use multiple search engines
+  // 3. Try different query formulations
+  // 4. Launch browser-control for JS-heavy sites
+
+  for (const sq of decomposition.subQueries) {
+    results.push({
+      angle: sq.angle,
+      query: sq.query,
+      sources: [], // Would be populated by actual searches
+      status: 'pending'
+    });
+  }
+
+  return results;
+}
+
+/**
+ * Phase 3: Deep Scraping
+ * Use browser-control to get full content from promising sources
+ */
+async function deepScrape(searchResults) {
+  // Would call browser-control scripts for:
+  // - Reddit threads (full comments)
+  // - FlyerTalk forum posts
+  // - Sites that block scrapers
+  // - Authenticated content
+
+  return {
+    scraped: [],
+    skipped: [],
+    errors: []
+  };
+}
+
+/**
+ * Phase 4: Summarization
+ * Compress each source into key findings
+ */
+async function summarizeFindings(sources) {
+  // Would use a smaller model (gpt-4o-mini) to summarize each source
+  // Extract: key claims, supporting evidence, credibility signals
+
+  return {
+    summaries: [],
+    totalSources: sources.length
+  };
+}
+
+/**
+ * Phase 5: Cross-Validation
+ * Check claims across sources, identify consensus vs conflicts
+ */
+async function crossValidate(summaries) {
+  // Compare claims across sources
+  // Identify: consensus, conflicts, unique findings
+
+  return {
+    consensus: [],
+    conflicts: [],
+    unique: []
+  };
+}
+
+/**
+ * Phase 6: Report Generation
+ * Synthesize everything into final report
+ */
+async function generateReport(topic, context, validated) {
+  const report = {
+    title: `Research: ${topic}`,
+    generatedAt: new Date().toISOString(),
+    context: {
+      domain: context.domain,
+      loaded: Object.keys(context.context),
+      patterns: context.patterns
+    },
+    executiveSummary: '',
+    detailedFindings: [],
+    multimediaResources: [],
+    discoveries: [], // "What I discovered you might not know"
+    recommendations: [],
+    confidence: {
+      high: [],
+      needsVerification: [],
+      conflicting: []
+    },
+    sources: []
+  };
+
+  return report;
+}
+
+/**
+ * Main Orchestration Loop
+ */
+async function runDeepResearch(topic, options = {}) {
+  console.error('🔍 Starting deep research...');
+  console.error(`📋 Topic: ${topic}`);
+
+  // Load context
+  const { loadDomainContext, detectDomain, loadPreferences } = require('./context-loader.js');
+  const preferences = loadPreferences();
+  const domain = options.domain || detectDomain(topic);
+  const context = loadDomainContext(domain, preferences);
+
+  console.error(`🏷️  Detected domain: ${domain}`);
+  console.error(`📦 Context loaded: ${Object.keys(context.context).length} keys`);
+
+  // Phase 1: Decompose
+  console.error('\n📝 Phase 1: Decomposing query...');
+  const decomposition = await decomposeQuery(topic, context);
+  console.error(`   Generated ${decomposition.subQueries.length} sub-queries`);
+
+  // Phase 2: Search
+  console.error('\n🔎 Phase 2: Executing parallel searches...');
+  const searchResults = await executeSearches(decomposition);
+  console.error(`   Searched ${searchResults.length} angles`);
+
+  // Phase 3: Deep Scrape
+  console.error('\n🌐 Phase 3: Deep scraping promising sources...');
+  const scraped = await deepScrape(searchResults);
+  console.error(`   Scraped ${scraped.scraped.length} sources`);
+
+  // Phase 4: Summarize
+  console.error('\n📋 Phase 4: Summarizing findings...');
+  const summaries = await summarizeFindings([...searchResults, ...scraped.scraped]);
+  console.error(`   Summarized ${summaries.totalSources} sources`);
+
+  // Phase 5: Cross-validate
+  console.error('\n✅ Phase 5: Cross-validating claims...');
+  const validated = await crossValidate(summaries);
+  console.error(`   Found ${validated.consensus.length} consensus, ${validated.conflicts.length} conflicts`);
+
+  // Phase 6: Generate report
+  console.error('\n📄 Phase 6: Generating report...');
+  const report = await generateReport(topic, context, validated);
+
+  return report;
+}
+
+// CLI interface
+async function main() {
+  const args = process.argv.slice(2);
+
+  let topic = '';
+  let domain = null;
+
+  for (let i = 0; i < args.length; i++) {
+    if (args[i] === '--query') {
+      topic = args[++i];
+    } else if (args[i] === '--domain') {
+      domain = args[++i];
+    } else if (!args[i].startsWith('--')) {
+      topic = args.slice(i).join(' ');
+      break;
+    }
+  }
+
+  if (!topic) {
+    console.error(JSON.stringify({
+      error: 'Missing research topic',
+      usage: 'bun orchestrator.js --query "Research topic" [--domain travel]'
+    }));
+    process.exit(1);
+  }
+
+  try {
+    const report = await runDeepResearch(topic, { domain });
+    console.log(JSON.stringify(report, null, 2));
+  } catch (error) {
+    console.error(JSON.stringify({
+      error: error.message,
+      stack: error.stack
+    }));
+    process.exit(1);
+  }
+}
+
+main();
+
+module.exports = { runDeepResearch, decomposeQuery };