#!/usr/bin/env bun /** * Deep Research Orchestrator * * Architecture based on current best practices: * - Single agent with iterative deepening * - Search → Summarize → Compress → Report * - Specialized models for different steps * - Cross-validation and reflection * * Sources: * - https://arxiv.org/abs/2506.18096 (Deep Research Agents survey) * - https://github.com/langchain-ai/open_deep_research * - https://openai.com/index/introducing-deep-research/ * * Usage: bun orchestrator.js --query "Research topic" [--domain travel] */ import fs from 'fs'; import path from 'path'; const SECRETS_PATH = path.join( process.env.HOME, 'Library/Mobile Documents/com~apple~CloudDocs/Geoffrey/secrets/.env' ); // Load environment variables function loadEnv() { if (fs.existsSync(SECRETS_PATH)) { const content = fs.readFileSync(SECRETS_PATH, 'utf-8'); for (const line of content.split('\n')) { const [key, ...valueParts] = line.split('='); if (key && valueParts.length) { process.env[key.trim()] = valueParts.join('=').trim(); } } } } loadEnv(); /** * Phase 1: Query Decomposition * Break research topic into 5-10 sub-questions from different angles */ async function decomposeQuery(topic, context) { // This would ideally call an LLM, but we can structure it deterministically const angles = [ 'factual/definitional', 'comparative/alternatives', 'current state/latest developments', 'expert opinions/professional sources', 'user experiences/reviews', 'common problems/gotchas', 'optimization/best practices', 'multimedia/visual resources' ]; const subQueries = angles.map(angle => ({ angle, query: `${topic} - ${angle}`, priority: angle.includes('user') || angle.includes('current') ? 'high' : 'normal' })); return { mainTopic: topic, context, subQueries, timestamp: new Date().toISOString() }; } /** * Phase 2: Parallel Search * Execute multiple searches simultaneously */ async function executeSearches(decomposition) { const results = []; // In full implementation, this would: // 1. Call WebSearch for each sub-query // 2. Use multiple search engines // 3. Try different query formulations // 4. Launch browser-control for JS-heavy sites for (const sq of decomposition.subQueries) { results.push({ angle: sq.angle, query: sq.query, sources: [], // Would be populated by actual searches status: 'pending' }); } return results; } /** * Phase 3: Deep Scraping * Use browser-control to get full content from promising sources */ async function deepScrape(searchResults) { // Would call browser-control scripts for: // - Reddit threads (full comments) // - FlyerTalk forum posts // - Sites that block scrapers // - Authenticated content return { scraped: [], skipped: [], errors: [] }; } /** * Phase 4: Summarization * Compress each source into key findings */ async function summarizeFindings(sources) { // Would use a smaller model (gpt-4o-mini) to summarize each source // Extract: key claims, supporting evidence, credibility signals return { summaries: [], totalSources: sources.length }; } /** * Phase 5: Cross-Validation * Check claims across sources, identify consensus vs conflicts */ async function crossValidate(summaries) { // Compare claims across sources // Identify: consensus, conflicts, unique findings return { consensus: [], conflicts: [], unique: [] }; } /** * Phase 6: Report Generation * Synthesize everything into final report */ async function generateReport(topic, context, validated) { const report = { title: `Research: ${topic}`, generatedAt: new Date().toISOString(), context: { domain: context.domain, loaded: Object.keys(context.context), patterns: context.patterns }, executiveSummary: '', detailedFindings: [], multimediaResources: [], discoveries: [], // "What I discovered you might not know" recommendations: [], confidence: { high: [], needsVerification: [], conflicting: [] }, sources: [] }; return report; } /** * Main Orchestration Loop */ async function runDeepResearch(topic, options = {}) { console.error('šŸ” Starting deep research...'); console.error(`šŸ“‹ Topic: ${topic}`); // Load context const { loadDomainContext, detectDomain, loadPreferences } = require('./context-loader.js'); const preferences = loadPreferences(); const domain = options.domain || detectDomain(topic); const context = loadDomainContext(domain, preferences); console.error(`šŸ·ļø Detected domain: ${domain}`); console.error(`šŸ“¦ Context loaded: ${Object.keys(context.context).length} keys`); // Phase 1: Decompose console.error('\nšŸ“ Phase 1: Decomposing query...'); const decomposition = await decomposeQuery(topic, context); console.error(` Generated ${decomposition.subQueries.length} sub-queries`); // Phase 2: Search console.error('\nšŸ”Ž Phase 2: Executing parallel searches...'); const searchResults = await executeSearches(decomposition); console.error(` Searched ${searchResults.length} angles`); // Phase 3: Deep Scrape console.error('\n🌐 Phase 3: Deep scraping promising sources...'); const scraped = await deepScrape(searchResults); console.error(` Scraped ${scraped.scraped.length} sources`); // Phase 4: Summarize console.error('\nšŸ“‹ Phase 4: Summarizing findings...'); const summaries = await summarizeFindings([...searchResults, ...scraped.scraped]); console.error(` Summarized ${summaries.totalSources} sources`); // Phase 5: Cross-validate console.error('\nāœ… Phase 5: Cross-validating claims...'); const validated = await crossValidate(summaries); console.error(` Found ${validated.consensus.length} consensus, ${validated.conflicts.length} conflicts`); // Phase 6: Generate report console.error('\nšŸ“„ Phase 6: Generating report...'); const report = await generateReport(topic, context, validated); return report; } // CLI interface async function main() { const args = process.argv.slice(2); let topic = ''; let domain = null; for (let i = 0; i < args.length; i++) { if (args[i] === '--query') { topic = args[++i]; } else if (args[i] === '--domain') { domain = args[++i]; } else if (!args[i].startsWith('--')) { topic = args.slice(i).join(' '); break; } } if (!topic) { console.error(JSON.stringify({ error: 'Missing research topic', usage: 'bun orchestrator.js --query "Research topic" [--domain travel]' })); process.exit(1); } try { const report = await runDeepResearch(topic, { domain }); console.log(JSON.stringify(report, null, 2)); } catch (error) { console.error(JSON.stringify({ error: error.message, stack: error.stack })); process.exit(1); } } main(); module.exports = { runDeepResearch, decomposeQuery };