Initial commit
This commit is contained in:
280
skills/deep-research/scripts/web-researcher.ts
Normal file
280
skills/deep-research/scripts/web-researcher.ts
Normal file
@@ -0,0 +1,280 @@
|
||||
#!/usr/bin/env bun
|
||||
|
||||
/**
|
||||
* Web Researcher - Web-based research automation
|
||||
* Handles web search and content extraction for research tasks
|
||||
*/
|
||||
|
||||
import { randomUUID } from 'node:crypto';
|
||||
|
||||
interface ResearchOptions {
|
||||
query: string;
|
||||
depth?: 'quick' | 'comprehensive' | 'recent';
|
||||
maxResults?: number;
|
||||
sources?: string[];
|
||||
excludeDomains?: string[];
|
||||
}
|
||||
|
||||
interface SearchResult {
|
||||
title: string;
|
||||
snippet: string;
|
||||
url?: string;
|
||||
content?: string;
|
||||
source: string;
|
||||
relevanceScore: number;
|
||||
publishDate?: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Web Researcher class for automating web-based research
|
||||
*/
|
||||
class WebResearcher {
|
||||
private readonly userAgent = 'Mozilla/5.0 (compatible; ResearchBot/1.0)';
|
||||
private readonly maxRetries = 3;
|
||||
private readonly requestDelay = 1000; // 1 second between requests
|
||||
|
||||
/**
|
||||
* Performs web research based on the provided options
|
||||
*
|
||||
* @param options - Research configuration options
|
||||
* @returns Array of search results
|
||||
*/
|
||||
async performResearch(options: ResearchOptions): Promise<SearchResult[]> {
|
||||
|
||||
try {
|
||||
// Simulate web search results
|
||||
const results = await this.simulateWebSearch(options);
|
||||
|
||||
// Filter and rank results
|
||||
const filteredResults = this.filterResults(results, options);
|
||||
const rankedResults = this.rankResults(filteredResults);
|
||||
|
||||
return rankedResults;
|
||||
} catch (error) {
|
||||
console.error('❌ Research failed:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Simulates web search functionality
|
||||
* In a real implementation, this would use search APIs or web scraping
|
||||
*
|
||||
* @param options - Research options
|
||||
* @returns Simulated search results
|
||||
*/
|
||||
private async simulateWebSearch(options: ResearchOptions): Promise<SearchResult[]> {
|
||||
// Simulate API delay
|
||||
await this.delay(500);
|
||||
|
||||
const mockResults: SearchResult[] = [
|
||||
{
|
||||
title: `${options.query} - Wikipedia`,
|
||||
snippet: `Comprehensive information about ${options.query} including history, operations, and key facts.`,
|
||||
url: `https://en.wikipedia.org/wiki/${encodeURIComponent(options.query)}`,
|
||||
source: 'Wikipedia',
|
||||
relevanceScore: 0.9,
|
||||
publishDate: new Date().toISOString(),
|
||||
},
|
||||
{
|
||||
title: `${options.query} Official Website`,
|
||||
snippet: `Official information about ${options.query}, including company profile, leadership, and recent news.`,
|
||||
url: `https://www.${options.query.toLowerCase().replace(/\s+/g, '')}.com`,
|
||||
source: 'Official Website',
|
||||
relevanceScore: 0.95,
|
||||
publishDate: new Date().toISOString(),
|
||||
},
|
||||
{
|
||||
title: `${options.query} - Crunchbase Profile`,
|
||||
snippet: `Funding information, investors, and company details for ${options.query}.`,
|
||||
url: `https://www.crunchbase.com/organization/${options.query.toLowerCase().replace(/\s+/g, '-')}`,
|
||||
source: 'Crunchbase',
|
||||
relevanceScore: 0.85,
|
||||
publishDate: new Date().toISOString(),
|
||||
},
|
||||
{
|
||||
title: `${options.query} Financial Performance`,
|
||||
snippet: `Latest financial results, revenue data, and market performance for ${options.query}.`,
|
||||
url: `https://finance.yahoo.com/quote/${options.query.toUpperCase()}`,
|
||||
source: 'Yahoo Finance',
|
||||
relevanceScore: 0.8,
|
||||
publishDate: new Date().toISOString(),
|
||||
},
|
||||
{
|
||||
title: `${options.query} Industry Analysis`,
|
||||
snippet: `Market position, competitors, and industry analysis for ${options.query}.`,
|
||||
url: `https://www.industry-analysis.com/${options.query.toLowerCase().replace(/\s+/g, '-')}`,
|
||||
source: 'Industry Analysis',
|
||||
relevanceScore: 0.75,
|
||||
publishDate: new Date().toISOString(),
|
||||
},
|
||||
];
|
||||
|
||||
// Return subset based on maxResults
|
||||
const maxResults = options.maxResults || 10;
|
||||
return mockResults.slice(0, Math.min(maxResults, mockResults.length));
|
||||
}
|
||||
|
||||
/**
|
||||
* Filters search results based on research options
|
||||
*
|
||||
* @param results - Raw search results
|
||||
* @param options - Research options
|
||||
* @returns Filtered results
|
||||
*/
|
||||
private filterResults(results: SearchResult[], options: ResearchOptions): SearchResult[] {
|
||||
let filtered = [...results];
|
||||
|
||||
// Filter by excluded domains
|
||||
if (options.excludeDomains && options.excludeDomains.length > 0) {
|
||||
filtered = filtered.filter(result => {
|
||||
const url = result.url || '';
|
||||
return !options.excludeDomains!.some(domain => url.includes(domain));
|
||||
});
|
||||
}
|
||||
|
||||
// Filter by specific sources
|
||||
if (options.sources && options.sources.length > 0) {
|
||||
filtered = filtered.filter(result =>
|
||||
options.sources!.some(source =>
|
||||
result.source.toLowerCase().includes(source.toLowerCase())
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
// Filter by relevance score
|
||||
const minRelevance = this.getMinRelevanceScore(options.depth);
|
||||
filtered = filtered.filter(result => result.relevanceScore >= minRelevance);
|
||||
|
||||
return filtered;
|
||||
}
|
||||
|
||||
/**
|
||||
* Ranks search results by relevance and other factors
|
||||
*
|
||||
* @param results - Filtered search results
|
||||
* @returns Ranked search results
|
||||
*/
|
||||
private rankResults(results: SearchResult[]): SearchResult[] {
|
||||
return results.sort((a, b) => {
|
||||
// Primary sort: relevance score
|
||||
if (b.relevanceScore !== a.relevanceScore) {
|
||||
return b.relevanceScore - a.relevanceScore;
|
||||
}
|
||||
|
||||
// Secondary sort: publish date (more recent first)
|
||||
if (a.publishDate && b.publishDate) {
|
||||
return new Date(b.publishDate).getTime() - new Date(a.publishDate).getTime();
|
||||
}
|
||||
|
||||
// Tertiary sort: source authority
|
||||
const sourceAuthority = this.getSourceAuthorityScore(a.source) - this.getSourceAuthorityScore(b.source);
|
||||
if (sourceAuthority !== 0) {
|
||||
return sourceAuthority;
|
||||
}
|
||||
|
||||
return 0;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the minimum relevance score based on research depth
|
||||
*
|
||||
* @param depth - Research depth level
|
||||
* @returns Minimum relevance score threshold
|
||||
*/
|
||||
private getMinRelevanceScore(depth?: string): number {
|
||||
switch (depth) {
|
||||
case 'comprehensive':
|
||||
return 0.6;
|
||||
case 'recent':
|
||||
return 0.7;
|
||||
case 'quick':
|
||||
default:
|
||||
return 0.8;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets source authority score for ranking
|
||||
*
|
||||
* @param source - Source name
|
||||
* @returns Authority score
|
||||
*/
|
||||
private getSourceAuthorityScore(source: string): number {
|
||||
const authorityScores: Record<string, number> = {
|
||||
'Official Website': 10,
|
||||
'Wikipedia': 9,
|
||||
'Reuters': 9,
|
||||
'Bloomberg': 9,
|
||||
'Yahoo Finance': 8,
|
||||
'Crunchbase': 8,
|
||||
'Industry Analysis': 7,
|
||||
'News': 6,
|
||||
};
|
||||
|
||||
return authorityScores[source] || 5;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches full content from a URL
|
||||
* In a real implementation, this would perform HTTP requests and content extraction
|
||||
*
|
||||
* @param url - URL to fetch content from
|
||||
* @returns Extracted content
|
||||
*/
|
||||
async fetchContent(url: string): Promise<string> {
|
||||
// Simulate content fetching
|
||||
await this.delay(this.requestDelay);
|
||||
|
||||
// Return mock content
|
||||
return `This is the extracted content from ${url}. In a real implementation, this would contain the actual web page content after processing and cleaning.`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the researcher and cleans up resources
|
||||
*/
|
||||
close(): void {
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility function to create delays
|
||||
*
|
||||
* @param ms - Milliseconds to delay
|
||||
* @returns Promise that resolves after the delay
|
||||
*/
|
||||
private delay(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a unique research session ID
|
||||
*
|
||||
* @returns Unique session ID
|
||||
*/
|
||||
generateSessionId(): string {
|
||||
return `research_${randomUUID()}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates research options
|
||||
*
|
||||
* @param options - Research options to validate
|
||||
* @throws Error if options are invalid
|
||||
*/
|
||||
private validateOptions(options: ResearchOptions): void {
|
||||
if (!options.query || options.query.trim().length === 0) {
|
||||
throw new Error('Query is required and cannot be empty');
|
||||
}
|
||||
|
||||
if (options.maxResults && (options.maxResults < 1 || options.maxResults > 100)) {
|
||||
throw new Error('Max results must be between 1 and 100');
|
||||
}
|
||||
|
||||
if (options.depth && !['quick', 'comprehensive', 'recent'].includes(options.depth)) {
|
||||
throw new Error('Depth must be one of: quick, comprehensive, recent');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export { WebResearcher, type ResearchOptions, type SearchResult };
|
||||
Reference in New Issue
Block a user