Initial commit
This commit is contained in:
887
hooks/handle-search-error.mjs
Normal file
887
hooks/handle-search-error.mjs
Normal file
@@ -0,0 +1,887 @@
|
||||
// hooks/handle-search-error.mjs
|
||||
import contentExtractor from './content-extractor.mjs';
|
||||
import { handleRateLimit } from './handle-rate-limit.mjs';
|
||||
|
||||
// ============================================================================
|
||||
// CONFIGURATION: Recovery strategy timeout
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Recovery strategy timeout in milliseconds
|
||||
* Environment variable: SEARCH_PLUS_RECOVERY_TIMEOUT_MS
|
||||
* Default: 5000ms (5 seconds) - based on project requirements for <5s average recovery
|
||||
*/
|
||||
const RECOVERY_TIMEOUT_MS = validateRecoveryTimeout(process.env.SEARCH_PLUS_RECOVERY_TIMEOUT_MS || '5000');
|
||||
|
||||
/**
|
||||
* Validates recovery timeout configuration value
|
||||
* @param {string} value - The timeout value to validate
|
||||
* @returns {number} Validated timeout in milliseconds
|
||||
*/
|
||||
function validateRecoveryTimeout(value) {
|
||||
const parsed = parseInt(value, 10);
|
||||
|
||||
// Check if value is a valid number
|
||||
if (isNaN(parsed)) {
|
||||
console.warn(`⚠️ Invalid SEARCH_PLUS_RECOVERY_TIMEOUT_MS: "${value}". Using default 5000ms.`);
|
||||
return 5000;
|
||||
}
|
||||
|
||||
// Check for reasonable bounds (100ms to 60s)
|
||||
if (parsed < 100) {
|
||||
console.warn(`⚠️ SEARCH_PLUS_RECOVERY_TIMEOUT_MS too low: ${parsed}ms. Minimum is 100ms. Using 100ms.`);
|
||||
return 100;
|
||||
}
|
||||
|
||||
if (parsed > 60000) {
|
||||
console.warn(`⚠️ SEARCH_PLUS_RECOVERY_TIMEOUT_MS too high: ${parsed}ms. Maximum is 60000ms. Using 60000ms.`);
|
||||
return 60000;
|
||||
}
|
||||
|
||||
return parsed;
|
||||
}
|
||||
|
||||
// Log configuration in development mode
|
||||
if (process.env.NODE_ENV === 'development') {
|
||||
console.log(`🔧 Search-Plus Recovery Timeout: ${RECOVERY_TIMEOUT_MS}ms`);
|
||||
}
|
||||
|
||||
/**
|
||||
* Standardized error response helper
|
||||
* @param {string} strategy - Name of the strategy that failed
|
||||
* @param {Error|string} error - The error that occurred
|
||||
* @param {number} startTime - Strategy start timestamp
|
||||
* @param {Object} additionalInfo - Additional context info
|
||||
* @returns {Object} Standardized error response
|
||||
*/
|
||||
function createStandardErrorResponse(strategy, error, startTime, additionalInfo = {}) {
|
||||
const responseTime = Date.now() - startTime;
|
||||
const errorMessage = error instanceof Error ? error.message : error;
|
||||
|
||||
return {
|
||||
success: false,
|
||||
error: errorMessage,
|
||||
strategy: strategy,
|
||||
responseTime: responseTime,
|
||||
timestamp: new Date().toISOString(),
|
||||
...additionalInfo
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Standardized success response helper
|
||||
* @param {string} strategy - Name of the strategy that succeeded
|
||||
* @param {*} data - The data returned by the strategy
|
||||
* @param {number} startTime - Strategy start timestamp
|
||||
* @param {Object} additionalInfo - Additional context info
|
||||
* @returns {Object} Standardized success response
|
||||
*/
|
||||
function createStandardSuccessResponse(strategy, data, startTime, additionalInfo = {}) {
|
||||
const responseTime = Date.now() - startTime;
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data: data,
|
||||
strategy: strategy,
|
||||
responseTime: responseTime,
|
||||
timestamp: new Date().toISOString(),
|
||||
...additionalInfo
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles web search errors with advanced recovery strategies
|
||||
* @param {Object} error - The error object
|
||||
* @param {Object} options - Search options that caused the error
|
||||
* @returns {Object} Recovery results or final error
|
||||
*/
|
||||
export async function handleWebSearchError(error, options) {
|
||||
console.log('Handling search error:', error);
|
||||
|
||||
// Check error type and apply appropriate recovery strategy
|
||||
if (error.code === 403 || error.message.includes('403') || error.message.toLowerCase().includes('forbidden')) {
|
||||
return await handle403Error(error, options);
|
||||
}
|
||||
else if (error.code === 451 || error.message.includes('451') || error.message.toLowerCase().includes('securitycompromise') || error.message.toLowerCase().includes('blocked until')) {
|
||||
return await handle451SecurityError(error, options);
|
||||
}
|
||||
else if (error.code === 422 || error.message.includes('422') || is422SchemaError(error)) {
|
||||
return await handle422Error(error, options);
|
||||
}
|
||||
else if (error.code === 429 || error.message.includes('429') || error.message.toLowerCase().includes('rate limit')) {
|
||||
return await handleRateLimit(error, options);
|
||||
}
|
||||
else if (error.code === 'ECONNREFUSED' || error.message.toLowerCase().includes('connection refused')) {
|
||||
return await handleConnectionRefusedError(error, options);
|
||||
}
|
||||
else if (error.code === 'ETIMEDOUT' || error.message.toLowerCase().includes('timeout')) {
|
||||
return await handleTimeoutError(error, options);
|
||||
}
|
||||
else {
|
||||
// For other errors, return the original error
|
||||
return {
|
||||
error: true,
|
||||
message: `Search failed: ${error.message}`,
|
||||
code: error.code
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles 403 Forbidden errors
|
||||
* @param {Object} error - The 403 error
|
||||
* @param {Object} options - Search options
|
||||
* @returns {Object} Recovery results
|
||||
*/
|
||||
async function handle403Error(error, options) {
|
||||
console.log('Handling 403 error - trying with different headers...');
|
||||
|
||||
try {
|
||||
// Try again with completely different headers
|
||||
const modifiedParams = {
|
||||
...options,
|
||||
headers: generateDiverseHeaders()
|
||||
};
|
||||
|
||||
// Add a delay before retrying
|
||||
await new Promise(resolve => setTimeout(resolve, 2000));
|
||||
|
||||
const results = await contentExtractor.tavily.search(modifiedParams);
|
||||
return {
|
||||
success: true,
|
||||
data: results,
|
||||
message: 'Successfully retrieved results after handling 403 error'
|
||||
};
|
||||
|
||||
} catch (retryError) {
|
||||
console.log('403 retry failed, trying alternative approach...');
|
||||
|
||||
// Try with a different search query formulation
|
||||
try {
|
||||
const reformulatedQuery = reformulateQuery(options.query);
|
||||
const results = await contentExtractor.tavily.search({ ...options, query: reformulatedQuery });
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data: results,
|
||||
message: 'Successfully retrieved results with reformulated query after 403 error'
|
||||
};
|
||||
} catch (finalError) {
|
||||
return {
|
||||
error: true,
|
||||
message: `Failed to retrieve results after handling 403 error: ${finalError.message}`
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles 451 SecurityCompromiseError (domain blocked due to abuse)
|
||||
* Uses parallel execution with enhanced UX logging
|
||||
* @param {Object} error - The 451 error
|
||||
* @param {Object} options - Search options
|
||||
* @returns {Object} Recovery results
|
||||
*/
|
||||
async function handle451SecurityError(error, options) {
|
||||
const blockedDomain = extractBlockedDomain(error.message);
|
||||
|
||||
// Simple mode for power users who want minimal output
|
||||
if (process.env.SEARCH_PLUS_451_SIMPLE_MODE === 'true') {
|
||||
return await handleSimple451Recovery(error, options, blockedDomain);
|
||||
}
|
||||
|
||||
// Enhanced UX logging by default
|
||||
console.log('🚫 451 SecurityCompromiseError detected');
|
||||
console.log(`📍 Blocked domain: ${blockedDomain || 'unknown'}`);
|
||||
console.log('🚀 Starting parallel recovery:');
|
||||
console.log(' 🛡️ Strategy 1: Domain exclusion');
|
||||
console.log(' 🔍 Strategy 2: Alternative sources');
|
||||
|
||||
// Optimized parallel execution using the two most effective strategies
|
||||
const strategies = [
|
||||
searchWithExcludedDomainUnified(options, blockedDomain, true),
|
||||
tryAlternativeSearchSources(options, true)
|
||||
];
|
||||
|
||||
try {
|
||||
const results = await Promise.any(strategies);
|
||||
console.log(`✅ Success! Used strategy: ${results.strategy} (${results.responseTime}ms)`);
|
||||
|
||||
// Provide actionable suggestions for future searches
|
||||
if (blockedDomain) {
|
||||
console.log(`💡 Next time, try: /search-plus "${options.query} -site:${blockedDomain}"`);
|
||||
}
|
||||
|
||||
return {
|
||||
success: true,
|
||||
data: results.data,
|
||||
message: `Successfully retrieved results using ${results.strategy} for blocked domain ${blockedDomain || 'unknown'}`,
|
||||
strategy: results.strategy,
|
||||
responseTime: results.responseTime,
|
||||
blockedDomain: blockedDomain
|
||||
};
|
||||
|
||||
} catch (aggregateError) {
|
||||
// Enhanced error classification and user guidance
|
||||
const failureType = classify451Failure(aggregateError, blockedDomain, options);
|
||||
console.log(`❌ All recovery strategies failed`);
|
||||
console.log(`🔍 Error type: ${failureType.type}`);
|
||||
|
||||
if (failureType.suggestions.length > 0) {
|
||||
console.log('💡 Suggestions:');
|
||||
failureType.suggestions.forEach((suggestion, i) => {
|
||||
console.log(` ${i + 1}. ${suggestion.description}`);
|
||||
});
|
||||
}
|
||||
|
||||
return generateEnhancedErrorResponse(failureType, blockedDomain, options);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles 451 errors in simple mode with minimal output
|
||||
* @param {Object} error - The 451 error
|
||||
* @param {Object} options - Search options
|
||||
* @param {string} blockedDomain - The blocked domain
|
||||
* @returns {Object} Recovery results
|
||||
*/
|
||||
async function handleSimple451Recovery(error, options, blockedDomain) {
|
||||
console.log('⚡ 451 error - attempting recovery...');
|
||||
|
||||
const strategies = [
|
||||
searchWithExcludedDomainUnified(options, blockedDomain, true),
|
||||
tryAlternativeSearchSources(options, true)
|
||||
];
|
||||
|
||||
try {
|
||||
const results = await Promise.any(strategies);
|
||||
console.log(`⚡ 451 recovered in ${results.responseTime}ms`);
|
||||
return results;
|
||||
} catch (aggregateError) {
|
||||
console.log('❌ 451 recovery failed');
|
||||
return {
|
||||
error: true,
|
||||
message: `Failed to recover from 451 error. Domain ${blockedDomain || 'unknown'} is blocked.`,
|
||||
blockedDomain: blockedDomain
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Classifies 451 failure types for enhanced error handling
|
||||
* @param {AggregateError} aggregateError - The combined error from failed strategies
|
||||
* @param {string} blockedDomain - The blocked domain
|
||||
* @param {Object} options - Original search options
|
||||
* @returns {Object} Failure classification with suggestions
|
||||
*/
|
||||
function classify451Failure(aggregateError, blockedDomain, options) {
|
||||
// Check for permanent block patterns
|
||||
if (aggregateError.errors.some(err => err.message.includes('blocked until'))) {
|
||||
return {
|
||||
type: 'permanent-block',
|
||||
suggestions: [
|
||||
{
|
||||
type: 'ready-to-run',
|
||||
command: `/search-plus "${options.query} -site:${blockedDomain}"`,
|
||||
description: 'Exclude blocked domain and search again'
|
||||
},
|
||||
{
|
||||
type: 'manual-search',
|
||||
url: `https://www.google.com/search?q=${encodeURIComponent(options.query)}`,
|
||||
description: 'Search manually in external browser'
|
||||
}
|
||||
],
|
||||
autoSuggestion: {
|
||||
message: 'For more predictable results, enable simple 451 handling?',
|
||||
command: 'export SEARCH_PLUS_451_SIMPLE_MODE=true',
|
||||
benefit: 'Provides clear guidance instead of complex automation'
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// Default classification
|
||||
return {
|
||||
type: 'recovery-failed',
|
||||
suggestions: [
|
||||
{
|
||||
type: 'ready-to-run',
|
||||
command: `/search-plus "${options.query} -site:${blockedDomain}"`,
|
||||
description: 'Try again excluding the blocked domain'
|
||||
}
|
||||
],
|
||||
autoSuggestion: {
|
||||
message: 'Want simpler error handling?',
|
||||
command: 'export SEARCH_PLUS_451_SIMPLE_MODE=true',
|
||||
benefit: 'Minimal output with focus on results'
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates enhanced error response with actionable suggestions
|
||||
* @param {Object} failureType - The classified failure type
|
||||
* @param {string} blockedDomain - The blocked domain
|
||||
* @param {Object} options - Original search options
|
||||
* @returns {Object} Enhanced error response
|
||||
*/
|
||||
function generateEnhancedErrorResponse(failureType, blockedDomain, options) {
|
||||
return {
|
||||
error: true,
|
||||
message: `Failed to retrieve results after handling 451 SecurityCompromiseError. Domain ${blockedDomain || 'unknown'} is blocked.`,
|
||||
blockedDomain: blockedDomain,
|
||||
failureType: failureType.type,
|
||||
suggestions: failureType.suggestions,
|
||||
autoSuggestion: failureType.autoSuggestion
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the blocked domain from error message
|
||||
* @param {string} errorMessage - The error message
|
||||
* @returns {string|null} The blocked domain or null if not found
|
||||
*/
|
||||
function extractBlockedDomain(errorMessage) {
|
||||
const domainMatch = errorMessage.match(/domain (\S+) blocked/i) ||
|
||||
errorMessage.match(/access to (\S+) blocked/i);
|
||||
return domainMatch ? domainMatch[1] : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts the block expiration date from error message
|
||||
* @param {string} errorMessage - The error message
|
||||
* @returns {string|null} The block expiration date or null if not found
|
||||
*/
|
||||
function extractBlockUntilDate(errorMessage) {
|
||||
// Look for "blocked until" followed by a date, capturing until the next reason or end
|
||||
const dateMatch = errorMessage.match(/blocked until (.+?)(?:\s+due|$)/i);
|
||||
return dateMatch ? dateMatch[1].trim() : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Alternative search sources with configurable optimization level
|
||||
* @param {Object} options - Original search options
|
||||
* @param {boolean} optimized - Whether to use optimized timeouts for parallel execution
|
||||
* @returns {Promise<Object>} Search results from alternative sources
|
||||
*/
|
||||
async function tryAlternativeSearchSources(options, optimized = false) {
|
||||
const startTime = Date.now();
|
||||
const strategyName = 'alternative-search-sources';
|
||||
const timeout = optimized ? 1500 : RECOVERY_TIMEOUT_MS;
|
||||
|
||||
try {
|
||||
console.log(optimized ? '🔍 Trying alternative search sources...' : 'Trying alternative search sources...');
|
||||
const blockedDomain = optimized ? (options.blockedDomain || null) : (options.error ? extractBlockedDomain(options.error.message || '') : null);
|
||||
const domainFilter = blockedDomain ? `-site:${blockedDomain}` : '';
|
||||
const modifiedQuery = `${options.query} ${domainFilter} alternative OR substitute OR replacement`.trim();
|
||||
const modifiedParams = {
|
||||
...options,
|
||||
query: modifiedQuery,
|
||||
include_answer: true,
|
||||
max_results: Math.min(options.max_results || 10, 8)
|
||||
};
|
||||
|
||||
if (optimized) {
|
||||
// Create AbortController for proper timeout cleanup in optimized mode
|
||||
const abortController = new AbortController();
|
||||
const timeoutId = setTimeout(() => {
|
||||
abortController.abort();
|
||||
}, timeout);
|
||||
|
||||
try {
|
||||
const searchPromise = contentExtractor.tavily.search({
|
||||
...modifiedParams,
|
||||
signal: abortController.signal
|
||||
});
|
||||
const results = await searchPromise;
|
||||
clearTimeout(timeoutId);
|
||||
|
||||
return createStandardSuccessResponse(strategyName, results, startTime);
|
||||
} catch (searchError) {
|
||||
if (searchError.name === 'AbortError') {
|
||||
throw new Error('Strategy timeout');
|
||||
}
|
||||
throw searchError;
|
||||
}
|
||||
} else {
|
||||
// Standard mode with timeout promise
|
||||
const strategyPromise = contentExtractor.tavily.search(modifiedParams);
|
||||
const timeoutPromise = new Promise((resolve) => {
|
||||
setTimeout(() => resolve(createStandardErrorResponse(strategyName, `Strategy timed out after ${timeout}ms`, startTime)), timeout);
|
||||
});
|
||||
|
||||
return await Promise.race([strategyPromise, timeoutPromise]);
|
||||
}
|
||||
} catch (error) {
|
||||
return createStandardErrorResponse(strategyName, error, startTime);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Domain exclusion search with configurable optimization level
|
||||
* @param {Object} options - Original search options
|
||||
* @param {string} blockedDomain - The blocked domain
|
||||
* @param {boolean} optimized - Whether to use optimized timeouts for parallel execution
|
||||
* @returns {Promise<Object>} Search results
|
||||
*/
|
||||
async function searchWithExcludedDomainUnified(options, blockedDomain, optimized = false) {
|
||||
const startTime = Date.now();
|
||||
const strategyName = 'excluded-domain-search';
|
||||
const timeout = optimized ? 1000 : RECOVERY_TIMEOUT_MS;
|
||||
|
||||
try {
|
||||
if (!blockedDomain) {
|
||||
return createStandardErrorResponse(strategyName, 'No blocked domain to exclude', startTime);
|
||||
}
|
||||
|
||||
console.log(optimized ? `🛡️ Excluding domain: ${blockedDomain}` : `Searching while excluding domain: ${blockedDomain}`);
|
||||
const exclusionQuery = `${options.query} -site:${blockedDomain}`;
|
||||
const modifiedParams = {
|
||||
...options,
|
||||
query: exclusionQuery,
|
||||
headers: generateDiverseHeaders()
|
||||
};
|
||||
|
||||
if (optimized) {
|
||||
// Create AbortController for proper timeout cleanup in optimized mode
|
||||
const abortController = new AbortController();
|
||||
const timeoutId = setTimeout(() => {
|
||||
abortController.abort();
|
||||
}, timeout);
|
||||
|
||||
try {
|
||||
const searchPromise = contentExtractor.tavily.search({
|
||||
...modifiedParams,
|
||||
signal: abortController.signal
|
||||
});
|
||||
const results = await searchPromise;
|
||||
clearTimeout(timeoutId);
|
||||
|
||||
return createStandardSuccessResponse(strategyName, results, startTime);
|
||||
} catch (searchError) {
|
||||
if (searchError.name === 'AbortError') {
|
||||
throw new Error('Strategy timeout');
|
||||
}
|
||||
throw searchError;
|
||||
}
|
||||
} else {
|
||||
// Standard mode with timeout promise and delay
|
||||
const strategyPromise = (async () => {
|
||||
await new Promise(resolve => setTimeout(resolve, 3000));
|
||||
const results = await contentExtractor.tavily.search(modifiedParams);
|
||||
return createStandardSuccessResponse(strategyName, results, startTime);
|
||||
})();
|
||||
|
||||
const timeoutPromise = new Promise((resolve) => {
|
||||
setTimeout(() => resolve(createStandardErrorResponse(strategyName, `Strategy timed out after ${timeout}ms`, startTime)), timeout);
|
||||
});
|
||||
|
||||
return await Promise.race([strategyPromise, timeoutPromise]);
|
||||
}
|
||||
} catch (error) {
|
||||
return createStandardErrorResponse(strategyName, error, startTime);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Reformulates query to avoid references to blocked domains
|
||||
* @param {Object} options - Original search options
|
||||
* @param {string} blockedDomain - The blocked domain
|
||||
* @returns {Object} Search results
|
||||
*/
|
||||
async function reformulateQueryAvoidingBlockedDomain(options, blockedDomain) {
|
||||
const startTime = Date.now();
|
||||
const strategyName = 'reformulate-query';
|
||||
|
||||
const strategyPromise = (async () => {
|
||||
try {
|
||||
console.log('Reformulating query to avoid blocked domain references...');
|
||||
let reformulatedQuery = options.query;
|
||||
if (blockedDomain) {
|
||||
const domainMappings = {
|
||||
'httpbin.org': 'HTTP testing API endpoint service',
|
||||
'github.com': 'code repository platform',
|
||||
'stackoverflow.com': 'programming Q&A website',
|
||||
'medium.com': 'blogging platform'
|
||||
};
|
||||
const genericTerm = domainMappings[blockedDomain] || 'online service';
|
||||
reformulatedQuery = options.query.replace(new RegExp(blockedDomain, 'gi'), genericTerm);
|
||||
}
|
||||
const modifiedParams = { ...options, query: reformulatedQuery, search_depth: "basic" };
|
||||
|
||||
await new Promise(resolve => setTimeout(resolve, 2500));
|
||||
const results = await contentExtractor.tavily.search(modifiedParams);
|
||||
|
||||
return { success: true, data: results, strategy: strategyName, responseTime: Date.now() - startTime };
|
||||
} catch (error) {
|
||||
return { success: false, error: error.message, strategy: strategyName, responseTime: Date.now() - startTime };
|
||||
}
|
||||
})();
|
||||
|
||||
const timeoutPromise = new Promise((resolve) => {
|
||||
setTimeout(() => resolve({
|
||||
success: false,
|
||||
error: `Strategy timed out after ${RECOVERY_TIMEOUT_MS}ms`,
|
||||
strategy: strategyName,
|
||||
responseTime: Date.now() - startTime
|
||||
}), RECOVERY_TIMEOUT_MS);
|
||||
});
|
||||
|
||||
return Promise.race([strategyPromise, timeoutPromise]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to use cached or archived results for blocked content
|
||||
* @param {Object} options - Original search options
|
||||
* @param {string} blockedDomain - The blocked domain
|
||||
* @returns {Object} Search results
|
||||
*/
|
||||
async function useCachedOrArchiveResults(options, blockedDomain) {
|
||||
const startTime = Date.now();
|
||||
const strategyName = 'archive-search';
|
||||
|
||||
const strategyPromise = (async () => {
|
||||
try {
|
||||
console.log('Searching for archived or cached content...');
|
||||
const archiveQuery = blockedDomain
|
||||
? `${options.query} web archive OR wayback machine OR cached version "site:${blockedDomain}"`
|
||||
: `${options.query} archived OR cached OR mirror`;
|
||||
const modifiedParams = { ...options, query: archiveQuery, max_results: Math.min(options.max_results || 10, 5) };
|
||||
|
||||
await new Promise(resolve => setTimeout(resolve, 4000));
|
||||
const results = await contentExtractor.tavily.search(modifiedParams);
|
||||
|
||||
return { success: true, data: results, strategy: strategyName, responseTime: Date.now() - startTime };
|
||||
} catch (error) {
|
||||
return { success: false, error: error.message, strategy: strategyName, responseTime: Date.now() - startTime };
|
||||
}
|
||||
})();
|
||||
|
||||
const timeoutPromise = new Promise((resolve) => {
|
||||
setTimeout(() => resolve({
|
||||
success: false,
|
||||
error: `Strategy timed out after ${RECOVERY_TIMEOUT_MS}ms`,
|
||||
strategy: strategyName,
|
||||
responseTime: Date.now() - startTime
|
||||
}), RECOVERY_TIMEOUT_MS);
|
||||
});
|
||||
|
||||
return Promise.race([strategyPromise, timeoutPromise]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles connection refused errors
|
||||
* @param {Object} error - The connection error
|
||||
* @param {Object} options - Search options
|
||||
* @returns {Object} Recovery results
|
||||
*/
|
||||
async function handleConnectionRefusedError(error, options) {
|
||||
console.log('Handling connection refused error...');
|
||||
|
||||
try {
|
||||
// Sometimes waiting and retrying works
|
||||
await new Promise(resolve => setTimeout(resolve, 5000));
|
||||
|
||||
// Try with different parameters
|
||||
const modifiedParams = {
|
||||
...options,
|
||||
headers: generateDiverseHeaders(),
|
||||
timeout: (options.timeout || 10000) + 5000 // Increase timeout
|
||||
};
|
||||
|
||||
const results = await contentExtractor.tavily.search(modifiedParams);
|
||||
return {
|
||||
success: true,
|
||||
data: results,
|
||||
message: 'Successfully retrieved results after handling connection refused error'
|
||||
};
|
||||
} catch (retryError) {
|
||||
return {
|
||||
error: true,
|
||||
message: `Failed to retrieve results after handling connection refused error: ${retryError.message}`
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles timeout errors
|
||||
* @param {Object} error - The timeout error
|
||||
* @param {Object} options - Search options
|
||||
* @returns {Object} Recovery results
|
||||
*/
|
||||
async function handleTimeoutError(error, options) {
|
||||
console.log('Handling timeout error...');
|
||||
|
||||
try {
|
||||
// Retry with increased timeout and different headers
|
||||
const modifiedParams = {
|
||||
...options,
|
||||
headers: generateDiverseHeaders(),
|
||||
timeout: Math.min((options.timeout || 10000) * 2, 30000) // Double timeout, max 30s
|
||||
};
|
||||
|
||||
const results = await contentExtractor.tavily.search(modifiedParams);
|
||||
return {
|
||||
success: true,
|
||||
data: results,
|
||||
message: 'Successfully retrieved results after handling timeout error'
|
||||
};
|
||||
} catch (retryError) {
|
||||
return {
|
||||
error: true,
|
||||
message: `Failed to retrieve results after handling timeout error: ${retryError.message}`
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate diverse headers to avoid detection
|
||||
* @returns {Object} Diverse headers object
|
||||
*/
|
||||
function generateDiverseHeaders() {
|
||||
const userAgents = [
|
||||
'Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1',
|
||||
'Mozilla/5.0 (iPad; CPU OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0 Mobile/15E148 Safari/604.1',
|
||||
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15',
|
||||
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||
];
|
||||
|
||||
const acceptLanguages = [
|
||||
'en-US,en;q=0.9',
|
||||
'en-GB,en;q=0.9',
|
||||
'en-CA,en;q=0.9',
|
||||
'en-AU,en;q=0.9'
|
||||
];
|
||||
|
||||
return {
|
||||
'User-Agent': userAgents[Math.floor(Math.random() * userAgents.length)],
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Accept-Language': acceptLanguages[Math.floor(Math.random() * acceptLanguages.length)],
|
||||
'Accept-Encoding': 'gzip, deflate, br',
|
||||
'DNT': '1',
|
||||
'Connection': 'keep-alive',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'Sec-Fetch-Dest': 'document',
|
||||
'Sec-Fetch-Mode': 'navigate',
|
||||
'Sec-Fetch-Site': 'none',
|
||||
'Cache-Control': 'max-age=0'
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Detects if error is a 422 schema validation error
|
||||
* @param {Object} error - The error object
|
||||
* @returns {boolean} True if this is a 422 schema error
|
||||
*/
|
||||
function is422SchemaError(error) {
|
||||
const errorMessage = error.message || '';
|
||||
const errorString = JSON.stringify(error);
|
||||
|
||||
// Check for common 422 schema validation patterns
|
||||
const schemaErrorPatterns = [
|
||||
'missing',
|
||||
'input_schema',
|
||||
'Field required',
|
||||
'unprocessable entity',
|
||||
'validation error',
|
||||
'schema validation',
|
||||
'invalid request format'
|
||||
];
|
||||
|
||||
return schemaErrorPatterns.some(pattern =>
|
||||
errorMessage.toLowerCase().includes(pattern) ||
|
||||
errorString.toLowerCase().includes(pattern)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles 422 Unprocessable Entity errors (schema validation)
|
||||
* @param {Object} error - The 422 error
|
||||
* @param {Object} options - Search options
|
||||
* @returns {Object} Recovery results
|
||||
*/
|
||||
async function handle422Error(error, options) {
|
||||
console.log('Handling 422 schema validation error...');
|
||||
|
||||
// Try multiple recovery strategies
|
||||
const strategies = [
|
||||
() => repairSchemaAndRetry(options),
|
||||
() => simplifyQueryAndRetry(options),
|
||||
() => reformulateQueryForSchema(options),
|
||||
() => tryAlternativeAPIFormat(options)
|
||||
];
|
||||
|
||||
for (const strategy of strategies) {
|
||||
try {
|
||||
console.log('Attempting 422 error recovery strategy...');
|
||||
const results = await strategy();
|
||||
if (results && !results.error) {
|
||||
return {
|
||||
success: true,
|
||||
data: results,
|
||||
message: 'Successfully retrieved results after handling 422 schema error'
|
||||
};
|
||||
}
|
||||
} catch (strategyError) {
|
||||
console.log('422 recovery strategy failed:', strategyError.message);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
error: true,
|
||||
message: `Failed to retrieve results after handling 422 schema error: ${error.message}`
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to repair schema issues and retry
|
||||
* @param {Object} options - Original search options
|
||||
* @returns {Object} Search results
|
||||
*/
|
||||
async function repairSchemaAndRetry(options) {
|
||||
console.log('Attempting schema repair...');
|
||||
|
||||
// Add missing input_schema if this is the issue
|
||||
const repairedParams = {
|
||||
...options,
|
||||
input_schema: {
|
||||
type: "web_search_20250305",
|
||||
name: "web_search",
|
||||
max_uses: 8
|
||||
}
|
||||
};
|
||||
|
||||
// Add delay before retry
|
||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||
|
||||
return await contentExtractor.tavily.search(repairedParams);
|
||||
}
|
||||
|
||||
/**
|
||||
* Simplifies the query to avoid schema validation issues
|
||||
* @param {Object} options - Original search options
|
||||
* @returns {Object} Search results
|
||||
*/
|
||||
async function simplifyQueryAndRetry(options) {
|
||||
console.log('Simplifying query for schema compatibility...');
|
||||
|
||||
const simplifiedQuery = simplifyQueryForSchema(options.query);
|
||||
const simplifiedParams = {
|
||||
...options,
|
||||
query: simplifiedQuery,
|
||||
max_results: Math.min(options.max_results || 10, 5), // Reduce complexity
|
||||
search_depth: "basic" // Use simpler search mode
|
||||
};
|
||||
|
||||
await new Promise(resolve => setTimeout(resolve, 1500));
|
||||
|
||||
return await contentExtractor.tavily.search(simplifiedParams);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reformulates query specifically for schema issues
|
||||
* @param {Object} options - Original search options
|
||||
* @returns {Object} Search results
|
||||
*/
|
||||
async function reformulateQueryForSchema(options) {
|
||||
console.log('Reformulating query for schema compatibility...');
|
||||
|
||||
const reformulatedQuery = reformulateQueryForSchemaCompatibility(options.query);
|
||||
const reformulatedParams = {
|
||||
...options,
|
||||
query: reformulatedQuery,
|
||||
include_answer: false, // Simplify request
|
||||
include_raw_content: false
|
||||
};
|
||||
|
||||
await new Promise(resolve => setTimeout(resolve, 2000));
|
||||
|
||||
return await contentExtractor.tavily.search(reformulatedParams);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries alternative API format
|
||||
* @param {Object} options - Original search options
|
||||
* @returns {Object} Search results
|
||||
*/
|
||||
async function tryAlternativeAPIFormat(options) {
|
||||
console.log('Trying alternative API format...');
|
||||
|
||||
// Try with minimal parameters
|
||||
const minimalParams = {
|
||||
query: options.query,
|
||||
api_key: options.api_key,
|
||||
search_depth: "basic"
|
||||
};
|
||||
|
||||
await new Promise(resolve => setTimeout(resolve, 3000));
|
||||
|
||||
return await contentExtractor.tavily.search(minimalParams);
|
||||
}
|
||||
|
||||
/**
|
||||
* Simplifies query for schema compatibility
|
||||
* @param {string} query - Original query
|
||||
* @returns {string} Simplified query
|
||||
*/
|
||||
function simplifyQueryForSchema(query) {
|
||||
return query
|
||||
.replace(/\s+/g, ' ') // Normalize whitespace
|
||||
.replace(/[^\w\s\-.,!?]/g, '') // Remove special characters except basic punctuation
|
||||
.substring(0, 200) // Limit length
|
||||
.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reformulates query specifically for schema compatibility issues
|
||||
* @param {string} query - Original query
|
||||
* @returns {string} Reformulated query
|
||||
*/
|
||||
function reformulateQueryForSchemaCompatibility(query) {
|
||||
// Break down complex queries into simpler components
|
||||
const words = query.split(' ').filter(word => word.length > 2);
|
||||
if (words.length > 8) {
|
||||
// If query is too long, use the most important terms
|
||||
return words.slice(0, 6).join(' ');
|
||||
}
|
||||
|
||||
// Replace problematic patterns
|
||||
return query
|
||||
.replace(/\d{4}/g, '') // Remove years
|
||||
.replace(/github|gitlab|bitbucket/gi, 'code repository') // Replace specific platforms
|
||||
.replace(/open source|open-source/gi, 'free software') // Simplify terminology
|
||||
.replace(/platform|boilerplate|framework/gi, 'software') // Generic terms
|
||||
.trim();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reformulates a query to potentially bypass filters
|
||||
* @param {string} query - Original query
|
||||
* @returns {string} Reformulated query
|
||||
*/
|
||||
function reformulateQuery(query) {
|
||||
// Simple reformulation - could be enhanced with more sophisticated NLP
|
||||
const synonyms = {
|
||||
'how to': 'guide for',
|
||||
'what is': 'information about',
|
||||
'why is': 'reason for',
|
||||
'when did': 'date of'
|
||||
};
|
||||
|
||||
let reformulated = query;
|
||||
for (const [original, replacement] of Object.entries(synonyms)) {
|
||||
reformulated = reformulated.replace(new RegExp(original, 'gi'), replacement);
|
||||
}
|
||||
|
||||
return reformulated;
|
||||
}
|
||||
|
||||
// Export additional functions for testing
|
||||
export {
|
||||
classify451Failure,
|
||||
validateRecoveryTimeout,
|
||||
createStandardErrorResponse,
|
||||
createStandardSuccessResponse
|
||||
};
|
||||
Reference in New Issue
Block a user