Initial commit
This commit is contained in:
335
skills/research-lookup/research_lookup.py
Normal file
335
skills/research-lookup/research_lookup.py
Normal file
@@ -0,0 +1,335 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Research Information Lookup Tool
|
||||
Uses Perplexity's Sonar Pro or Sonar Reasoning Pro models through OpenRouter.
|
||||
Automatically selects the appropriate model based on query complexity.
|
||||
"""
|
||||
|
||||
import os
|
||||
import json
|
||||
import requests
|
||||
import time
|
||||
from datetime import datetime
|
||||
from typing import Dict, List, Optional, Any
|
||||
from urllib.parse import quote
|
||||
|
||||
|
||||
class ResearchLookup:
|
||||
"""Research information lookup using Perplexity Sonar models via OpenRouter."""
|
||||
|
||||
# Complexity indicators for determining which model to use
|
||||
REASONING_KEYWORDS = [
|
||||
'compare', 'contrast', 'analyze', 'analysis', 'synthesis', 'meta-analysis',
|
||||
'systematic review', 'evaluate', 'critique', 'trade-off', 'tradeoff',
|
||||
'relationship', 'versus', 'vs', 'vs.', 'compared to',
|
||||
'mechanism', 'why', 'how does', 'how do', 'explain', 'theoretical framework',
|
||||
'implications', 'debate', 'controversy', 'conflicting', 'paradox',
|
||||
'reconcile', 'integrate', 'multifaceted', 'complex interaction',
|
||||
'causal relationship', 'underlying mechanism', 'interpret', 'reasoning',
|
||||
'pros and cons', 'advantages and disadvantages', 'critical analysis',
|
||||
'differences between', 'similarities', 'trade offs'
|
||||
]
|
||||
|
||||
def __init__(self, force_model: Optional[str] = None):
|
||||
"""
|
||||
Initialize the research lookup tool.
|
||||
|
||||
Args:
|
||||
force_model: Optional model override ('pro' or 'reasoning').
|
||||
If None, automatically selects based on query complexity.
|
||||
"""
|
||||
self.api_key = os.getenv("OPENROUTER_API_KEY")
|
||||
if not self.api_key:
|
||||
raise ValueError("OPENROUTER_API_KEY environment variable not set")
|
||||
|
||||
self.base_url = "https://openrouter.ai/api/v1"
|
||||
self.model_pro = "perplexity/sonar-pro" # Fast, efficient lookup
|
||||
self.model_reasoning = "perplexity/sonar-reasoning-pro" # Deep analysis
|
||||
self.force_model = force_model
|
||||
self.headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
"HTTP-Referer": "https://scientific-writer.local", # Replace with your domain
|
||||
"X-Title": "Scientific Writer Research Tool"
|
||||
}
|
||||
|
||||
def _assess_query_complexity(self, query: str) -> str:
|
||||
"""
|
||||
Assess query complexity to determine which model to use.
|
||||
|
||||
Returns:
|
||||
'reasoning' for complex analytical queries, 'pro' for straightforward lookups
|
||||
"""
|
||||
query_lower = query.lower()
|
||||
|
||||
# Count reasoning keywords
|
||||
reasoning_count = sum(1 for keyword in self.REASONING_KEYWORDS if keyword in query_lower)
|
||||
|
||||
# Count questions (multiple questions suggest complexity)
|
||||
question_count = query.count('?')
|
||||
|
||||
# Check for multiple clauses (complexity indicators)
|
||||
clause_indicators = [' and ', ' or ', ' but ', ' however ', ' whereas ', ' although ']
|
||||
clause_count = sum(1 for indicator in clause_indicators if indicator in query_lower)
|
||||
|
||||
# Complexity score
|
||||
complexity_score = (
|
||||
reasoning_count * 3 + # Reasoning keywords heavily weighted
|
||||
question_count * 2 + # Multiple questions indicate complexity
|
||||
clause_count * 1.5 + # Multiple clauses suggest nuance
|
||||
(1 if len(query) > 150 else 0) # Long queries often more complex
|
||||
)
|
||||
|
||||
# Threshold for using reasoning model (lowered to 3 to catch single reasoning keywords)
|
||||
return 'reasoning' if complexity_score >= 3 else 'pro'
|
||||
|
||||
def _select_model(self, query: str) -> str:
|
||||
"""Select the appropriate model based on query complexity or force override."""
|
||||
if self.force_model:
|
||||
return self.model_reasoning if self.force_model == 'reasoning' else self.model_pro
|
||||
|
||||
complexity_level = self._assess_query_complexity(query)
|
||||
return self.model_reasoning if complexity_level == 'reasoning' else self.model_pro
|
||||
|
||||
def _make_request(self, messages: List[Dict[str, str]], model: str, **kwargs) -> Dict[str, Any]:
|
||||
"""Make a request to the OpenRouter API."""
|
||||
data = {
|
||||
"model": model,
|
||||
"messages": messages,
|
||||
"max_tokens": 4000,
|
||||
"temperature": 0.1, # Low temperature for factual research
|
||||
"provider": {
|
||||
"order": ["Perplexity"],
|
||||
"allow_fallbacks": False
|
||||
},
|
||||
**kwargs
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.post(
|
||||
f"{self.base_url}/chat/completions",
|
||||
headers=self.headers,
|
||||
json=data,
|
||||
timeout=90 # Increased timeout for reasoning model
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except requests.exceptions.RequestException as e:
|
||||
raise Exception(f"API request failed: {str(e)}")
|
||||
|
||||
def _format_research_prompt(self, query: str) -> str:
|
||||
"""Format the query for optimal research results."""
|
||||
return f"""You are an expert research assistant. Please provide comprehensive, accurate research information for the following query: "{query}"
|
||||
|
||||
IMPORTANT INSTRUCTIONS:
|
||||
1. Focus on ACADEMIC and SCIENTIFIC sources (peer-reviewed papers, reputable journals, institutional research)
|
||||
2. Include RECENT information (prioritize 2020-2024 publications)
|
||||
3. Provide COMPLETE citations with authors, title, journal/conference, year, and DOI when available
|
||||
4. Structure your response with clear sections and proper attribution
|
||||
5. Be comprehensive but concise - aim for 800-1200 words
|
||||
6. Include key findings, methodologies, and implications when relevant
|
||||
7. Note any controversies, limitations, or conflicting evidence
|
||||
|
||||
RESPONSE FORMAT:
|
||||
- Start with a brief summary (2-3 sentences)
|
||||
- Present key findings and studies in organized sections
|
||||
- End with future directions or research gaps if applicable
|
||||
- Include 5-8 high-quality citations at the end
|
||||
|
||||
Remember: This is for academic research purposes. Prioritize accuracy, completeness, and proper attribution."""
|
||||
|
||||
def lookup(self, query: str) -> Dict[str, Any]:
|
||||
"""Perform a research lookup for the given query."""
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
# Select the appropriate model based on query complexity
|
||||
selected_model = self._select_model(query)
|
||||
model_type = "reasoning" if "reasoning" in selected_model else "standard"
|
||||
|
||||
print(f"[Research] Using {selected_model} (detected complexity: {model_type})")
|
||||
|
||||
# Format the research prompt
|
||||
research_prompt = self._format_research_prompt(query)
|
||||
|
||||
# Prepare messages for the API with system message for academic mode
|
||||
messages = [
|
||||
{
|
||||
"role": "system",
|
||||
"content": "You are an academic research assistant. Focus exclusively on scholarly sources: peer-reviewed journals, academic papers, research institutions, and reputable scientific publications. Prioritize recent academic literature (2020-2024) and provide complete citations with DOIs. Use academic/scholarly search mode."
|
||||
},
|
||||
{"role": "user", "content": research_prompt}
|
||||
]
|
||||
|
||||
try:
|
||||
# Make the API request with selected model
|
||||
response = self._make_request(messages, model=selected_model)
|
||||
|
||||
# Extract the response content
|
||||
if "choices" in response and len(response["choices"]) > 0:
|
||||
choice = response["choices"][0]
|
||||
if "message" in choice and "content" in choice["message"]:
|
||||
content = choice["message"]["content"]
|
||||
|
||||
# Extract citations if present (basic regex extraction)
|
||||
citations = self._extract_citations(content)
|
||||
|
||||
return {
|
||||
"success": True,
|
||||
"query": query,
|
||||
"response": content,
|
||||
"citations": citations,
|
||||
"timestamp": timestamp,
|
||||
"model": selected_model,
|
||||
"model_type": model_type,
|
||||
"usage": response.get("usage", {})
|
||||
}
|
||||
else:
|
||||
raise Exception("Invalid response format from API")
|
||||
else:
|
||||
raise Exception("No response choices received from API")
|
||||
|
||||
except Exception as e:
|
||||
return {
|
||||
"success": False,
|
||||
"query": query,
|
||||
"error": str(e),
|
||||
"timestamp": timestamp,
|
||||
"model": selected_model,
|
||||
"model_type": model_type
|
||||
}
|
||||
|
||||
def _extract_citations(self, text: str) -> List[Dict[str, str]]:
|
||||
"""Extract potential citations from the response text."""
|
||||
# This is a simple citation extractor - in practice, you might want
|
||||
# to use a more sophisticated approach or rely on the model's structured output
|
||||
|
||||
citations = []
|
||||
|
||||
# Look for common citation patterns
|
||||
import re
|
||||
|
||||
# Pattern for author et al. year
|
||||
author_pattern = r'([A-Z][a-z]+(?:\s+[A-Z]\.)*(?:\s+et\s+al\.)?)\s*\((\d{4})\)'
|
||||
matches = re.findall(author_pattern, text)
|
||||
|
||||
for author, year in matches:
|
||||
citations.append({
|
||||
"authors": author,
|
||||
"year": year,
|
||||
"type": "extracted"
|
||||
})
|
||||
|
||||
# Look for DOI patterns
|
||||
doi_pattern = r'doi:\s*([^\s\)\]]+)'
|
||||
doi_matches = re.findall(doi_pattern, text, re.IGNORECASE)
|
||||
|
||||
for doi in doi_matches:
|
||||
citations.append({
|
||||
"doi": doi.strip(),
|
||||
"type": "doi"
|
||||
})
|
||||
|
||||
return citations
|
||||
|
||||
def batch_lookup(self, queries: List[str], delay: float = 1.0) -> List[Dict[str, Any]]:
|
||||
"""Perform multiple research lookups with optional delay between requests."""
|
||||
results = []
|
||||
|
||||
for i, query in enumerate(queries):
|
||||
if i > 0 and delay > 0:
|
||||
time.sleep(delay) # Rate limiting
|
||||
|
||||
result = self.lookup(query)
|
||||
results.append(result)
|
||||
|
||||
# Print progress
|
||||
print(f"[Research] Completed query {i+1}/{len(queries)}: {query[:50]}...")
|
||||
|
||||
return results
|
||||
|
||||
def get_model_info(self) -> Dict[str, Any]:
|
||||
"""Get information about available models from OpenRouter."""
|
||||
try:
|
||||
response = requests.get(
|
||||
f"{self.base_url}/models",
|
||||
headers=self.headers,
|
||||
timeout=30
|
||||
)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
except Exception as e:
|
||||
return {"error": str(e)}
|
||||
|
||||
|
||||
def main():
|
||||
"""Command-line interface for testing the research lookup tool."""
|
||||
import argparse
|
||||
|
||||
parser = argparse.ArgumentParser(description="Research Information Lookup Tool")
|
||||
parser.add_argument("query", nargs="?", help="Research query to look up")
|
||||
parser.add_argument("--model-info", action="store_true", help="Show available models")
|
||||
parser.add_argument("--batch", nargs="+", help="Run multiple queries")
|
||||
parser.add_argument("--force-model", choices=['pro', 'reasoning'],
|
||||
help="Force use of specific model (pro=fast lookup, reasoning=deep analysis)")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Check for API key
|
||||
if not os.getenv("OPENROUTER_API_KEY"):
|
||||
print("Error: OPENROUTER_API_KEY environment variable not set")
|
||||
print("Please set it in your .env file or export it:")
|
||||
print(" export OPENROUTER_API_KEY='your_openrouter_api_key'")
|
||||
return 1
|
||||
|
||||
try:
|
||||
research = ResearchLookup(force_model=args.force_model)
|
||||
|
||||
if args.model_info:
|
||||
print("Available models from OpenRouter:")
|
||||
models = research.get_model_info()
|
||||
if "data" in models:
|
||||
for model in models["data"]:
|
||||
if "perplexity" in model["id"].lower():
|
||||
print(f" - {model['id']}: {model.get('name', 'N/A')}")
|
||||
return 0
|
||||
|
||||
if not args.query and not args.batch:
|
||||
parser.print_help()
|
||||
return 1
|
||||
|
||||
if args.batch:
|
||||
print(f"Running batch research for {len(args.batch)} queries...")
|
||||
results = research.batch_lookup(args.batch)
|
||||
else:
|
||||
print(f"Researching: {args.query}")
|
||||
results = [research.lookup(args.query)]
|
||||
|
||||
# Display results
|
||||
for i, result in enumerate(results):
|
||||
if result["success"]:
|
||||
print(f"\n{'='*80}")
|
||||
print(f"Query {i+1}: {result['query']}")
|
||||
print(f"Timestamp: {result['timestamp']}")
|
||||
print(f"Model: {result['model']} ({result.get('model_type', 'unknown')})")
|
||||
print(f"{'='*80}")
|
||||
print(result["response"])
|
||||
|
||||
if result["citations"]:
|
||||
print(f"\nExtracted Citations ({len(result['citations'])}):")
|
||||
for j, citation in enumerate(result["citations"]):
|
||||
print(f" {j+1}. {citation}")
|
||||
|
||||
if result["usage"]:
|
||||
print(f"\nUsage: {result['usage']}")
|
||||
else:
|
||||
print(f"\nError in query {i+1}: {result['error']}")
|
||||
|
||||
return 0
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error: {str(e)}")
|
||||
return 1
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
exit(main())
|
||||
Reference in New Issue
Block a user