Initial commit

2025-11-30 08:30:18 +08:00
commit 74bee324ab
335 changed files with 147377 additions and 0 deletions
--- a/skills/research-lookup/scripts/research_lookup.py
+++ b/skills/research-lookup/scripts/research_lookup.py
@@ -0,0 +1,261 @@
+#!/usr/bin/env python3
+"""
+Research Information Lookup Tool
+Uses Perplexity's Sonar Pro model through OpenRouter for academic research queries.
+"""
+
+import os
+import json
+import requests
+import time
+from datetime import datetime
+from typing import Dict, List, Optional, Any
+from urllib.parse import quote
+
+
+class ResearchLookup:
+    """Research information lookup using Perplexity Sonar Pro via OpenRouter."""
+
+    def __init__(self):
+        """Initialize the research lookup tool."""
+        self.api_key = os.getenv("OPENROUTER_API_KEY")
+        if not self.api_key:
+            raise ValueError("OPENROUTER_API_KEY environment variable not set")
+
+        self.base_url = "https://openrouter.ai/api/v1"
+        self.model = "perplexity/sonar-reasoning-pro"  # Perplexity Sonar Pro with online search
+        self.headers = {
+            "Authorization": f"Bearer {self.api_key}",
+            "Content-Type": "application/json",
+            "HTTP-Referer": "https://scientific-writer.local",  # Replace with your domain
+            "X-Title": "Scientific Writer Research Tool"
+        }
+
+    def _make_request(self, messages: List[Dict[str, str]], **kwargs) -> Dict[str, Any]:
+        """Make a request to the OpenRouter API."""
+        data = {
+            "model": self.model,
+            "messages": messages,
+            "max_tokens": 8000,
+            "temperature": 0.1,  # Low temperature for factual research
+            **kwargs
+        }
+
+        try:
+            response = requests.post(
+                f"{self.base_url}/chat/completions",
+                headers=self.headers,
+                json=data,
+                timeout=60
+            )
+            response.raise_for_status()
+            return response.json()
+        except requests.exceptions.RequestException as e:
+            raise Exception(f"API request failed: {str(e)}")
+
+    def _format_research_prompt(self, query: str) -> str:
+        """Format the query for optimal research results."""
+        return f"""You are an expert research assistant. Please provide comprehensive, accurate research information for the following query: "{query}"
+
+IMPORTANT INSTRUCTIONS:
+1. Focus on ACADEMIC and SCIENTIFIC sources (peer-reviewed papers, reputable journals, institutional research)
+2. Include RECENT information (prioritize 2020-2024 publications)
+3. Provide COMPLETE citations with authors, title, journal/conference, year, and DOI when available
+4. Structure your response with clear sections and proper attribution
+5. Be comprehensive but concise - aim for 800-1200 words
+6. Include key findings, methodologies, and implications when relevant
+7. Note any controversies, limitations, or conflicting evidence
+
+RESPONSE FORMAT:
+- Start with a brief summary (2-3 sentences)
+- Present key findings and studies in organized sections
+- End with future directions or research gaps if applicable
+- Include 5-8 high-quality citations at the end
+
+Remember: This is for academic research purposes. Prioritize accuracy, completeness, and proper attribution."""
+
+    def lookup(self, query: str) -> Dict[str, Any]:
+        """Perform a research lookup for the given query."""
+        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+        # Format the research prompt
+        research_prompt = self._format_research_prompt(query)
+
+        # Prepare messages for the API with system message for academic mode
+        messages = [
+            {
+                "role": "system", 
+                "content": "You are an academic research assistant. Focus exclusively on scholarly sources: peer-reviewed journals, academic papers, research institutions, and reputable scientific publications. Prioritize recent academic literature (2020-2024) and provide complete citations with DOIs. Use academic/scholarly search mode."
+            },
+            {"role": "user", "content": research_prompt}
+        ]
+
+        try:
+            # Make the API request
+            response = self._make_request(messages)
+
+            # Extract the response content
+            if "choices" in response and len(response["choices"]) > 0:
+                choice = response["choices"][0]
+                if "message" in choice and "content" in choice["message"]:
+                    content = choice["message"]["content"]
+
+                    # Extract citations if present (basic regex extraction)
+                    citations = self._extract_citations(content)
+
+                    return {
+                        "success": True,
+                        "query": query,
+                        "response": content,
+                        "citations": citations,
+                        "timestamp": timestamp,
+                        "model": self.model,
+                        "usage": response.get("usage", {})
+                    }
+                else:
+                    raise Exception("Invalid response format from API")
+            else:
+                raise Exception("No response choices received from API")
+
+        except Exception as e:
+            return {
+                "success": False,
+                "query": query,
+                "error": str(e),
+                "timestamp": timestamp,
+                "model": self.model
+            }
+
+    def _extract_citations(self, text: str) -> List[Dict[str, str]]:
+        """Extract potential citations from the response text."""
+        # This is a simple citation extractor - in practice, you might want
+        # to use a more sophisticated approach or rely on the model's structured output
+
+        citations = []
+
+        # Look for common citation patterns
+        import re
+
+        # Pattern for author et al. year
+        author_pattern = r'([A-Z][a-z]+(?:\s+[A-Z]\.)*(?:\s+et\s+al\.)?)\s*\((\d{4})\)'
+        matches = re.findall(author_pattern, text)
+
+        for author, year in matches:
+            citations.append({
+                "authors": author,
+                "year": year,
+                "type": "extracted"
+            })
+
+        # Look for DOI patterns
+        doi_pattern = r'doi:\s*([^\s\)\]]+)'
+        doi_matches = re.findall(doi_pattern, text, re.IGNORECASE)
+
+        for doi in doi_matches:
+            citations.append({
+                "doi": doi.strip(),
+                "type": "doi"
+            })
+
+        return citations
+
+    def batch_lookup(self, queries: List[str], delay: float = 1.0) -> List[Dict[str, Any]]:
+        """Perform multiple research lookups with optional delay between requests."""
+        results = []
+
+        for i, query in enumerate(queries):
+            if i > 0 and delay > 0:
+                time.sleep(delay)  # Rate limiting
+
+            result = self.lookup(query)
+            results.append(result)
+
+            # Print progress
+            print(f"[Research] Completed query {i+1}/{len(queries)}: {query[:50]}...")
+
+        return results
+
+    def get_model_info(self) -> Dict[str, Any]:
+        """Get information about available models from OpenRouter."""
+        try:
+            response = requests.get(
+                f"{self.base_url}/models",
+                headers=self.headers,
+                timeout=30
+            )
+            response.raise_for_status()
+            return response.json()
+        except Exception as e:
+            return {"error": str(e)}
+
+
+def main():
+    """Command-line interface for testing the research lookup tool."""
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Research Information Lookup Tool")
+    parser.add_argument("query", nargs="?", help="Research query to look up")
+    parser.add_argument("--model-info", action="store_true", help="Show available models")
+    parser.add_argument("--batch", nargs="+", help="Run multiple queries")
+
+    args = parser.parse_args()
+
+    # Check for API key
+    if not os.getenv("OPENROUTER_API_KEY"):
+        print("Error: OPENROUTER_API_KEY environment variable not set")
+        print("Please set it in your .env file or export it:")
+        print("  export OPENROUTER_API_KEY='your_openrouter_api_key'")
+        return 1
+
+    try:
+        research = ResearchLookup()
+
+        if args.model_info:
+            print("Available models from OpenRouter:")
+            models = research.get_model_info()
+            if "data" in models:
+                for model in models["data"]:
+                    if "perplexity" in model["id"].lower():
+                        print(f"  - {model['id']}: {model.get('name', 'N/A')}")
+            return 0
+
+        if not args.query and not args.batch:
+            print("Error: No query provided. Use --model-info to see available models.")
+            return 1
+
+        if args.batch:
+            print(f"Running batch research for {len(args.batch)} queries...")
+            results = research.batch_lookup(args.batch)
+        else:
+            print(f"Researching: {args.query}")
+            results = [research.lookup(args.query)]
+
+        # Display results
+        for i, result in enumerate(results):
+            if result["success"]:
+                print(f"\n{'='*80}")
+                print(f"Query {i+1}: {result['query']}")
+                print(f"Timestamp: {result['timestamp']}")
+                print(f"Model: {result['model']}")
+                print(f"{'='*80}")
+                print(result["response"])
+
+                if result["citations"]:
+                    print(f"\nExtracted Citations ({len(result['citations'])}):")
+                    for j, citation in enumerate(result["citations"]):
+                        print(f"  {j+1}. {citation}")
+
+                if result["usage"]:
+                    print(f"\nUsage: {result['usage']}")
+            else:
+                print(f"\nError in query {i+1}: {result['error']}")
+
+        return 0
+
+    except Exception as e:
+        print(f"Error: {str(e)}")
+        return 1
+
+
+if __name__ == "__main__":
+    exit(main())