Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 08:38:26 +08:00
commit 41d9f6b189
304 changed files with 98322 additions and 0 deletions

View File

@@ -0,0 +1,159 @@
{
"name": "Research Claim Map Evaluator",
"description": "Evaluates claim verification for precise claim definition, evidence triangulation, source credibility assessment, and confidence calibration",
"criteria": [
{
"name": "Claim Precision and Testability",
"weight": 1.4,
"scale": {
"1": "Vague claim with undefined terms, no specific metrics, untestable",
"2": "Somewhat specific but missing key details (timeframe, scope, or metrics unclear)",
"3": "Specific claim with most terms defined, testable with some clarification needed",
"4": "Precise claim with numbers/dates/scope, clear terms, fully testable",
"5": "Exemplary: Reformulated from vague to specific, key terms defined explicitly, potential ambiguities addressed, claim decomposed into testable sub-claims if complex"
}
},
{
"name": "Evidence Triangulation Quality",
"weight": 1.5,
"scale": {
"1": "Single source or only sources agreeing with claim (no contradicting evidence sought)",
"2": "Multiple sources but all similar type/origin (not truly independent)",
"3": "2-3 independent sources for and against, basic triangulation",
"4": "3+ independent sources with different methodologies, both supporting and contradicting evidence gathered",
"5": "Exemplary: Systematic triangulation across source types (primary + secondary), methodologies (quantitative + qualitative), perspectives (proponent + skeptic), active search for disconfirming evidence, circular citations identified and excluded"
}
},
{
"name": "Source Credibility Assessment",
"weight": 1.4,
"scale": {
"1": "No credibility evaluation or accepted sources at face value",
"2": "Basic credibility check (author identified) but no depth (expertise, bias, track record ignored)",
"3": "Credibility assessed on 2-3 factors (e.g., expertise and independence) with brief reasoning",
"4": "All four factors assessed (expertise, independence, track record, methodology) for each major source with clear reasoning",
"5": "Exemplary: Systematic CRAAP test or equivalent, conflicts of interest identified, track record researched (retractions, corrections), methodology transparency evaluated, source bias explicitly noted and accounted for in weighting"
}
},
{
"name": "Evidence Quality Rating",
"weight": 1.3,
"scale": {
"1": "No distinction between evidence types (treats social media post = peer-reviewed study)",
"2": "Vague quality labels ('good source', 'reliable') without systematic rating",
"3": "Evidence categorized (primary/secondary/tertiary) but inconsistently or without justification",
"4": "Systematic rating using evidence hierarchy, each source classified with brief rationale",
"5": "Exemplary: Evidence rated on multiple dimensions (type, methodology, sample size, recency), quality justification detailed, limitations of even high-quality evidence acknowledged"
}
},
{
"name": "Limitations and Gaps Documentation",
"weight": 1.3,
"scale": {
"1": "No limitations noted or only minor caveats mentioned",
"2": "Generic limitations ('more research needed') without specifics",
"3": "Some limitations noted (missing data or assumptions stated) but incomplete",
"4": "Comprehensive limitations: gaps identified, assumptions stated, quality concerns noted, unknowns distinguished from known contradictions",
"5": "Exemplary: Systematic gap analysis (what evidence expected but not found), assumptions explicitly tested for sensitivity, distinction made between 'no evidence found' vs 'evidence of absence', suggestions for what would increase confidence"
}
},
{
"name": "Confidence Calibration and Reasoning",
"weight": 1.4,
"scale": {
"1": "No confidence level stated or vague ('probably', 'likely')",
"2": "Confidence stated but not justified or clearly miscalibrated (100% on weak evidence or 10% on strong)",
"3": "Numeric confidence (0-100%) stated with basic reasoning",
"4": "Well-calibrated confidence with clear reasoning linking to evidence quality, source credibility, and limitations",
"5": "Exemplary: Confidence range provided (not point estimate), reasoning traces from base rates through evidence to posterior, sensitivity analysis shown (confidence under different assumptions), explicit acknowledgment of uncertainty"
}
},
{
"name": "Bias Detection and Mitigation",
"weight": 1.2,
"scale": {
"1": "Clear bias (cherry-picked evidence, ignored contradictions, one-sided presentation)",
"2": "Unintentional bias (confirmation bias evident, didn't actively seek contradicting evidence)",
"3": "Some bias mitigation (acknowledged contradicting evidence, noted potential biases)",
"4": "Active bias mitigation (sought disconfirming evidence, considered alternative explanations, acknowledged own potential biases)",
"5": "Exemplary: Systematic bias checks (CRAAP test, conflict of interest disclosure), actively argued against own hypothesis, considered base rates to avoid availability/anchoring bias, source framing bias identified and corrected"
}
},
{
"name": "Actionable Recommendation",
"weight": 1.2,
"scale": {
"1": "No recommendation or action unclear ('interesting finding', 'needs more study')",
"2": "Vague action ('be cautious', 'consider this') without decision guidance",
"3": "Basic recommendation (believe/reject/uncertain) but not tied to decision context",
"4": "Clear actionable recommendation linked to confidence level and decision context (what to do given uncertainty)",
"5": "Exemplary: Recommendation with decision thresholds ('if you need 80%+ confidence to act, don't proceed; if 60% sufficient, proceed with mitigation X'), contingency plans for uncertainty, clear next steps to increase confidence if needed"
}
}
],
"guidance": {
"by_claim_type": {
"vendor_claims": {
"recommended_approach": "Seek independent verification (analysts, customer references, trials), avoid relying solely on vendor sources",
"evidence_priority": "Primary (customer data, trials) > Secondary (analyst reports) > Tertiary (vendor press releases)",
"red_flags": ["Only vendor sources", "Vague metrics ('up to X%')", "Cherry-picked case studies", "No independent verification"]
},
"news_claims": {
"recommended_approach": "Trace to primary source, check multiple outlets, verify context and framing",
"evidence_priority": "Primary sources (official statements, documents) > Secondary (news reports citing primary) > Tertiary (opinion pieces)",
"red_flags": ["Single source", "Anonymous claims without corroboration", "Circular reporting (outlets citing each other)", "Out-of-context quotes"]
},
"research_claims": {
"recommended_approach": "Check replication studies, meta-analyses, assess methodology rigor, look for conflicts of interest",
"evidence_priority": "RCTs/meta-analyses > Observational studies > Expert opinion",
"red_flags": ["Single study", "Small sample", "Conflicts of interest undisclosed", "P-hacking indicators", "Correlation claimed as causation"]
},
"statistical_claims": {
"recommended_approach": "Verify methodology, sample size, confidence intervals, base rates, check for denominators",
"evidence_priority": "Transparent methodology with raw data > Summary statistics > Infographics without sources",
"red_flags": ["Denominator unclear", "Cherry-picked timeframes", "Correlation ≠ causation", "No confidence intervals", "Misleading visualizations"]
}
}
},
"common_failure_modes": {
"confirmation_bias": {
"symptom": "Only evidence supporting claim found, contradictions ignored or dismissed",
"root_cause": "Want claim to be true (motivated reasoning), didn't actively search for disconfirmation",
"fix": "Actively search 'why this might be wrong', assign someone to argue against, seek base rates for skepticism"
},
"authority_bias": {
"symptom": "Accepted claim because prestigious source (Nobel Prize, Harvard, Fortune 500) without evaluating evidence",
"root_cause": "Heuristic: prestigious source = truth (often valid but not always)",
"fix": "Evaluate evidence quality independently, check if expert in this specific domain, verify claim not opinion"
},
"single_source_overconfidence": {
"symptom": "High confidence based on one source, even if high quality",
"root_cause": "Didn't triangulate, assumed quality source = truth",
"fix": "Require 2-3 independent sources for high confidence, check for replication/corroboration"
},
"vague_confidence": {
"symptom": "Used words ('probably', 'likely', 'seems') instead of numbers (60%, 75%)",
"root_cause": "Uncomfortable quantifying uncertainty, didn't calibrate",
"fix": "Force numeric confidence (0-100%), use calibration guides, test against base rates"
},
"missing_limitations": {
"symptom": "Conclusion presented without caveats, gaps, or unknowns acknowledged",
"root_cause": "Focused on what's known, didn't systematically check for what's unknown",
"fix": "Template section for limitations forces documentation, ask 'what would make me more confident?'"
}
},
"excellence_indicators": [
"Claim reformulated from vague to specific, testable assertion",
"Evidence triangulated: 3+ independent sources, multiple methodologies",
"Source credibility systematically assessed: expertise, independence, track record, methodology",
"Evidence rated using clear hierarchy: primary > secondary > tertiary",
"Contradicting evidence actively sought and fairly presented",
"Limitations and gaps comprehensively documented",
"Assumptions stated explicitly and tested for sensitivity",
"Confidence calibrated numerically (0-100%) with reasoning",
"Recommendation actionable and tied to decision context",
"Bias mitigation demonstrated (sought disconfirming evidence, checked conflicts of interest)",
"Distinction made between 'no evidence found' and 'evidence of absence'",
"Sources properly cited with links/references for verification"
]
}

View File

@@ -0,0 +1,328 @@
# Research Claim Map: Advanced Methodologies
## Table of Contents
1. [Triangulation Techniques](#1-triangulation-techniques)
2. [Source Verification Methods](#2-source-verification-methods)
3. [Evidence Synthesis Frameworks](#3-evidence-synthesis-frameworks)
4. [Bias Detection and Mitigation](#4-bias-detection-and-mitigation)
5. [Confidence Calibration Techniques](#5-confidence-calibration-techniques)
6. [Advanced Investigation Patterns](#6-advanced-investigation-patterns)
## 1. Triangulation Techniques
### Multi-Source Verification
**Independent corroboration**:
- **Minimum 3 independent sources** for high-confidence claims
- Sources are independent if: different authors, organizations, funding, data collection methods
- Example: Government report + Academic study + Industry analysis (all using different data)
**Detecting circular citations**:
- Trace back to original source - if A cites B, B cites C, C cites A → circular, invalid
- Check publication dates - later sources should cite earlier, not reverse
- Use citation indexes (Google Scholar, Web of Science) to map citation networks
**Convergent evidence**:
- Different methodologies reaching same conclusion (surveys + experiments + observational)
- Different populations/contexts showing same pattern
- Example: Lab studies + field studies + meta-analyses all finding same effect
### Cross-Checking Strategies
**Fact-checking databases**:
- Snopes, FactCheck.org, PolitiFact for public claims
- Retraction Watch for scientific papers
- OpenSecrets for political funding claims
- SEC EDGAR for financial claims
**Domain-specific verification**:
- Medical: PubMed, Cochrane Reviews, FDA databases
- Technology: CVE databases, vendor security advisories, benchmark repositories
- Business: Crunchbase, SEC filings, earnings transcripts
- Historical: Primary source archives, digitized records
**Temporal consistency**:
- Check if claim was true at time stated (not just currently)
- Verify dates in citations match narrative
- Look for anachronisms (technology/events cited before they existed)
## 2. Source Verification Methods
### CRAAP Test (Currency, Relevance, Authority, Accuracy, Purpose)
**Currency**: When was it published/updated?
- High: Within last year for fast-changing topics, within 5 years for stable domains
- Medium: Dated but still applicable
- Low: Outdated, context has changed significantly
**Relevance**: Does it address your specific claim?
- High: Directly addresses claim with same scope/context
- Medium: Related but different scope (e.g., different population, timeframe)
- Low: Tangentially related, requires extrapolation
**Authority**: Who is the author/publisher?
- High: Recognized expert, peer-reviewed publication, established institution
- Medium: Knowledgeable but not top-tier, some editorial oversight
- Low: Unknown author, self-published, no credentials
**Accuracy**: Can it be verified?
- High: Data/methods shared, replicable, other sources corroborate
- Medium: Some verification possible, mostly consistent with known facts
- Low: Unverifiable claims, contradicts established knowledge
**Purpose**: Why was it created?
- High: Inform/educate, transparent about limitations
- Medium: Persuade but with evidence, some bias acknowledged
- Low: Sell/propagandize, misleading framing, undisclosed conflicts
### Domain Authority Assessment
**Academic sources**:
- Journal impact factor (higher = more rigorous peer review)
- H-index of authors (citation impact)
- Institutional affiliation (R1 research university > teaching-focused college)
- Funding source disclosure (NIH grant > pharmaceutical company funding for drug study)
**News sources**:
- Editorial standards (corrections policy, fact-checking team)
- Awards/recognition (Pulitzer, Peabody, investigative journalism awards)
- Ownership transparency (independent > owned by entity with vested interest)
- Track record (history of accurate reporting vs retractions)
**Technical sources**:
- Benchmark methodology disclosure (reproducible specs, public data)
- Vendor independence (third-party testing > vendor self-reporting)
- Community verification (open-source code, peer reproduction)
- Standards compliance (IEEE, NIST, OWASP standards)
## 3. Evidence Synthesis Frameworks
### GRADE System (Grading of Recommendations Assessment, Development and Evaluation)
**Start with evidence type**:
- Randomized controlled trials (RCTs): Start HIGH quality
- Observational studies: Start LOW quality
- Expert opinion: Start VERY LOW quality
**Downgrade for**:
- Risk of bias (methodology flaws, conflicts of interest)
- Inconsistency (conflicting results across studies)
- Indirectness (different population/intervention than claim)
- Imprecision (small sample, wide confidence intervals)
- Publication bias (only positive results published)
**Upgrade for**:
- Large effect size (strong signal)
- Dose-response gradient (more X → more Y)
- All plausible confounders would reduce effect (conservative estimate)
**Final quality rating**:
- **High**: Very confident true effect is close to estimate
- **Moderate**: Moderately confident, true effect likely close
- **Low**: Limited confidence, true effect may differ substantially
- **Very Low**: Very little confidence, true effect likely very different
### Meta-Analysis Interpretation
**Effect size + confidence intervals**:
- Large effect + narrow CI = high confidence
- Small effect + narrow CI = real but modest effect
- Any effect + wide CI = uncertain
- Example: "10% improvement (95% CI: 5-15%)" vs "10% improvement (95% CI: -5-25%)"
**Heterogeneity (I² statistic)**:
- I² < 25%: Low heterogeneity, studies agree
- I² 25-75%: Moderate heterogeneity, some variation
- I² > 75%: High heterogeneity, studies conflict (be skeptical of pooled estimate)
**Publication bias detection**:
- Funnel plot asymmetry (missing small negative studies)
- File drawer problem (unpublished null results)
- Check trial registries (ClinicalTrials.gov) for unreported studies
## 4. Bias Detection and Mitigation
### Common Cognitive Biases in Claim Evaluation
**Confirmation bias**:
- **Symptom**: Finding only supporting evidence, ignoring contradictions
- **Mitigation**: Actively search for "why this might be wrong", assign someone to argue against
- **Example**: Believing vendor claim because you want product to work
**Availability bias**:
- **Symptom**: Overweighting vivid anecdotes vs dry statistics
- **Mitigation**: Prioritize data over stories, ask "how representative?"
- **Example**: Fearing plane crashes (vivid news) over car crashes (statistically riskier)
**Authority bias**:
- **Symptom**: Accepting claims because source is prestigious (Nobel Prize, Harvard, etc.)
- **Mitigation**: Evaluate evidence quality independently, check if expert in this specific domain
- **Example**: Believing physicist's medical claims (out of domain expertise)
**Anchoring bias**:
- **Symptom**: First number heard becomes reference point
- **Mitigation**: Seek base rates, compare to industry benchmarks, gather range of estimates
- **Example**: Vendor says "saves 50%" → anchor on 50%, skeptical of analyst saying 10%
**Recency bias**:
- **Symptom**: Overweighting latest information, dismissing older evidence
- **Mitigation**: Consider full timeline, check if latest is outlier or trend
- **Example**: One bad quarter → ignoring 5 years of growth
### Source Bias Indicators
**Financial conflicts of interest**:
- Study funded by company whose product is being evaluated
- Author owns stock, serves on board, receives consulting fees
- Disclosure: Look for "Conflicts of Interest" section in papers, FDA disclosures
**Ideological bias**:
- Think tank with known political lean
- Advocacy organization with mission-driven agenda
- Framing: Watch for loaded language, cherry-picked comparisons
**Selection bias in studies**:
- Participants not representative of target population
- Dropout rate differs between groups
- Outcomes measured selectively (dropped endpoints with null results)
**Reporting bias**:
- Positive results published, negative results buried
- Outcomes changed after seeing data (HARKing: Hypothesizing After Results Known)
- Subsetting data until significance found (p-hacking)
## 5. Confidence Calibration Techniques
### Bayesian Updating
**Start with prior probability** (before seeing evidence):
- Base rate: How often is this type of claim true?
- Example: "New product will disrupt market" - base rate ~5% (most fail)
**Update with evidence** (likelihood ratio):
- How much more likely is this evidence if claim is true vs false?
- Strong evidence: Likelihood ratio >10 (evidence 10× more likely if claim true)
- Weak evidence: Likelihood ratio <3
**Calculate posterior probability** (after evidence):
- Use Bayes theorem or intuitive updating
- Example: Prior 5%, strong evidence (LR=10) → Posterior ~35%
### Fermi Estimation for Sanity Checks
**Decompose claim into estimable parts**:
- Claim: "Company has 10,000 paying customers"
- Decompose: Employees × customers per employee, or revenue ÷ price per customer
- Cross-check: Do the numbers add up?
**Example**:
- Claim: Startup has 1M users
- Check: Founded 2 years ago → 1,370 new users/day → 57/hour (24/7) or 171/hour (8hr workday)
- Reality check: Plausible for viral product? Need marketing spend estimate.
### Confidence Intervals and Ranges
**Avoid point estimates** ("70% confident"):
- Use ranges: "60-80% confident" acknowledges uncertainty
- Ask: What would make me 90% confident? What's missing?
**Sensitivity analysis**:
- Best case scenario (all assumptions optimistic) → upper bound confidence
- Worst case scenario (all assumptions pessimistic) → lower bound confidence
- Most likely scenario → central estimate
## 6. Advanced Investigation Patterns
### Investigative Journalism Techniques
**Paper trail following**:
- Follow money: Who benefits financially from this claim being believed?
- Follow timeline: Who said what when? Any story changes over time?
- Follow power: Who has authority/incentive to suppress contradicting evidence?
**Source cultivation**:
- Insider sources (whistleblowers, former employees) for claims companies hide
- Expert sources (academics, consultants) for technical evaluation
- Documentary sources (contracts, emails, internal memos) for ground truth
**Red flags in interviews**:
- Vague answers to specific questions
- Defensiveness or hostility when questioned
- Inconsistencies between different tellings
- Refusal to provide documentation
### Legal Evidence Standards
**Burden of proof levels**:
- **Beyond reasonable doubt** (criminal): 95%+ confidence
- **Clear and convincing** (civil high stakes): 75%+ confidence
- **Preponderance of evidence** (civil standard): 51%+ confidence (more likely than not)
**Hearsay rules**:
- Firsthand testimony > secondhand ("I saw X" > "Someone told me X")
- Exception: Business records, public records (trustworthy hearsay)
- Watch for: Anonymous sources, "people are saying", "experts claim"
**Chain of custody**:
- Document handling: Who collected, stored, analyzed evidence?
- Tampering risk: Could evidence have been altered?
- Authentication: How do we know this document/photo is genuine?
### Competitive Intelligence Validation
**HUMINT (Human Intelligence)**:
- Customer interviews: "Do you use competitor's product? How does it work?"
- Former employees: Glassdoor reviews, LinkedIn networking
- Conference presentations: Technical details revealed publicly
**OSINT (Open Source Intelligence)**:
- Public filings: SEC 10-K, patents, trademarks
- Job postings: What skills are they hiring for? (reveals technology stack, strategic priorities)
- Social media: Employee posts, company announcements
- Web archives: Wayback Machine to see claim history, website changes
**TECHINT (Technical Intelligence)**:
- Reverse engineering: Analyze product directly
- Benchmarking: Test performance claims yourself
- Network analysis: DNS records, API endpoints, infrastructure footprint
### Scientific Reproducibility Assessment
**Replication indicator**:
- Has anyone reproduced the finding? (Strong evidence)
- Did replication attempts fail? (Evidence against)
- Has no one tried to replicate? (Unknown, be cautious)
**Pre-registration check**:
- Was study pre-registered (ClinicalTrials.gov, OSF)? Reduces p-hacking risk
- Do results match pre-registered outcomes? If different, why?
**Data/code availability**:
- Can you access raw data to re-analyze?
- Is code available to reproduce analysis?
- Are materials specified to replicate experiment?
**Robustness checks**:
- Do findings hold with different analysis methods?
- Are results sensitive to outliers or specific assumptions?
- Do subsample analyses show consistent effects?
---
## Workflow Integration
**When to use advanced techniques**:
**Triangulation** → Every claim (minimum requirement)
**CRAAP Test** → When assessing unfamiliar sources
**GRADE System** → Medical/health claims, policy decisions
**Bayesian Updating** → When you have prior knowledge/base rates
**Fermi Estimation** → Quantitative claims that seem implausible
**Investigative Techniques** → High-stakes business decisions, fraud detection
**Legal Standards** → Determining action thresholds (e.g., firing employee, lawsuit)
**Reproducibility Assessment** → Scientific/technical claims
**Start simple, add complexity as needed**:
1. Quick verification: CRAAP test + Google fact-check
2. Moderate investigation: Triangulate 3 sources + basic bias check
3. Deep investigation: Full methodology above + expert consultation

View File

@@ -0,0 +1,338 @@
# Research Claim Map Template
## Workflow
Copy this checklist and track your progress:
```
Research Claim Map Progress:
- [ ] Step 1: Define claim precisely
- [ ] Step 2: Gather evidence for and against
- [ ] Step 3: Rate evidence quality
- [ ] Step 4: Assess source credibility
- [ ] Step 5: Identify limitations
- [ ] Step 6: Synthesize conclusion
```
**Step 1: Define claim precisely**
Restate as specific, testable assertion with numbers, dates, clear terms. See [Claim Reformulation](#claim-reformulation-examples).
**Step 2: Gather evidence for and against**
Find sources supporting and contradicting claim. See [Evidence Categories](#evidence-categories).
**Step 3: Rate evidence quality**
Apply evidence hierarchy (primary > secondary > tertiary). See [Evidence Quality Rating](#evidence-quality-rating).
**Step 4: Assess source credibility**
Evaluate expertise, independence, track record, methodology. See [Credibility Assessment](#source-credibility-scoring).
**Step 5: Identify limitations**
Document gaps, assumptions, uncertainties. See [Limitations Documentation](#limitations-and-gaps).
**Step 6: Synthesize conclusion**
Determine confidence level (0-100%) and recommendation. See [Confidence Calibration](#confidence-level-calibration).
---
## Research Claim Map Template
### 1. Claim Statement
**Original claim**: [Quote exact claim as stated]
**Reformulated claim** (specific, testable): [Restate with precise terms, numbers, dates, scope]
**Why this claim matters**: [Decision impact, stakes, consequences if true/false]
**Key terms defined**:
- [Term 1]: [Definition to avoid ambiguity]
- [Term 2]: [Definition]
---
### 2. Evidence For
| Source | Evidence Type | Quality | Credibility | Summary |
|--------|--------------|---------|-------------|---------|
| [Source name/link] | [Primary/Secondary/Tertiary] | [H/M/L] | [H/M/L] | [What it says] |
| | | | | |
| | | | | |
**Strongest evidence for**:
1. [Most compelling evidence with explanation why it's strong]
2. [Second strongest]
---
### 3. Evidence Against
| Source | Evidence Type | Quality | Credibility | Summary |
|--------|--------------|---------|-------------|---------|
| [Source name/link] | [Primary/Secondary/Tertiary] | [H/M/L] | [H/M/L] | [What it says] |
| | | | | |
| | | | | |
**Strongest evidence against**:
1. [Most compelling counter-evidence with explanation]
2. [Second strongest]
---
### 4. Source Credibility Analysis
**For each major source, evaluate:**
**Source: [Name/Link]**
- **Expertise**: [H/M/L] - [Why: credentials, domain knowledge]
- **Independence**: [H/M/L] - [Conflicts of interest, bias, incentives]
- **Track Record**: [H/M/L] - [Prior accuracy, corrections, reputation]
- **Methodology**: [H/M/L] - [How they obtained information, transparency]
- **Overall credibility**: [H/M/L]
**Source: [Name/Link]**
- **Expertise**: [H/M/L] - [Why]
- **Independence**: [H/M/L] - [Why]
- **Track Record**: [H/M/L] - [Why]
- **Methodology**: [H/M/L] - [Why]
- **Overall credibility**: [H/M/L]
---
### 5. Limitations and Gaps
**What's unknown or uncertain**:
- [Gap 1: What evidence is missing]
- [Gap 2: What couldn't be verified]
- [Gap 3: What's ambiguous or unclear]
**Assumptions made**:
- [Assumption 1: What we're assuming to be true]
- [Assumption 2]
**Quality concerns**:
- [Concern 1: Weaknesses in evidence or methodology]
- [Concern 2]
**Further investigation needed**:
- [What additional evidence would increase confidence]
- [What questions remain unanswered]
---
### 6. Conclusion
**Confidence level**: [0-100%]
**Confidence reasoning**:
- [Why this confidence level based on evidence quality, source credibility, limitations]
**Assessment**: [Choose one]
-**Claim validated** (70-100% confidence) - Evidence strongly supports claim
-**Claim partially true** (40-69% confidence) - Mixed or weak evidence, requires nuance
-**Claim rejected** (0-39% confidence) - Evidence contradicts or insufficient support
**Recommendation**:
[Action to take based on this assessment - what should be believed, decided, or done]
**Key caveats**:
- [Important qualification 1]
- [Important qualification 2]
---
## Guidance for Each Section
### Claim Reformulation Examples
**Vague → Specific:**
- ❌ "Product X is better" → ✓ "Product X loads pages 50% faster than Product Y on benchmark Z"
- ❌ "Most customers are satisfied" → ✓ "NPS score ≥50 based on survey of ≥1000 customers in Q3 2024"
- ❌ "Studies show it works" → ✓ "≥3 peer-reviewed RCTs show ≥20% improvement vs placebo, p<0.05"
**Avoid:**
- Subjective terms ("better", "significant", "many")
- Undefined metrics ("performance", "quality", "efficiency")
- Vague time ranges ("recently", "long-term")
- Unclear comparisons ("faster", "cheaper" - than what?)
### Evidence Categories
**Primary (Strongest):**
- Original research data, raw datasets
- Direct measurements, transaction logs
- First-hand testimony from participants
- Legal documents, contracts, financial filings
- Photographs, videos of events (verified authentic)
**Secondary (Medium):**
- Analysis/synthesis of primary sources
- Peer-reviewed research papers
- News reporting citing primary sources
- Expert analysis with transparent methodology
- Government/institutional reports
**Tertiary (Weakest):**
- Summaries of secondary sources
- Textbooks, encyclopedias, Wikipedia
- Press releases, marketing content
- Opinion pieces, editorials
- Anecdotal reports
**Non-Evidence (Unreliable):**
- Social media claims without verification
- Anonymous sources with no corroboration
- Circular citations (A→B→A)
- "Experts say" without named experts
- Cherry-picked quotes out of context
### Evidence Quality Rating
**High (H):**
- Multiple independent primary sources agree
- Methodology transparent and replicable
- Large sample size, rigorous controls
- Peer-reviewed or independently verified
- Recent and relevant to current context
**Medium (M):**
- Single primary source or multiple secondary sources
- Some methodology disclosed
- Moderate sample size, some controls
- Some independent verification
- Somewhat dated but still applicable
**Low (L):**
- Tertiary sources only
- Methodology opaque or questionable
- Small sample, no controls, anecdotal
- No independent verification
- Outdated or context has changed
### Source Credibility Scoring
**Expertise:**
- High: Domain expert, relevant credentials, published research
- Medium: General knowledge, some relevant experience
- Low: No demonstrated expertise, out of domain
**Independence:**
- High: No financial/personal stake, third-party verification
- Medium: Some potential bias but disclosed
- Low: Direct conflict of interest, undisclosed bias
**Track Record:**
- High: Consistent accuracy, transparent about corrections
- Medium: Unknown history or mixed record
- Low: History of errors, retractions, misinformation
**Methodology:**
- High: Transparent process, data/methods shared, replicable
- Medium: Some details provided, partially verifiable
- Low: Black box, unverifiable, cherry-picked data
### Limitations and Gaps
**Common gaps:**
- Missing primary sources (only secondary summaries available)
- Conflicting evidence without clear resolution
- Outdated information (claim may have changed)
- Incomplete data (partial picture only)
- Methodology unclear (can't assess quality)
- Context missing (claim true but misleading framing)
**Document:**
- What evidence you expected to find but didn't
- What questions you couldn't answer
- What assumptions you had to make to proceed
- What contradictions remain unresolved
### Confidence Level Calibration
**90-100% (Near Certain):**
- Multiple independent primary sources
- High credibility sources with strong methodology
- No significant contradicting evidence
- Minimal assumptions or gaps
- Example: "Earth orbits the Sun"
**70-89% (Confident):**
- Strong secondary sources or single primary source
- Credible sources, some methodology disclosed
- Minor contradictions explainable
- Some assumptions but reasonable
- Example: "Vendor has >5,000 customers based on analyst report"
**50-69% (Uncertain):**
- Mixed evidence quality or conflicting sources
- Moderate credibility, unclear methodology
- Significant gaps or assumptions
- Requires more investigation to be confident
- Example: "Feature will improve retention 10-20%"
**30-49% (Skeptical):**
- More/stronger evidence against than for
- Low credibility sources or weak evidence
- Major gaps, questionable assumptions
- Claim likely exaggerated or misleading
- Example: "Supplement cures disease based on testimonials"
**0-29% (Likely False):**
- Strong evidence contradicting claim
- Unreliable sources, no credible support
- Claim contradicts established facts
- Clear misinformation or fabrication
- Example: "Vaccine contains tracking microchips"
---
## Common Patterns
### Pattern 1: Vendor Due Diligence
**Claim**: Vendor claims product capabilities, performance, customer metrics
**Approach**: Seek independent verification, customer references, trials
**Red flags**: Only vendor sources, vague metrics, "up to X" ranges, cherry-picked case studies
### Pattern 2: News Fact-Check
**Claim**: Event occurred, statistic cited, quote attributed
**Approach**: Trace to primary source, check multiple outlets, verify context
**Red flags**: Single source, anonymous claims, sensational framing, out-of-context quotes
### Pattern 3: Research Validity
**Claim**: Study shows X causes Y, treatment is effective
**Approach**: Check replication, sample size, methodology, competing explanations
**Red flags**: Single study, conflicts of interest, p-hacking, correlation claimed as causation
### Pattern 4: Competitive Intelligence
**Claim**: Competitor has capability, market share, strategic direction
**Approach**: Triangulate public filings, analyst reports, customer feedback
**Red flags**: Rumor/speculation, outdated info, no primary verification
---
## Quality Checklist
- [ ] Claim restated as specific, testable assertion
- [ ] Evidence gathered for both supporting and contradicting
- [ ] Each source rated for evidence quality (Primary/Secondary/Tertiary)
- [ ] Each source assessed for credibility (Expertise, Independence, Track Record, Methodology)
- [ ] Strongest evidence for and against identified
- [ ] Limitations and gaps documented explicitly
- [ ] Assumptions stated clearly
- [ ] Confidence level quantified (0-100%)
- [ ] Recommendation is actionable and evidence-based
- [ ] Caveats and qualifications noted
- [ ] No cherry-picking (actively sought contradicting evidence)
- [ ] Distinction made between "no evidence found" and "evidence against"
- [ ] Sources properly attributed with links/citations
- [ ] Avoided common biases (confirmation, authority, recency, availability)
- [ ] Quality sufficient for decision (if not, flag need for more investigation)