#!/usr/bin/env python3 """ ScholarEval Score Calculator Calculate aggregate evaluation scores from dimension-level ratings. Supports weighted averaging, threshold analysis, and score visualization. Usage: python calculate_scores.py --scores --output python calculate_scores.py --scores --weights python calculate_scores.py --interactive Author: ScholarEval Framework License: MIT """ import json import argparse import sys from typing import Dict, List, Optional from pathlib import Path # Default dimension weights (total = 100%) DEFAULT_WEIGHTS = { "problem_formulation": 0.15, "literature_review": 0.15, "methodology": 0.20, "data_collection": 0.10, "analysis": 0.15, "results": 0.10, "writing": 0.10, "citations": 0.05 } # Quality level definitions QUALITY_LEVELS = { (4.5, 5.0): ("Exceptional", "Ready for top-tier publication"), (4.0, 4.4): ("Strong", "Publication-ready with minor revisions"), (3.5, 3.9): ("Good", "Major revisions required, promising work"), (3.0, 3.4): ("Acceptable", "Significant revisions needed"), (2.0, 2.9): ("Weak", "Fundamental issues, major rework required"), (0.0, 1.9): ("Poor", "Not suitable without complete revision") } def load_scores(filepath: Path) -> Dict[str, float]: """Load dimension scores from JSON file.""" try: with open(filepath, 'r') as f: scores = json.load(f) # Validate scores for dim, score in scores.items(): if not 1 <= score <= 5: raise ValueError(f"Score for {dim} must be between 1 and 5, got {score}") return scores except FileNotFoundError: print(f"Error: File not found: {filepath}") sys.exit(1) except json.JSONDecodeError: print(f"Error: Invalid JSON in {filepath}") sys.exit(1) except ValueError as e: print(f"Error: {e}") sys.exit(1) def load_weights(filepath: Optional[Path] = None) -> Dict[str, float]: """Load dimension weights from JSON file or return defaults.""" if filepath is None: return DEFAULT_WEIGHTS try: with open(filepath, 'r') as f: weights = json.load(f) # Validate weights sum to 1.0 total = sum(weights.values()) if not 0.99 <= total <= 1.01: # Allow small floating point errors raise ValueError(f"Weights must sum to 1.0, got {total}") return weights except FileNotFoundError: print(f"Error: File not found: {filepath}") sys.exit(1) except json.JSONDecodeError: print(f"Error: Invalid JSON in {filepath}") sys.exit(1) except ValueError as e: print(f"Error: {e}") sys.exit(1) def calculate_weighted_average(scores: Dict[str, float], weights: Dict[str, float]) -> float: """Calculate weighted average score.""" total_score = 0.0 total_weight = 0.0 for dimension, score in scores.items(): # Handle dimension name variations (e.g., "problem_formulation" vs "problem-formulation") dim_key = dimension.replace('-', '_').lower() weight = weights.get(dim_key, 0.0) total_score += score * weight total_weight += weight # Normalize if not all dimensions were scored if total_weight > 0: return total_score / total_weight * (sum(weights.values()) / total_weight) return 0.0 def get_quality_level(score: float) -> tuple: """Get quality level description for a given score.""" for (low, high), (level, description) in QUALITY_LEVELS.items(): if low <= score <= high: return level, description return "Unknown", "Score out of expected range" def generate_bar_chart(scores: Dict[str, float], max_width: int = 50) -> str: """Generate ASCII bar chart of dimension scores.""" lines = [] max_name_len = max(len(name) for name in scores.keys()) for dimension, score in sorted(scores.items(), key=lambda x: x[1], reverse=True): bar_length = int((score / 5.0) * max_width) bar = '█' * bar_length padding = ' ' * (max_name_len - len(dimension)) lines.append(f" {dimension}{padding} │ {bar} {score:.2f}") return '\n'.join(lines) def identify_strengths_weaknesses(scores: Dict[str, float]) -> tuple: """Identify top strengths and areas for improvement.""" sorted_scores = sorted(scores.items(), key=lambda x: x[1], reverse=True) strengths = [dim for dim, score in sorted_scores[:3] if score >= 4.0] weaknesses = [dim for dim, score in sorted_scores[-3:] if score < 3.5] return strengths, weaknesses def generate_report(scores: Dict[str, float], weights: Dict[str, float], output_file: Optional[Path] = None) -> str: """Generate comprehensive evaluation report.""" overall_score = calculate_weighted_average(scores, weights) quality_level, quality_desc = get_quality_level(overall_score) strengths, weaknesses = identify_strengths_weaknesses(scores) report_lines = [ "="*70, "SCHOLAREVAL SCORE REPORT", "="*70, "", f"Overall Score: {overall_score:.2f} / 5.00", f"Quality Level: {quality_level}", f"Assessment: {quality_desc}", "", "="*70, "DIMENSION SCORES", "="*70, "", generate_bar_chart(scores), "", "="*70, "DETAILED BREAKDOWN", "="*70, "" ] # Add detailed scores with weights for dimension, score in sorted(scores.items()): dim_key = dimension.replace('-', '_').lower() weight = weights.get(dim_key, 0.0) weighted_contribution = score * weight percentage = weight * 100 report_lines.append( f" {dimension:25s} {score:.2f}/5.00 " f"(weight: {percentage:4.1f}%, contribution: {weighted_contribution:.3f})" ) report_lines.extend([ "", "="*70, "ASSESSMENT SUMMARY", "="*70, "" ]) if strengths: report_lines.append("Top Strengths:") for dim in strengths: report_lines.append(f" • {dim}: {scores[dim]:.2f}/5.00") report_lines.append("") if weaknesses: report_lines.append("Areas for Improvement:") for dim in weaknesses: report_lines.append(f" • {dim}: {scores[dim]:.2f}/5.00") report_lines.append("") # Add recommendations based on score report_lines.extend([ "="*70, "RECOMMENDATIONS", "="*70, "" ]) if overall_score >= 4.5: report_lines.append(" Excellent work! Ready for submission to top-tier venues.") elif overall_score >= 4.0: report_lines.append(" Strong work. Address minor issues identified in weaknesses.") elif overall_score >= 3.5: report_lines.append(" Good foundation. Focus on major revisions in weak dimensions.") elif overall_score >= 3.0: report_lines.append(" Significant revisions needed. Prioritize weakest dimensions.") elif overall_score >= 2.0: report_lines.append(" Major rework required. Consider restructuring approach.") else: report_lines.append(" Fundamental revision needed across multiple dimensions.") report_lines.append("") report_lines.append("="*70) report = '\n'.join(report_lines) # Write to file if specified if output_file: try: with open(output_file, 'w') as f: f.write(report) print(f"\nReport saved to: {output_file}") except IOError as e: print(f"Error writing to {output_file}: {e}") return report def interactive_mode(): """Run interactive score entry mode.""" print("ScholarEval Interactive Score Calculator") print("="*50) print("\nEnter scores for each dimension (1-5):") print("(Press Enter to skip a dimension)\n") scores = {} dimensions = [ "problem_formulation", "literature_review", "methodology", "data_collection", "analysis", "results", "writing", "citations" ] for dim in dimensions: while True: dim_display = dim.replace('_', ' ').title() user_input = input(f"{dim_display}: ").strip() if not user_input: break try: score = float(user_input) if 1 <= score <= 5: scores[dim] = score break else: print(" Score must be between 1 and 5") except ValueError: print(" Invalid input. Please enter a number between 1 and 5") if not scores: print("\nNo scores entered. Exiting.") return print("\n" + "="*50) print("SCORES ENTERED:") for dim, score in scores.items(): print(f" {dim.replace('_', ' ').title()}: {score}") print("\nCalculating overall assessment...\n") report = generate_report(scores, DEFAULT_WEIGHTS) print(report) # Ask if user wants to save save = input("\nSave report to file? (y/n): ").strip().lower() if save == 'y': filename = input("Enter filename [scholareval_report.txt]: ").strip() if not filename: filename = "scholareval_report.txt" generate_report(scores, DEFAULT_WEIGHTS, Path(filename)) def main(): parser = argparse.ArgumentParser( description="Calculate aggregate ScholarEval scores from dimension ratings", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Calculate from JSON file python calculate_scores.py --scores my_scores.json # Calculate with custom weights python calculate_scores.py --scores my_scores.json --weights custom_weights.json # Save report to file python calculate_scores.py --scores my_scores.json --output report.txt # Interactive mode python calculate_scores.py --interactive Score JSON Format: { "problem_formulation": 4.5, "literature_review": 4.0, "methodology": 3.5, "data_collection": 4.0, "analysis": 3.5, "results": 4.0, "writing": 4.5, "citations": 4.0 } Weights JSON Format: { "problem_formulation": 0.15, "literature_review": 0.15, "methodology": 0.20, "data_collection": 0.10, "analysis": 0.15, "results": 0.10, "writing": 0.10, "citations": 0.05 } """ ) parser.add_argument('--scores', type=Path, help='Path to JSON file with dimension scores') parser.add_argument('--weights', type=Path, help='Path to JSON file with dimension weights (optional)') parser.add_argument('--output', type=Path, help='Path to output report file (optional)') parser.add_argument('--interactive', '-i', action='store_true', help='Run in interactive mode') args = parser.parse_args() # Interactive mode if args.interactive: interactive_mode() return # File mode if not args.scores: parser.print_help() print("\nError: --scores is required (or use --interactive)") sys.exit(1) scores = load_scores(args.scores) weights = load_weights(args.weights) report = generate_report(scores, weights, args.output) # Print to stdout if no output file specified if not args.output: print(report) if __name__ == '__main__': main()