27 KiB
27 KiB
name, description, category
| name | description | category |
|---|---|---|
| validate:web | Validate web pages and detect JavaScript errors automatically using headless browser automation | validation |
Validate Web Command
Slash command: /validate:web
Automatically validate web pages (like dashboard.py) and detect JavaScript errors, console issues, and performance problems without manual browser inspection.
Usage
/validate:web <URL> [options]
Examples
# Validate local dashboard
/validate:web http://127.0.0.1:5000
# Validate with detailed output
/validate:web http://127.0.0.1:5000 --verbose
# Validate and auto-fix issues
/validate:web http://127.0.0.1:5000 --auto-fix
# Save validation report
/validate:web http://127.0.0.1:5000 --report
# Crawl and validate all subpages
/validate:web http://127.0.0.1:5000 --crawl
# Crawl with depth limit
/validate:web http://127.0.0.1:5000 --crawl --max-depth 2
# Crawl specific subpages only
/validate:web http://127.0.0.1:5000 --crawl --include "/api/*,/analytics/*"
# Exclude certain paths from crawling
/validate:web http://127.0.0.1:5000 --crawl --exclude "/admin/*,/debug/*"
Implementation
#!/usr/bin/env python3
import sys
import os
import json
import subprocess
from pathlib import Path
from datetime import datetime
# Add plugin lib directory to path
plugin_lib = Path(__file__).parent.parent.parent / 'lib'
sys.path.insert(0, str(plugin_lib))
try:
from web_page_validator import WebPageValidator, format_validation_report
VALIDATOR_AVAILABLE = True
except ImportError:
VALIDATOR_AVAILABLE = False
# Import additional modules for crawling
try:
from urllib.parse import urljoin, urlparse, urlunparse
from fnmatch import fnmatch
import re
import time
from collections import deque
CRAWLING_AVAILABLE = True
except ImportError:
CRAWLING_AVAILABLE = False
def crawl_and_validate(validator, start_url, max_depth=3, max_pages=50,
include_patterns=None, exclude_patterns=None,
same_domain=True, wait_for_load=3, verbose=False):
"""Crawl and validate all pages discovered from the start URL."""
if not CRAWLING_AVAILABLE:
raise ImportError("Required crawling modules not available")
from urllib.parse import urljoin, urlparse, urlunparse
from fnmatch import fnmatch
import time
from collections import deque
start_domain = urlparse(start_url).netloc
visited = set()
queue = deque([(start_url, 0)]) # (url, depth)
results = []
print(f"[INFO] Starting crawl from: {start_url}")
print(f"[INFO] Domain: {start_domain}")
print()
while queue and len(results) < max_pages:
current_url, depth = queue.popleft()
# Skip if already visited
if current_url in visited:
continue
visited.add(current_url)
# Check depth limit
if depth > max_depth:
continue
# Check domain restriction
if same_domain and urlparse(current_url).netloc != start_domain:
continue
# Check include/exclude patterns
if not should_crawl_url(current_url, include_patterns, exclude_patterns):
continue
print(f"[INFO] Validating (depth {depth}): {current_url}")
try:
# Validate the current page
result = validator.validate_url(current_url, wait_for_load=wait_for_load)
result.depth = depth # Add depth information
results.append(result)
# Show progress
status = "✅ PASS" if result.success else "❌ FAIL"
errors = len(result.console_errors) + len(result.javascript_errors)
print(f"[{status}] {errors} errors, {len(result.console_warnings)} warnings")
# Extract links for further crawling (only from successful pages)
if result.success and depth < max_depth:
links = extract_links_from_page(current_url, result.page_content or "")
for link in links:
if link not in visited and len(visited) + len(queue) < max_pages:
queue.append((link, depth + 1))
# Brief pause to avoid overwhelming the server
time.sleep(0.5)
except Exception as e:
print(f"[ERROR] Failed to validate {current_url}: {e}")
# Create a failed result object
from types import SimpleNamespace
failed_result = SimpleNamespace(
url=current_url,
success=False,
load_time=0,
page_title="Error",
console_errors=[f"Validation failed: {e}"],
console_warnings=[],
javascript_errors=[],
network_errors=[],
depth=depth,
page_content=""
)
results.append(failed_result)
print()
print(f"[INFO] Crawling completed: {len(results)} pages validated")
print(f"[INFO] Pages discovered: {len(visited)}")
return results
def should_crawl_url(url, include_patterns=None, exclude_patterns=None):
"""Check if URL should be crawled based on include/exclude patterns."""
from urllib.parse import urlparse
parsed = urlparse(url)
path = parsed.path
# Skip non-HTML resources
if any(path.endswith(ext) for ext in ['.css', '.js', '.png', '.jpg', '.jpeg', '.gif', '.svg', '.ico', '.pdf', '.zip']):
return False
# Skip hash fragments
if parsed.fragment:
return False
# Check include patterns
if include_patterns:
if not any(fnmatch(path, pattern.strip()) for pattern in include_patterns):
return False
# Check exclude patterns
if exclude_patterns:
if any(fnmatch(path, pattern.strip()) for pattern in exclude_patterns):
return False
return True
def extract_links_from_page(base_url, page_content):
"""Extract all valid links from page content."""
from urllib.parse import urljoin, urlparse
import re
if not page_content:
return []
# Extract links using regex patterns
link_patterns = [
r'href=["\']([^"\']+)["\']', # href attributes
r'action=["\']([^"\']+)["\']', # form actions
r'src=["\']([^"\']+)["\']', # src attributes (for some dynamic content)
]
links = set()
for pattern in link_patterns:
matches = re.findall(pattern, page_content, re.IGNORECASE)
for match in matches:
# Convert relative URLs to absolute
absolute_url = urljoin(base_url, match)
# Validate URL
parsed = urlparse(absolute_url)
if parsed.scheme in ['http', 'https'] and parsed.netloc:
links.add(absolute_url)
return list(links)
def display_crawling_results(results):
"""Display comprehensive crawling results."""
if not results:
print("[ERROR] No pages were validated")
return
# Sort results by URL for consistent output
results.sort(key=lambda r: r.url)
# Summary statistics
total_pages = len(results)
successful_pages = sum(1 for r in results if r.success)
failed_pages = total_pages - successful_pages
total_errors = sum(len(r.console_errors) + len(r.javascript_errors) for r in results)
total_warnings = sum(len(r.console_warnings) for r in results)
print("=" * 80)
print(f"🕷️ CRAWLING VALIDATION RESULTS")
print("=" * 80)
print(f"Total Pages: {total_pages}")
print(f"Successful: {successful_pages} ✅")
print(f"Failed: {failed_pages} ❌")
print(f"Total Errors: {total_errors}")
print(f"Total Warnings: {total_warnings}")
print(f"Success Rate: {(successful_pages/total_pages)*100:.1f}%")
print()
# Show failed pages first
failed_results = [r for r in results if not r.success]
if failed_results:
print("❌ FAILED PAGES:")
for i, result in enumerate(failed_results, 1):
print(f" {i}. {result.url}")
print(f" Status: FAILED")
errors = len(result.console_errors) + len(result.javascript_errors)
if errors > 0:
print(f" Errors: {errors}")
print()
# Show successful pages with warnings
successful_with_warnings = [r for r in results if r.success and
(len(r.console_warnings) > 0 or
len(r.console_errors) > 0 or
len(r.javascript_errors) > 0)]
if successful_with_warnings:
print("[WARN]️ SUCCESSFUL PAGES WITH ISSUES:")
for i, result in enumerate(successful_with_warnings[:10], 1): # Limit to first 10
errors = len(result.console_errors) + len(result.javascript_errors)
warnings = len(result.console_warnings)
status_parts = []
if errors > 0:
status_parts.append(f"{errors} errors")
if warnings > 0:
status_parts.append(f"{warnings} warnings")
print(f" {i}. {result.url}")
print(f" Issues: {', '.join(status_parts)}")
if len(successful_with_warnings) > 10:
print(f" ... and {len(successful_with_warnings) - 10} more pages with issues")
print()
# Show top errors across all pages
all_errors = []
for result in results:
for error in result.console_errors:
all_errors.append(f"[{error.level.upper()}] {error.message[:100]}")
for error in result.javascript_errors:
all_errors.append(f"[JS] {str(error)[:100]}")
if all_errors:
print("🔍 TOP ERRORS ACROSS ALL PAGES:")
from collections import Counter
error_counts = Counter(all_errors)
for i, (error, count) in enumerate(error_counts.most_common(10), 1):
print(f" {i}. ({count}×) {error}")
print()
# Overall status
if successful_pages == total_pages:
print("🎉 OVERALL STATUS: ALL PAGES PASSED ✅")
elif successful_pages > total_pages * 0.8:
print("✅ OVERALL STATUS: MOSTLY HEALTHY ({successful_pages}/{total_pages} pages passed)")
else:
print("[WARN]️ OVERALL STATUS: NEEDS ATTENTION ({successful_pages}/{total_pages} pages passed)")
print("=" * 80)
def save_crawling_report(results, base_url):
"""Save comprehensive crawling report to file."""
from datetime import datetime
report_dir = Path('.claude/reports')
report_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime('%Y-%m-%d_%H%M%S')
report_file = report_dir / f'web-crawling-{timestamp}.md'
# Generate report content
content = generate_crawling_report_content(results, base_url)
report_file.write_text(content, encoding='utf-8')
print(f"[OK] Comprehensive crawling report saved to: {report_file}")
def generate_crawling_report_content(results, base_url):
"""Generate comprehensive markdown report for crawling results."""
from datetime import datetime
total_pages = len(results)
successful_pages = sum(1 for r in results if r.success)
failed_pages = total_pages - successful_pages
total_errors = sum(len(r.console_errors) + len(r.javascript_errors) for r in results)
total_warnings = sum(len(r.console_warnings) for r in results)
content = f"""{'='*80}
WEB CRAWLING VALIDATION REPORT
{'='*80}
**Base URL**: {base_url}
**Timestamp**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
**Status**: {'PASSED' if successful_pages == total_pages else 'FAILED'}
## SUMMARY
{'-'*40}
- **Total Pages**: {total_pages}
- **Successful**: {successful_pages} ✅
- **Failed**: {failed_pages} ❌
- **Success Rate**: {(successful_pages/total_pages)*100:.1f}%
- **Total Errors**: {total_errors}
- **Total Warnings**: {total_warnings}
## DETAILED RESULTS
{'-'*40}
"""
# Sort results by status and URL
results.sort(key=lambda r: (not r.success, r.url))
for i, result in enumerate(results, 1):
status_icon = "✅" if result.success else "❌"
content += f"""
### {i}. {result.url} {status_icon}
**Status**: {'PASSED' if result.success else 'FAILED'}
**Load Time**: {result.load_time:.2f}s
**Page Title**: {result.page_title}
"""
if not result.success:
errors = len(result.console_errors) + len(result.javascript_errors)
content += f"**Total Errors**: {errors}\n"
if result.console_errors:
content += "\n**Console Errors:**\n"
for j, error in enumerate(result.console_errors[:5], 1):
content += f" {j}. [{error.level.upper()}] {error.message}\n"
if len(result.console_errors) > 5:
content += f" ... and {len(result.console_errors) - 5} more errors\n"
if result.javascript_errors:
content += "\n**JavaScript Errors:**\n"
for j, error in enumerate(result.javascript_errors[:5], 1):
content += f" {j}. {str(error)}\n"
if len(result.javascript_errors) > 5:
content += f" ... and {len(result.javascript_errors) - 5} more errors\n"
if result.console_warnings:
content += f"**Warnings**: {len(result.console_warnings)}\n"
if len(result.console_warnings) <= 3:
for j, warning in enumerate(result.console_warnings, 1):
content += f" {j}. {warning.message}\n"
else:
content += f" Top 3 warnings:\n"
for j, warning in enumerate(result.console_warnings[:3], 1):
content += f" {j}. {warning.message}\n"
content += f" ... and {len(result.console_warnings) - 3} more warnings\n"
content += "\n---\n"
# Add recommendations section
content += f"""
## RECOMMENDATIONS
{'-'*40}
"""
if failed_pages > 0:
content += f"""### 🚨 Priority Fixes ({failed_pages} pages failed)
1. **Fix JavaScript Errors**: Review and fix syntax errors in failed pages
2. **Check Server Configuration**: Ensure all resources load correctly
3. **Validate HTML Structure**: Check for malformed HTML causing issues
4. **Test Functionality**: Verify interactive elements work properly
"""
if total_errors > 0:
content += f"""### 🔧 Technical Improvements ({total_errors} total errors)
1. **Console Error Resolution**: Fix JavaScript runtime errors
2. **Resource Loading**: Ensure all assets (CSS, JS, images) are accessible
3. **Network Issues**: Check for failed API calls or missing resources
4. **Error Handling**: Implement proper error handling in JavaScript
"""
if total_warnings > 0:
content += f"""### [WARN]️ Code Quality ({total_warnings} warnings)
1. **Deprecation Warnings**: Update deprecated API usage
2. **Performance Optimization**: Address performance warnings
3. **Best Practices**: Follow modern web development standards
4. **Code Cleanup**: Remove unused code and console logs
"""
content += f"""
## VALIDATION METRICS
{'-'*40}
- **Validation Date**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
- **Tool**: Web Page Validator with Crawling
- **Scope**: Full site validation with subpage discovery
- **Coverage**: {total_pages} pages analyzed
- **Effectiveness**: {((successful_pages/total_pages)*100):.1f}% success rate
{'='*80}
"""
return content
def main():
"""Main execution function"""
import argparse
parser = argparse.ArgumentParser(description='Validate web pages automatically')
parser.add_argument('url', nargs='?', default='http://127.0.0.1:5000',
help='URL to validate (default: http://127.0.0.1:5000)')
parser.add_argument('--verbose', '-v', action='store_true',
help='Show detailed output including warnings')
parser.add_argument('--auto-fix', action='store_true',
help='Attempt to automatically fix detected issues')
parser.add_argument('--report', action='store_true',
help='Save detailed report to .claude/data/reports/')
parser.add_argument('--timeout', type=int, default=30,
help='Page load timeout in seconds')
parser.add_argument('--wait', type=int, default=3,
help='Wait time after page load in seconds')
# Crawling options
parser.add_argument('--crawl', action='store_true',
help='Crawl and validate all subpages found on the site')
parser.add_argument('--max-depth', type=int, default=3,
help='Maximum crawl depth (default: 3)')
parser.add_argument('--max-pages', type=int, default=50,
help='Maximum number of pages to crawl (default: 50)')
parser.add_argument('--include', type=str, default='',
help='Comma-separated list of path patterns to include (e.g., "/api/*,/analytics/*")')
parser.add_argument('--exclude', type=str, default='',
help='Comma-separated list of path patterns to exclude (e.g., "/admin/*,/debug/*")')
parser.add_argument('--same-domain', action='store_true', default=True,
help='Only crawl pages on the same domain (default: True)')
args = parser.parse_args()
print("[INFO] Web Page Validation")
print(f"[INFO] Target URL: {args.url}")
print()
if not VALIDATOR_AVAILABLE:
print("[ERROR] Web page validator not available")
print("[INFO] Install dependencies: pip install selenium")
print("[INFO] Install ChromeDriver: https://chromedriver.chromium.org/")
return 1
# Run validation
if args.crawl:
print("[INFO] Starting web page crawling and validation...")
print(f"[INFO] Max depth: {args.max_depth}, Max pages: {args.max_pages}")
print("[INFO] This may take several minutes depending on site size...")
print()
if args.include:
print(f"[INFO] Including only: {args.include}")
if args.exclude:
print(f"[INFO] Excluding: {args.exclude}")
print()
else:
print("[INFO] Starting headless browser validation...")
print("[INFO] This may take a few seconds...")
print()
try:
with WebPageValidator(headless=True, timeout=args.timeout) as validator:
if args.crawl:
# Enhanced crawling functionality
results = crawl_and_validate(
validator, args.url,
max_depth=args.max_depth,
max_pages=args.max_pages,
include_patterns=args.include.split(',') if args.include else [],
exclude_patterns=args.exclude.split(',') if args.exclude else [],
same_domain=args.same_domain,
wait_for_load=args.wait,
verbose=args.verbose
)
# Display crawling results
display_crawling_results(results)
# Save comprehensive report
if args.report or not all(r.success for r in results):
save_crawling_report(results, args.url)
# Return appropriate exit code
return 0 if all(r.success for r in results) else 1
else:
# Single page validation (existing logic)
result = validator.validate_url(args.url, wait_for_load=args.wait)
# Display results
if result.success:
print("=" * 80)
print("[OK] VALIDATION PASSED")
print("=" * 80)
print(f"URL: {result.url}")
print(f"Page Title: {result.page_title}")
print(f"Load Time: {result.load_time:.2f}s")
print(f"Console Errors: 0")
print(f"Console Warnings: {len(result.console_warnings)}")
print(f"JavaScript Errors: 0")
print()
print("[OK] No errors detected - page is functioning correctly")
print()
if result.console_warnings and args.verbose:
print("WARNINGS:")
for i, warning in enumerate(result.console_warnings[:5], 1):
print(f" {i}. {warning.message}")
if len(result.console_warnings) > 5:
print(f" ... and {len(result.console_warnings) - 5} more warnings")
print()
else:
print("=" * 80)
print("[ERROR] VALIDATION FAILED")
print("=" * 80)
print(f"URL: {result.url}")
print(f"Page Title: {result.page_title}")
print(f"Load Time: {result.load_time:.2f}s")
print()
print("ERROR SUMMARY:")
print(f" Console Errors: {len(result.console_errors)}")
print(f" Console Warnings: {len(result.console_warnings)}")
print(f" JavaScript Errors: {len(result.javascript_errors)}")
print(f" Network Errors: {len(result.network_errors)}")
print()
# Show top errors
if result.console_errors:
print("TOP CONSOLE ERRORS:")
for i, error in enumerate(result.console_errors[:3], 1):
print(f" {i}. [{error.level.upper()}] {error.message[:80]}")
if error.source and error.source != 'unknown':
print(f" Source: {error.source}")
if len(result.console_errors) > 3:
print(f" ... and {len(result.console_errors) - 3} more errors")
print()
# Show JavaScript errors
if result.javascript_errors:
print("JAVASCRIPT ERRORS:")
for i, js_error in enumerate(result.javascript_errors[:3], 1):
print(f" {i}. {js_error[:80]}")
if len(result.javascript_errors) > 3:
print(f" ... and {len(result.javascript_errors) - 3} more errors")
print()
# Auto-fix suggestions
if args.auto_fix:
print("AUTO-FIX ANALYSIS:")
print(" Analyzing errors for automatic fixes...")
print()
# Check for string escaping issues
has_escape_issues = any(
'SyntaxError' in str(e) or 'unexpected token' in str(e).lower()
for e in result.javascript_errors
)
if has_escape_issues:
print(" [DETECTED] String escaping issues in JavaScript")
print(" [FIX] Use Python raw strings (r'...') for JavaScript escape sequences")
print(" [EXAMPLE] Change 'Value\\n' to r'Value\\n' in Python source")
print()
print(" [ACTION] Would you like to apply automatic fixes? (y/n)")
else:
print(" [INFO] No auto-fixable issues detected")
print(" [INFO] Manual review required for detected errors")
print()
# Save detailed report
if args.report or not result.success:
report_dir = Path('.claude/reports')
report_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime('%Y-%m-%d_%H%M%S')
report_file = report_dir / f'web-validation-{timestamp}.md'
report_content = format_validation_report(result, verbose=True)
report_file.write_text(report_content, encoding='utf-8')
print(f"[OK] Detailed report saved to: {report_file}")
print()
# Performance metrics
if args.verbose and result.performance_metrics:
print("PERFORMANCE METRICS:")
for key, value in result.performance_metrics.items():
if isinstance(value, (int, float)):
print(f" {key}: {value:.2f}ms")
else:
print(f" {key}: {value}")
print()
print("=" * 80)
return 0 if result.success else 1
except KeyboardInterrupt:
print("\n[WARN] Validation interrupted by user")
return 130
except Exception as e:
print(f"[ERROR] Validation failed: {e}")
import traceback
traceback.print_exc()
return 1
if __name__ == '__main__':
sys.exit(main())
Features
- Automated Error Detection: Captures JavaScript errors without manual browser inspection
- Console Log Monitoring: Captures errors, warnings, and info logs from browser console
- Network Monitoring: Detects failed HTTP requests and resource loading issues
- Performance Metrics: Measures page load time and resource usage
- Auto-Fix Suggestions: Provides guidance on fixing detected issues
- Detailed Reports: Saves comprehensive validation reports to
.claude/data/reports/ - 🆕 Subpage Crawling: Automatically discovers and validates all subpages on a website
- 🆕 Comprehensive Coverage: Crawl with configurable depth limits (default: 3 levels)
- 🆕 Smart Filtering: Include/exclude specific paths with pattern matching
- 🆕 Site-wide Analysis: Aggregates errors and warnings across entire website
- 🆕 Progress Tracking: Real-time crawling progress with detailed status updates
Requirements
Recommended (for full functionality):
pip install selenium
ChromeDriver (for Selenium):
- Download from: https://chromedriver.chromium.org/
- Or install automatically:
pip install webdriver-manager
Alternative (if Selenium unavailable):
pip install playwright
playwright install chromium
Integration with Dashboard
This command is automatically invoked when starting dashboards via /monitor:dashboard to ensure no JavaScript errors exist before displaying to the user.
Output Format
Terminal Output (concise summary):
[OK] VALIDATION PASSED
URL: http://127.0.0.1:5000
Page Title: Autonomous Agent Dashboard
Load Time: 1.23s
Console Errors: 0
JavaScript Errors: 0
Report File (detailed analysis):
# WEB PAGE VALIDATION REPORT
## Summary
- URL: http://127.0.0.1:5000
- Status: PASSED
- Load Time: 1.23s
- Console Errors: 0
- JavaScript Errors: 0
## Console Errors
(none detected)
## Performance Metrics
- Load Time: 1234ms
- DOM Ready: 456ms
- Resources: 15 loaded successfully
Error Categories
- JavaScript Syntax Errors: Invalid JavaScript code
- Runtime Errors: Uncaught exceptions during execution
- Reference Errors: Undefined variables or functions
- Type Errors: Invalid type operations
- Network Errors: Failed HTTP requests
- Resource Errors: Missing CSS, JS, or image files
Best Practices
- Run validation after making changes to web components
- Always validate before committing dashboard changes
- Use
--auto-fixfor common issues like string escaping - Save reports for debugging with
--reportflag - Increase
--timeoutfor slow-loading pages - Use
--verbosefor detailed troubleshooting
See Also
/monitor:dashboard- Start dashboard with automatic validation/analyze:quality- Comprehensive quality control including web validation- Skill: web-validation - Detailed methodology and best practices