#!/usr/bin/env python3 """ artifact.validate skill - Comprehensive artifact validation Validates artifacts against structure, schema, and quality criteria. Generates detailed validation reports with scores and recommendations. """ import sys import os import argparse import re from pathlib import Path from datetime import datetime from typing import Dict, Any, List, Optional, Tuple import yaml import json def load_artifact_registry() -> Dict[str, Any]: """Load artifact registry from artifact.define skill""" registry_file = Path(__file__).parent.parent / "artifact.define" / "artifact_define.py" if not registry_file.exists(): raise FileNotFoundError(f"Artifact registry not found: {registry_file}") with open(registry_file, 'r') as f: content = f.read() # Find KNOWN_ARTIFACT_TYPES dictionary start_marker = "KNOWN_ARTIFACT_TYPES = {" start_idx = content.find(start_marker) if start_idx == -1: raise ValueError("Could not find KNOWN_ARTIFACT_TYPES in registry file") start_idx += len(start_marker) - 1 # Find matching closing brace brace_count = 0 end_idx = start_idx for i in range(start_idx, len(content)): if content[i] == '{': brace_count += 1 elif content[i] == '}': brace_count -= 1 if brace_count == 0: end_idx = i + 1 break dict_str = content[start_idx:end_idx] artifacts = eval(dict_str) return artifacts def detect_artifact_type(file_path: Path, content: str) -> Optional[str]: """Detect artifact type from filename or content""" # Try to detect from filename filename = file_path.stem # Load registry to check against known types registry = load_artifact_registry() # Direct match if filename in registry: return filename # Check for partial matches for artifact_type in registry.keys(): if artifact_type in filename: return artifact_type # Try to detect from content (YAML metadata) if file_path.suffix in ['.yaml', '.yml']: try: data = yaml.safe_load(content) if isinstance(data, dict) and 'metadata' in data: # Check for artifact type in metadata metadata = data['metadata'] if 'artifactType' in metadata: return metadata['artifactType'] except: pass return None def validate_yaml_syntax(content: str) -> Tuple[bool, Optional[str]]: """Validate YAML syntax""" try: yaml.safe_load(content) return True, None except yaml.YAMLError as e: return False, f"YAML syntax error: {str(e)}" def validate_markdown_structure(content: str) -> Tuple[bool, List[str]]: """Validate Markdown structure""" issues = [] # Check for at least one heading if not re.search(r'^#+ ', content, re.MULTILINE): issues.append("No headings found - document should have structured sections") # Check for document control section if 'Document Control' not in content and 'Metadata' not in content: issues.append("Missing document control/metadata section") return len(issues) == 0, issues def check_metadata_completeness(data: Dict[str, Any], file_format: str) -> Dict[str, Any]: """Check metadata section completeness""" issues = [] warnings = [] metadata = {} if file_format in ['yaml', 'yml']: if 'metadata' not in data: issues.append("Missing 'metadata' section") return { 'complete': False, 'score': 0, 'issues': issues, 'warnings': warnings } metadata = data['metadata'] # Required fields required = ['version', 'created', 'author', 'status'] for field in required: if field not in metadata or not metadata[field] or metadata[field] == 'TODO': issues.append(f"Missing or incomplete required field: {field}") # Recommended fields recommended = ['lastModified', 'classification', 'documentOwner'] for field in recommended: if field not in metadata or not metadata[field]: warnings.append(f"Missing recommended field: {field}") # Check for TODO placeholders if isinstance(metadata, dict): for key, value in metadata.items(): if isinstance(value, str) and 'TODO' in value: warnings.append(f"Field '{key}' contains TODO marker: {value}") score = max(0, 100 - (len(issues) * 25) - (len(warnings) * 10)) return { 'complete': len(issues) == 0, 'score': score, 'issues': issues, 'warnings': warnings, 'metadata': metadata } def count_todo_markers(content: str) -> List[str]: """Count and locate TODO markers in content""" todos = [] lines = content.split('\n') for i, line in enumerate(lines, 1): if 'TODO' in line: # Extract the TODO text todo_text = line.strip() if len(todo_text) > 100: todo_text = todo_text[:100] + "..." todos.append(f"Line {i}: {todo_text}") return todos def validate_required_sections(data: Dict[str, Any], artifact_type: str, file_format: str) -> Dict[str, Any]: """Validate that required sections are present""" issues = [] warnings = [] if file_format in ['yaml', 'yml']: # Common required sections for YAML artifacts if 'metadata' not in data: issues.append("Missing 'metadata' section") if 'content' not in data and artifact_type not in ['schema-definition', 'data-model']: warnings.append("Missing 'content' section - artifact may be incomplete") # Check for empty content if 'content' in data: content = data['content'] if isinstance(content, dict): empty_fields = [k for k, v in content.items() if not v or (isinstance(v, str) and v.strip() == 'TODO: ')] if empty_fields: warnings.append(f"Empty content fields: {', '.join(empty_fields)}") score = max(0, 100 - (len(issues) * 30) - (len(warnings) * 15)) return { 'valid': len(issues) == 0, 'score': score, 'issues': issues, 'warnings': warnings } def validate_against_schema(data: Dict[str, Any], schema_path: Optional[Path]) -> Dict[str, Any]: """Validate artifact against JSON schema if available""" if not schema_path or not schema_path.exists(): return { 'validated': False, 'score': None, 'message': 'No schema available for validation' } try: with open(schema_path, 'r') as f: schema = json.load(f) # Note: Would need jsonschema library for full validation # For now, just indicate schema was found return { 'validated': False, 'score': None, 'message': 'Schema validation not yet implemented (requires jsonschema library)' } except Exception as e: return { 'validated': False, 'score': None, 'message': f'Schema validation error: {str(e)}' } def calculate_quality_score(validation_results: Dict[str, Any]) -> int: """Calculate overall quality score from validation results""" scores = [] weights = [] # Syntax validation (weight: 30%) if validation_results['syntax']['valid']: scores.append(100) else: scores.append(0) weights.append(0.30) # Metadata completeness (weight: 25%) scores.append(validation_results['metadata']['score']) weights.append(0.25) # Required sections (weight: 25%) scores.append(validation_results['sections']['score']) weights.append(0.25) # TODO markers - penalty (weight: 20%) todo_count = len(validation_results['todos']) todo_score = max(0, 100 - (todo_count * 5)) scores.append(todo_score) weights.append(0.20) # Calculate weighted average quality_score = sum(s * w for s, w in zip(scores, weights)) return int(quality_score) def generate_recommendations(validation_results: Dict[str, Any]) -> List[str]: """Generate actionable recommendations based on validation results""" recommendations = [] # Syntax issues if not validation_results['syntax']['valid']: recommendations.append("🔴 CRITICAL: Fix syntax errors before proceeding") # Metadata issues metadata = validation_results['metadata'] if metadata['issues']: recommendations.append(f"🔴 Fix {len(metadata['issues'])} required metadata field(s)") if metadata['warnings']: recommendations.append(f"🟡 Complete {len(metadata['warnings'])} recommended metadata field(s)") # Section issues sections = validation_results['sections'] if sections['issues']: recommendations.append(f"🔴 Add {len(sections['issues'])} required section(s)") if sections['warnings']: recommendations.append(f"🟡 Review {len(sections['warnings'])} section warning(s)") # TODO markers todo_count = len(validation_results['todos']) if todo_count > 0: if todo_count > 10: recommendations.append(f"🔴 Replace {todo_count} TODO markers with actual content") else: recommendations.append(f"🟡 Replace {todo_count} TODO marker(s) with actual content") # Quality score recommendations quality_score = validation_results['quality_score'] if quality_score < 50: recommendations.append("🔴 Artifact needs significant work before it's ready for review") elif quality_score < 70: recommendations.append("🟡 Artifact needs refinement before it's ready for approval") elif quality_score < 90: recommendations.append("🟢 Artifact is good - minor improvements recommended") else: recommendations.append("✅ Artifact meets quality standards") return recommendations def validate_artifact( artifact_path: str, artifact_type: Optional[str] = None, strict: bool = False, schema_path: Optional[str] = None ) -> Dict[str, Any]: """ Validate an artifact against structure, schema, and quality criteria Args: artifact_path: Path to artifact file artifact_type: Type of artifact (auto-detected if not provided) strict: Strict mode - treat warnings as errors schema_path: Optional path to JSON schema Returns: Validation report with scores and recommendations """ file_path = Path(artifact_path) # Check file exists if not file_path.exists(): return { 'success': False, 'error': f"Artifact file not found: {artifact_path}", 'is_valid': False, 'quality_score': 0 } # Read file with open(file_path, 'r') as f: content = f.read() # Detect format file_format = file_path.suffix.lstrip('.') if file_format not in ['yaml', 'yml', 'md']: return { 'success': False, 'error': f"Unsupported file format: {file_format}. Expected yaml, yml, or md", 'is_valid': False, 'quality_score': 0 } # Detect or validate artifact type detected_type = detect_artifact_type(file_path, content) if artifact_type and detected_type and artifact_type != detected_type: print(f"Warning: Specified type '{artifact_type}' differs from detected type '{detected_type}'") final_type = artifact_type or detected_type or "unknown" # Initialize validation results validation_results = { 'artifact_path': str(file_path.absolute()), 'artifact_type': final_type, 'file_format': file_format, 'file_size': len(content), 'validated_at': datetime.now().isoformat() } # 1. Syntax validation if file_format in ['yaml', 'yml']: is_valid, error = validate_yaml_syntax(content) validation_results['syntax'] = { 'valid': is_valid, 'error': error } # Parse for further validation if is_valid: data = yaml.safe_load(content) else: # Cannot continue without valid syntax return { 'success': True, 'validation_results': validation_results, 'is_valid': False, 'quality_score': 0, 'recommendations': ['🔴 CRITICAL: Fix YAML syntax errors before proceeding'] } else: # Markdown is_valid, issues = validate_markdown_structure(content) validation_results['syntax'] = { 'valid': is_valid, 'issues': issues if not is_valid else [] } data = {} # Markdown doesn't parse to structured data # 2. Metadata completeness if file_format in ['yaml', 'yml']: validation_results['metadata'] = check_metadata_completeness(data, file_format) else: validation_results['metadata'] = { 'complete': True, 'score': 100, 'issues': [], 'warnings': [] } # 3. TODO markers validation_results['todos'] = count_todo_markers(content) # 4. Required sections if file_format in ['yaml', 'yml']: validation_results['sections'] = validate_required_sections(data, final_type, file_format) else: validation_results['sections'] = { 'valid': True, 'score': 100, 'issues': [], 'warnings': [] } # 5. Schema validation (if schema provided) if schema_path: validation_results['schema'] = validate_against_schema(data, Path(schema_path)) # Calculate quality score quality_score = calculate_quality_score(validation_results) validation_results['quality_score'] = quality_score # Generate recommendations recommendations = generate_recommendations(validation_results) validation_results['recommendations'] = recommendations # Determine overall validity has_critical_issues = ( not validation_results['syntax']['valid'] or len(validation_results['metadata']['issues']) > 0 or len(validation_results['sections']['issues']) > 0 ) has_warnings = ( len(validation_results['metadata']['warnings']) > 0 or len(validation_results['sections']['warnings']) > 0 or len(validation_results['todos']) > 0 ) if strict: is_valid = not has_critical_issues and not has_warnings else: is_valid = not has_critical_issues return { 'success': True, 'validation_results': validation_results, 'is_valid': is_valid, 'quality_score': quality_score, 'recommendations': recommendations } def main(): """Main entry point for artifact.validate skill""" parser = argparse.ArgumentParser( description='Validate artifacts against structure, schema, and quality criteria' ) parser.add_argument( 'artifact_path', type=str, help='Path to artifact file to validate' ) parser.add_argument( '--artifact-type', type=str, help='Type of artifact (auto-detected if not provided)' ) parser.add_argument( '--strict', action='store_true', help='Strict mode - treat warnings as errors' ) parser.add_argument( '--schema-path', type=str, help='Path to JSON schema for validation' ) parser.add_argument( '--output', type=str, help='Save validation report to file' ) args = parser.parse_args() # Validate artifact result = validate_artifact( artifact_path=args.artifact_path, artifact_type=args.artifact_type, strict=args.strict, schema_path=args.schema_path ) # Save to file if requested if args.output: output_path = Path(args.output) output_path.parent.mkdir(parents=True, exist_ok=True) with open(output_path, 'w') as f: yaml.dump(result, f, default_flow_style=False, sort_keys=False) print(f"\nValidation report saved to: {output_path}") # Print report if not result['success']: print(f"\n{'='*70}") print(f"✗ Validation Failed") print(f"{'='*70}") print(f"Error: {result['error']}") print(f"{'='*70}\n") return 1 vr = result['validation_results'] print(f"\n{'='*70}") print(f"Artifact Validation Report") print(f"{'='*70}") print(f"Artifact: {vr['artifact_path']}") print(f"Type: {vr['artifact_type']}") print(f"Format: {vr['file_format']}") print(f"Size: {vr['file_size']} bytes") print(f"") print(f"Overall Status: {'✅ VALID' if result['is_valid'] else '❌ INVALID'}") print(f"Quality Score: {result['quality_score']}/100") print(f"") # Syntax print(f"Syntax Validation:") if vr['syntax']['valid']: print(f" ✅ Valid {vr['file_format'].upper()} syntax") else: print(f" ❌ {vr['syntax'].get('error', 'Syntax errors found')}") if 'issues' in vr['syntax']: for issue in vr['syntax']['issues']: print(f" - {issue}") print() # Metadata print(f"Metadata Completeness: {vr['metadata']['score']}/100") if vr['metadata']['issues']: print(f" Issues:") for issue in vr['metadata']['issues']: print(f" ❌ {issue}") if vr['metadata']['warnings']: print(f" Warnings:") for warning in vr['metadata']['warnings']: print(f" 🟡 {warning}") if not vr['metadata']['issues'] and not vr['metadata']['warnings']: print(f" ✅ All metadata fields complete") print() # Sections print(f"Required Sections: {vr['sections']['score']}/100") if vr['sections']['issues']: print(f" Issues:") for issue in vr['sections']['issues']: print(f" ❌ {issue}") if vr['sections']['warnings']: print(f" Warnings:") for warning in vr['sections']['warnings']: print(f" 🟡 {warning}") if not vr['sections']['issues'] and not vr['sections']['warnings']: print(f" ✅ All required sections present") print() # TODOs todo_count = len(vr['todos']) print(f"TODO Markers: {todo_count}") if todo_count > 0: print(f" 🟡 Found {todo_count} TODO marker(s) - artifact incomplete") if todo_count <= 5: for todo in vr['todos']: print(f" - {todo}") else: for todo in vr['todos'][:5]: print(f" - {todo}") print(f" ... and {todo_count - 5} more") else: print(f" ✅ No TODO markers found") print() # Recommendations print(f"Recommendations:") for rec in result['recommendations']: print(f" {rec}") print(f"{'='*70}\n") return 0 if result['is_valid'] else 1 if __name__ == '__main__': sys.exit(main())