Initial commit

2025-11-29 18:26:08 +08:00
commit 8f22ddf339
295 changed files with 59710 additions and 0 deletions
--- a/skills/artifact.validate/artifact_validate.py
+++ b/skills/artifact.validate/artifact_validate.py
@@ -0,0 +1,598 @@
+#!/usr/bin/env python3
+"""
+artifact.validate skill - Comprehensive artifact validation
+
+Validates artifacts against structure, schema, and quality criteria.
+Generates detailed validation reports with scores and recommendations.
+"""
+
+import sys
+import os
+import argparse
+import re
+from pathlib import Path
+from datetime import datetime
+from typing import Dict, Any, List, Optional, Tuple
+import yaml
+import json
+
+
+def load_artifact_registry() -> Dict[str, Any]:
+    """Load artifact registry from artifact.define skill"""
+    registry_file = Path(__file__).parent.parent / "artifact.define" / "artifact_define.py"
+
+    if not registry_file.exists():
+        raise FileNotFoundError(f"Artifact registry not found: {registry_file}")
+
+    with open(registry_file, 'r') as f:
+        content = f.read()
+
+    # Find KNOWN_ARTIFACT_TYPES dictionary
+    start_marker = "KNOWN_ARTIFACT_TYPES = {"
+    start_idx = content.find(start_marker)
+    if start_idx == -1:
+        raise ValueError("Could not find KNOWN_ARTIFACT_TYPES in registry file")
+
+    start_idx += len(start_marker) - 1
+
+    # Find matching closing brace
+    brace_count = 0
+    end_idx = start_idx
+    for i in range(start_idx, len(content)):
+        if content[i] == '{':
+            brace_count += 1
+        elif content[i] == '}':
+            brace_count -= 1
+            if brace_count == 0:
+                end_idx = i + 1
+                break
+
+    dict_str = content[start_idx:end_idx]
+    artifacts = eval(dict_str)
+    return artifacts
+
+
+def detect_artifact_type(file_path: Path, content: str) -> Optional[str]:
+    """Detect artifact type from filename or content"""
+    # Try to detect from filename
+    filename = file_path.stem
+
+    # Load registry to check against known types
+    registry = load_artifact_registry()
+
+    # Direct match
+    if filename in registry:
+        return filename
+
+    # Check for partial matches
+    for artifact_type in registry.keys():
+        if artifact_type in filename:
+            return artifact_type
+
+    # Try to detect from content (YAML metadata)
+    if file_path.suffix in ['.yaml', '.yml']:
+        try:
+            data = yaml.safe_load(content)
+            if isinstance(data, dict) and 'metadata' in data:
+                # Check for artifact type in metadata
+                metadata = data['metadata']
+                if 'artifactType' in metadata:
+                    return metadata['artifactType']
+        except:
+            pass
+
+    return None
+
+
+def validate_yaml_syntax(content: str) -> Tuple[bool, Optional[str]]:
+    """Validate YAML syntax"""
+    try:
+        yaml.safe_load(content)
+        return True, None
+    except yaml.YAMLError as e:
+        return False, f"YAML syntax error: {str(e)}"
+
+
+def validate_markdown_structure(content: str) -> Tuple[bool, List[str]]:
+    """Validate Markdown structure"""
+    issues = []
+
+    # Check for at least one heading
+    if not re.search(r'^#+ ', content, re.MULTILINE):
+        issues.append("No headings found - document should have structured sections")
+
+    # Check for document control section
+    if 'Document Control' not in content and 'Metadata' not in content:
+        issues.append("Missing document control/metadata section")
+
+    return len(issues) == 0, issues
+
+
+def check_metadata_completeness(data: Dict[str, Any], file_format: str) -> Dict[str, Any]:
+    """Check metadata section completeness"""
+    issues = []
+    warnings = []
+    metadata = {}
+
+    if file_format in ['yaml', 'yml']:
+        if 'metadata' not in data:
+            issues.append("Missing 'metadata' section")
+            return {
+                'complete': False,
+                'score': 0,
+                'issues': issues,
+                'warnings': warnings
+            }
+
+        metadata = data['metadata']
+
+        # Required fields
+        required = ['version', 'created', 'author', 'status']
+        for field in required:
+            if field not in metadata or not metadata[field] or metadata[field] == 'TODO':
+                issues.append(f"Missing or incomplete required field: {field}")
+
+        # Recommended fields
+        recommended = ['lastModified', 'classification', 'documentOwner']
+        for field in recommended:
+            if field not in metadata or not metadata[field]:
+                warnings.append(f"Missing recommended field: {field}")
+
+        # Check for TODO placeholders
+        if isinstance(metadata, dict):
+            for key, value in metadata.items():
+                if isinstance(value, str) and 'TODO' in value:
+                    warnings.append(f"Field '{key}' contains TODO marker: {value}")
+
+    score = max(0, 100 - (len(issues) * 25) - (len(warnings) * 10))
+
+    return {
+        'complete': len(issues) == 0,
+        'score': score,
+        'issues': issues,
+        'warnings': warnings,
+        'metadata': metadata
+    }
+
+
+def count_todo_markers(content: str) -> List[str]:
+    """Count and locate TODO markers in content"""
+    todos = []
+    lines = content.split('\n')
+
+    for i, line in enumerate(lines, 1):
+        if 'TODO' in line:
+            # Extract the TODO text
+            todo_text = line.strip()
+            if len(todo_text) > 100:
+                todo_text = todo_text[:100] + "..."
+            todos.append(f"Line {i}: {todo_text}")
+
+    return todos
+
+
+def validate_required_sections(data: Dict[str, Any], artifact_type: str, file_format: str) -> Dict[str, Any]:
+    """Validate that required sections are present"""
+    issues = []
+    warnings = []
+
+    if file_format in ['yaml', 'yml']:
+        # Common required sections for YAML artifacts
+        if 'metadata' not in data:
+            issues.append("Missing 'metadata' section")
+
+        if 'content' not in data and artifact_type not in ['schema-definition', 'data-model']:
+            warnings.append("Missing 'content' section - artifact may be incomplete")
+
+        # Check for empty content
+        if 'content' in data:
+            content = data['content']
+            if isinstance(content, dict):
+                empty_fields = [k for k, v in content.items() if not v or (isinstance(v, str) and v.strip() == 'TODO: ')]
+                if empty_fields:
+                    warnings.append(f"Empty content fields: {', '.join(empty_fields)}")
+
+    score = max(0, 100 - (len(issues) * 30) - (len(warnings) * 15))
+
+    return {
+        'valid': len(issues) == 0,
+        'score': score,
+        'issues': issues,
+        'warnings': warnings
+    }
+
+
+def validate_against_schema(data: Dict[str, Any], schema_path: Optional[Path]) -> Dict[str, Any]:
+    """Validate artifact against JSON schema if available"""
+    if not schema_path or not schema_path.exists():
+        return {
+            'validated': False,
+            'score': None,
+            'message': 'No schema available for validation'
+        }
+
+    try:
+        with open(schema_path, 'r') as f:
+            schema = json.load(f)
+
+        # Note: Would need jsonschema library for full validation
+        # For now, just indicate schema was found
+        return {
+            'validated': False,
+            'score': None,
+            'message': 'Schema validation not yet implemented (requires jsonschema library)'
+        }
+    except Exception as e:
+        return {
+            'validated': False,
+            'score': None,
+            'message': f'Schema validation error: {str(e)}'
+        }
+
+
+def calculate_quality_score(validation_results: Dict[str, Any]) -> int:
+    """Calculate overall quality score from validation results"""
+    scores = []
+    weights = []
+
+    # Syntax validation (weight: 30%)
+    if validation_results['syntax']['valid']:
+        scores.append(100)
+    else:
+        scores.append(0)
+    weights.append(0.30)
+
+    # Metadata completeness (weight: 25%)
+    scores.append(validation_results['metadata']['score'])
+    weights.append(0.25)
+
+    # Required sections (weight: 25%)
+    scores.append(validation_results['sections']['score'])
+    weights.append(0.25)
+
+    # TODO markers - penalty (weight: 20%)
+    todo_count = len(validation_results['todos'])
+    todo_score = max(0, 100 - (todo_count * 5))
+    scores.append(todo_score)
+    weights.append(0.20)
+
+    # Calculate weighted average
+    quality_score = sum(s * w for s, w in zip(scores, weights))
+
+    return int(quality_score)
+
+
+def generate_recommendations(validation_results: Dict[str, Any]) -> List[str]:
+    """Generate actionable recommendations based on validation results"""
+    recommendations = []
+
+    # Syntax issues
+    if not validation_results['syntax']['valid']:
+        recommendations.append("🔴 CRITICAL: Fix syntax errors before proceeding")
+
+    # Metadata issues
+    metadata = validation_results['metadata']
+    if metadata['issues']:
+        recommendations.append(f"🔴 Fix {len(metadata['issues'])} required metadata field(s)")
+    if metadata['warnings']:
+        recommendations.append(f"🟡 Complete {len(metadata['warnings'])} recommended metadata field(s)")
+
+    # Section issues
+    sections = validation_results['sections']
+    if sections['issues']:
+        recommendations.append(f"🔴 Add {len(sections['issues'])} required section(s)")
+    if sections['warnings']:
+        recommendations.append(f"🟡 Review {len(sections['warnings'])} section warning(s)")
+
+    # TODO markers
+    todo_count = len(validation_results['todos'])
+    if todo_count > 0:
+        if todo_count > 10:
+            recommendations.append(f"🔴 Replace {todo_count} TODO markers with actual content")
+        else:
+            recommendations.append(f"🟡 Replace {todo_count} TODO marker(s) with actual content")
+
+    # Quality score recommendations
+    quality_score = validation_results['quality_score']
+    if quality_score < 50:
+        recommendations.append("🔴 Artifact needs significant work before it's ready for review")
+    elif quality_score < 70:
+        recommendations.append("🟡 Artifact needs refinement before it's ready for approval")
+    elif quality_score < 90:
+        recommendations.append("🟢 Artifact is good - minor improvements recommended")
+    else:
+        recommendations.append("✅ Artifact meets quality standards")
+
+    return recommendations
+
+
+def validate_artifact(
+    artifact_path: str,
+    artifact_type: Optional[str] = None,
+    strict: bool = False,
+    schema_path: Optional[str] = None
+) -> Dict[str, Any]:
+    """
+    Validate an artifact against structure, schema, and quality criteria
+
+    Args:
+        artifact_path: Path to artifact file
+        artifact_type: Type of artifact (auto-detected if not provided)
+        strict: Strict mode - treat warnings as errors
+        schema_path: Optional path to JSON schema
+
+    Returns:
+        Validation report with scores and recommendations
+    """
+    file_path = Path(artifact_path)
+
+    # Check file exists
+    if not file_path.exists():
+        return {
+            'success': False,
+            'error': f"Artifact file not found: {artifact_path}",
+            'is_valid': False,
+            'quality_score': 0
+        }
+
+    # Read file
+    with open(file_path, 'r') as f:
+        content = f.read()
+
+    # Detect format
+    file_format = file_path.suffix.lstrip('.')
+    if file_format not in ['yaml', 'yml', 'md']:
+        return {
+            'success': False,
+            'error': f"Unsupported file format: {file_format}. Expected yaml, yml, or md",
+            'is_valid': False,
+            'quality_score': 0
+        }
+
+    # Detect or validate artifact type
+    detected_type = detect_artifact_type(file_path, content)
+    if artifact_type and detected_type and artifact_type != detected_type:
+        print(f"Warning: Specified type '{artifact_type}' differs from detected type '{detected_type}'")
+
+    final_type = artifact_type or detected_type or "unknown"
+
+    # Initialize validation results
+    validation_results = {
+        'artifact_path': str(file_path.absolute()),
+        'artifact_type': final_type,
+        'file_format': file_format,
+        'file_size': len(content),
+        'validated_at': datetime.now().isoformat()
+    }
+
+    # 1. Syntax validation
+    if file_format in ['yaml', 'yml']:
+        is_valid, error = validate_yaml_syntax(content)
+        validation_results['syntax'] = {
+            'valid': is_valid,
+            'error': error
+        }
+
+        # Parse for further validation
+        if is_valid:
+            data = yaml.safe_load(content)
+        else:
+            # Cannot continue without valid syntax
+            return {
+                'success': True,
+                'validation_results': validation_results,
+                'is_valid': False,
+                'quality_score': 0,
+                'recommendations': ['🔴 CRITICAL: Fix YAML syntax errors before proceeding']
+            }
+    else:  # Markdown
+        is_valid, issues = validate_markdown_structure(content)
+        validation_results['syntax'] = {
+            'valid': is_valid,
+            'issues': issues if not is_valid else []
+        }
+        data = {}  # Markdown doesn't parse to structured data
+
+    # 2. Metadata completeness
+    if file_format in ['yaml', 'yml']:
+        validation_results['metadata'] = check_metadata_completeness(data, file_format)
+    else:
+        validation_results['metadata'] = {
+            'complete': True,
+            'score': 100,
+            'issues': [],
+            'warnings': []
+        }
+
+    # 3. TODO markers
+    validation_results['todos'] = count_todo_markers(content)
+
+    # 4. Required sections
+    if file_format in ['yaml', 'yml']:
+        validation_results['sections'] = validate_required_sections(data, final_type, file_format)
+    else:
+        validation_results['sections'] = {
+            'valid': True,
+            'score': 100,
+            'issues': [],
+            'warnings': []
+        }
+
+    # 5. Schema validation (if schema provided)
+    if schema_path:
+        validation_results['schema'] = validate_against_schema(data, Path(schema_path))
+
+    # Calculate quality score
+    quality_score = calculate_quality_score(validation_results)
+    validation_results['quality_score'] = quality_score
+
+    # Generate recommendations
+    recommendations = generate_recommendations(validation_results)
+    validation_results['recommendations'] = recommendations
+
+    # Determine overall validity
+    has_critical_issues = (
+        not validation_results['syntax']['valid'] or
+        len(validation_results['metadata']['issues']) > 0 or
+        len(validation_results['sections']['issues']) > 0
+    )
+
+    has_warnings = (
+        len(validation_results['metadata']['warnings']) > 0 or
+        len(validation_results['sections']['warnings']) > 0 or
+        len(validation_results['todos']) > 0
+    )
+
+    if strict:
+        is_valid = not has_critical_issues and not has_warnings
+    else:
+        is_valid = not has_critical_issues
+
+    return {
+        'success': True,
+        'validation_results': validation_results,
+        'is_valid': is_valid,
+        'quality_score': quality_score,
+        'recommendations': recommendations
+    }
+
+
+def main():
+    """Main entry point for artifact.validate skill"""
+    parser = argparse.ArgumentParser(
+        description='Validate artifacts against structure, schema, and quality criteria'
+    )
+    parser.add_argument(
+        'artifact_path',
+        type=str,
+        help='Path to artifact file to validate'
+    )
+    parser.add_argument(
+        '--artifact-type',
+        type=str,
+        help='Type of artifact (auto-detected if not provided)'
+    )
+    parser.add_argument(
+        '--strict',
+        action='store_true',
+        help='Strict mode - treat warnings as errors'
+    )
+    parser.add_argument(
+        '--schema-path',
+        type=str,
+        help='Path to JSON schema for validation'
+    )
+    parser.add_argument(
+        '--output',
+        type=str,
+        help='Save validation report to file'
+    )
+
+    args = parser.parse_args()
+
+    # Validate artifact
+    result = validate_artifact(
+        artifact_path=args.artifact_path,
+        artifact_type=args.artifact_type,
+        strict=args.strict,
+        schema_path=args.schema_path
+    )
+
+    # Save to file if requested
+    if args.output:
+        output_path = Path(args.output)
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(output_path, 'w') as f:
+            yaml.dump(result, f, default_flow_style=False, sort_keys=False)
+        print(f"\nValidation report saved to: {output_path}")
+
+    # Print report
+    if not result['success']:
+        print(f"\n{'='*70}")
+        print(f"✗ Validation Failed")
+        print(f"{'='*70}")
+        print(f"Error: {result['error']}")
+        print(f"{'='*70}\n")
+        return 1
+
+    vr = result['validation_results']
+
+    print(f"\n{'='*70}")
+    print(f"Artifact Validation Report")
+    print(f"{'='*70}")
+    print(f"Artifact:     {vr['artifact_path']}")
+    print(f"Type:         {vr['artifact_type']}")
+    print(f"Format:       {vr['file_format']}")
+    print(f"Size:         {vr['file_size']} bytes")
+    print(f"")
+    print(f"Overall Status: {'✅ VALID' if result['is_valid'] else '❌ INVALID'}")
+    print(f"Quality Score:  {result['quality_score']}/100")
+    print(f"")
+
+    # Syntax
+    print(f"Syntax Validation:")
+    if vr['syntax']['valid']:
+        print(f"  ✅ Valid {vr['file_format'].upper()} syntax")
+    else:
+        print(f"  ❌ {vr['syntax'].get('error', 'Syntax errors found')}")
+        if 'issues' in vr['syntax']:
+            for issue in vr['syntax']['issues']:
+                print(f"     - {issue}")
+    print()
+
+    # Metadata
+    print(f"Metadata Completeness: {vr['metadata']['score']}/100")
+    if vr['metadata']['issues']:
+        print(f"  Issues:")
+        for issue in vr['metadata']['issues']:
+            print(f"    ❌ {issue}")
+    if vr['metadata']['warnings']:
+        print(f"  Warnings:")
+        for warning in vr['metadata']['warnings']:
+            print(f"    🟡 {warning}")
+    if not vr['metadata']['issues'] and not vr['metadata']['warnings']:
+        print(f"  ✅ All metadata fields complete")
+    print()
+
+    # Sections
+    print(f"Required Sections: {vr['sections']['score']}/100")
+    if vr['sections']['issues']:
+        print(f"  Issues:")
+        for issue in vr['sections']['issues']:
+            print(f"    ❌ {issue}")
+    if vr['sections']['warnings']:
+        print(f"  Warnings:")
+        for warning in vr['sections']['warnings']:
+            print(f"    🟡 {warning}")
+    if not vr['sections']['issues'] and not vr['sections']['warnings']:
+        print(f"  ✅ All required sections present")
+    print()
+
+    # TODOs
+    todo_count = len(vr['todos'])
+    print(f"TODO Markers: {todo_count}")
+    if todo_count > 0:
+        print(f"  🟡 Found {todo_count} TODO marker(s) - artifact incomplete")
+        if todo_count <= 5:
+            for todo in vr['todos']:
+                print(f"     - {todo}")
+        else:
+            for todo in vr['todos'][:5]:
+                print(f"     - {todo}")
+            print(f"     ... and {todo_count - 5} more")
+    else:
+        print(f"  ✅ No TODO markers found")
+    print()
+
+    # Recommendations
+    print(f"Recommendations:")
+    for rec in result['recommendations']:
+        print(f"  {rec}")
+
+    print(f"{'='*70}\n")
+
+    return 0 if result['is_valid'] else 1
+
+
+if __name__ == '__main__':
+    sys.exit(main())