Initial commit
This commit is contained in:
598
skills/artifact.validate/artifact_validate.py
Executable file
598
skills/artifact.validate/artifact_validate.py
Executable file
@@ -0,0 +1,598 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
artifact.validate skill - Comprehensive artifact validation
|
||||
|
||||
Validates artifacts against structure, schema, and quality criteria.
|
||||
Generates detailed validation reports with scores and recommendations.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import argparse
|
||||
import re
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, List, Optional, Tuple
|
||||
import yaml
|
||||
import json
|
||||
|
||||
|
||||
def load_artifact_registry() -> Dict[str, Any]:
|
||||
"""Load artifact registry from artifact.define skill"""
|
||||
registry_file = Path(__file__).parent.parent / "artifact.define" / "artifact_define.py"
|
||||
|
||||
if not registry_file.exists():
|
||||
raise FileNotFoundError(f"Artifact registry not found: {registry_file}")
|
||||
|
||||
with open(registry_file, 'r') as f:
|
||||
content = f.read()
|
||||
|
||||
# Find KNOWN_ARTIFACT_TYPES dictionary
|
||||
start_marker = "KNOWN_ARTIFACT_TYPES = {"
|
||||
start_idx = content.find(start_marker)
|
||||
if start_idx == -1:
|
||||
raise ValueError("Could not find KNOWN_ARTIFACT_TYPES in registry file")
|
||||
|
||||
start_idx += len(start_marker) - 1
|
||||
|
||||
# Find matching closing brace
|
||||
brace_count = 0
|
||||
end_idx = start_idx
|
||||
for i in range(start_idx, len(content)):
|
||||
if content[i] == '{':
|
||||
brace_count += 1
|
||||
elif content[i] == '}':
|
||||
brace_count -= 1
|
||||
if brace_count == 0:
|
||||
end_idx = i + 1
|
||||
break
|
||||
|
||||
dict_str = content[start_idx:end_idx]
|
||||
artifacts = eval(dict_str)
|
||||
return artifacts
|
||||
|
||||
|
||||
def detect_artifact_type(file_path: Path, content: str) -> Optional[str]:
|
||||
"""Detect artifact type from filename or content"""
|
||||
# Try to detect from filename
|
||||
filename = file_path.stem
|
||||
|
||||
# Load registry to check against known types
|
||||
registry = load_artifact_registry()
|
||||
|
||||
# Direct match
|
||||
if filename in registry:
|
||||
return filename
|
||||
|
||||
# Check for partial matches
|
||||
for artifact_type in registry.keys():
|
||||
if artifact_type in filename:
|
||||
return artifact_type
|
||||
|
||||
# Try to detect from content (YAML metadata)
|
||||
if file_path.suffix in ['.yaml', '.yml']:
|
||||
try:
|
||||
data = yaml.safe_load(content)
|
||||
if isinstance(data, dict) and 'metadata' in data:
|
||||
# Check for artifact type in metadata
|
||||
metadata = data['metadata']
|
||||
if 'artifactType' in metadata:
|
||||
return metadata['artifactType']
|
||||
except:
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def validate_yaml_syntax(content: str) -> Tuple[bool, Optional[str]]:
|
||||
"""Validate YAML syntax"""
|
||||
try:
|
||||
yaml.safe_load(content)
|
||||
return True, None
|
||||
except yaml.YAMLError as e:
|
||||
return False, f"YAML syntax error: {str(e)}"
|
||||
|
||||
|
||||
def validate_markdown_structure(content: str) -> Tuple[bool, List[str]]:
|
||||
"""Validate Markdown structure"""
|
||||
issues = []
|
||||
|
||||
# Check for at least one heading
|
||||
if not re.search(r'^#+ ', content, re.MULTILINE):
|
||||
issues.append("No headings found - document should have structured sections")
|
||||
|
||||
# Check for document control section
|
||||
if 'Document Control' not in content and 'Metadata' not in content:
|
||||
issues.append("Missing document control/metadata section")
|
||||
|
||||
return len(issues) == 0, issues
|
||||
|
||||
|
||||
def check_metadata_completeness(data: Dict[str, Any], file_format: str) -> Dict[str, Any]:
|
||||
"""Check metadata section completeness"""
|
||||
issues = []
|
||||
warnings = []
|
||||
metadata = {}
|
||||
|
||||
if file_format in ['yaml', 'yml']:
|
||||
if 'metadata' not in data:
|
||||
issues.append("Missing 'metadata' section")
|
||||
return {
|
||||
'complete': False,
|
||||
'score': 0,
|
||||
'issues': issues,
|
||||
'warnings': warnings
|
||||
}
|
||||
|
||||
metadata = data['metadata']
|
||||
|
||||
# Required fields
|
||||
required = ['version', 'created', 'author', 'status']
|
||||
for field in required:
|
||||
if field not in metadata or not metadata[field] or metadata[field] == 'TODO':
|
||||
issues.append(f"Missing or incomplete required field: {field}")
|
||||
|
||||
# Recommended fields
|
||||
recommended = ['lastModified', 'classification', 'documentOwner']
|
||||
for field in recommended:
|
||||
if field not in metadata or not metadata[field]:
|
||||
warnings.append(f"Missing recommended field: {field}")
|
||||
|
||||
# Check for TODO placeholders
|
||||
if isinstance(metadata, dict):
|
||||
for key, value in metadata.items():
|
||||
if isinstance(value, str) and 'TODO' in value:
|
||||
warnings.append(f"Field '{key}' contains TODO marker: {value}")
|
||||
|
||||
score = max(0, 100 - (len(issues) * 25) - (len(warnings) * 10))
|
||||
|
||||
return {
|
||||
'complete': len(issues) == 0,
|
||||
'score': score,
|
||||
'issues': issues,
|
||||
'warnings': warnings,
|
||||
'metadata': metadata
|
||||
}
|
||||
|
||||
|
||||
def count_todo_markers(content: str) -> List[str]:
|
||||
"""Count and locate TODO markers in content"""
|
||||
todos = []
|
||||
lines = content.split('\n')
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
if 'TODO' in line:
|
||||
# Extract the TODO text
|
||||
todo_text = line.strip()
|
||||
if len(todo_text) > 100:
|
||||
todo_text = todo_text[:100] + "..."
|
||||
todos.append(f"Line {i}: {todo_text}")
|
||||
|
||||
return todos
|
||||
|
||||
|
||||
def validate_required_sections(data: Dict[str, Any], artifact_type: str, file_format: str) -> Dict[str, Any]:
|
||||
"""Validate that required sections are present"""
|
||||
issues = []
|
||||
warnings = []
|
||||
|
||||
if file_format in ['yaml', 'yml']:
|
||||
# Common required sections for YAML artifacts
|
||||
if 'metadata' not in data:
|
||||
issues.append("Missing 'metadata' section")
|
||||
|
||||
if 'content' not in data and artifact_type not in ['schema-definition', 'data-model']:
|
||||
warnings.append("Missing 'content' section - artifact may be incomplete")
|
||||
|
||||
# Check for empty content
|
||||
if 'content' in data:
|
||||
content = data['content']
|
||||
if isinstance(content, dict):
|
||||
empty_fields = [k for k, v in content.items() if not v or (isinstance(v, str) and v.strip() == 'TODO: ')]
|
||||
if empty_fields:
|
||||
warnings.append(f"Empty content fields: {', '.join(empty_fields)}")
|
||||
|
||||
score = max(0, 100 - (len(issues) * 30) - (len(warnings) * 15))
|
||||
|
||||
return {
|
||||
'valid': len(issues) == 0,
|
||||
'score': score,
|
||||
'issues': issues,
|
||||
'warnings': warnings
|
||||
}
|
||||
|
||||
|
||||
def validate_against_schema(data: Dict[str, Any], schema_path: Optional[Path]) -> Dict[str, Any]:
|
||||
"""Validate artifact against JSON schema if available"""
|
||||
if not schema_path or not schema_path.exists():
|
||||
return {
|
||||
'validated': False,
|
||||
'score': None,
|
||||
'message': 'No schema available for validation'
|
||||
}
|
||||
|
||||
try:
|
||||
with open(schema_path, 'r') as f:
|
||||
schema = json.load(f)
|
||||
|
||||
# Note: Would need jsonschema library for full validation
|
||||
# For now, just indicate schema was found
|
||||
return {
|
||||
'validated': False,
|
||||
'score': None,
|
||||
'message': 'Schema validation not yet implemented (requires jsonschema library)'
|
||||
}
|
||||
except Exception as e:
|
||||
return {
|
||||
'validated': False,
|
||||
'score': None,
|
||||
'message': f'Schema validation error: {str(e)}'
|
||||
}
|
||||
|
||||
|
||||
def calculate_quality_score(validation_results: Dict[str, Any]) -> int:
|
||||
"""Calculate overall quality score from validation results"""
|
||||
scores = []
|
||||
weights = []
|
||||
|
||||
# Syntax validation (weight: 30%)
|
||||
if validation_results['syntax']['valid']:
|
||||
scores.append(100)
|
||||
else:
|
||||
scores.append(0)
|
||||
weights.append(0.30)
|
||||
|
||||
# Metadata completeness (weight: 25%)
|
||||
scores.append(validation_results['metadata']['score'])
|
||||
weights.append(0.25)
|
||||
|
||||
# Required sections (weight: 25%)
|
||||
scores.append(validation_results['sections']['score'])
|
||||
weights.append(0.25)
|
||||
|
||||
# TODO markers - penalty (weight: 20%)
|
||||
todo_count = len(validation_results['todos'])
|
||||
todo_score = max(0, 100 - (todo_count * 5))
|
||||
scores.append(todo_score)
|
||||
weights.append(0.20)
|
||||
|
||||
# Calculate weighted average
|
||||
quality_score = sum(s * w for s, w in zip(scores, weights))
|
||||
|
||||
return int(quality_score)
|
||||
|
||||
|
||||
def generate_recommendations(validation_results: Dict[str, Any]) -> List[str]:
|
||||
"""Generate actionable recommendations based on validation results"""
|
||||
recommendations = []
|
||||
|
||||
# Syntax issues
|
||||
if not validation_results['syntax']['valid']:
|
||||
recommendations.append("🔴 CRITICAL: Fix syntax errors before proceeding")
|
||||
|
||||
# Metadata issues
|
||||
metadata = validation_results['metadata']
|
||||
if metadata['issues']:
|
||||
recommendations.append(f"🔴 Fix {len(metadata['issues'])} required metadata field(s)")
|
||||
if metadata['warnings']:
|
||||
recommendations.append(f"🟡 Complete {len(metadata['warnings'])} recommended metadata field(s)")
|
||||
|
||||
# Section issues
|
||||
sections = validation_results['sections']
|
||||
if sections['issues']:
|
||||
recommendations.append(f"🔴 Add {len(sections['issues'])} required section(s)")
|
||||
if sections['warnings']:
|
||||
recommendations.append(f"🟡 Review {len(sections['warnings'])} section warning(s)")
|
||||
|
||||
# TODO markers
|
||||
todo_count = len(validation_results['todos'])
|
||||
if todo_count > 0:
|
||||
if todo_count > 10:
|
||||
recommendations.append(f"🔴 Replace {todo_count} TODO markers with actual content")
|
||||
else:
|
||||
recommendations.append(f"🟡 Replace {todo_count} TODO marker(s) with actual content")
|
||||
|
||||
# Quality score recommendations
|
||||
quality_score = validation_results['quality_score']
|
||||
if quality_score < 50:
|
||||
recommendations.append("🔴 Artifact needs significant work before it's ready for review")
|
||||
elif quality_score < 70:
|
||||
recommendations.append("🟡 Artifact needs refinement before it's ready for approval")
|
||||
elif quality_score < 90:
|
||||
recommendations.append("🟢 Artifact is good - minor improvements recommended")
|
||||
else:
|
||||
recommendations.append("✅ Artifact meets quality standards")
|
||||
|
||||
return recommendations
|
||||
|
||||
|
||||
def validate_artifact(
|
||||
artifact_path: str,
|
||||
artifact_type: Optional[str] = None,
|
||||
strict: bool = False,
|
||||
schema_path: Optional[str] = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Validate an artifact against structure, schema, and quality criteria
|
||||
|
||||
Args:
|
||||
artifact_path: Path to artifact file
|
||||
artifact_type: Type of artifact (auto-detected if not provided)
|
||||
strict: Strict mode - treat warnings as errors
|
||||
schema_path: Optional path to JSON schema
|
||||
|
||||
Returns:
|
||||
Validation report with scores and recommendations
|
||||
"""
|
||||
file_path = Path(artifact_path)
|
||||
|
||||
# Check file exists
|
||||
if not file_path.exists():
|
||||
return {
|
||||
'success': False,
|
||||
'error': f"Artifact file not found: {artifact_path}",
|
||||
'is_valid': False,
|
||||
'quality_score': 0
|
||||
}
|
||||
|
||||
# Read file
|
||||
with open(file_path, 'r') as f:
|
||||
content = f.read()
|
||||
|
||||
# Detect format
|
||||
file_format = file_path.suffix.lstrip('.')
|
||||
if file_format not in ['yaml', 'yml', 'md']:
|
||||
return {
|
||||
'success': False,
|
||||
'error': f"Unsupported file format: {file_format}. Expected yaml, yml, or md",
|
||||
'is_valid': False,
|
||||
'quality_score': 0
|
||||
}
|
||||
|
||||
# Detect or validate artifact type
|
||||
detected_type = detect_artifact_type(file_path, content)
|
||||
if artifact_type and detected_type and artifact_type != detected_type:
|
||||
print(f"Warning: Specified type '{artifact_type}' differs from detected type '{detected_type}'")
|
||||
|
||||
final_type = artifact_type or detected_type or "unknown"
|
||||
|
||||
# Initialize validation results
|
||||
validation_results = {
|
||||
'artifact_path': str(file_path.absolute()),
|
||||
'artifact_type': final_type,
|
||||
'file_format': file_format,
|
||||
'file_size': len(content),
|
||||
'validated_at': datetime.now().isoformat()
|
||||
}
|
||||
|
||||
# 1. Syntax validation
|
||||
if file_format in ['yaml', 'yml']:
|
||||
is_valid, error = validate_yaml_syntax(content)
|
||||
validation_results['syntax'] = {
|
||||
'valid': is_valid,
|
||||
'error': error
|
||||
}
|
||||
|
||||
# Parse for further validation
|
||||
if is_valid:
|
||||
data = yaml.safe_load(content)
|
||||
else:
|
||||
# Cannot continue without valid syntax
|
||||
return {
|
||||
'success': True,
|
||||
'validation_results': validation_results,
|
||||
'is_valid': False,
|
||||
'quality_score': 0,
|
||||
'recommendations': ['🔴 CRITICAL: Fix YAML syntax errors before proceeding']
|
||||
}
|
||||
else: # Markdown
|
||||
is_valid, issues = validate_markdown_structure(content)
|
||||
validation_results['syntax'] = {
|
||||
'valid': is_valid,
|
||||
'issues': issues if not is_valid else []
|
||||
}
|
||||
data = {} # Markdown doesn't parse to structured data
|
||||
|
||||
# 2. Metadata completeness
|
||||
if file_format in ['yaml', 'yml']:
|
||||
validation_results['metadata'] = check_metadata_completeness(data, file_format)
|
||||
else:
|
||||
validation_results['metadata'] = {
|
||||
'complete': True,
|
||||
'score': 100,
|
||||
'issues': [],
|
||||
'warnings': []
|
||||
}
|
||||
|
||||
# 3. TODO markers
|
||||
validation_results['todos'] = count_todo_markers(content)
|
||||
|
||||
# 4. Required sections
|
||||
if file_format in ['yaml', 'yml']:
|
||||
validation_results['sections'] = validate_required_sections(data, final_type, file_format)
|
||||
else:
|
||||
validation_results['sections'] = {
|
||||
'valid': True,
|
||||
'score': 100,
|
||||
'issues': [],
|
||||
'warnings': []
|
||||
}
|
||||
|
||||
# 5. Schema validation (if schema provided)
|
||||
if schema_path:
|
||||
validation_results['schema'] = validate_against_schema(data, Path(schema_path))
|
||||
|
||||
# Calculate quality score
|
||||
quality_score = calculate_quality_score(validation_results)
|
||||
validation_results['quality_score'] = quality_score
|
||||
|
||||
# Generate recommendations
|
||||
recommendations = generate_recommendations(validation_results)
|
||||
validation_results['recommendations'] = recommendations
|
||||
|
||||
# Determine overall validity
|
||||
has_critical_issues = (
|
||||
not validation_results['syntax']['valid'] or
|
||||
len(validation_results['metadata']['issues']) > 0 or
|
||||
len(validation_results['sections']['issues']) > 0
|
||||
)
|
||||
|
||||
has_warnings = (
|
||||
len(validation_results['metadata']['warnings']) > 0 or
|
||||
len(validation_results['sections']['warnings']) > 0 or
|
||||
len(validation_results['todos']) > 0
|
||||
)
|
||||
|
||||
if strict:
|
||||
is_valid = not has_critical_issues and not has_warnings
|
||||
else:
|
||||
is_valid = not has_critical_issues
|
||||
|
||||
return {
|
||||
'success': True,
|
||||
'validation_results': validation_results,
|
||||
'is_valid': is_valid,
|
||||
'quality_score': quality_score,
|
||||
'recommendations': recommendations
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point for artifact.validate skill"""
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Validate artifacts against structure, schema, and quality criteria'
|
||||
)
|
||||
parser.add_argument(
|
||||
'artifact_path',
|
||||
type=str,
|
||||
help='Path to artifact file to validate'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--artifact-type',
|
||||
type=str,
|
||||
help='Type of artifact (auto-detected if not provided)'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--strict',
|
||||
action='store_true',
|
||||
help='Strict mode - treat warnings as errors'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--schema-path',
|
||||
type=str,
|
||||
help='Path to JSON schema for validation'
|
||||
)
|
||||
parser.add_argument(
|
||||
'--output',
|
||||
type=str,
|
||||
help='Save validation report to file'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
# Validate artifact
|
||||
result = validate_artifact(
|
||||
artifact_path=args.artifact_path,
|
||||
artifact_type=args.artifact_type,
|
||||
strict=args.strict,
|
||||
schema_path=args.schema_path
|
||||
)
|
||||
|
||||
# Save to file if requested
|
||||
if args.output:
|
||||
output_path = Path(args.output)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(output_path, 'w') as f:
|
||||
yaml.dump(result, f, default_flow_style=False, sort_keys=False)
|
||||
print(f"\nValidation report saved to: {output_path}")
|
||||
|
||||
# Print report
|
||||
if not result['success']:
|
||||
print(f"\n{'='*70}")
|
||||
print(f"✗ Validation Failed")
|
||||
print(f"{'='*70}")
|
||||
print(f"Error: {result['error']}")
|
||||
print(f"{'='*70}\n")
|
||||
return 1
|
||||
|
||||
vr = result['validation_results']
|
||||
|
||||
print(f"\n{'='*70}")
|
||||
print(f"Artifact Validation Report")
|
||||
print(f"{'='*70}")
|
||||
print(f"Artifact: {vr['artifact_path']}")
|
||||
print(f"Type: {vr['artifact_type']}")
|
||||
print(f"Format: {vr['file_format']}")
|
||||
print(f"Size: {vr['file_size']} bytes")
|
||||
print(f"")
|
||||
print(f"Overall Status: {'✅ VALID' if result['is_valid'] else '❌ INVALID'}")
|
||||
print(f"Quality Score: {result['quality_score']}/100")
|
||||
print(f"")
|
||||
|
||||
# Syntax
|
||||
print(f"Syntax Validation:")
|
||||
if vr['syntax']['valid']:
|
||||
print(f" ✅ Valid {vr['file_format'].upper()} syntax")
|
||||
else:
|
||||
print(f" ❌ {vr['syntax'].get('error', 'Syntax errors found')}")
|
||||
if 'issues' in vr['syntax']:
|
||||
for issue in vr['syntax']['issues']:
|
||||
print(f" - {issue}")
|
||||
print()
|
||||
|
||||
# Metadata
|
||||
print(f"Metadata Completeness: {vr['metadata']['score']}/100")
|
||||
if vr['metadata']['issues']:
|
||||
print(f" Issues:")
|
||||
for issue in vr['metadata']['issues']:
|
||||
print(f" ❌ {issue}")
|
||||
if vr['metadata']['warnings']:
|
||||
print(f" Warnings:")
|
||||
for warning in vr['metadata']['warnings']:
|
||||
print(f" 🟡 {warning}")
|
||||
if not vr['metadata']['issues'] and not vr['metadata']['warnings']:
|
||||
print(f" ✅ All metadata fields complete")
|
||||
print()
|
||||
|
||||
# Sections
|
||||
print(f"Required Sections: {vr['sections']['score']}/100")
|
||||
if vr['sections']['issues']:
|
||||
print(f" Issues:")
|
||||
for issue in vr['sections']['issues']:
|
||||
print(f" ❌ {issue}")
|
||||
if vr['sections']['warnings']:
|
||||
print(f" Warnings:")
|
||||
for warning in vr['sections']['warnings']:
|
||||
print(f" 🟡 {warning}")
|
||||
if not vr['sections']['issues'] and not vr['sections']['warnings']:
|
||||
print(f" ✅ All required sections present")
|
||||
print()
|
||||
|
||||
# TODOs
|
||||
todo_count = len(vr['todos'])
|
||||
print(f"TODO Markers: {todo_count}")
|
||||
if todo_count > 0:
|
||||
print(f" 🟡 Found {todo_count} TODO marker(s) - artifact incomplete")
|
||||
if todo_count <= 5:
|
||||
for todo in vr['todos']:
|
||||
print(f" - {todo}")
|
||||
else:
|
||||
for todo in vr['todos'][:5]:
|
||||
print(f" - {todo}")
|
||||
print(f" ... and {todo_count - 5} more")
|
||||
else:
|
||||
print(f" ✅ No TODO markers found")
|
||||
print()
|
||||
|
||||
# Recommendations
|
||||
print(f"Recommendations:")
|
||||
for rec in result['recommendations']:
|
||||
print(f" {rec}")
|
||||
|
||||
print(f"{'='*70}\n")
|
||||
|
||||
return 0 if result['is_valid'] else 1
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
Reference in New Issue
Block a user