Initial commit

This commit is contained in:
Zhongwei Li
2025-11-29 18:26:08 +08:00
commit 8f22ddf339
295 changed files with 59710 additions and 0 deletions

View File

@@ -0,0 +1,598 @@
#!/usr/bin/env python3
"""
artifact.validate skill - Comprehensive artifact validation
Validates artifacts against structure, schema, and quality criteria.
Generates detailed validation reports with scores and recommendations.
"""
import sys
import os
import argparse
import re
from pathlib import Path
from datetime import datetime
from typing import Dict, Any, List, Optional, Tuple
import yaml
import json
def load_artifact_registry() -> Dict[str, Any]:
"""Load artifact registry from artifact.define skill"""
registry_file = Path(__file__).parent.parent / "artifact.define" / "artifact_define.py"
if not registry_file.exists():
raise FileNotFoundError(f"Artifact registry not found: {registry_file}")
with open(registry_file, 'r') as f:
content = f.read()
# Find KNOWN_ARTIFACT_TYPES dictionary
start_marker = "KNOWN_ARTIFACT_TYPES = {"
start_idx = content.find(start_marker)
if start_idx == -1:
raise ValueError("Could not find KNOWN_ARTIFACT_TYPES in registry file")
start_idx += len(start_marker) - 1
# Find matching closing brace
brace_count = 0
end_idx = start_idx
for i in range(start_idx, len(content)):
if content[i] == '{':
brace_count += 1
elif content[i] == '}':
brace_count -= 1
if brace_count == 0:
end_idx = i + 1
break
dict_str = content[start_idx:end_idx]
artifacts = eval(dict_str)
return artifacts
def detect_artifact_type(file_path: Path, content: str) -> Optional[str]:
"""Detect artifact type from filename or content"""
# Try to detect from filename
filename = file_path.stem
# Load registry to check against known types
registry = load_artifact_registry()
# Direct match
if filename in registry:
return filename
# Check for partial matches
for artifact_type in registry.keys():
if artifact_type in filename:
return artifact_type
# Try to detect from content (YAML metadata)
if file_path.suffix in ['.yaml', '.yml']:
try:
data = yaml.safe_load(content)
if isinstance(data, dict) and 'metadata' in data:
# Check for artifact type in metadata
metadata = data['metadata']
if 'artifactType' in metadata:
return metadata['artifactType']
except:
pass
return None
def validate_yaml_syntax(content: str) -> Tuple[bool, Optional[str]]:
"""Validate YAML syntax"""
try:
yaml.safe_load(content)
return True, None
except yaml.YAMLError as e:
return False, f"YAML syntax error: {str(e)}"
def validate_markdown_structure(content: str) -> Tuple[bool, List[str]]:
"""Validate Markdown structure"""
issues = []
# Check for at least one heading
if not re.search(r'^#+ ', content, re.MULTILINE):
issues.append("No headings found - document should have structured sections")
# Check for document control section
if 'Document Control' not in content and 'Metadata' not in content:
issues.append("Missing document control/metadata section")
return len(issues) == 0, issues
def check_metadata_completeness(data: Dict[str, Any], file_format: str) -> Dict[str, Any]:
"""Check metadata section completeness"""
issues = []
warnings = []
metadata = {}
if file_format in ['yaml', 'yml']:
if 'metadata' not in data:
issues.append("Missing 'metadata' section")
return {
'complete': False,
'score': 0,
'issues': issues,
'warnings': warnings
}
metadata = data['metadata']
# Required fields
required = ['version', 'created', 'author', 'status']
for field in required:
if field not in metadata or not metadata[field] or metadata[field] == 'TODO':
issues.append(f"Missing or incomplete required field: {field}")
# Recommended fields
recommended = ['lastModified', 'classification', 'documentOwner']
for field in recommended:
if field not in metadata or not metadata[field]:
warnings.append(f"Missing recommended field: {field}")
# Check for TODO placeholders
if isinstance(metadata, dict):
for key, value in metadata.items():
if isinstance(value, str) and 'TODO' in value:
warnings.append(f"Field '{key}' contains TODO marker: {value}")
score = max(0, 100 - (len(issues) * 25) - (len(warnings) * 10))
return {
'complete': len(issues) == 0,
'score': score,
'issues': issues,
'warnings': warnings,
'metadata': metadata
}
def count_todo_markers(content: str) -> List[str]:
"""Count and locate TODO markers in content"""
todos = []
lines = content.split('\n')
for i, line in enumerate(lines, 1):
if 'TODO' in line:
# Extract the TODO text
todo_text = line.strip()
if len(todo_text) > 100:
todo_text = todo_text[:100] + "..."
todos.append(f"Line {i}: {todo_text}")
return todos
def validate_required_sections(data: Dict[str, Any], artifact_type: str, file_format: str) -> Dict[str, Any]:
"""Validate that required sections are present"""
issues = []
warnings = []
if file_format in ['yaml', 'yml']:
# Common required sections for YAML artifacts
if 'metadata' not in data:
issues.append("Missing 'metadata' section")
if 'content' not in data and artifact_type not in ['schema-definition', 'data-model']:
warnings.append("Missing 'content' section - artifact may be incomplete")
# Check for empty content
if 'content' in data:
content = data['content']
if isinstance(content, dict):
empty_fields = [k for k, v in content.items() if not v or (isinstance(v, str) and v.strip() == 'TODO: ')]
if empty_fields:
warnings.append(f"Empty content fields: {', '.join(empty_fields)}")
score = max(0, 100 - (len(issues) * 30) - (len(warnings) * 15))
return {
'valid': len(issues) == 0,
'score': score,
'issues': issues,
'warnings': warnings
}
def validate_against_schema(data: Dict[str, Any], schema_path: Optional[Path]) -> Dict[str, Any]:
"""Validate artifact against JSON schema if available"""
if not schema_path or not schema_path.exists():
return {
'validated': False,
'score': None,
'message': 'No schema available for validation'
}
try:
with open(schema_path, 'r') as f:
schema = json.load(f)
# Note: Would need jsonschema library for full validation
# For now, just indicate schema was found
return {
'validated': False,
'score': None,
'message': 'Schema validation not yet implemented (requires jsonschema library)'
}
except Exception as e:
return {
'validated': False,
'score': None,
'message': f'Schema validation error: {str(e)}'
}
def calculate_quality_score(validation_results: Dict[str, Any]) -> int:
"""Calculate overall quality score from validation results"""
scores = []
weights = []
# Syntax validation (weight: 30%)
if validation_results['syntax']['valid']:
scores.append(100)
else:
scores.append(0)
weights.append(0.30)
# Metadata completeness (weight: 25%)
scores.append(validation_results['metadata']['score'])
weights.append(0.25)
# Required sections (weight: 25%)
scores.append(validation_results['sections']['score'])
weights.append(0.25)
# TODO markers - penalty (weight: 20%)
todo_count = len(validation_results['todos'])
todo_score = max(0, 100 - (todo_count * 5))
scores.append(todo_score)
weights.append(0.20)
# Calculate weighted average
quality_score = sum(s * w for s, w in zip(scores, weights))
return int(quality_score)
def generate_recommendations(validation_results: Dict[str, Any]) -> List[str]:
"""Generate actionable recommendations based on validation results"""
recommendations = []
# Syntax issues
if not validation_results['syntax']['valid']:
recommendations.append("🔴 CRITICAL: Fix syntax errors before proceeding")
# Metadata issues
metadata = validation_results['metadata']
if metadata['issues']:
recommendations.append(f"🔴 Fix {len(metadata['issues'])} required metadata field(s)")
if metadata['warnings']:
recommendations.append(f"🟡 Complete {len(metadata['warnings'])} recommended metadata field(s)")
# Section issues
sections = validation_results['sections']
if sections['issues']:
recommendations.append(f"🔴 Add {len(sections['issues'])} required section(s)")
if sections['warnings']:
recommendations.append(f"🟡 Review {len(sections['warnings'])} section warning(s)")
# TODO markers
todo_count = len(validation_results['todos'])
if todo_count > 0:
if todo_count > 10:
recommendations.append(f"🔴 Replace {todo_count} TODO markers with actual content")
else:
recommendations.append(f"🟡 Replace {todo_count} TODO marker(s) with actual content")
# Quality score recommendations
quality_score = validation_results['quality_score']
if quality_score < 50:
recommendations.append("🔴 Artifact needs significant work before it's ready for review")
elif quality_score < 70:
recommendations.append("🟡 Artifact needs refinement before it's ready for approval")
elif quality_score < 90:
recommendations.append("🟢 Artifact is good - minor improvements recommended")
else:
recommendations.append("✅ Artifact meets quality standards")
return recommendations
def validate_artifact(
artifact_path: str,
artifact_type: Optional[str] = None,
strict: bool = False,
schema_path: Optional[str] = None
) -> Dict[str, Any]:
"""
Validate an artifact against structure, schema, and quality criteria
Args:
artifact_path: Path to artifact file
artifact_type: Type of artifact (auto-detected if not provided)
strict: Strict mode - treat warnings as errors
schema_path: Optional path to JSON schema
Returns:
Validation report with scores and recommendations
"""
file_path = Path(artifact_path)
# Check file exists
if not file_path.exists():
return {
'success': False,
'error': f"Artifact file not found: {artifact_path}",
'is_valid': False,
'quality_score': 0
}
# Read file
with open(file_path, 'r') as f:
content = f.read()
# Detect format
file_format = file_path.suffix.lstrip('.')
if file_format not in ['yaml', 'yml', 'md']:
return {
'success': False,
'error': f"Unsupported file format: {file_format}. Expected yaml, yml, or md",
'is_valid': False,
'quality_score': 0
}
# Detect or validate artifact type
detected_type = detect_artifact_type(file_path, content)
if artifact_type and detected_type and artifact_type != detected_type:
print(f"Warning: Specified type '{artifact_type}' differs from detected type '{detected_type}'")
final_type = artifact_type or detected_type or "unknown"
# Initialize validation results
validation_results = {
'artifact_path': str(file_path.absolute()),
'artifact_type': final_type,
'file_format': file_format,
'file_size': len(content),
'validated_at': datetime.now().isoformat()
}
# 1. Syntax validation
if file_format in ['yaml', 'yml']:
is_valid, error = validate_yaml_syntax(content)
validation_results['syntax'] = {
'valid': is_valid,
'error': error
}
# Parse for further validation
if is_valid:
data = yaml.safe_load(content)
else:
# Cannot continue without valid syntax
return {
'success': True,
'validation_results': validation_results,
'is_valid': False,
'quality_score': 0,
'recommendations': ['🔴 CRITICAL: Fix YAML syntax errors before proceeding']
}
else: # Markdown
is_valid, issues = validate_markdown_structure(content)
validation_results['syntax'] = {
'valid': is_valid,
'issues': issues if not is_valid else []
}
data = {} # Markdown doesn't parse to structured data
# 2. Metadata completeness
if file_format in ['yaml', 'yml']:
validation_results['metadata'] = check_metadata_completeness(data, file_format)
else:
validation_results['metadata'] = {
'complete': True,
'score': 100,
'issues': [],
'warnings': []
}
# 3. TODO markers
validation_results['todos'] = count_todo_markers(content)
# 4. Required sections
if file_format in ['yaml', 'yml']:
validation_results['sections'] = validate_required_sections(data, final_type, file_format)
else:
validation_results['sections'] = {
'valid': True,
'score': 100,
'issues': [],
'warnings': []
}
# 5. Schema validation (if schema provided)
if schema_path:
validation_results['schema'] = validate_against_schema(data, Path(schema_path))
# Calculate quality score
quality_score = calculate_quality_score(validation_results)
validation_results['quality_score'] = quality_score
# Generate recommendations
recommendations = generate_recommendations(validation_results)
validation_results['recommendations'] = recommendations
# Determine overall validity
has_critical_issues = (
not validation_results['syntax']['valid'] or
len(validation_results['metadata']['issues']) > 0 or
len(validation_results['sections']['issues']) > 0
)
has_warnings = (
len(validation_results['metadata']['warnings']) > 0 or
len(validation_results['sections']['warnings']) > 0 or
len(validation_results['todos']) > 0
)
if strict:
is_valid = not has_critical_issues and not has_warnings
else:
is_valid = not has_critical_issues
return {
'success': True,
'validation_results': validation_results,
'is_valid': is_valid,
'quality_score': quality_score,
'recommendations': recommendations
}
def main():
"""Main entry point for artifact.validate skill"""
parser = argparse.ArgumentParser(
description='Validate artifacts against structure, schema, and quality criteria'
)
parser.add_argument(
'artifact_path',
type=str,
help='Path to artifact file to validate'
)
parser.add_argument(
'--artifact-type',
type=str,
help='Type of artifact (auto-detected if not provided)'
)
parser.add_argument(
'--strict',
action='store_true',
help='Strict mode - treat warnings as errors'
)
parser.add_argument(
'--schema-path',
type=str,
help='Path to JSON schema for validation'
)
parser.add_argument(
'--output',
type=str,
help='Save validation report to file'
)
args = parser.parse_args()
# Validate artifact
result = validate_artifact(
artifact_path=args.artifact_path,
artifact_type=args.artifact_type,
strict=args.strict,
schema_path=args.schema_path
)
# Save to file if requested
if args.output:
output_path = Path(args.output)
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(output_path, 'w') as f:
yaml.dump(result, f, default_flow_style=False, sort_keys=False)
print(f"\nValidation report saved to: {output_path}")
# Print report
if not result['success']:
print(f"\n{'='*70}")
print(f"✗ Validation Failed")
print(f"{'='*70}")
print(f"Error: {result['error']}")
print(f"{'='*70}\n")
return 1
vr = result['validation_results']
print(f"\n{'='*70}")
print(f"Artifact Validation Report")
print(f"{'='*70}")
print(f"Artifact: {vr['artifact_path']}")
print(f"Type: {vr['artifact_type']}")
print(f"Format: {vr['file_format']}")
print(f"Size: {vr['file_size']} bytes")
print(f"")
print(f"Overall Status: {'✅ VALID' if result['is_valid'] else '❌ INVALID'}")
print(f"Quality Score: {result['quality_score']}/100")
print(f"")
# Syntax
print(f"Syntax Validation:")
if vr['syntax']['valid']:
print(f" ✅ Valid {vr['file_format'].upper()} syntax")
else:
print(f"{vr['syntax'].get('error', 'Syntax errors found')}")
if 'issues' in vr['syntax']:
for issue in vr['syntax']['issues']:
print(f" - {issue}")
print()
# Metadata
print(f"Metadata Completeness: {vr['metadata']['score']}/100")
if vr['metadata']['issues']:
print(f" Issues:")
for issue in vr['metadata']['issues']:
print(f"{issue}")
if vr['metadata']['warnings']:
print(f" Warnings:")
for warning in vr['metadata']['warnings']:
print(f" 🟡 {warning}")
if not vr['metadata']['issues'] and not vr['metadata']['warnings']:
print(f" ✅ All metadata fields complete")
print()
# Sections
print(f"Required Sections: {vr['sections']['score']}/100")
if vr['sections']['issues']:
print(f" Issues:")
for issue in vr['sections']['issues']:
print(f"{issue}")
if vr['sections']['warnings']:
print(f" Warnings:")
for warning in vr['sections']['warnings']:
print(f" 🟡 {warning}")
if not vr['sections']['issues'] and not vr['sections']['warnings']:
print(f" ✅ All required sections present")
print()
# TODOs
todo_count = len(vr['todos'])
print(f"TODO Markers: {todo_count}")
if todo_count > 0:
print(f" 🟡 Found {todo_count} TODO marker(s) - artifact incomplete")
if todo_count <= 5:
for todo in vr['todos']:
print(f" - {todo}")
else:
for todo in vr['todos'][:5]:
print(f" - {todo}")
print(f" ... and {todo_count - 5} more")
else:
print(f" ✅ No TODO markers found")
print()
# Recommendations
print(f"Recommendations:")
for rec in result['recommendations']:
print(f" {rec}")
print(f"{'='*70}\n")
return 0 if result['is_valid'] else 1
if __name__ == '__main__':
sys.exit(main())