Initial commit
This commit is contained in:
378
skills/artifact.validate.types/SKILL.md
Normal file
378
skills/artifact.validate.types/SKILL.md
Normal file
@@ -0,0 +1,378 @@
|
||||
# artifact.validate.types
|
||||
|
||||
## Overview
|
||||
|
||||
Validates artifact type names against the Betty Framework registry and returns complete metadata for each type. Provides intelligent fuzzy matching and suggestions for invalid types.
|
||||
|
||||
**Version**: 0.1.0
|
||||
**Status**: active
|
||||
|
||||
## Purpose
|
||||
|
||||
This skill is critical for ensuring skills reference valid artifact types before creation. It validates artifact type names against `registry/artifact_types.json`, retrieves complete metadata (file_pattern, content_type, schema), and suggests alternatives for invalid types using three fuzzy matching strategies:
|
||||
|
||||
1. **Singular/Plural Detection** (high confidence) - Detects "data-flow-diagram" vs "data-flow-diagrams"
|
||||
2. **Generic vs Specific Variants** (medium confidence) - Suggests "logical-data-model" for "data-model"
|
||||
3. **Levenshtein Distance** (low confidence) - Catches typos like "thret-model" → "threat-model"
|
||||
|
||||
This skill is specifically designed to be called by `meta.skill` during Step 2 (Validate Artifact Types) of the skill creation workflow.
|
||||
|
||||
## Inputs
|
||||
|
||||
| Parameter | Type | Required | Default | Description |
|
||||
|-----------|------|----------|---------|-------------|
|
||||
| `artifact_types` | array | Yes | - | List of artifact type names to validate |
|
||||
| `check_schemas` | boolean | No | `true` | Whether to verify schema files exist on filesystem |
|
||||
| `suggest_alternatives` | boolean | No | `true` | Whether to suggest similar types for invalid ones |
|
||||
| `max_suggestions` | number | No | `3` | Maximum number of suggestions per invalid type |
|
||||
|
||||
## Outputs
|
||||
|
||||
| Output | Type | Description |
|
||||
|--------|------|-------------|
|
||||
| `validation_results` | object | Validation results for each artifact type with complete metadata |
|
||||
| `all_valid` | boolean | Whether all artifact types are valid |
|
||||
| `invalid_types` | array | List of artifact types that don't exist in registry |
|
||||
| `suggestions` | object | Suggested alternatives for each invalid type |
|
||||
| `warnings` | array | List of warnings (e.g., schema file missing) |
|
||||
|
||||
## Artifact Metadata
|
||||
|
||||
### Produces
|
||||
- **validation-report** (`*.validation.json`) - Validation results with metadata and suggestions
|
||||
|
||||
### Consumes
|
||||
None - reads directly from registry files
|
||||
|
||||
## Usage
|
||||
|
||||
### Example 1: Validate Single Artifact Type
|
||||
|
||||
```bash
|
||||
python artifact_validate_types.py \
|
||||
--artifact_types '["threat-model"]' \
|
||||
--check_schemas true
|
||||
```
|
||||
|
||||
**Output:**
|
||||
```json
|
||||
{
|
||||
"validation_results": {
|
||||
"threat-model": {
|
||||
"valid": true,
|
||||
"file_pattern": "*.threat-model.yaml",
|
||||
"content_type": "application/yaml",
|
||||
"schema": "schemas/artifacts/threat-model-schema.json",
|
||||
"description": "Threat model (STRIDE, attack trees)..."
|
||||
}
|
||||
},
|
||||
"all_valid": true,
|
||||
"invalid_types": [],
|
||||
"suggestions": {},
|
||||
"warnings": []
|
||||
}
|
||||
```
|
||||
|
||||
### Example 2: Invalid Type with Suggestions
|
||||
|
||||
```bash
|
||||
python artifact_validate_types.py \
|
||||
--artifact_types '["data-flow-diagram", "threat-model"]' \
|
||||
--suggest_alternatives true
|
||||
```
|
||||
|
||||
**Output:**
|
||||
```json
|
||||
{
|
||||
"validation_results": {
|
||||
"data-flow-diagram": {
|
||||
"valid": false
|
||||
},
|
||||
"threat-model": {
|
||||
"valid": true,
|
||||
"file_pattern": "*.threat-model.yaml",
|
||||
"content_type": "application/yaml",
|
||||
"schema": "schemas/artifacts/threat-model-schema.json"
|
||||
}
|
||||
},
|
||||
"all_valid": false,
|
||||
"invalid_types": ["data-flow-diagram"],
|
||||
"suggestions": {
|
||||
"data-flow-diagram": [
|
||||
{
|
||||
"type": "data-flow-diagrams",
|
||||
"reason": "Plural form",
|
||||
"confidence": "high"
|
||||
},
|
||||
{
|
||||
"type": "dataflow-diagram",
|
||||
"reason": "Similar spelling",
|
||||
"confidence": "low"
|
||||
}
|
||||
]
|
||||
},
|
||||
"warnings": [],
|
||||
"ok": false,
|
||||
"status": "validation_failed"
|
||||
}
|
||||
```
|
||||
|
||||
### Example 3: Multiple Invalid Types with Generic → Specific Suggestions
|
||||
|
||||
```bash
|
||||
python artifact_validate_types.py \
|
||||
--artifact_types '["data-model", "api-spec", "test-result"]' \
|
||||
--max_suggestions 3
|
||||
```
|
||||
|
||||
**Output:**
|
||||
```json
|
||||
{
|
||||
"all_valid": false,
|
||||
"invalid_types": ["data-model", "api-spec"],
|
||||
"suggestions": {
|
||||
"data-model": [
|
||||
{
|
||||
"type": "logical-data-model",
|
||||
"reason": "Specific variant of model",
|
||||
"confidence": "medium"
|
||||
},
|
||||
{
|
||||
"type": "physical-data-model",
|
||||
"reason": "Specific variant of model",
|
||||
"confidence": "medium"
|
||||
},
|
||||
{
|
||||
"type": "enterprise-data-model",
|
||||
"reason": "Specific variant of model",
|
||||
"confidence": "medium"
|
||||
}
|
||||
],
|
||||
"api-spec": [
|
||||
{
|
||||
"type": "openapi-spec",
|
||||
"reason": "Specific variant of spec",
|
||||
"confidence": "medium"
|
||||
},
|
||||
{
|
||||
"type": "asyncapi-spec",
|
||||
"reason": "Specific variant of spec",
|
||||
"confidence": "medium"
|
||||
}
|
||||
]
|
||||
},
|
||||
"validation_results": {
|
||||
"test-result": {
|
||||
"valid": true,
|
||||
"file_pattern": "*.test-result.json",
|
||||
"content_type": "application/json"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Example 4: Save Validation Report to File
|
||||
|
||||
```bash
|
||||
python artifact_validate_types.py \
|
||||
--artifact_types '["threat-model", "architecture-overview"]' \
|
||||
--output validation-results.validation.json
|
||||
```
|
||||
|
||||
Creates `validation-results.validation.json` with complete validation report.
|
||||
|
||||
## Integration with meta.skill
|
||||
|
||||
The `meta.skill` agent calls this skill in Step 2 of its workflow:
|
||||
|
||||
```yaml
|
||||
# meta.skill workflow Step 2
|
||||
2. **Validate Artifact Types**
|
||||
- Extract artifact types from skill description
|
||||
- Call artifact.validate.types with all types
|
||||
- If all_valid == false:
|
||||
→ Display suggestions to user
|
||||
→ Ask user to confirm correct types
|
||||
→ HALT until types are validated
|
||||
- Store validated metadata for use in skill.yaml generation
|
||||
```
|
||||
|
||||
**Example Integration:**
|
||||
|
||||
```python
|
||||
# meta.skill calls artifact.validate.types
|
||||
result = subprocess.run([
|
||||
'python', 'skills/artifact.validate.types/artifact_validate_types.py',
|
||||
'--artifact_types', json.dumps(["threat-model", "data-flow-diagrams"]),
|
||||
'--suggest_alternatives', 'true'
|
||||
], capture_output=True, text=True)
|
||||
|
||||
validation = json.loads(result.stdout)
|
||||
|
||||
if not validation['all_valid']:
|
||||
print(f"❌ Invalid artifact types: {validation['invalid_types']}")
|
||||
for invalid_type, suggestions in validation['suggestions'].items():
|
||||
print(f"\n Suggestions for '{invalid_type}':")
|
||||
for s in suggestions:
|
||||
print(f" - {s['type']} ({s['confidence']} confidence): {s['reason']}")
|
||||
# HALT skill creation
|
||||
else:
|
||||
print("✅ All artifact types validated")
|
||||
# Continue with skill.yaml generation using validated metadata
|
||||
```
|
||||
|
||||
## Fuzzy Matching Strategies
|
||||
|
||||
### Strategy 1: Singular/Plural Detection (High Confidence)
|
||||
|
||||
Detects when a user forgets the "s":
|
||||
|
||||
| Invalid Type | Suggested Type | Reason |
|
||||
|-------------|----------------|--------|
|
||||
| `data-flow-diagram` | `data-flow-diagrams` | Plural form |
|
||||
| `threat-models` | `threat-model` | Singular form |
|
||||
|
||||
### Strategy 2: Generic vs Specific Variants (Medium Confidence)
|
||||
|
||||
Suggests specific variants when a generic term is used:
|
||||
|
||||
| Invalid Type | Suggested Types |
|
||||
|-------------|-----------------|
|
||||
| `data-model` | `logical-data-model`, `physical-data-model`, `enterprise-data-model` |
|
||||
| `api-spec` | `openapi-spec`, `asyncapi-spec`, `graphql-spec` |
|
||||
| `architecture-diagram` | `system-architecture-diagram`, `component-architecture-diagram` |
|
||||
|
||||
### Strategy 3: Levenshtein Distance (Low Confidence)
|
||||
|
||||
Catches typos and misspellings (60%+ similarity):
|
||||
|
||||
| Invalid Type | Suggested Type | Similarity |
|
||||
|-------------|----------------|------------|
|
||||
| `thret-model` | `threat-model` | ~90% |
|
||||
| `architecure-overview` | `architecture-overview` | ~85% |
|
||||
| `api-specfication` | `api-specification` | ~92% |
|
||||
|
||||
## Error Handling
|
||||
|
||||
### Missing Registry File
|
||||
|
||||
```json
|
||||
{
|
||||
"ok": false,
|
||||
"status": "error",
|
||||
"error": "Artifact registry not found: registry/artifact_types.json"
|
||||
}
|
||||
```
|
||||
|
||||
**Resolution**: Ensure you're running from the Betty Framework root directory.
|
||||
|
||||
### Invalid JSON in artifact_types Parameter
|
||||
|
||||
```json
|
||||
{
|
||||
"ok": false,
|
||||
"status": "error",
|
||||
"error": "Invalid JSON: Expecting ',' delimiter: line 1 column 15 (char 14)"
|
||||
}
|
||||
```
|
||||
|
||||
**Resolution**: Ensure artifact_types is a valid JSON array with proper quoting.
|
||||
|
||||
### Corrupted Registry File
|
||||
|
||||
```json
|
||||
{
|
||||
"ok": false,
|
||||
"status": "error",
|
||||
"error": "Invalid JSON in registry file: ..."
|
||||
}
|
||||
```
|
||||
|
||||
**Resolution**: Validate and fix `registry/artifact_types.json` syntax.
|
||||
|
||||
## Performance
|
||||
|
||||
- **Single type validation**: <100ms
|
||||
- **20 types validation**: <1 second
|
||||
- **All 409 types validation**: <5 seconds
|
||||
|
||||
Memory usage is minimal as registry is loaded once and indexed by name for O(1) lookups.
|
||||
|
||||
## Dependencies
|
||||
|
||||
- **Python 3.7+**
|
||||
- **PyYAML** - For reading registry
|
||||
- **difflib** - For fuzzy matching (Python stdlib)
|
||||
- **jsonschema** - For validation (optional)
|
||||
|
||||
## Testing
|
||||
|
||||
Run the test suite:
|
||||
|
||||
```bash
|
||||
cd skills/artifact.validate.types
|
||||
python test_artifact_validate_types.py
|
||||
```
|
||||
|
||||
**Test Coverage:**
|
||||
- ✅ Valid artifact type validation
|
||||
- ✅ Invalid artifact type detection
|
||||
- ✅ Singular/plural suggestion
|
||||
- ✅ Generic → specific suggestion
|
||||
- ✅ Typo detection with Levenshtein distance
|
||||
- ✅ Max suggestions limit
|
||||
- ✅ Schema file existence checking
|
||||
- ✅ Empty input handling
|
||||
- ✅ Mixed valid/invalid types
|
||||
|
||||
## Quality Standards
|
||||
|
||||
- **Accuracy**: 100% for exact matches in registry
|
||||
- **Suggestion Quality**: >80% relevant for common mistakes
|
||||
- **Performance**: <1s for 20 types, <100ms for single type
|
||||
- **Schema Verification**: 100% accurate file existence check
|
||||
- **Error Handling**: Graceful handling of corrupted registry files
|
||||
|
||||
## Success Criteria
|
||||
|
||||
- ✅ Validates all 409 artifact types correctly
|
||||
- ✅ Provides accurate suggestions for common mistakes (singular/plural)
|
||||
- ✅ Returns exact metadata from registry (file_pattern, content_type, schema)
|
||||
- ✅ Detects missing schema files and warns appropriately
|
||||
- ✅ Completes validation in <1 second for up to 20 types
|
||||
- ✅ Fuzzy matching handles typos within 40% character difference
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Skill returns all_valid=false but I think types are correct
|
||||
|
||||
1. Check the exact spelling in `registry/artifact_types.json`
|
||||
2. Look at suggestions - they often reveal plural/singular issues
|
||||
3. Use `jq` to search registry:
|
||||
```bash
|
||||
jq '.artifact_types[] | select(.name | contains("your-search"))' registry/artifact_types.json
|
||||
```
|
||||
|
||||
### Fuzzy matching isn't suggesting the type I expect
|
||||
|
||||
1. Check if the type name follows patterns (ending in common suffix like "-model", "-spec")
|
||||
2. Increase `max_suggestions` to see more options
|
||||
3. The type might be too dissimilar (< 60% match threshold)
|
||||
|
||||
### Schema warnings appearing for valid types
|
||||
|
||||
This is normal if schema files haven't been created yet. Schema files are optional for many artifact types. Set `check_schemas=false` to suppress these warnings.
|
||||
|
||||
## Related Skills
|
||||
|
||||
- **artifact.define** - Define new artifact types
|
||||
- **artifact.create** - Create artifact files
|
||||
- **skill.define** - Validate skill manifests
|
||||
- **registry.update** - Update skill registry
|
||||
|
||||
## References
|
||||
|
||||
- [Python difflib](https://docs.python.org/3/library/difflib.html) - Fuzzy string matching
|
||||
- [Betty Artifact Registry](../../registry/artifact_types.json) - Source of truth for artifact types
|
||||
- [Levenshtein Distance](https://en.wikipedia.org/wiki/Levenshtein_distance) - String similarity algorithm
|
||||
- [meta.skill Agent](../../agents/meta.skill/agent.yaml) - Primary consumer of this skill
|
||||
354
skills/artifact.validate.types/artifact_validate_types.py
Normal file
354
skills/artifact.validate.types/artifact_validate_types.py
Normal file
@@ -0,0 +1,354 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Artifact Type Validation Skill for Betty Framework
|
||||
|
||||
Validates artifact type names against registry/artifact_types.json and provides
|
||||
fuzzy matching suggestions for invalid types using:
|
||||
- Singular/plural detection
|
||||
- Generic vs specific variant matching
|
||||
- Levenshtein distance for typos
|
||||
|
||||
Usage:
|
||||
python artifact_validate_types.py --artifact_types '["threat-model", "data-flow-diagram"]'
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from difflib import get_close_matches
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def load_artifact_registry(registry_path: str = "registry/artifact_types.json") -> Dict[str, Any]:
|
||||
"""
|
||||
Load the artifact types registry.
|
||||
|
||||
Args:
|
||||
registry_path: Path to artifact_types.json
|
||||
|
||||
Returns:
|
||||
Dictionary with artifact types indexed by name
|
||||
|
||||
Raises:
|
||||
FileNotFoundError: If registry file doesn't exist
|
||||
json.JSONDecodeError: If registry file is invalid JSON
|
||||
"""
|
||||
if not os.path.exists(registry_path):
|
||||
raise FileNotFoundError(f"Artifact registry not found: {registry_path}")
|
||||
|
||||
try:
|
||||
with open(registry_path, 'r') as f:
|
||||
registry = json.load(f)
|
||||
|
||||
# Index by name for O(1) lookup
|
||||
artifact_types_dict = {
|
||||
artifact['name']: artifact
|
||||
for artifact in registry.get('artifact_types', [])
|
||||
}
|
||||
|
||||
logger.info(f"Loaded {len(artifact_types_dict)} artifact types from registry")
|
||||
return artifact_types_dict
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Invalid JSON in registry file: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def find_similar_types(
|
||||
invalid_type: str,
|
||||
all_types: List[str],
|
||||
max_suggestions: int = 3
|
||||
) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Find similar artifact types using multiple strategies.
|
||||
|
||||
Strategies:
|
||||
1. Singular/Plural variants (high confidence)
|
||||
2. Generic vs Specific variants (medium confidence)
|
||||
3. Levenshtein distance for typos (low confidence)
|
||||
|
||||
Args:
|
||||
invalid_type: The artifact type that wasn't found
|
||||
all_types: List of all valid artifact type names
|
||||
max_suggestions: Maximum number of suggestions to return
|
||||
|
||||
Returns:
|
||||
List of suggestion dictionaries with type, reason, and confidence
|
||||
"""
|
||||
suggestions = []
|
||||
|
||||
# Strategy 1: Singular/Plural variants
|
||||
if invalid_type.endswith('s'):
|
||||
singular = invalid_type[:-1]
|
||||
if singular in all_types:
|
||||
suggestions.append({
|
||||
'type': singular,
|
||||
'reason': 'Singular form',
|
||||
'confidence': 'high'
|
||||
})
|
||||
else:
|
||||
plural = invalid_type + 's'
|
||||
if plural in all_types:
|
||||
suggestions.append({
|
||||
'type': plural,
|
||||
'reason': 'Plural form',
|
||||
'confidence': 'high'
|
||||
})
|
||||
|
||||
# Strategy 2: Generic vs Specific variants
|
||||
# e.g., "data-model" → ["logical-data-model", "physical-data-model"]
|
||||
if '-' in invalid_type:
|
||||
parts = invalid_type.split('-')
|
||||
base_term = parts[-1] # e.g., "model" from "data-model"
|
||||
|
||||
# Find types that end with "-{base_term}"
|
||||
matches = [t for t in all_types if t.endswith('-' + base_term)]
|
||||
|
||||
for match in matches[:max_suggestions]:
|
||||
if match not in [s['type'] for s in suggestions]:
|
||||
suggestions.append({
|
||||
'type': match,
|
||||
'reason': f'Specific variant of {base_term}',
|
||||
'confidence': 'medium'
|
||||
})
|
||||
|
||||
# Strategy 3: Levenshtein distance for typos
|
||||
close_matches = get_close_matches(
|
||||
invalid_type,
|
||||
all_types,
|
||||
n=max_suggestions,
|
||||
cutoff=0.6 # 60% similarity threshold
|
||||
)
|
||||
|
||||
for match in close_matches:
|
||||
if match not in [s['type'] for s in suggestions]:
|
||||
suggestions.append({
|
||||
'type': match,
|
||||
'reason': 'Similar spelling',
|
||||
'confidence': 'low'
|
||||
})
|
||||
|
||||
return suggestions[:max_suggestions]
|
||||
|
||||
|
||||
def validate_artifact_types(
|
||||
artifact_types: List[str],
|
||||
check_schemas: bool = True,
|
||||
suggest_alternatives: bool = True,
|
||||
max_suggestions: int = 3,
|
||||
registry_path: str = "registry/artifact_types.json"
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Validate artifact types against the registry.
|
||||
|
||||
Args:
|
||||
artifact_types: List of artifact type names to validate
|
||||
check_schemas: Whether to verify schema files exist
|
||||
suggest_alternatives: Whether to suggest similar types for invalid ones
|
||||
max_suggestions: Maximum suggestions per invalid type
|
||||
registry_path: Path to artifact registry JSON file
|
||||
|
||||
Returns:
|
||||
Dictionary with validation results:
|
||||
{
|
||||
'validation_results': {type_name: {valid, file_pattern, ...}},
|
||||
'all_valid': bool,
|
||||
'invalid_types': [type_names],
|
||||
'suggestions': {type_name: [suggestions]},
|
||||
'warnings': [warning_messages]
|
||||
}
|
||||
"""
|
||||
# Load registry
|
||||
artifact_types_dict = load_artifact_registry(registry_path)
|
||||
all_type_names = list(artifact_types_dict.keys())
|
||||
|
||||
# Initialize results
|
||||
results = {}
|
||||
invalid_types = []
|
||||
suggestions_dict = {}
|
||||
warnings = []
|
||||
|
||||
# Validate each type
|
||||
for artifact_type in artifact_types:
|
||||
if artifact_type in artifact_types_dict:
|
||||
# Valid - get metadata
|
||||
metadata = artifact_types_dict[artifact_type]
|
||||
|
||||
# Check schema file exists (if check_schemas=true)
|
||||
if check_schemas and metadata.get('schema'):
|
||||
schema_path = metadata['schema']
|
||||
if not os.path.exists(schema_path):
|
||||
warning_msg = f"Schema file missing: {schema_path}"
|
||||
warnings.append(warning_msg)
|
||||
logger.warning(warning_msg)
|
||||
|
||||
results[artifact_type] = {
|
||||
'valid': True,
|
||||
'file_pattern': metadata.get('file_pattern'),
|
||||
'content_type': metadata.get('content_type'),
|
||||
'schema': metadata.get('schema'),
|
||||
'description': metadata.get('description')
|
||||
}
|
||||
|
||||
logger.info(f"✓ {artifact_type} - valid")
|
||||
|
||||
else:
|
||||
# Invalid - mark as invalid and find suggestions
|
||||
results[artifact_type] = {'valid': False}
|
||||
invalid_types.append(artifact_type)
|
||||
|
||||
logger.warning(f"✗ {artifact_type} - not found in registry")
|
||||
|
||||
# Generate suggestions if enabled
|
||||
if suggest_alternatives:
|
||||
suggestions = find_similar_types(
|
||||
artifact_type,
|
||||
all_type_names,
|
||||
max_suggestions
|
||||
)
|
||||
if suggestions:
|
||||
suggestions_dict[artifact_type] = suggestions
|
||||
logger.info(
|
||||
f" Suggestions for '{artifact_type}': "
|
||||
f"{', '.join(s['type'] for s in suggestions)}"
|
||||
)
|
||||
|
||||
# Compile final results
|
||||
return {
|
||||
'validation_results': results,
|
||||
'all_valid': len(invalid_types) == 0,
|
||||
'invalid_types': invalid_types,
|
||||
'suggestions': suggestions_dict,
|
||||
'warnings': warnings
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point for artifact.validate.types skill."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Validate artifact types against Betty Framework registry"
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--artifact_types',
|
||||
type=str,
|
||||
required=True,
|
||||
help='JSON array of artifact type names to validate (e.g., \'["threat-model", "data-flow-diagram"]\')'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--check_schemas',
|
||||
type=bool,
|
||||
default=True,
|
||||
help='Whether to verify schema files exist on filesystem (default: true)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--suggest_alternatives',
|
||||
type=bool,
|
||||
default=True,
|
||||
help='Whether to suggest similar types for invalid ones (default: true)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--max_suggestions',
|
||||
type=int,
|
||||
default=3,
|
||||
help='Maximum number of suggestions per invalid type (default: 3)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--registry_path',
|
||||
type=str,
|
||||
default='registry/artifact_types.json',
|
||||
help='Path to artifact registry file (default: registry/artifact_types.json)'
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
'--output',
|
||||
type=str,
|
||||
help='Output file path for validation report (optional)'
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
try:
|
||||
# Parse artifact_types JSON
|
||||
artifact_types = json.loads(args.artifact_types)
|
||||
|
||||
if not isinstance(artifact_types, list):
|
||||
logger.error("artifact_types must be a JSON array")
|
||||
sys.exit(1)
|
||||
|
||||
logger.info(f"Validating {len(artifact_types)} artifact types...")
|
||||
|
||||
# Perform validation
|
||||
result = validate_artifact_types(
|
||||
artifact_types=artifact_types,
|
||||
check_schemas=args.check_schemas,
|
||||
suggest_alternatives=args.suggest_alternatives,
|
||||
max_suggestions=args.max_suggestions,
|
||||
registry_path=args.registry_path
|
||||
)
|
||||
|
||||
# Add metadata
|
||||
result['ok'] = result['all_valid']
|
||||
result['status'] = 'success' if result['all_valid'] else 'validation_failed'
|
||||
|
||||
# Save to file if output path specified
|
||||
if args.output:
|
||||
output_path = Path(args.output)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(output_path, 'w') as f:
|
||||
json.dump(result, f, indent=2)
|
||||
|
||||
logger.info(f"Validation report saved to: {output_path}")
|
||||
result['validation_report_path'] = str(output_path)
|
||||
|
||||
# Print results
|
||||
print(json.dumps(result, indent=2))
|
||||
|
||||
# Exit with error code if validation failed
|
||||
sys.exit(0 if result['all_valid'] else 1)
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Invalid JSON in artifact_types parameter: {e}")
|
||||
print(json.dumps({
|
||||
'ok': False,
|
||||
'status': 'error',
|
||||
'error': f'Invalid JSON: {str(e)}'
|
||||
}, indent=2))
|
||||
sys.exit(1)
|
||||
|
||||
except FileNotFoundError as e:
|
||||
logger.error(str(e))
|
||||
print(json.dumps({
|
||||
'ok': False,
|
||||
'status': 'error',
|
||||
'error': str(e)
|
||||
}, indent=2))
|
||||
sys.exit(1)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error: {e}", exc_info=True)
|
||||
print(json.dumps({
|
||||
'ok': False,
|
||||
'status': 'error',
|
||||
'error': str(e)
|
||||
}, indent=2))
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
126
skills/artifact.validate.types/benchmark_performance.py
Normal file
126
skills/artifact.validate.types/benchmark_performance.py
Normal file
@@ -0,0 +1,126 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Performance benchmarks for artifact.validate.types
|
||||
|
||||
Tests validation performance with different numbers of artifact types
|
||||
to verify <1s for 20 types and <100ms for single type claims.
|
||||
"""
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
from artifact_validate_types import validate_artifact_types
|
||||
|
||||
|
||||
def benchmark_validation(artifact_types: list, iterations: int = 5) -> dict:
|
||||
"""Benchmark validation performance."""
|
||||
times = []
|
||||
|
||||
for _ in range(iterations):
|
||||
start = time.perf_counter()
|
||||
result = validate_artifact_types(
|
||||
artifact_types=artifact_types,
|
||||
check_schemas=False, # Skip filesystem checks for pure validation speed
|
||||
suggest_alternatives=True,
|
||||
max_suggestions=3,
|
||||
registry_path="registry/artifact_types.json"
|
||||
)
|
||||
end = time.perf_counter()
|
||||
times.append((end - start) * 1000) # Convert to milliseconds
|
||||
|
||||
avg_time = sum(times) / len(times)
|
||||
min_time = min(times)
|
||||
max_time = max(times)
|
||||
|
||||
return {
|
||||
'count': len(artifact_types),
|
||||
'iterations': iterations,
|
||||
'avg_ms': round(avg_time, 2),
|
||||
'min_ms': round(min_time, 2),
|
||||
'max_ms': round(max_time, 2),
|
||||
'times_ms': [round(t, 2) for t in times]
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
"""Run performance benchmarks."""
|
||||
print("=== artifact.validate.types Performance Benchmarks ===\n")
|
||||
|
||||
# Load actual artifact types from registry
|
||||
with open("registry/artifact_types.json") as f:
|
||||
registry = json.load(f)
|
||||
all_types = [a['name'] for a in registry['artifact_types']]
|
||||
|
||||
print(f"Registry contains {len(all_types)} artifact types\n")
|
||||
|
||||
# Benchmark different scenarios
|
||||
scenarios = [
|
||||
("Single type", [all_types[0]]),
|
||||
("5 types", all_types[:5]),
|
||||
("10 types", all_types[:10]),
|
||||
("20 types", all_types[:20]),
|
||||
("50 types", all_types[:50]),
|
||||
("100 types", all_types[:100]),
|
||||
("All types (409)", all_types),
|
||||
]
|
||||
|
||||
results = []
|
||||
|
||||
for name, types in scenarios:
|
||||
print(f"Benchmarking: {name} ({len(types)} types)")
|
||||
result = benchmark_validation(types, iterations=5)
|
||||
results.append({'name': name, **result})
|
||||
|
||||
avg = result['avg_ms']
|
||||
status = "✅ " if avg < 1000 else "⚠️ "
|
||||
print(f" {status}Average: {avg}ms (min: {result['min_ms']}ms, max: {result['max_ms']}ms)")
|
||||
print()
|
||||
|
||||
# Print summary
|
||||
print("\n=== Summary ===\n")
|
||||
|
||||
# Check claims
|
||||
single_result = results[0]
|
||||
twenty_result = results[3]
|
||||
|
||||
print("Claim Verification:")
|
||||
if single_result['avg_ms'] < 100:
|
||||
print(f" ✅ Single type < 100ms: {single_result['avg_ms']}ms")
|
||||
else:
|
||||
print(f" ❌ Single type < 100ms: {single_result['avg_ms']}ms (MISSED TARGET)")
|
||||
|
||||
if twenty_result['avg_ms'] < 1000:
|
||||
print(f" ✅ 20 types < 1s: {twenty_result['avg_ms']}ms")
|
||||
else:
|
||||
print(f" ❌ 20 types < 1s: {twenty_result['avg_ms']}ms (MISSED TARGET)")
|
||||
|
||||
all_result = results[-1]
|
||||
print(f"\nAll 409 types: {all_result['avg_ms']}ms")
|
||||
|
||||
# Calculate throughput
|
||||
throughput = len(all_types) / (all_result['avg_ms'] / 1000)
|
||||
print(f"Throughput: {int(throughput)} types/second")
|
||||
|
||||
# Save results
|
||||
output_path = "skills/artifact.validate.types/benchmark_results.json"
|
||||
with open(output_path, 'w') as f:
|
||||
json.dump({
|
||||
'benchmark_date': time.strftime('%Y-%m-%d %H:%M:%S'),
|
||||
'registry_size': len(all_types),
|
||||
'results': results,
|
||||
'claims_verified': {
|
||||
'single_type_under_100ms': single_result['avg_ms'] < 100,
|
||||
'twenty_types_under_1s': twenty_result['avg_ms'] < 1000
|
||||
}
|
||||
}, f, indent=2)
|
||||
|
||||
print(f"\nResults saved to: {output_path}")
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
115
skills/artifact.validate.types/benchmark_results.json
Normal file
115
skills/artifact.validate.types/benchmark_results.json
Normal file
@@ -0,0 +1,115 @@
|
||||
{
|
||||
"benchmark_date": "2025-11-02 18:35:38",
|
||||
"registry_size": 409,
|
||||
"results": [
|
||||
{
|
||||
"name": "Single type",
|
||||
"count": 1,
|
||||
"iterations": 5,
|
||||
"avg_ms": 1.02,
|
||||
"min_ms": 0.92,
|
||||
"max_ms": 1.27,
|
||||
"times_ms": [
|
||||
1.27,
|
||||
0.95,
|
||||
1.04,
|
||||
0.93,
|
||||
0.92
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "5 types",
|
||||
"count": 5,
|
||||
"iterations": 5,
|
||||
"avg_ms": 0.98,
|
||||
"min_ms": 0.89,
|
||||
"max_ms": 1.05,
|
||||
"times_ms": [
|
||||
1.05,
|
||||
1.01,
|
||||
0.92,
|
||||
1.02,
|
||||
0.89
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "10 types",
|
||||
"count": 10,
|
||||
"iterations": 5,
|
||||
"avg_ms": 0.96,
|
||||
"min_ms": 0.91,
|
||||
"max_ms": 1.02,
|
||||
"times_ms": [
|
||||
1.02,
|
||||
0.96,
|
||||
0.95,
|
||||
0.91,
|
||||
0.98
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "20 types",
|
||||
"count": 20,
|
||||
"iterations": 5,
|
||||
"avg_ms": 1.22,
|
||||
"min_ms": 1.15,
|
||||
"max_ms": 1.34,
|
||||
"times_ms": [
|
||||
1.15,
|
||||
1.19,
|
||||
1.34,
|
||||
1.23,
|
||||
1.17
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "50 types",
|
||||
"count": 50,
|
||||
"iterations": 5,
|
||||
"avg_ms": 1.68,
|
||||
"min_ms": 1.63,
|
||||
"max_ms": 1.76,
|
||||
"times_ms": [
|
||||
1.7,
|
||||
1.76,
|
||||
1.65,
|
||||
1.63,
|
||||
1.66
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "100 types",
|
||||
"count": 100,
|
||||
"iterations": 5,
|
||||
"avg_ms": 2.73,
|
||||
"min_ms": 2.49,
|
||||
"max_ms": 3.54,
|
||||
"times_ms": [
|
||||
2.5,
|
||||
2.49,
|
||||
2.53,
|
||||
2.58,
|
||||
3.54
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "All types (409)",
|
||||
"count": 409,
|
||||
"iterations": 5,
|
||||
"avg_ms": 7.79,
|
||||
"min_ms": 7.44,
|
||||
"max_ms": 8.43,
|
||||
"times_ms": [
|
||||
7.56,
|
||||
8.43,
|
||||
7.69,
|
||||
7.44,
|
||||
7.84
|
||||
]
|
||||
}
|
||||
],
|
||||
"claims_verified": {
|
||||
"single_type_under_100ms": true,
|
||||
"twenty_types_under_1s": true
|
||||
}
|
||||
}
|
||||
84
skills/artifact.validate.types/skill.yaml
Normal file
84
skills/artifact.validate.types/skill.yaml
Normal file
@@ -0,0 +1,84 @@
|
||||
name: artifact.validate.types
|
||||
version: 0.1.0
|
||||
description: >
|
||||
Validate that artifact types exist in the Betty Framework registry and return
|
||||
complete metadata for each type. Provides fuzzy matching and suggestions for
|
||||
invalid types using singular/plural detection and Levenshtein distance.
|
||||
|
||||
inputs:
|
||||
- name: artifact_types
|
||||
type: array
|
||||
required: true
|
||||
description: List of artifact type names to validate (e.g., ["threat-model", "architecture-overview"])
|
||||
|
||||
- name: check_schemas
|
||||
type: boolean
|
||||
required: false
|
||||
default: true
|
||||
description: Whether to verify schema files exist on filesystem
|
||||
|
||||
- name: suggest_alternatives
|
||||
type: boolean
|
||||
required: false
|
||||
default: true
|
||||
description: Whether to suggest similar types for invalid ones
|
||||
|
||||
- name: max_suggestions
|
||||
type: number
|
||||
required: false
|
||||
default: 3
|
||||
description: Maximum number of suggestions per invalid type
|
||||
|
||||
outputs:
|
||||
- name: validation_results
|
||||
type: object
|
||||
description: Validation results for each artifact type with complete metadata
|
||||
|
||||
- name: all_valid
|
||||
type: boolean
|
||||
description: Whether all artifact types are valid
|
||||
|
||||
- name: invalid_types
|
||||
type: array
|
||||
description: List of artifact types that don't exist in registry
|
||||
|
||||
- name: suggestions
|
||||
type: object
|
||||
description: Suggested alternatives for each invalid type (type name → suggestions)
|
||||
|
||||
- name: warnings
|
||||
type: array
|
||||
description: List of warnings (e.g., schema file missing)
|
||||
|
||||
artifact_metadata:
|
||||
produces:
|
||||
- type: validation-report
|
||||
file_pattern: "*.validation.json"
|
||||
content_type: application/json
|
||||
schema: schemas/validation-report.json
|
||||
description: Artifact type validation results with metadata and suggestions
|
||||
|
||||
consumes: []
|
||||
|
||||
entrypoints:
|
||||
- command: /artifact/validate/types
|
||||
handler: artifact_validate_types.py
|
||||
runtime: python
|
||||
permissions:
|
||||
- filesystem:read
|
||||
|
||||
status: active
|
||||
|
||||
tags:
|
||||
- artifacts
|
||||
- validation
|
||||
- registry
|
||||
- metadata
|
||||
- quality
|
||||
|
||||
dependencies:
|
||||
- pyyaml
|
||||
- jsonschema
|
||||
|
||||
permissions:
|
||||
- filesystem:read
|
||||
326
skills/artifact.validate.types/test_artifact_validate_types.py
Normal file
326
skills/artifact.validate.types/test_artifact_validate_types.py
Normal file
@@ -0,0 +1,326 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Tests for artifact.validate.types skill
|
||||
|
||||
Tests fuzzy matching strategies:
|
||||
- Singular/plural detection
|
||||
- Generic vs specific variants
|
||||
- Levenshtein distance for typos
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
from artifact_validate_types import (
|
||||
load_artifact_registry,
|
||||
find_similar_types,
|
||||
validate_artifact_types
|
||||
)
|
||||
|
||||
|
||||
class TestArtifactValidateTypes(unittest.TestCase):
|
||||
"""Test suite for artifact type validation."""
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
"""Set up test fixtures."""
|
||||
cls.registry_path = "registry/artifact_types.json"
|
||||
|
||||
# Load actual registry for integration tests
|
||||
if os.path.exists(cls.registry_path):
|
||||
cls.artifact_types_dict = load_artifact_registry(cls.registry_path)
|
||||
cls.all_type_names = list(cls.artifact_types_dict.keys())
|
||||
else:
|
||||
cls.artifact_types_dict = {}
|
||||
cls.all_type_names = []
|
||||
|
||||
def test_load_registry(self):
|
||||
"""Test loading artifact registry."""
|
||||
if not os.path.exists(self.registry_path):
|
||||
self.skipTest("Registry file not found")
|
||||
|
||||
registry = load_artifact_registry(self.registry_path)
|
||||
|
||||
self.assertIsInstance(registry, dict)
|
||||
self.assertGreater(len(registry), 0, "Registry should contain artifact types")
|
||||
|
||||
# Check structure of first entry
|
||||
first_type = next(iter(registry.values()))
|
||||
self.assertIn('name', first_type)
|
||||
self.assertIn('file_pattern', first_type)
|
||||
|
||||
def test_validate_single_valid_type(self):
|
||||
"""Test validation of a single valid artifact type."""
|
||||
if not self.all_type_names:
|
||||
self.skipTest("Registry not available")
|
||||
|
||||
# Use a known artifact type (threat-model exists in registry)
|
||||
result = validate_artifact_types(
|
||||
artifact_types=["threat-model"],
|
||||
check_schemas=False, # Don't check schema files in tests
|
||||
suggest_alternatives=False,
|
||||
registry_path=self.registry_path
|
||||
)
|
||||
|
||||
self.assertTrue(result['all_valid'])
|
||||
self.assertEqual(len(result['invalid_types']), 0)
|
||||
self.assertIn('threat-model', result['validation_results'])
|
||||
self.assertTrue(result['validation_results']['threat-model']['valid'])
|
||||
self.assertIn('file_pattern', result['validation_results']['threat-model'])
|
||||
|
||||
def test_validate_invalid_type(self):
|
||||
"""Test validation of an invalid artifact type."""
|
||||
result = validate_artifact_types(
|
||||
artifact_types=["nonexistent-artifact-type"],
|
||||
check_schemas=False,
|
||||
suggest_alternatives=False,
|
||||
registry_path=self.registry_path
|
||||
)
|
||||
|
||||
self.assertFalse(result['all_valid'])
|
||||
self.assertIn('nonexistent-artifact-type', result['invalid_types'])
|
||||
self.assertFalse(
|
||||
result['validation_results']['nonexistent-artifact-type']['valid']
|
||||
)
|
||||
|
||||
def test_validate_mixed_types(self):
|
||||
"""Test validation of mix of valid and invalid types."""
|
||||
if not self.all_type_names:
|
||||
self.skipTest("Registry not available")
|
||||
|
||||
result = validate_artifact_types(
|
||||
artifact_types=["threat-model", "invalid-type", "architecture-overview"],
|
||||
check_schemas=False,
|
||||
suggest_alternatives=False,
|
||||
registry_path=self.registry_path
|
||||
)
|
||||
|
||||
self.assertFalse(result['all_valid'])
|
||||
self.assertEqual(len(result['invalid_types']), 1)
|
||||
self.assertIn('invalid-type', result['invalid_types'])
|
||||
|
||||
# Valid types should have metadata
|
||||
self.assertTrue(result['validation_results']['threat-model']['valid'])
|
||||
self.assertTrue(result['validation_results']['architecture-overview']['valid'])
|
||||
|
||||
def test_singular_plural_suggestion(self):
|
||||
"""Test singular/plural fuzzy matching."""
|
||||
# Create test data
|
||||
all_types = ["data-flow-diagrams", "threat-model", "api-spec"]
|
||||
|
||||
# Test plural → singular suggestion
|
||||
suggestions = find_similar_types("data-flow-diagram", all_types, max_suggestions=3)
|
||||
|
||||
# Should suggest plural form
|
||||
self.assertTrue(any(
|
||||
s['type'] == 'data-flow-diagrams' and s['confidence'] == 'high'
|
||||
for s in suggestions
|
||||
))
|
||||
|
||||
def test_generic_specific_suggestion(self):
|
||||
"""Test generic vs specific variant matching."""
|
||||
# Create test data with specific variants
|
||||
all_types = [
|
||||
"logical-data-model",
|
||||
"physical-data-model",
|
||||
"enterprise-data-model",
|
||||
"threat-model"
|
||||
]
|
||||
|
||||
# Search for generic "data-model"
|
||||
suggestions = find_similar_types("data-model", all_types, max_suggestions=3)
|
||||
|
||||
# Should suggest specific variants ending in "-data-model"
|
||||
suggested_types = [s['type'] for s in suggestions]
|
||||
self.assertTrue(
|
||||
any('data-model' in t for t in suggested_types),
|
||||
"Should suggest specific data-model variants"
|
||||
)
|
||||
|
||||
def test_typo_suggestion(self):
|
||||
"""Test Levenshtein distance for typo detection."""
|
||||
all_types = ["threat-model", "architecture-overview", "api-specification"]
|
||||
|
||||
# Typo: "thret-model" instead of "threat-model"
|
||||
suggestions = find_similar_types("thret-model", all_types, max_suggestions=3)
|
||||
|
||||
# Should suggest "threat-model"
|
||||
suggested_types = [s['type'] for s in suggestions]
|
||||
self.assertIn("threat-model", suggested_types)
|
||||
|
||||
def test_max_suggestions_limit(self):
|
||||
"""Test that max_suggestions limit is respected."""
|
||||
all_types = [
|
||||
"logical-data-model",
|
||||
"physical-data-model",
|
||||
"enterprise-data-model",
|
||||
"conceptual-data-model",
|
||||
"canonical-data-model"
|
||||
]
|
||||
|
||||
suggestions = find_similar_types("data-model", all_types, max_suggestions=2)
|
||||
|
||||
# Should return at most 2 suggestions
|
||||
self.assertLessEqual(len(suggestions), 2)
|
||||
|
||||
def test_suggestions_integration(self):
|
||||
"""Test end-to-end suggestion workflow."""
|
||||
if not self.all_type_names:
|
||||
self.skipTest("Registry not available")
|
||||
|
||||
result = validate_artifact_types(
|
||||
artifact_types=["data-flow-diagram"], # Singular (likely plural in registry)
|
||||
check_schemas=False,
|
||||
suggest_alternatives=True,
|
||||
max_suggestions=3,
|
||||
registry_path=self.registry_path
|
||||
)
|
||||
|
||||
# Should detect as invalid
|
||||
self.assertFalse(result['all_valid'])
|
||||
self.assertIn('data-flow-diagram', result['invalid_types'])
|
||||
|
||||
# Should provide suggestions
|
||||
if 'data-flow-diagram' in result['suggestions']:
|
||||
suggestions = result['suggestions']['data-flow-diagram']
|
||||
self.assertGreater(len(suggestions), 0)
|
||||
|
||||
# Check suggestion structure
|
||||
first_suggestion = suggestions[0]
|
||||
self.assertIn('type', first_suggestion)
|
||||
self.assertIn('reason', first_suggestion)
|
||||
self.assertIn('confidence', first_suggestion)
|
||||
|
||||
def test_schema_checking(self):
|
||||
"""Test schema file existence checking."""
|
||||
if not os.path.exists(self.registry_path):
|
||||
self.skipTest("Registry not available")
|
||||
|
||||
# Validate a type that has a schema
|
||||
result = validate_artifact_types(
|
||||
artifact_types=["threat-model"],
|
||||
check_schemas=True,
|
||||
suggest_alternatives=False,
|
||||
registry_path=self.registry_path
|
||||
)
|
||||
|
||||
# If schema is missing, should have warning
|
||||
# If schema exists, no warning
|
||||
if 'schemas/artifacts/threat-model-schema.json' not in [
|
||||
w for w in result['warnings'] if 'threat-model' in w
|
||||
]:
|
||||
# Schema exists - good
|
||||
pass
|
||||
else:
|
||||
# Schema missing - warning should be present
|
||||
self.assertTrue(any('threat-model' in w for w in result['warnings']))
|
||||
|
||||
def test_empty_input(self):
|
||||
"""Test validation with empty artifact types list."""
|
||||
result = validate_artifact_types(
|
||||
artifact_types=[],
|
||||
check_schemas=False,
|
||||
suggest_alternatives=False,
|
||||
registry_path=self.registry_path
|
||||
)
|
||||
|
||||
self.assertTrue(result['all_valid'])
|
||||
self.assertEqual(len(result['invalid_types']), 0)
|
||||
self.assertEqual(len(result['validation_results']), 0)
|
||||
|
||||
def test_validation_report_structure(self):
|
||||
"""Test that validation report has correct structure."""
|
||||
result = validate_artifact_types(
|
||||
artifact_types=["threat-model", "invalid-type"],
|
||||
check_schemas=False,
|
||||
suggest_alternatives=True,
|
||||
max_suggestions=3,
|
||||
registry_path=self.registry_path
|
||||
)
|
||||
|
||||
# Check required fields
|
||||
self.assertIn('validation_results', result)
|
||||
self.assertIn('all_valid', result)
|
||||
self.assertIn('invalid_types', result)
|
||||
self.assertIn('suggestions', result)
|
||||
self.assertIn('warnings', result)
|
||||
|
||||
# Check types
|
||||
self.assertIsInstance(result['validation_results'], dict)
|
||||
self.assertIsInstance(result['all_valid'], bool)
|
||||
self.assertIsInstance(result['invalid_types'], list)
|
||||
self.assertIsInstance(result['suggestions'], dict)
|
||||
self.assertIsInstance(result['warnings'], list)
|
||||
|
||||
|
||||
class TestFuzzMatchingStrategies(unittest.TestCase):
|
||||
"""Focused tests for fuzzy matching strategies."""
|
||||
|
||||
def test_plural_to_singular(self):
|
||||
"""Test plural → singular suggestion."""
|
||||
all_types = ["threat-model", "data-flow-diagram"]
|
||||
suggestions = find_similar_types("threat-models", all_types)
|
||||
|
||||
# Should suggest singular
|
||||
self.assertTrue(any(
|
||||
s['type'] == 'threat-model' and
|
||||
s['reason'] == 'Singular form' and
|
||||
s['confidence'] == 'high'
|
||||
for s in suggestions
|
||||
))
|
||||
|
||||
def test_singular_to_plural(self):
|
||||
"""Test singular → plural suggestion."""
|
||||
all_types = ["data-flow-diagrams", "threat-models"]
|
||||
suggestions = find_similar_types("threat-model", all_types)
|
||||
|
||||
# Should suggest plural
|
||||
self.assertTrue(any(
|
||||
s['type'] == 'threat-models' and
|
||||
s['reason'] == 'Plural form' and
|
||||
s['confidence'] == 'high'
|
||||
for s in suggestions
|
||||
))
|
||||
|
||||
def test_generic_to_specific(self):
|
||||
"""Test generic → specific variant suggestions."""
|
||||
all_types = [
|
||||
"openapi-spec",
|
||||
"asyncapi-spec",
|
||||
"graphql-spec"
|
||||
]
|
||||
|
||||
suggestions = find_similar_types("api-spec", all_types)
|
||||
|
||||
# Should suggest specific API spec variants
|
||||
suggested_types = [s['type'] for s in suggestions]
|
||||
self.assertTrue(any('spec' in t for t in suggested_types))
|
||||
|
||||
def test_confidence_levels(self):
|
||||
"""Test that confidence levels are correctly assigned."""
|
||||
all_types = [
|
||||
"threat-model", # For singular/plural (high confidence)
|
||||
"logical-data-model", # For specific variant (medium confidence)
|
||||
"thret-model" # For typo (low confidence - but this is exact match)
|
||||
]
|
||||
|
||||
# Plural → singular (high)
|
||||
suggestions = find_similar_types("threat-models", all_types)
|
||||
high_conf = [s for s in suggestions if s['confidence'] == 'high']
|
||||
self.assertTrue(len(high_conf) > 0)
|
||||
|
||||
|
||||
def run_tests():
|
||||
"""Run all tests."""
|
||||
unittest.main(argv=[''], verbosity=2, exit=False)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
run_tests()
|
||||
Reference in New Issue
Block a user