Files
gh-epieczko-betty/skills/api.validate/validators/zalando_rules.py
2025-11-29 18:26:08 +08:00

360 lines
14 KiB
Python

"""
Zalando RESTful API Guidelines validation rules.
Based on: https://opensource.zalando.com/restful-api-guidelines/
"""
from typing import Dict, List, Any, Optional
import re
class ValidationError:
"""Represents a validation error or warning."""
def __init__(
self,
rule_id: str,
message: str,
severity: str = "error",
path: Optional[str] = None,
suggestion: Optional[str] = None
):
self.rule_id = rule_id
self.message = message
self.severity = severity # "error" or "warning"
self.path = path
self.suggestion = suggestion
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for JSON serialization."""
result = {
"rule_id": self.rule_id,
"message": self.message,
"severity": self.severity
}
if self.path:
result["path"] = self.path
if self.suggestion:
result["suggestion"] = self.suggestion
return result
class ZalandoValidator:
"""Validates OpenAPI specs against Zalando guidelines."""
def __init__(self, spec: Dict[str, Any], strict: bool = False):
self.spec = spec
self.strict = strict
self.errors: List[ValidationError] = []
self.warnings: List[ValidationError] = []
def validate(self) -> Dict[str, Any]:
"""
Run all validation rules.
Returns:
Validation report with errors and warnings
"""
# Required metadata
self._check_required_metadata()
# Naming conventions
self._check_naming_conventions()
# HTTP methods and status codes
self._check_http_methods()
self._check_status_codes()
# Error handling
self._check_error_responses()
# Headers
self._check_required_headers()
# Security
self._check_security_schemes()
return {
"valid": len(self.errors) == 0 and (not self.strict or len(self.warnings) == 0),
"errors": [e.to_dict() for e in self.errors],
"warnings": [w.to_dict() for w in self.warnings],
"guideline_version": "zalando-1.0",
"rules_checked": self._get_rules_checked()
}
def _add_error(self, rule_id: str, message: str, path: str = None, suggestion: str = None):
"""Add a validation error."""
self.errors.append(ValidationError(rule_id, message, "error", path, suggestion))
def _add_warning(self, rule_id: str, message: str, path: str = None, suggestion: str = None):
"""Add a validation warning."""
if self.strict:
self.errors.append(ValidationError(rule_id, message, "error", path, suggestion))
else:
self.warnings.append(ValidationError(rule_id, message, "warning", path, suggestion))
def _check_required_metadata(self):
"""
Check required metadata fields.
Zalando requires: x-api-id, x-audience
"""
info = self.spec.get("info", {})
# Check x-api-id (MUST)
if "x-api-id" not in info:
self._add_error(
"MUST_001",
"Missing required field 'info.x-api-id'",
"info.x-api-id",
"Add a UUID to uniquely identify this API: x-api-id: 'd0184f38-b98d-11e7-9c56-68f728c1ba70'"
)
else:
# Validate UUID format
api_id = info["x-api-id"]
uuid_pattern = r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$'
if not re.match(uuid_pattern, str(api_id), re.IGNORECASE):
self._add_error(
"MUST_001",
f"'info.x-api-id' must be a valid UUID, got: {api_id}",
"info.x-api-id"
)
# Check x-audience (MUST)
if "x-audience" not in info:
self._add_error(
"MUST_002",
"Missing required field 'info.x-audience'",
"info.x-audience",
"Specify target audience: x-audience: 'component-internal' | 'company-internal' | 'external-partner' | 'external-public'"
)
else:
valid_audiences = ["component-internal", "company-internal", "external-partner", "external-public"]
audience = info["x-audience"]
if audience not in valid_audiences:
self._add_error(
"MUST_002",
f"'info.x-audience' must be one of: {', '.join(valid_audiences)}",
"info.x-audience"
)
# Check contact information (SHOULD)
if "contact" not in info:
self._add_warning(
"SHOULD_001",
"Missing 'info.contact' - should provide API owner contact information",
"info.contact",
"Add contact information: contact: {name: 'Team Name', email: 'team@company.com'}"
)
def _check_naming_conventions(self):
"""
Check naming conventions.
Zalando requires: snake_case for properties, kebab-case or snake_case for paths
"""
# Check path naming
paths = self.spec.get("paths", {})
for path in paths.keys():
# Remove path parameters for checking
path_without_params = re.sub(r'\{[^}]+\}', '', path)
segments = [s for s in path_without_params.split('/') if s]
for segment in segments:
# Should be kebab-case or snake_case
if not re.match(r'^[a-z0-9_-]+$', segment):
self._add_error(
"MUST_003",
f"Path segment '{segment}' should use lowercase kebab-case or snake_case",
f"paths.{path}",
f"Use lowercase: {segment.lower()}"
)
# Check schema property naming (should be snake_case)
schemas = self.spec.get("components", {}).get("schemas", {})
for schema_name, schema in schemas.items():
if "properties" in schema and schema["properties"] is not None and isinstance(schema["properties"], dict):
for prop_name in schema["properties"].keys():
if not re.match(r'^[a-z][a-z0-9_]*$', prop_name):
self._add_error(
"MUST_004",
f"Property '{prop_name}' in schema '{schema_name}' should use snake_case",
f"components.schemas.{schema_name}.properties.{prop_name}",
f"Use snake_case: {self._to_snake_case(prop_name)}"
)
def _check_http_methods(self):
"""
Check HTTP methods are used correctly.
"""
paths = self.spec.get("paths", {})
for path, path_item in paths.items():
for method in path_item.keys():
if method.upper() not in ["GET", "POST", "PUT", "PATCH", "DELETE", "HEAD", "OPTIONS", "PARAMETERS"]:
continue
operation = path_item[method]
# GET should not have requestBody
if method.upper() == "GET" and "requestBody" in operation:
self._add_error(
"MUST_005",
f"GET operation should not have requestBody",
f"paths.{path}.get.requestBody"
)
# POST should return 201 for resource creation
if method.upper() == "POST":
responses = operation.get("responses", {})
if "201" not in responses and "200" not in responses:
self._add_warning(
"SHOULD_002",
"POST operation should return 201 (Created) for resource creation",
f"paths.{path}.post.responses"
)
def _check_status_codes(self):
"""
Check proper use of HTTP status codes.
"""
paths = self.spec.get("paths", {})
for path, path_item in paths.items():
for method, operation in path_item.items():
if method.upper() not in ["GET", "POST", "PUT", "PATCH", "DELETE"]:
continue
responses = operation.get("responses", {})
# All operations should document error responses
if "400" not in responses:
self._add_warning(
"SHOULD_003",
f"{method.upper()} operation should document 400 (Bad Request) response",
f"paths.{path}.{method}.responses"
)
if "500" not in responses:
self._add_warning(
"SHOULD_004",
f"{method.upper()} operation should document 500 (Internal Error) response",
f"paths.{path}.{method}.responses"
)
# Check 201 has Location header
if "201" in responses:
response_201 = responses["201"]
headers = response_201.get("headers", {})
if "Location" not in headers:
self._add_warning(
"SHOULD_005",
"201 (Created) response should include Location header",
f"paths.{path}.{method}.responses.201.headers",
"Add: headers: {Location: {schema: {type: string, format: uri}}}"
)
def _check_error_responses(self):
"""
Check error responses use RFC 7807 Problem JSON.
Zalando requires: application/problem+json for errors
"""
# Check if Problem schema exists
schemas = self.spec.get("components", {}).get("schemas", {})
has_problem_schema = "Problem" in schemas
if not has_problem_schema:
self._add_warning(
"SHOULD_006",
"Missing 'Problem' schema for RFC 7807 error responses",
"components.schemas",
"Add Problem schema following RFC 7807: https://datatracker.ietf.org/doc/html/rfc7807"
)
# Check error responses use application/problem+json
paths = self.spec.get("paths", {})
for path, path_item in paths.items():
for method, operation in path_item.items():
if method.upper() not in ["GET", "POST", "PUT", "PATCH", "DELETE"]:
continue
responses = operation.get("responses", {})
for status_code, response in responses.items():
# Check 4xx and 5xx responses
if status_code.startswith("4") or status_code.startswith("5"):
content = response.get("content", {})
if content and "application/problem+json" not in content:
self._add_warning(
"SHOULD_007",
f"Error response {status_code} should use 'application/problem+json' content type",
f"paths.{path}.{method}.responses.{status_code}.content"
)
def _check_required_headers(self):
"""
Check for required headers.
Zalando requires: X-Flow-ID for request tracing
"""
# Check if responses document X-Flow-ID
paths = self.spec.get("paths", {})
missing_flow_id = []
for path, path_item in paths.items():
for method, operation in path_item.items():
if method.upper() not in ["GET", "POST", "PUT", "PATCH", "DELETE"]:
continue
responses = operation.get("responses", {})
for status_code, response in responses.items():
if status_code.startswith("2"): # Success responses
headers = response.get("headers", {})
if "X-Flow-ID" not in headers and "X-Flow-Id" not in headers:
missing_flow_id.append(f"paths.{path}.{method}.responses.{status_code}")
if missing_flow_id and len(missing_flow_id) > 0:
self._add_warning(
"SHOULD_008",
f"Success responses should include X-Flow-ID header for request tracing",
missing_flow_id[0],
"Add: headers: {X-Flow-ID: {schema: {type: string, format: uuid}}}"
)
def _check_security_schemes(self):
"""
Check security schemes are defined.
"""
components = self.spec.get("components", {})
security_schemes = components.get("securitySchemes", {})
if not security_schemes:
self._add_warning(
"SHOULD_009",
"No security schemes defined - consider adding authentication",
"components.securitySchemes",
"Add security schemes: bearerAuth, oauth2, etc."
)
def _get_rules_checked(self) -> List[str]:
"""Get list of rules that were checked."""
return [
"MUST_001: Required x-api-id metadata",
"MUST_002: Required x-audience metadata",
"MUST_003: Path naming conventions",
"MUST_004: Property naming conventions (snake_case)",
"MUST_005: HTTP method usage",
"SHOULD_001: Contact information",
"SHOULD_002: POST returns 201",
"SHOULD_003: Document 400 errors",
"SHOULD_004: Document 500 errors",
"SHOULD_005: 201 includes Location header",
"SHOULD_006: Problem schema for errors",
"SHOULD_007: Error responses use application/problem+json",
"SHOULD_008: X-Flow-ID header for tracing",
"SHOULD_009: Security schemes defined"
]
@staticmethod
def _to_snake_case(text: str) -> str:
"""Convert text to snake_case."""
# Insert underscore before uppercase letters
s1 = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', text)
# Insert underscore before uppercase letters preceded by lowercase
s2 = re.sub('([a-z0-9])([A-Z])', r'\1_\2', s1)
return s2.lower()