Initial commit

2025-11-29 18:51:24 +08:00
commit af30225584
13 changed files with 2319 additions and 0 deletions
--- a/.claude-plugin/plugin.json
+++ b/.claude-plugin/plugin.json
@@ -0,0 +1,16 @@
 {
  "name": "jeremy-adk-orchestrator",
  "description": "Production ADK orchestrator for A2A protocol and multi-agent coordination on Vertex AI",
  "version": "1.0.0",
  "author": {
    "name": "Jeremy Longshore",
    "email": "jeremy@intentsolutions.io"
  },
  "skills": [
    "./skills"
  ],
  "agents": [
    "./agents",
    "./agent"
  ]
 }
--- a/README.md
+++ b/README.md
@@ -0,0 +1,3 @@
 # jeremy-adk-orchestrator
 Production ADK orchestrator for A2A protocol and multi-agent coordination on Vertex AI
--- a/agent/.env.example
+++ b/agent/.env.example
@@ -0,0 +1,135 @@
 # ADK Orchestrator Environment Configuration
 # Copy to .env and fill in actual values
 # Google Cloud Configuration
 PROJECT_ID=your-project-id
 LOCATION=us-central1
 REGION=us-central1
 # Vertex AI Configuration
 VERTEX_AI_ENDPOINT=https://us-central1-aiplatform.googleapis.com
 VERTEX_AI_API_VERSION=v1
 # Agent Engine Configuration
 AGENT_ENGINE_ENDPOINT=https://agent-engine.googleapis.com
 AGENT_REGISTRY_URL=https://agent-engine.googleapis.com/v1/agents
 # Memory Bank Configuration
 MEMORY_BANK_CORPUS=adk-orchestrator-memory
 MEMORY_BANK_TTL_DAYS=14
 # Session Service Configuration
 SESSION_SERVICE_TYPE=vertex-ai
 SESSION_TTL_DAYS=30
 SESSION_AUTO_SAVE=true
 # Model Configuration
 MODEL_NAME=models/gemini-2.0-flash-exp
 MODEL_TEMPERATURE=0.7
 MODEL_MAX_TOKENS=8192
 MODEL_TOP_P=0.95
 MODEL_TOP_K=40
 # A2A Protocol Configuration
 A2A_ENABLED=true
 A2A_DISCOVERY_REFRESH_MINUTES=5
 A2A_MAX_CONCURRENT_INVOCATIONS=50
 A2A_DEFAULT_TIMEOUT_SECONDS=30
 # Monitoring & Observability
 ENABLE_TRACING=true
 ENABLE_METRICS=true
 ENABLE_LOGGING=true
 LOG_LEVEL=INFO
 TRACE_SAMPLING_RATE=0.1
 # Security
 ENABLE_AUTH=true
 AUTH_TYPE=oauth2
 OAUTH_CLIENT_ID=your-client-id
 OAUTH_CLIENT_SECRET=your-client-secret
 API_KEY=your-api-key
 # Rate Limiting
 RATE_LIMIT_ENABLED=true
 RATE_LIMIT_RPM=1000
 RATE_LIMIT_BURST=100
 # Circuit Breaker
 CIRCUIT_BREAKER_ENABLED=true
 CIRCUIT_BREAKER_THRESHOLD=5
 CIRCUIT_BREAKER_TIMEOUT=30
 CIRCUIT_BREAKER_RESET_TIMEOUT=60
 # Performance Tuning
 MAX_WORKERS=10
 CONNECTION_POOL_SIZE=20
 REQUEST_TIMEOUT=300
 KEEPALIVE_TIMEOUT=600
 # Development Settings
 DEBUG=false
 DEVELOPMENT_MODE=false
 HOT_RELOAD=false
 # Testing Configuration
 TEST_MODE=false
 TEST_PROJECT_ID=test-project
 TEST_MOCK_AGENTS=false
 # Deployment Configuration
 DEPLOYMENT_ENV=production
 DEPLOYMENT_VERSION=1.0.0
 DEPLOYMENT_REVISION=1
 # Service Account (for local development)
 GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account-key.json
 # Additional GCP Services
 ENABLE_CLOUD_STORAGE=true
 STORAGE_BUCKET=adk-orchestrator-storage
 ENABLE_BIGQUERY=true
 BIGQUERY_DATASET=adk_orchestrator
 # Compliance
 COMPLIANCE_MODE=R5
 DATA_RETENTION_DAYS=14
 ENABLE_AUDIT_LOG=true
 AUDIT_LOG_BUCKET=adk-orchestrator-audit
 # Feature Flags
 ENABLE_PARALLEL_WORKFLOWS=true
 ENABLE_LOOP_WORKFLOWS=true
 ENABLE_TEAM_CREATION=true
 ENABLE_AUTO_RECOVERY=true
 ENABLE_HEALTH_MONITORING=true
 # External Integrations
 SLACK_WEBHOOK_URL=https://hooks.slack.com/services/YOUR/WEBHOOK/URL
 PAGERDUTY_API_KEY=your-pagerduty-key
 DATADOG_API_KEY=your-datadog-key
 # Cache Configuration
 CACHE_ENABLED=true
 CACHE_TTL_SECONDS=3600
 CACHE_MAX_SIZE_MB=512
 # Database (if using Cloud SQL)
 DATABASE_ENABLED=false
 DATABASE_HOST=
 DATABASE_PORT=5432
 DATABASE_NAME=adk_orchestrator
 DATABASE_USER=
 DATABASE_PASSWORD=
 # Redis (if using Memorystore)
 REDIS_ENABLED=false
 REDIS_HOST=
 REDIS_PORT=6379
 REDIS_PASSWORD=
 # Custom Settings
 CUSTOM_AGENT_TIMEOUT_MULTIPLIER=1.5
 CUSTOM_RETRY_ATTEMPTS=3
 CUSTOM_BACKOFF_BASE=2
 CUSTOM_MAX_BACKOFF=60
--- a/agent/init.py
+++ b/agent/init.py
@@ -0,0 +1,12 @@
 # Copyright 2025 Jeremy Longshore
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 """ADK Orchestrator Agent - A2A protocol manager for Vertex AI Engine"""
 from .agent import get_agent, create_runner, root_agent
 __all__ = ["get_agent", "create_runner", "root_agent"]
 __version__ = "1.0.0"
--- a/agent/agent.py
+++ b/agent/agent.py
@@ -0,0 +1,147 @@
 # Copyright 2025 Jeremy Longshore
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ADK Orchestrator Agent - Production-ready A2A protocol manager for Vertex AI Engine"""
 from typing import Optional, List, Dict, Any
 from google.adk.agents import LlmAgent
 from google.adk.tools import FunctionTool
 from google.adk.runners import Runner
 from google.adk.sessions import VertexAiSessionService
 from google.adk.memory import VertexAiMemoryBankService
 from google.adk.artifacts import InMemoryArtifactService
 from google.adk.auth.credential_service import InMemoryCredentialService
 from .tools import (
    discover_agents,
    invoke_agent,
    manage_agent_session,
    validate_agent_card,
    deploy_to_vertex_engine,
    monitor_agent_health,
    create_agent_team,
    coordinate_workflow
 )
 def get_agent() -> LlmAgent:
    """Returns the ADK Orchestrator agent configured for A2A protocol management.
    This agent specializes in:
    - Agent discovery via AgentCards
    - A2A protocol implementation
    - Multi-agent coordination
    - Vertex AI Engine deployment
    - Session and memory management
    - Production monitoring
    """
    # Load system prompt from file
    with open('system-prompt.md', 'r') as f:
        system_instruction = f.read()
    return LlmAgent(
        name="adk-orchestrator",
        model="models/gemini-2.0-flash-exp",  # Latest Gemini for orchestration
        description="Production ADK orchestrator for A2A protocol and multi-agent coordination",
        instruction=system_instruction,
        tools=[
            # Agent Discovery & Management
            FunctionTool(discover_agents),
            FunctionTool(invoke_agent),
            FunctionTool(validate_agent_card),
            # Session & Memory Management
            FunctionTool(manage_agent_session),
            # Deployment & Operations
            FunctionTool(deploy_to_vertex_engine),
            FunctionTool(monitor_agent_health),
            # Multi-Agent Coordination
            FunctionTool(create_agent_team),
            FunctionTool(coordinate_workflow),
        ],
        # Enable features for production
        enable_parallel_tool_calls=True,
        enable_code_execution=True,
        context_window_size=2_000_000,  # 2M token context for Gemini 2.0
        output_key="orchestration_result",
        metadata={
            "version": "1.0.0",
            "deployment_target": "vertex-ai-engine",
            "capabilities": ["a2a", "multi-agent", "session-management", "monitoring"],
            "compliance": "R5-ready"
        }
    )
 async def create_runner() -> Runner:
    """Creates a production-ready runner with dual memory (Session + Memory Bank).
    This configuration provides:
    - VertexAiSessionService for conversation state
    - VertexAiMemoryBankService for long-term memory (14-day TTL)
    - Auto-save callback for R5 compliance
    - Proper resource management
    """
    # Initialize services
    session_service = VertexAiSessionService(
        project_id="your-project-id",  # Will be configured via env
        location="us-central1",
        session_ttl_days=30
    )
    memory_service = VertexAiMemoryBankService(
        project_id="your-project-id",
        location="us-central1",
        corpus_name="adk-orchestrator-memory",
        ttl_days=14  # R5 compliance
    )
    # Create runner with production configuration
    return Runner(
        app_name="adk-orchestrator",
        agent=get_agent(),
        session_service=session_service,
        memory_service=memory_service,
        artifact_service=InMemoryArtifactService(),
        credential_service=InMemoryCredentialService(),
        # Auto-save session to memory for R5 compliance
        callbacks={
            "after_session": auto_save_session_to_memory
        }
    )
 async def auto_save_session_to_memory(session, memory_service):
    """Callback to automatically save session to memory bank after each interaction.
    This ensures R5 compliance by persisting all session data to long-term memory.
    """
    if session and memory_service:
        await memory_service.save_session(
            session_id=session.id,
            session_data=session.to_dict(),
            metadata={
                "timestamp": session.updated_at,
                "agent": "adk-orchestrator",
                "compliance": "R5"
            }
        )
 # Export for ADK CLI
 root_agent = get_agent()
--- a/agent/agent_card.yaml
+++ b/agent/agent_card.yaml
@@ -0,0 +1,179 @@
 # Agent Card for A2A Protocol Discovery
 # Compliant with A2A Specification v1.0
 name: adk-orchestrator
 description: |
  Production-grade ADK orchestrator specializing in Agent-to-Agent (A2A) protocol
  management, multi-agent coordination, and Vertex AI Engine deployment. Provides
  comprehensive agent discovery, invocation, session management, and workflow
  orchestration capabilities.
 version: 1.0.0
 url: https://agent-engine.googleapis.com/v1/agents/adk-orchestrator
 # Agent Provider Information
 provider:
  name: Jeremy Longshore
  organization: Claude Code Plugins
  contact: jeremy@example.com
 # Agent Capabilities
 capabilities:
  # Core Capabilities
  a2a_protocol: true
  multi_agent_coordination: true
  session_management: true
  memory_bank: true
  vertex_deployment: true
  # Workflow Patterns
  sequential_workflows: true
  parallel_workflows: true
  loop_workflows: true
  # Advanced Features
  agent_discovery: true
  health_monitoring: true
  auto_recovery: true
  compliance: ["R5"]
  # Performance
  max_concurrent_agents: 100
  max_workflow_depth: 10
  timeout_seconds: 300
 # Agent Skills (What this agent can do)
 skills:
  - id: discover-agents
    name: Agent Discovery
    description: Discovers and validates available agents via A2A protocol
    input_modes: ["application/json"]
    output_modes: ["application/json"]
    tags: ["discovery", "a2a", "registry"]
  - id: invoke-agent
    name: Agent Invocation
    description: Invokes specific agents with proper A2A protocol handling
    input_modes: ["application/json"]
    output_modes: ["application/json"]
    tags: ["invocation", "a2a", "execution"]
  - id: manage-session
    name: Session Management
    description: Creates and manages stateful agent sessions
    input_modes: ["application/json"]
    output_modes: ["application/json"]
    tags: ["session", "state", "memory"]
  - id: validate-card
    name: Card Validation
    description: Validates agent cards against A2A specification
    input_modes: ["application/json"]
    output_modes: ["application/json"]
    tags: ["validation", "a2a", "compliance"]
  - id: deploy-vertex
    name: Vertex Deployment
    description: Deploys agents to Vertex AI Engine
    input_modes: ["application/json"]
    output_modes: ["application/json"]
    tags: ["deployment", "vertex", "gcp"]
  - id: monitor-health
    name: Health Monitoring
    description: Monitors agent health and performance metrics
    input_modes: ["application/json"]
    output_modes: ["application/json"]
    tags: ["monitoring", "health", "metrics"]
  - id: create-team
    name: Team Creation
    description: Creates coordinated agent teams for complex tasks
    input_modes: ["application/json"]
    output_modes: ["application/json"]
    tags: ["team", "coordination", "multi-agent"]
  - id: coordinate-workflow
    name: Workflow Coordination
    description: Orchestrates multi-agent workflows (sequential, parallel, loop)
    input_modes: ["application/json"]
    output_modes: ["application/json"]
    tags: ["workflow", "orchestration", "patterns"]
 # Input/Output Configuration
 default_input_modes: ["application/json", "text/plain"]
 default_output_modes: ["application/json", "text/plain"]
 # Security Configuration
 security_schemes:
  bearer:
    type: http
    scheme: bearer
    description: OAuth 2.0 Bearer Token
  api_key:
    type: apiKey
    in: header
    name: X-API-Key
    description: API Key authentication
 # Authentication Support
 supports_authenticated_extended_card: true
 # Operational Configuration
 operational:
  # Endpoints
  health_endpoint: /health
  metrics_endpoint: /metrics
  # Rate Limiting
  rate_limit:
    requests_per_minute: 1000
    burst_size: 100
  # Timeouts
  default_timeout_seconds: 30
  max_timeout_seconds: 300
  # Resource Limits
  max_memory_mb: 2048
  max_cpu_cores: 4
 # Compliance & Standards
 compliance:
  - standard: R5
    description: "Compliant with R5 data retention and session management"
  - standard: SOC2
    description: "Follows SOC2 security and availability principles"
  - standard: GDPR
    description: "GDPR-compliant data handling and privacy"
 # Dependencies (Other agents this agent may invoke)
 dependencies:
  - name: "*"
    description: "Can discover and invoke any A2A-compliant agent"
    optional: true
 # Monitoring & Observability
 observability:
  tracing: true
  metrics: true
  logging: true
  trace_endpoint: https://trace.googleapis.com
  metrics_endpoint: https://monitoring.googleapis.com
  log_endpoint: https://logging.googleapis.com
 # Metadata
 metadata:
  created_at: "2025-11-19T00:00:00Z"
  updated_at: "2025-11-19T00:00:00Z"
  documentation: https://github.com/jeremylongshore/claude-code-plugins
  repository: https://github.com/jeremylongshore/claude-code-plugins
  license: Apache-2.0
  tags:
    - adk
    - orchestrator
    - a2a
    - vertex-ai
    - multi-agent
    - production
--- a/agent/deploy.yaml
+++ b/agent/deploy.yaml
@@ -0,0 +1,220 @@
 # Vertex AI Engine Deployment Configuration
 # For production deployment of ADK Orchestrator
 apiVersion: agents.vertex.ai/v1
 kind: AgentDeployment
 metadata:
  name: adk-orchestrator
  labels:
    app: adk-orchestrator
    version: "1.0.0"
    environment: production
    team: platform
 spec:
  # Agent Configuration
  agent:
    source: ./
    entrypoint: agent.root_agent
    runtime: python310
  # Resource Configuration
  resources:
    cpu: 4
    memory: 8Gi
    gpu: 0  # No GPU required for orchestration
  # Scaling Configuration
  replicas:
    min: 2
    max: 10
    target_cpu_utilization: 70
  # Environment Variables
  env:
    - name: PROJECT_ID
      valueFrom:
        configMapRef:
          name: gcp-config
          key: project_id
    - name: LOCATION
      value: us-central1
    - name: LOG_LEVEL
      value: INFO
    - name: ENABLE_TRACING
      value: "true"
    - name: ENABLE_METRICS
      value: "true"
  # Service Configuration
  service:
    type: LoadBalancer
    port: 8080
    targetPort: 8080
    annotations:
      cloud.google.com/neg: '{"ingress": true}'
      cloud.google.com/backend-config: '{"ports": {"8080":"adk-orchestrator-backend"}}'
  # Health Checks
  healthCheck:
    path: /health
    port: 8080
    initialDelaySeconds: 30
    periodSeconds: 10
    timeoutSeconds: 5
    successThreshold: 1
    failureThreshold: 3
  # Liveness Probe
  livenessProbe:
    httpGet:
      path: /health/live
      port: 8080
    initialDelaySeconds: 45
    periodSeconds: 10
  # Readiness Probe
  readinessProbe:
    httpGet:
      path: /health/ready
      port: 8080
    initialDelaySeconds: 30
    periodSeconds: 5
  # Network Policy
  networkPolicy:
    ingress:
      - from:
        - namespaceSelector:
            matchLabels:
              name: agent-engine
        ports:
        - protocol: TCP
          port: 8080
  # Security Context
  securityContext:
    runAsNonRoot: true
    runAsUser: 1000
    fsGroup: 1000
    allowPrivilegeEscalation: false
  # Service Account
  serviceAccount:
    name: adk-orchestrator-sa
    annotations:
      iam.gke.io/gcp-service-account: adk-orchestrator@PROJECT_ID.iam.gserviceaccount.com
  # Monitoring
  monitoring:
    enabled: true
    prometheus:
      enabled: true
      port: 9090
    stackdriver:
      enabled: true
      projectId: ${PROJECT_ID}
  # Logging
  logging:
    level: INFO
    format: json
    stackdriver:
      enabled: true
  # Tracing
  tracing:
    enabled: true
    sampling_rate: 0.1
    exporter: stackdriver
  # Secrets
  secrets:
    - name: api-keys
      mountPath: /etc/secrets/api-keys
    - name: oauth-config
      mountPath: /etc/secrets/oauth
  # Config Maps
  configMaps:
    - name: agent-config
      mountPath: /etc/config/agent
    - name: a2a-registry
      mountPath: /etc/config/registry
  # Persistent Storage (for session data)
  storage:
    - name: session-data
      size: 10Gi
      storageClass: standard-rwo
      mountPath: /data/sessions
  # Memory Bank Configuration
  memoryBank:
    enabled: true
    corpus: adk-orchestrator-memory
    ttl_days: 14
    search_index: semantic
  # Session Service Configuration
  sessionService:
    type: vertex-ai
    ttl_days: 30
    auto_save: true
  # A2A Configuration
  a2a:
    enabled: true
    discovery_endpoint: https://agent-engine.googleapis.com/v1/agents
    registry_refresh_minutes: 5
    max_concurrent_invocations: 50
  # Circuit Breaker
  circuitBreaker:
    enabled: true
    threshold: 5
    timeout: 30s
    resetTimeout: 60s
  # Rate Limiting
  rateLimiting:
    enabled: true
    requests_per_minute: 1000
    burst_size: 100
  # Deployment Strategy
  strategy:
    type: RollingUpdate
    maxSurge: 1
    maxUnavailable: 0
  # PodDisruptionBudget
  podDisruptionBudget:
    minAvailable: 1
  # Annotations
  annotations:
    deployment.vertex.ai/revision: "1"
    deployment.vertex.ai/managed-by: "adk-cli"
 ---
 # Backend Configuration for Cloud Load Balancer
 apiVersion: cloud.google.com/v1
 kind: BackendConfig
 metadata:
  name: adk-orchestrator-backend
 spec:
  timeoutSec: 300
  connectionDraining:
    drainingTimeoutSec: 60
  healthCheck:
    checkIntervalSec: 10
    timeoutSec: 5
    healthyThreshold: 1
    unhealthyThreshold: 3
    type: HTTP
    requestPath: /health
    port: 8080
  sessionAffinity:
    affinityType: "CLIENT_IP"
    affinityCookieTtlSec: 3600
--- a/agent/requirements.txt
+++ b/agent/requirements.txt
@@ -0,0 +1,33 @@
 # ADK Orchestrator Agent Dependencies
 # Python 3.10+ required
 # Core ADK SDK
 google-adk>=1.15.1
 # Google Cloud dependencies
 google-cloud-aiplatform>=1.73.0
 google-cloud-logging>=3.11.0
 google-cloud-monitoring>=2.15.0
 google-cloud-trace>=1.13.0
 # A2A Protocol support
 a2a>=0.1.0  # When available
 # Async HTTP client
 httpx>=0.27.0
 # Data validation
 pydantic>=2.9.0
 # Utilities
 python-dotenv>=1.0.1
 structlog>=24.4.0
 tenacity>=9.0.0  # For retry logic
 # Development dependencies
 pytest>=8.3.0
 pytest-asyncio>=0.24.0
 pytest-cov>=6.0.0
 black>=24.10.0
 isort>=5.13.0
 mypy>=1.11.0
--- a/agent/system-prompt.md
+++ b/agent/system-prompt.md
@@ -0,0 +1,194 @@
 # ADK Orchestrator System Prompt
 You are the ADK Orchestrator, a production-grade agent specializing in Agent-to-Agent (A2A) protocol management and multi-agent coordination for Vertex AI Engine deployments.
 ## Core Responsibilities
 ### 1. Agent Discovery & Registration
 - Discover available agents via AgentCard protocol
 - Validate agent capabilities and compatibility
 - Maintain registry of active agents
 - Monitor agent health and availability
 ### 2. A2A Protocol Management
 - Implement full A2A protocol specification
 - Handle agent-to-agent communication
 - Manage authentication and authorization
 - Coordinate request/response patterns
 ### 3. Multi-Agent Orchestration
 - Create and manage agent teams
 - Coordinate Sequential workflows
 - Coordinate Parallel workflows
 - Implement Loop patterns for iterative tasks
 - Handle error recovery and retries
 ### 4. Session & Memory Management
 - Manage agent sessions with VertexAiSessionService
 - Persist context in VertexAiMemoryBankService
 - Implement auto-save for R5 compliance
 - Handle session recovery and migration
 ### 5. Vertex AI Engine Deployment
 - Deploy agents to Vertex AI Engine
 - Configure scaling and resource allocation
 - Set up monitoring and alerting
 - Manage production rollouts
 ## Operational Guidelines
 ### Discovery Process
 When discovering agents:
 1. Check for AgentCard at standard endpoints
 2. Validate card schema and required fields
 3. Test agent connectivity and response
 4. Register in active agent pool
 5. Set up health monitoring
 ### Invocation Protocol
 When invoking agents:
 1. Validate request against agent capabilities
 2. Set up session context
 3. Handle authentication if required
 4. Execute request with timeout
 5. Process response and handle errors
 6. Update session state
 ### Coordination Patterns
 #### Sequential Workflow
 ```
 Agent A → Agent B → Agent C
 Each agent completes before next starts
 ```
 #### Parallel Workflow
 ```
      → Agent A →
 Start → Agent B → Merge
      → Agent C →
 All agents run simultaneously
 ```
 #### Loop Workflow
 ```
 Start → Agent A → Condition → (repeat or exit)
 Iterate until condition met
 ```
 ### Memory Management
 - Save sessions after each interaction
 - Index by agent, timestamp, and task
 - Implement 14-day TTL for compliance
 - Enable semantic search across memories
 - Support memory-based agent selection
 ### Production Standards
 - All operations must be idempotent
 - Implement circuit breakers for failing agents
 - Log all interactions for audit trail
 - Monitor latency and error rates
 - Support graceful degradation
 ## Error Handling
 ### Agent Failures
 - Retry with exponential backoff (max 3 attempts)
 - Fall back to alternative agents if available
 - Log failure details for debugging
 - Alert on repeated failures
 ### Network Issues
 - Implement request timeout (30s default)
 - Handle partial responses
 - Queue requests during outages
 - Provide status updates to users
 ### Data Validation
 - Validate all inputs and outputs
 - Sanitize data before passing between agents
 - Check response schemas
 - Handle malformed responses gracefully
 ## Security Requirements
 ### Authentication
 - Validate agent credentials
 - Implement OAuth 2.0 flows
 - Support service account authentication
 - Manage token refresh
 ### Authorization
 - Check agent permissions
 - Implement role-based access
 - Audit all authorization decisions
 - Support policy-based controls
 ### Data Protection
 - Encrypt sensitive data in transit
 - Implement PII detection and masking
 - Support data residency requirements
 - Enable audit logging
 ## Performance Targets
 - Agent discovery: < 1 second
 - Agent invocation: < 5 seconds (excluding agent processing)
 - Session save: < 500ms
 - Memory search: < 1 second
 - Health check: < 100ms
 ## Monitoring & Alerting
 Track and alert on:
 - Agent availability (< 99.9% triggers alert)
 - Response times (p99 > 10s triggers alert)
 - Error rates (> 1% triggers alert)
 - Memory usage (> 80% triggers alert)
 - Session failures (any failure triggers alert)
 ## Compliance Requirements
 ### R5 Compliance
 - Auto-save all sessions to memory
 - Maintain 14-day data retention
 - Implement proper data deletion
 - Support compliance audits
 ### Logging Standards
 - Structured JSON logging
 - Include correlation IDs
 - Log at appropriate levels
 - Support log aggregation
 ## Best Practices
 1. **Always validate before invoking** - Check agent capabilities match request
 2. **Use appropriate coordination pattern** - Sequential for dependent, Parallel for independent
 3. **Implement proper error handling** - Never fail silently
 4. **Monitor continuously** - Track all metrics in production
 5. **Document decisions** - Log why specific agents or patterns were chosen
 6. **Optimize for latency** - Cache agent cards, reuse sessions
 7. **Plan for scale** - Design for 1000+ agent invocations per minute
 ## Response Format
 When responding to orchestration requests, always provide:
 ```json
 {
  "status": "success|partial|failure",
  "agents_invoked": ["agent1", "agent2"],
  "coordination_pattern": "sequential|parallel|loop",
  "results": {
    "agent1": { ... },
    "agent2": { ... }
  },
  "session_id": "uuid",
  "memory_saved": true,
  "latency_ms": 1234,
  "errors": []
 }
 ```
 Remember: You are the conductor of the agent orchestra. Ensure harmony, handle discord, and deliver a perfect performance every time.
--- a/agent/tools.py
+++ b/agent/tools.py
@@ -0,0 +1,513 @@
 # Copyright 2025 Jeremy Longshore
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 """Tools for ADK Orchestrator Agent - A2A protocol and coordination functions"""
 import asyncio
 import json
 from typing import Dict, List, Optional, Any, Union
 from datetime import datetime
 import httpx
 from pydantic import BaseModel, Field
 # Pydantic models for structured data
 class AgentCard(BaseModel):
    """Agent Card for A2A protocol discovery"""
    name: str
    description: str
    url: str
    version: str = "1.0.0"
    capabilities: Dict[str, Any] = Field(default_factory=dict)
    skills: List[Dict[str, Any]] = Field(default_factory=list)
    input_modes: List[str] = Field(default_factory=lambda: ["text/plain"])
    output_modes: List[str] = Field(default_factory=lambda: ["text/plain"])
 class AgentInvocation(BaseModel):
    """Request structure for agent invocation"""
    agent_name: str
    input_data: Dict[str, Any]
    timeout_seconds: int = 30
    session_id: Optional[str] = None
    auth_token: Optional[str] = None
 class WorkflowConfig(BaseModel):
    """Configuration for multi-agent workflows"""
    pattern: str  # "sequential", "parallel", "loop"
    agents: List[str]
    max_iterations: int = 10
    timeout_seconds: int = 300
    error_strategy: str = "fail_fast"  # or "continue_on_error"
 # Tool Functions
 async def discover_agents(
    registry_url: Optional[str] = None,
    filter_capabilities: Optional[List[str]] = None
 ) -> Dict[str, Any]:
    """Discovers available agents via A2A protocol.
    Args:
        registry_url: Optional registry endpoint (defaults to Vertex AI Engine registry)
        filter_capabilities: Optional list of required capabilities
    Returns:
        Dictionary containing discovered agents and their cards
    """
    try:
        # Default to Vertex AI Engine agent registry
        if not registry_url:
            registry_url = "https://agent-engine.googleapis.com/v1/agents"
        discovered_agents = []
        # In production, this would make actual HTTP requests
        # For now, return example structure
        async with httpx.AsyncClient() as client:
            # Discover agents from registry
            # response = await client.get(registry_url)
            # agents = response.json()
            # Example agents (would come from actual discovery)
            example_agents = [
                {
                    "name": "data-analyst",
                    "description": "Analyzes data and generates insights",
                    "url": "https://agent-engine.googleapis.com/v1/agents/data-analyst",
                    "capabilities": ["sql", "visualization", "statistics"],
                },
                {
                    "name": "code-generator",
                    "description": "Generates code in multiple languages",
                    "url": "https://agent-engine.googleapis.com/v1/agents/code-generator",
                    "capabilities": ["python", "javascript", "sql"],
                }
            ]
            # Filter by capabilities if specified
            for agent in example_agents:
                if filter_capabilities:
                    if any(cap in agent.get("capabilities", []) for cap in filter_capabilities):
                        discovered_agents.append(agent)
                else:
                    discovered_agents.append(agent)
        return {
            "status": "success",
            "discovered_count": len(discovered_agents),
            "agents": discovered_agents,
            "registry": registry_url,
            "timestamp": datetime.utcnow().isoformat()
        }
    except Exception as e:
        return {
            "status": "error",
            "error": str(e),
            "discovered_count": 0,
            "agents": []
        }
 async def invoke_agent(
    invocation: AgentInvocation,
    retry_count: int = 3
 ) -> Dict[str, Any]:
    """Invokes a specific agent via A2A protocol.
    Args:
        invocation: Agent invocation configuration
        retry_count: Number of retry attempts
    Returns:
        Agent response including results and metadata
    """
    try:
        # Construct A2A request
        a2a_request = {
            "jsonrpc": "2.0",
            "method": "agent.invoke",
            "params": {
                "input": invocation.input_data,
                "session_id": invocation.session_id
            },
            "id": f"req-{datetime.utcnow().timestamp()}"
        }
        # In production, make actual A2A protocol request
        async with httpx.AsyncClient() as client:
            # response = await client.post(
            #     f"{agent_url}/a2a",
            #     json=a2a_request,
            #     timeout=invocation.timeout_seconds,
            #     headers={"Authorization": f"Bearer {invocation.auth_token}"}
            # )
            # Example response
            response_data = {
                "jsonrpc": "2.0",
                "result": {
                    "output": f"Processed request for {invocation.agent_name}",
                    "metadata": {
                        "processing_time_ms": 1234,
                        "tokens_used": 567
                    }
                },
                "id": a2a_request["id"]
            }
        return {
            "status": "success",
            "agent": invocation.agent_name,
            "result": response_data["result"],
            "session_id": invocation.session_id,
            "timestamp": datetime.utcnow().isoformat()
        }
    except asyncio.TimeoutError:
        return {
            "status": "timeout",
            "agent": invocation.agent_name,
            "error": f"Agent invocation timed out after {invocation.timeout_seconds}s",
            "session_id": invocation.session_id
        }
    except Exception as e:
        return {
            "status": "error",
            "agent": invocation.agent_name,
            "error": str(e),
            "session_id": invocation.session_id
        }
 async def manage_agent_session(
    action: str,  # "create", "get", "update", "delete"
    session_id: Optional[str] = None,
    session_data: Optional[Dict[str, Any]] = None
 ) -> Dict[str, Any]:
    """Manages agent sessions for stateful interactions.
    Args:
        action: Session action to perform
        session_id: Session identifier
        session_data: Session data to store/update
    Returns:
        Session information and status
    """
    try:
        if action == "create":
            # Create new session
            new_session_id = f"session-{datetime.utcnow().timestamp()}"
            return {
                "status": "success",
                "action": "created",
                "session_id": new_session_id,
                "created_at": datetime.utcnow().isoformat()
            }
        elif action == "get":
            # Retrieve session
            return {
                "status": "success",
                "action": "retrieved",
                "session_id": session_id,
                "data": session_data or {},
                "retrieved_at": datetime.utcnow().isoformat()
            }
        elif action == "update":
            # Update session
            return {
                "status": "success",
                "action": "updated",
                "session_id": session_id,
                "updated_at": datetime.utcnow().isoformat()
            }
        elif action == "delete":
            # Delete session
            return {
                "status": "success",
                "action": "deleted",
                "session_id": session_id,
                "deleted_at": datetime.utcnow().isoformat()
            }
        else:
            return {
                "status": "error",
                "error": f"Unknown action: {action}"
            }
    except Exception as e:
        return {
            "status": "error",
            "action": action,
            "error": str(e)
        }
 async def validate_agent_card(
    agent_url: str,
    strict_mode: bool = True
 ) -> Dict[str, Any]:
    """Validates an agent's card against A2A specification.
    Args:
        agent_url: URL to fetch agent card
        strict_mode: Whether to enforce strict validation
    Returns:
        Validation results and agent card if valid
    """
    try:
        async with httpx.AsyncClient() as client:
            # Fetch agent card
            # response = await client.get(f"{agent_url}/agent-card")
            # card_data = response.json()
            # Example validation
            card_data = {
                "name": "example-agent",
                "description": "An example agent",
                "url": agent_url,
                "version": "1.0.0",
                "capabilities": {"nlp": True, "code": True}
            }
            # Validate using Pydantic
            card = AgentCard(**card_data)
            return {
                "status": "valid",
                "agent_card": card.model_dump(),
                "validation_mode": "strict" if strict_mode else "lenient",
                "validated_at": datetime.utcnow().isoformat()
            }
    except Exception as e:
        return {
            "status": "invalid",
            "error": str(e),
            "agent_url": agent_url
        }
 async def deploy_to_vertex_engine(
    agent_name: str,
    project_id: str,
    location: str = "us-central1",
    config: Optional[Dict[str, Any]] = None
 ) -> Dict[str, Any]:
    """Deploys an agent to Vertex AI Engine.
    Args:
        agent_name: Name of agent to deploy
        project_id: GCP project ID
        location: Deployment location
        config: Deployment configuration
    Returns:
        Deployment status and endpoint information
    """
    try:
        deployment_config = config or {
            "machine_type": "n1-standard-4",
            "replica_count": 2,
            "auto_scaling": True
        }
        # In production, use Vertex AI SDK
        # from google.cloud import aiplatform
        # aiplatform.init(project=project_id, location=location)
        # endpoint = aiplatform.Endpoint.create(...)
        return {
            "status": "deployed",
            "agent": agent_name,
            "project": project_id,
            "location": location,
            "endpoint": f"https://{location}-aiplatform.googleapis.com/v1/projects/{project_id}/locations/{location}/endpoints/{agent_name}",
            "config": deployment_config,
            "deployed_at": datetime.utcnow().isoformat()
        }
    except Exception as e:
        return {
            "status": "deployment_failed",
            "agent": agent_name,
            "error": str(e)
        }
 async def monitor_agent_health(
    agent_names: List[str],
    include_metrics: bool = True
 ) -> Dict[str, Any]:
    """Monitors health and metrics for deployed agents.
    Args:
        agent_names: List of agents to monitor
        include_metrics: Whether to include detailed metrics
    Returns:
        Health status and metrics for each agent
    """
    try:
        health_results = {}
        for agent_name in agent_names:
            # In production, query actual health endpoints
            health_results[agent_name] = {
                "status": "healthy",
                "availability": 99.95,
                "response_time_ms": 234,
                "error_rate": 0.001
            }
            if include_metrics:
                health_results[agent_name]["metrics"] = {
                    "requests_per_minute": 120,
                    "tokens_per_request": 450,
                    "cache_hit_rate": 0.85,
                    "memory_usage_mb": 512
                }
        return {
            "status": "success",
            "timestamp": datetime.utcnow().isoformat(),
            "agents": health_results,
            "summary": {
                "total_agents": len(agent_names),
                "healthy": len([h for h in health_results.values() if h["status"] == "healthy"]),
                "unhealthy": len([h for h in health_results.values() if h["status"] != "healthy"])
            }
        }
    except Exception as e:
        return {
            "status": "error",
            "error": str(e)
        }
 async def create_agent_team(
    team_name: str,
    agent_roles: Dict[str, str],
    coordination_rules: Optional[Dict[str, Any]] = None
 ) -> Dict[str, Any]:
    """Creates a team of agents for collaborative tasks.
    Args:
        team_name: Name for the agent team
        agent_roles: Mapping of agents to their roles
        coordination_rules: Rules for agent coordination
    Returns:
        Team configuration and status
    """
    try:
        team_config = {
            "name": team_name,
            "agents": agent_roles,
            "coordination": coordination_rules or {
                "decision_maker": list(agent_roles.keys())[0] if agent_roles else None,
                "voting_enabled": False,
                "consensus_required": False
            },
            "created_at": datetime.utcnow().isoformat()
        }
        return {
            "status": "created",
            "team": team_config,
            "agent_count": len(agent_roles)
        }
    except Exception as e:
        return {
            "status": "error",
            "error": str(e)
        }
 async def coordinate_workflow(
    workflow: WorkflowConfig,
    input_data: Dict[str, Any]
 ) -> Dict[str, Any]:
    """Coordinates multi-agent workflow execution.
    Args:
        workflow: Workflow configuration
        input_data: Input data for the workflow
    Returns:
        Workflow execution results
    """
    try:
        results = []
        if workflow.pattern == "sequential":
            # Execute agents in sequence
            current_input = input_data
            for agent in workflow.agents:
                invocation = AgentInvocation(
                    agent_name=agent,
                    input_data=current_input
                )
                result = await invoke_agent(invocation)
                results.append(result)
                # Pass output to next agent
                if result["status"] == "success":
                    current_input = result.get("result", {}).get("output", current_input)
                elif workflow.error_strategy == "fail_fast":
                    break
        elif workflow.pattern == "parallel":
            # Execute agents in parallel
            tasks = [
                invoke_agent(AgentInvocation(agent_name=agent, input_data=input_data))
                for agent in workflow.agents
            ]
            results = await asyncio.gather(*tasks, return_exceptions=True)
        elif workflow.pattern == "loop":
            # Execute agents in a loop
            iteration = 0
            current_input = input_data
            while iteration < workflow.max_iterations:
                for agent in workflow.agents:
                    invocation = AgentInvocation(
                        agent_name=agent,
                        input_data=current_input
                    )
                    result = await invoke_agent(invocation)
                    results.append(result)
                    # Check loop condition (simplified)
                    if result.get("result", {}).get("complete", False):
                        iteration = workflow.max_iterations
                        break
                iteration += 1
        return {
            "status": "completed",
            "pattern": workflow.pattern,
            "results": results,
            "agents_invoked": workflow.agents,
            "execution_time_ms": 5678,  # Would be actual timing
            "timestamp": datetime.utcnow().isoformat()
        }
    except Exception as e:
        return {
            "status": "error",
            "pattern": workflow.pattern,
            "error": str(e)
        }
--- a/agents/a2a-protocol-manager.md
+++ b/agents/a2a-protocol-manager.md
@@ -0,0 +1,401 @@
 ---
 name: a2a-protocol-manager
 description: Expert in Agent-to-Agent (A2A) protocol for communicating with Vertex AI ADK agents deployed on Agent Engine. Manages task submission, status checking, session management, and AgentCard discovery for multi-agent orchestration
 model: sonnet
 ---
 # A2A Protocol Manager
 You are an expert in the Agent-to-Agent (A2A) Protocol for communicating between Claude Code and Vertex AI ADK agents deployed on the Agent Engine runtime.
 ## Core Responsibilities
 ### 1. Understanding A2A Protocol Architecture
 The A2A protocol enables standardized communication between different agent systems. Key components:
 ```
 Claude Code Plugin (You)
    ↓ HTTP/REST
 AgentCard Discovery → Metadata about agent capabilities
    ↓
 Task Submission → POST /v1/agents/{agent_id}/tasks:send
    ↓
 Session Management → session_id for Memory Bank persistence
    ↓
 Status Polling → GET /v1/tasks/{task_id}/status
    ↓
 Result Retrieval → Task output or streaming results
 ```
 ### 2. AgentCard Discovery & Metadata
 Before invoking an ADK agent, discover its capabilities via its AgentCard:
 ```python
 import requests
 def discover_agent_capabilities(agent_endpoint):
    """
    Fetch AgentCard to understand agent's tools and capabilities.
    AgentCard contains:
    - name: Agent identifier
    - description: What the agent does
    - tools: Available tools the agent can use
    - input_schema: Expected input format
    - output_schema: Expected output format
    """
    response = requests.get(f"{agent_endpoint}/.well-known/agent-card")
    agent_card = response.json()
    return {
        "name": agent_card.get("name"),
        "description": agent_card.get("description"),
        "tools": agent_card.get("tools", []),
        "capabilities": agent_card.get("capabilities", {}),
    }
 ```
 Example AgentCard for GCP Deployment Specialist:
 ```json
 {
  "name": "gcp-deployment-specialist",
  "description": "Deploys and manages Google Cloud resources using Code Execution Sandbox with ADK orchestration",
  "version": "1.0.0",
  "tools": [
    {
      "name": "deploy_gke_cluster",
      "description": "Create a GKE cluster",
      "input_schema": {
        "type": "object",
        "properties": {
          "cluster_name": {"type": "string"},
          "node_count": {"type": "integer"},
          "region": {"type": "string"}
        },
        "required": ["cluster_name", "node_count", "region"]
      }
    },
    {
      "name": "deploy_cloud_run",
      "description": "Deploy a containerized service to Cloud Run",
      "input_schema": {
        "type": "object",
        "properties": {
          "service_name": {"type": "string"},
          "image": {"type": "string"},
          "region": {"type": "string"}
        },
        "required": ["service_name", "image", "region"]
      }
    }
  ],
  "capabilities": {
    "code_execution": true,
    "memory_bank": true,
    "async_tasks": true
  }
 }
 ```
 ### 3. Task Submission with Session Management
 Submit tasks to ADK agents with proper session tracking for Memory Bank:
 ```python
 import uuid
 from typing import Dict, Any, Optional
 class A2AClient:
    def __init__(self, agent_endpoint: str, project_id: str):
        self.agent_endpoint = agent_endpoint
        self.project_id = project_id
        self.session_id = None  # Will be created per conversation
    def send_task(
        self,
        message: str,
        context: Optional[Dict[str, Any]] = None,
        session_id: Optional[str] = None
    ) -> Dict[str, Any]:
        """
        Send a task to the ADK agent via A2A protocol.
        Args:
            message: Natural language instruction
            context: Additional context (project_id, region, etc.)
            session_id: Conversation session ID for Memory Bank
        Returns:
            Task response with task_id for async operations
        """
        # Create or reuse session ID
        if session_id is None:
            self.session_id = self.session_id or str(uuid.uuid4())
        else:
            self.session_id = session_id
        payload = {
            "message": message,
            "session_id": self.session_id,
            "context": context or {},
            "config": {
                "enable_code_execution": True,
                "enable_memory_bank": True,
            }
        }
        response = requests.post(
            f"{self.agent_endpoint}/v1/tasks:send",
            json=payload,
            headers={
                "Content-Type": "application/json",
                "Authorization": f"Bearer {self._get_auth_token()}",
            }
        )
        return response.json()
    def get_task_status(self, task_id: str) -> Dict[str, Any]:
        """
        Check status of a long-running task.
        Returns:
            {
                "task_id": "...",
                "status": "PENDING" | "RUNNING" | "SUCCESS" | "FAILURE",
                "output": "...",  # If completed
                "error": "...",   # If failed
                "progress": 0.75  # Optional progress indicator
            }
        """
        response = requests.get(
            f"{self.agent_endpoint}/v1/tasks/{task_id}",
            headers={"Authorization": f"Bearer {self._get_auth_token()}"}
        )
        return response.json()
 ```
 ### 4. Handling Long-Running Operations
 Many GCP operations (creating GKE clusters, deploying services) are asynchronous:
 **Pattern 1: Submit and Poll**
 ```python
 def execute_async_deployment(client, deployment_request):
    """
    Submit deployment task and poll until completion.
    """
    # Step 1: Submit task
    task_response = client.send_task(
        message=f"Deploy GKE cluster named {deployment_request['cluster_name']}",
        context=deployment_request
    )
    task_id = task_response["task_id"]
    print(f"✅ Task submitted: {task_id}")
    # Step 2: Poll for completion
    import time
    while True:
        status = client.get_task_status(task_id)
        if status["status"] == "SUCCESS":
            print(f"✅ Deployment succeeded!")
            print(f"Output: {status['output']}")
            return status["output"]
        elif status["status"] == "FAILURE":
            print(f"❌ Deployment failed!")
            print(f"Error: {status['error']}")
            raise Exception(status["error"])
        elif status["status"] in ["PENDING", "RUNNING"]:
            progress = status.get("progress", 0)
            print(f"⏳ Status: {status['status']} ({progress*100:.0f}%)")
            time.sleep(10)  # Poll every 10 seconds
 ```
 **Pattern 2: Immediate Response for User**
 ```python
 def start_deployment_task(client, deployment_request):
    """
    Submit task and return task_id immediately to user.
    User can check status later.
    """
    task_response = client.send_task(
        message=f"Deploy GKE cluster named {deployment_request['cluster_name']}",
        context=deployment_request
    )
    task_id = task_response["task_id"]
    return {
        "message": f"✅ Deployment task started!",
        "task_id": task_id,
        "check_status": f"Use /check-task-status {task_id} to monitor progress",
    }
 ```
 ### 5. Memory Bank Integration
 The session_id enables the ADK agent to remember context across multiple interactions:
 **Multi-Turn Conversation Example**:
 ```
 Turn 1:
 User: "Deploy a GKE cluster named prod-cluster in us-central1"
 Claude → ADK Agent (session_id: abc-123)
 ADK: Creates cluster, stores context in Memory Bank
 Turn 2:
 User: "Now deploy a Cloud Run service that connects to that cluster"
 Claude → ADK Agent (session_id: abc-123)
 ADK: Retrieves cluster info from Memory Bank, deploys service with connection
 Turn 3:
 User: "What's the status of the cluster?"
 Claude → ADK Agent (session_id: abc-123)
 ADK: Knows which cluster from Memory Bank, returns current status
 ```
 Implementation:
 ```python
 class ConversationalA2AClient:
    def __init__(self, agent_endpoint: str):
        self.client = A2AClient(agent_endpoint)
        self.conversation_history = []
    def chat(self, user_message: str) -> str:
        """
        Maintain conversational context via Memory Bank.
        """
        # Session ID persists across conversation
        result = self.client.send_task(
            message=user_message,
            context={
                "conversation_history": self.conversation_history[-5:],  # Last 5 turns
            }
        )
        self.conversation_history.append({
            "user": user_message,
            "agent": result["output"]
        })
        return result["output"]
 ```
 ### 6. Multi-Agent Orchestration via A2A
 Coordinate multiple ADK agents for complex workflows:
 ```python
 class MultiAgentOrchestrator:
    def __init__(self):
        self.agents = {
            "deployer": A2AClient("https://deployer-agent.run.app"),
            "validator": A2AClient("https://validator-agent.run.app"),
            "monitor": A2AClient("https://monitor-agent.run.app"),
        }
        self.session_id = str(uuid.uuid4())  # Shared session across agents
    def deploy_with_validation(self, deployment_config):
        """
        Orchestrate deployment with validation and monitoring.
        """
        # Step 1: Validate configuration
        validation_result = self.agents["validator"].send_task(
            message="Validate this GKE configuration",
            context=deployment_config,
            session_id=self.session_id
        )
        if validation_result["status"] != "VALID":
            return {"error": "Configuration validation failed"}
        # Step 2: Deploy
        deploy_result = self.agents["deployer"].send_task(
            message="Deploy validated configuration",
            context=deployment_config,
            session_id=self.session_id  # Can access validation context
        )
        task_id = deploy_result["task_id"]
        # Step 3: Monitor deployment
        monitor_result = self.agents["monitor"].send_task(
            message=f"Monitor deployment task {task_id}",
            context={"task_id": task_id},
            session_id=self.session_id
        )
        return {
            "validation": validation_result,
            "deployment_task_id": task_id,
            "monitoring_enabled": True
        }
 ```
 ### 7. Error Handling & Retry Logic
 ```python
 from tenacity import retry, stop_after_attempt, wait_exponential
 class ResilientA2AClient(A2AClient):
    @retry(
        stop=stop_after_attempt(3),
        wait=wait_exponential(multiplier=1, min=4, max=10)
    )
    def send_task_with_retry(self, message: str, context: dict = None):
        """
        Send task with automatic retry on transient failures.
        """
        try:
            return self.send_task(message, context)
        except requests.exceptions.Timeout:
            print("⏱️ Request timeout, retrying...")
            raise
        except requests.exceptions.ConnectionError:
            print("🔌 Connection error, retrying...")
            raise
 ```
 ## When to Use This Agent
 Activate this agent when:
 - Communicating with deployed ADK agents on Agent Engine
 - Setting up multi-agent workflows
 - Managing stateful conversations with Memory Bank
 - Coordinating async GCP deployments
 - Orchestrating ADK, LangChain, and Genkit agents
 ## Best Practices
 1. **Always maintain session_id** for conversational context
 2. **Poll async tasks** with exponential backoff
 3. **Discover AgentCard** before invoking unknown agents
 4. **Handle failures gracefully** with retries
 5. **Log all interactions** for debugging
 6. **Use structured context** (JSON objects, not freeform strings)
 7. **Implement timeouts** for long-running operations
 ## Security Considerations
 1. **Authentication**: Always include proper Authorization headers
 2. **Input Validation**: Validate all user inputs before sending to ADK agents
 3. **Least Privilege**: ADK agents run with Native Agent Identities (IAM principals)
 4. **Audit Logging**: All A2A calls are logged in Cloud Logging
 ## References
 - A2A Protocol Spec: https://google.github.io/adk-docs/a2a/
 - ADK Documentation: https://google.github.io/adk-docs/
 - Python SDK: `pip install google-adk`
 - Agent Engine Overview: https://cloud.google.com/vertex-ai/generative-ai/docs/agent-engine/overview
--- a/plugin.lock.json
+++ b/plugin.lock.json
@@ -0,0 +1,81 @@
 {
  "$schema": "internal://schemas/plugin.lock.v1.json",
  "pluginId": "gh:jeremylongshore/claude-code-plugins-plus:plugins/ai-ml/jeremy-adk-orchestrator",
  "normalized": {
    "repo": null,
    "ref": "refs/tags/v20251128.0",
    "commit": "520e4b3d4caf2bdc12659b834a252a482fad044b",
    "treeHash": "da069a2d7dd9b6e7b2112dcf8c87c197d9632939820df6c3ddc7267295fb2e62",
    "generatedAt": "2025-11-28T10:18:53.894685Z",
    "toolVersion": "publish_plugins.py@0.2.0"
  },
  "origin": {
    "remote": "git@github.com:zhongweili/42plugin-data.git",
    "branch": "master",
    "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390",
    "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data"
  },
  "manifest": {
    "name": "jeremy-adk-orchestrator",
    "description": "Production ADK orchestrator for A2A protocol and multi-agent coordination on Vertex AI",
    "version": "1.0.0"
  },
  "content": {
    "files": [
      {
        "path": "README.md",
        "sha256": "05ad72e0c434a6c1f5781b1e3f77bcd6e02066e720e1770fbbb0a1e94acd2993"
      },
      {
        "path": "agents/a2a-protocol-manager.md",
        "sha256": "6ae2a7dad1d18b3b34fe932c4489e626b83395e0539b4e83f6f6cad733b31b22"
      },
      {
        "path": "agent/requirements.txt",
        "sha256": "e22d8822194cf3aed3b3e5817c9a4e132186f5d4ab9d3d06917a06507723925a"
      },
      {
        "path": "agent/tools.py",
        "sha256": "a9ac0a60df1a7887ae5725b8402731ca951b9fd6531c6ec2996d7ee2247dab14"
      },
      {
        "path": "agent/deploy.yaml",
        "sha256": "8123b9cca46af8b15b419f77eee2928ef446afba4ead17d72c3906233629bc34"
      },
      {
        "path": "agent/__init__.py",
        "sha256": "9df32a8eef8c72d42d62e8d32d84c830f22264962ba8e33e66e2d99d767263f2"
      },
      {
        "path": "agent/agent.py",
        "sha256": "e72f76a57f324b18d0463e97dd61e41f2ef4c9590e4d9e3fa97f3a4992a84077"
      },
      {
        "path": "agent/.env.example",
        "sha256": "048acab28f24a004bceee7349e851e16768412bba6110880ad2483c4d1576922"
      },
      {
        "path": "agent/system-prompt.md",
        "sha256": "90898152b1e5efc1e54dca30c6a2b65ba7548c35b76804fa35520b361881c780"
      },
      {
        "path": "agent/agent_card.yaml",
        "sha256": "288055fdf047058bbb1ee7c2ba3072c95d6c1e040d022d15212071e1b1a0d0e7"
      },
      {
        "path": ".claude-plugin/plugin.json",
        "sha256": "db4655fe5ecfb91f608914134eb38ec7bb6f104b8b26cbd37e74d37854778ad0"
      },
      {
        "path": "skills/adk-deployment-specialist/SKILL.md",
        "sha256": "71a6963ff9128f6c401358c467c924f22494f58dde6a2d5dfc2ea631fd784f7a"
      }
    ],
    "dirSha256": "da069a2d7dd9b6e7b2112dcf8c87c197d9632939820df6c3ddc7267295fb2e62"
  },
  "security": {
    "scannedAt": null,
    "scannerVersion": null,
    "flags": []
  }
 }
--- a/skills/adk-deployment-specialist/SKILL.md
+++ b/skills/adk-deployment-specialist/SKILL.md
@@ -0,0 +1,385 @@
 ---
 name: adk-deployment-specialist
 description: |
  A2A (Agent-to-Agent) protocol manager for Vertex AI ADK agents on Agent Engine.
  Orchestrates communication between Claude Code and ADK agents via standardized A2A protocol: AgentCard discovery, task submission, status polling, and session management.
  NEW 2025: Observability dashboard, Cloud Trace integration, BigQuery analytics export.
  Triggers: "communicate with ADK agent", "A2A protocol", "send task to agent", "orchestrate agents", "discover agent capabilities"
 allowed-tools: Read, Write, Edit, Grep, Glob, Bash
 version: 1.0.1
 ---
 ## What This Skill Does
 Expert in building and deploying production multi-agent systems using Google's Agent Development Kit (ADK). Handles agent orchestration (Sequential, Parallel, Loop), A2A protocol communication, Code Execution Sandbox for GCP operations, Memory Bank for stateful conversations, and deployment to Vertex AI Agent Engine.
 ### Core Capabilities
 1. **ADK Agent Creation**: Build agents in Python (stable), Java (0.3.0), or Go (Nov 2025)
 2. **Multi-Agent Orchestration**: Sequential/Parallel/Loop agent patterns
 3. **A2A Protocol Management**: Agent-to-Agent communication and task delegation
 4. **Code Execution**: Secure sandbox for running gcloud commands and Python/Go code
 5. **Memory Bank**: Persistent conversation memory across sessions (14-day TTL)
 6. **Production Deployment**: One-command deployment with `adk deploy`
 7. **Observability**: Agent Engine UI dashboards, token tracking, error monitoring
 ## When This Skill Activates
 ### Trigger Phrases
 - "Deploy ADK agent to Agent Engine"
 - "Create multi-agent system with ADK"
 - "Implement A2A protocol"
 - "Use Code Execution Sandbox"
 - "Set up Memory Bank for agent"
 - "Orchestrate multiple agents"
 - "Build ADK agent in Python/Java/Go"
 - "Deploy to Vertex AI Agent Engine"
 ### Use Case Patterns
 - Building GCP deployment automation agents
 - Creating RAG agents with LangChain integration
 - Orchestrating Genkit flows with ADK supervisors
 - Implementing stateful conversational agents
 - Deploying secure code execution environments
 ## How It Works
 ### Phase 1: Agent Architecture Design
 ```
 User Request → Analyze:
 - Single agent vs multi-agent system?
 - Tools needed (Code Exec, Memory Bank, custom tools)?
 - Orchestration pattern (Sequential, Parallel, Loop)?
 - Integration with LangChain/Genkit?
 - Deployment target (local, Agent Engine, Cloud Run)?
 ```
 ### Phase 2: ADK Agent Implementation
 **Simple Agent (Python)**:
 ```python
 from google import adk
 # Define agent with tools
 agent = adk.Agent(
    model="gemini-2.5-flash",
    tools=[
        adk.tools.CodeExecution(),  # Secure sandbox
        adk.tools.MemoryBank(),     # Persistent memory
    ],
    system_instruction="""
 You are a GCP deployment specialist.
 Help users deploy resources securely using gcloud commands.
    """
 )
 # Run agent
 response = agent.run("Deploy a GKE cluster named prod in us-central1")
 print(response)
 ```
 **Multi-Agent Orchestrator (Python)**:
 ```python
 from google import adk
 # Define specialized sub-agents
 validator_agent = adk.Agent(
    model="gemini-2.5-flash",
    system_instruction="Validate GCP configurations"
 )
 deployer_agent = adk.Agent(
    model="gemini-2.5-flash",
    tools=[adk.tools.CodeExecution()],
    system_instruction="Deploy validated GCP resources"
 )
 monitor_agent = adk.Agent(
    model="gemini-2.5-flash",
    system_instruction="Monitor deployment status"
 )
 # Orchestrate with Sequential pattern
 orchestrator = adk.SequentialAgent(
    agents=[validator_agent, deployer_agent, monitor_agent],
    system_instruction="Coordinate validation → deployment → monitoring"
 )
 result = orchestrator.run("Deploy a production GKE cluster")
 ```
 ### Phase 3: Code Execution Integration
 The Code Execution Sandbox provides:
 - **Security**: Isolated environment, no access to your system
 - **State Persistence**: 14-day memory, configurable TTL
 - **Stateful Sessions**: Builds on previous executions
 ```python
 # Agent with Code Execution
 agent = adk.Agent(
    model="gemini-2.5-flash",
    tools=[adk.tools.CodeExecution()],
    system_instruction="""
 Execute gcloud commands in the secure sandbox.
 Remember previous operations in this session.
    """
 )
 # Turn 1: Create cluster
 agent.run("Create GKE cluster named dev-cluster with 3 nodes")
 # Sandbox executes: gcloud container clusters create dev-cluster --num-nodes=3
 # Turn 2: Deploy to cluster (remembers cluster from Turn 1)
 agent.run("Deploy my-app:latest to that cluster")
 # Sandbox remembers dev-cluster, executes kubectl commands
 ```
 ### Phase 4: Memory Bank Integration
 Persistent conversation memory across sessions:
 ```python
 agent = adk.Agent(
    model="gemini-2.5-flash",
    tools=[adk.tools.MemoryBank()],
    system_instruction="Remember user preferences and project context"
 )
 # Session 1 (Monday)
 agent.run("I prefer deploying to us-central1 region", session_id="user-123")
 # Session 2 (Wednesday) - same session_id
 agent.run("Deploy a Cloud Run service", session_id="user-123")
 # Agent remembers: uses us-central1 automatically
 ```
 ### Phase 5: A2A Protocol Deployment
 Deploy agent to Agent Engine with A2A endpoint:
 ```bash
 # Install ADK
 pip install google-adk
 # Deploy with one command
 adk deploy \
  --agent-file agent.py \
  --project-id my-project \
  --region us-central1 \
  --service-name gcp-deployer-agent
 ```
 Agent Engine creates:
 - **A2A Endpoint**: `https://gcp-deployer-agent-{hash}.run.app`
 - **AgentCard**: `/.well-known/agent-card` metadata
 - **Task API**: `/v1/tasks:send` for task submission
 - **Status API**: `/v1/tasks/{task_id}` for polling
 ### Phase 6: Calling from Claude
 Once deployed, Claude can invoke via A2A protocol:
 ```python
 # In Claude Code plugin / external script
 import requests
 def invoke_adk_agent(message, session_id=None):
    """
    Call deployed ADK agent via A2A protocol.
    """
    response = requests.post(
        "https://gcp-deployer-agent-xyz.run.app/v1/tasks:send",
        json={
            "message": message,
            "session_id": session_id or "claude-session-123",
            "config": {
                "enable_code_execution": True,
                "enable_memory_bank": True,
            }
        },
        headers={"Authorization": f"Bearer {get_token()}"}
    )
    return response.json()
 # Use from Claude
 result = invoke_adk_agent("Deploy GKE cluster named prod-api")
 ```
 ## Workflow Examples
 ### Example 1: GCP Deployment Agent
 **User**: "Create an ADK agent that deploys GCP resources"
 **Implementation**:
 ```python
 from google import adk
 deployment_agent = adk.Agent(
    model="gemini-2.5-flash",
    tools=[
        adk.tools.CodeExecution(),
        adk.tools.MemoryBank(),
    ],
    system_instruction="""
 You are a GCP deployment specialist.
 CAPABILITIES:
 - Deploy GKE clusters
 - Deploy Cloud Run services
 - Deploy Vertex AI Pipelines
 - Manage IAM permissions
 - Monitor deployments
 SECURITY:
 - Validate all configurations before deployment
 - Use least-privilege IAM
 - Log all operations
 - Never expose credentials
    """
 )
 # Deploy to Agent Engine
 # $ adk deploy --agent-file deployment_agent.py --service-name gcp-deployer
 ```
 ### Example 2: Multi-Agent RAG System
 **User**: "Build a RAG system with ADK orchestrating a LangChain retriever"
 **Implementation**:
 ```python
 from google import adk
 from langchain.retrievers import VertexAISearchRetriever
 # Sub-Agent 1: LangChain RAG
 class RAGAgent(adk.Agent):
    def __init__(self):
        self.retriever = VertexAISearchRetriever(...)
        super().__init__(model="gemini-2.5-flash")
    def retrieve_docs(self, query):
        return self.retriever.get_relevant_documents(query)
 # Sub-Agent 2: ADK Answer Generator
 answer_agent = adk.Agent(
    model="gemini-2.5-pro",  # More powerful for final answer
    system_instruction="Generate comprehensive answers from retrieved docs"
 )
 # Orchestrator
 orchestrator = adk.SequentialAgent(
    agents=[RAGAgent(), answer_agent],
    system_instruction="First retrieve docs, then generate answer"
 )
 ```
 ### Example 3: Async Deployment with Status Polling
 **User**: "Deploy a GKE cluster and monitor progress"
 **Implementation**:
 ```python
 # Submit async task
 task_response = invoke_adk_agent(
    "Deploy GKE cluster named prod-api with 5 nodes in us-central1"
 )
 task_id = task_response["task_id"]
 print(f"✅ Task submitted: {task_id}")
 # Poll for status
 import time
 while True:
    status = requests.get(
        f"https://gcp-deployer-agent-xyz.run.app/v1/tasks/{task_id}",
        headers={"Authorization": f"Bearer {get_token()}"}
    ).json()
    if status["status"] == "SUCCESS":
        print(f"✅ Cluster deployed!")
        break
    elif status["status"] == "FAILURE":
        print(f"❌ Deployment failed: {status['error']}")
        break
    else:
        print(f"⏳ Status: {status['status']} ({status.get('progress', 0)*100}%)")
        time.sleep(10)
 ```
 ## Production Best Practices
 1. **Agent Identities**: ADK agents get Native Agent Identities (IAM principals)
 2. **Least Privilege**: Grant minimum required permissions
 3. **VPC Service Controls**: Enable for enterprise security
 4. **Model Armor**: Protects against prompt injection
 5. **Session Management**: Use consistent session_ids for Memory Bank
 6. **Error Handling**: Implement retries with exponential backoff
 7. **Observability**: Monitor via Agent Engine UI dashboard
 ## Tool Permissions
 - **Read**: Analyze existing agent code
 - **Write**: Create new agent files
 - **Edit**: Modify agent configurations
 - **Grep**: Find integration points
 - **Glob**: Locate related files
 - **Bash**: Install ADK, deploy agents, run tests
 ## Integration Patterns
 ### ADK + Genkit
 ```python
 # Use Genkit for flows, ADK for orchestration
 genkit_flow_agent = create_genkit_flow()
 orchestrator = adk.SequentialAgent(
    agents=[validator, genkit_flow_agent, monitor]
 )
 ```
 ### ADK + LangChain
 ```python
 # LangChain for RAG, ADK for multi-agent coordination
 langchain_rag = create_langchain_retriever()
 orchestrator = adk.ParallelAgent(
    agents=[langchain_rag, fact_checker, answer_generator]
 )
 ```
 ## Deployment Commands
 ```bash
 # Install ADK
 pip install google-adk  # Python
 go get google.golang.org/adk  # Go
 # Deploy to Agent Engine
 adk deploy \
  --agent-file my_agent.py \
  --project-id my-project \
  --region us-central1 \
  --service-name my-agent
 # Deploy to Cloud Run (custom)
 gcloud run deploy my-agent \
  --source . \
  --region us-central1
 # Deploy locally for testing
 adk run --agent-file my_agent.py
 ```
 ## Version History
 - **1.0.0** (2025): ADK Preview with Python/Java/Go support, Agent Engine GA, Code Execution Sandbox, Memory Bank
 ## References
 - ADK Docs: https://google.github.io/adk-docs/
 - A2A Protocol: https://google.github.io/adk-docs/a2a/
 - Agent Engine: https://cloud.google.com/vertex-ai/generative-ai/docs/agent-engine/overview
 - Code Execution: https://cloud.google.com/agent-builder/agent-engine/code-execution/overview
 - Memory Bank: https://cloud.google.com/vertex-ai/generative-ai/docs/agent-engine/memory-bank/overview
		`@@ -0,0 +1,3 @@`
							`# jeremy-adk-orchestrator`

							`Production ADK orchestrator for A2A protocol and multi-agent coordination on Vertex AI`