commit 47e178c6cb4b9be0418d70270b6c4b751ea7d4bf Author: Zhongwei Li Date: Sun Nov 30 08:54:41 2025 +0800 Initial commit diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..7fe0e25 --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,12 @@ +{ + "name": "sap-ai-core", + "description": "Guides development with SAP AI Core and AI Launchpad for enterprise AI/ML workloads. Covers generative AI models, orchestration workflows, RAG, ML pipelines, and content filtering.", + "version": "1.0.0", + "author": { + "name": "Zhongwei Li", + "email": "zhongweili@tubi.tv" + }, + "skills": [ + "./" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..2f8b9b7 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# sap-ai-core + +Guides development with SAP AI Core and AI Launchpad for enterprise AI/ML workloads. Covers generative AI models, orchestration workflows, RAG, ML pipelines, and content filtering. diff --git a/SKILL.md b/SKILL.md new file mode 100644 index 0000000..d3239b7 --- /dev/null +++ b/SKILL.md @@ -0,0 +1,263 @@ +--- +name: sap-ai-core +description: | + Guides development with SAP AI Core and SAP AI Launchpad for enterprise AI/ML workloads on SAP BTP. Use when: deploying generative AI models (GPT, Claude, Gemini, Llama), building orchestration workflows with templating/filtering/grounding, implementing RAG with vector databases, managing ML training pipelines with Argo Workflows, configuring content filtering and data masking for PII protection, using the Generative AI Hub for prompt experimentation, or integrating AI capabilities into SAP applications. Covers service plans (Free/Standard/Extended), model providers (Azure OpenAI, AWS Bedrock, GCP Vertex AI, Mistral, IBM), orchestration modules, embeddings, tool calling, and structured outputs. +license: GPL-3.0 +metadata: + version: "1.1.0" + last_verified: "2025-11-27" + production_tested: "Yes, examples verified against SAP documentation" +--- + +# SAP AI Core & AI Launchpad Skill + +## Related Skills + +- **sap-btp-cloud-platform**: Use for platform context, BTP account setup, and service integration +- **sap-cap-capire**: Use for building AI-powered applications with CAP or integrating AI services +- **sap-cloud-sdk-ai**: Use for SDK integration, AI service calls, and Java/JavaScript implementations +- **sap-btp-best-practices**: Use for production deployment patterns and AI governance guidelines + +## Table of Contents + +1. [Overview](#overview) +2. [Quick Start](#quick-start) +3. [Service Plans](#service-plans) +4. [Model Providers](#model-providers) +5. [Orchestration](#orchestration) +6. [Content Filtering](#content-filtering) +7. [Data Masking](#data-masking) +8. [Grounding (RAG)](#grounding-rag) +9. [Tool Calling](#tool-calling) +10. [Structured Output](#structured-output) +11. [Embeddings](#embeddings) +12. [ML Training](#ml-training) +13. [Deployments](#deployments) +14. [SAP AI Launchpad](#sap-ai-launchpad) +15. [API Reference](#api-reference) +16. [Common Patterns](#common-patterns) +17. [Troubleshooting](#troubleshooting) +18. [References](#references) + +## Overview + +SAP AI Core is a service on SAP Business Technology Platform (BTP) that manages AI asset execution in a standardized, scalable, hyperscaler-agnostic manner. SAP AI Launchpad provides the management UI for AI runtimes including the Generative AI Hub. + +### Core Capabilities + +| Capability | Description | +|------------|-------------| +| **Generative AI Hub** | Access to LLMs from multiple providers with unified API | +| **Orchestration** | Modular pipeline for templating, filtering, grounding, masking | +| **ML Training** | Argo Workflows-based batch pipelines for model training | +| **Inference Serving** | Deploy models as HTTPS endpoints for predictions | +| **Grounding/RAG** | Vector database integration for contextual AI | + +### Three Components + +1. **SAP AI Core**: Execution engine for AI workflows and model serving +2. **SAP AI Launchpad**: Management UI for AI runtimes and GenAI Hub +3. **AI API**: Standardized lifecycle management across runtimes + +## Quick Start + +### Prerequisites + +- SAP BTP enterprise account +- SAP AI Core service instance (Extended plan for GenAI) +- Service key with credentials + +### 1. Get Authentication Token + +```bash +# Set environment variables from service key +export AI_API_URL="" +export AUTH_URL="" +export CLIENT_ID="" +export CLIENT_SECRET="" + +# Get OAuth token +AUTH_TOKEN=$(curl -s -X POST "$AUTH_URL/oauth/token" \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "grant_type=client_credentials&client_id=$CLIENT_ID&client_secret=$CLIENT_SECRET" \ + | jq -r '.access_token') +``` + +### 2. Create Orchestration Deployment + +```bash +# Check for existing orchestration deployment +curl -X GET "$AI_API_URL/v2/lm/deployments" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" + +# Create orchestration deployment if needed +curl -X POST "$AI_API_URL/v2/lm/deployments" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "configurationId": "" + }' +``` + +### 3. Use Harmonized API for Model Inference + +```bash +ORCHESTRATION_URL="" + +curl -X POST "$ORCHESTRATION_URL/v2/completion" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "config": { + "module_configurations": { + "llm_module_config": { + "model_name": "gpt-4o", + "model_version": "latest", + "model_params": { + "max_tokens": 1000, + "temperature": 0.7 + } + }, + "templating_module_config": { + "template": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "{{?user_query}}"} + ] + } + } + }, + "input_params": { + "user_query": "What is SAP AI Core?" + } + }' +``` + +## Service Plans + +| Plan | Cost | GenAI Hub | Support | Resource Groups | +|------|------|-----------|---------|-----------------| +| **Free** | Free | No | Community only | Default only | +| **Standard** | Per resource + baseline | No | Full SLA | Multiple | +| **Extended** | Per resource + tokens | Yes | Full SLA | Multiple | + +**Key Restrictions:** +- Free and Standard mutually exclusive in same subaccount +- Free → Standard upgrade possible; downgrade not supported +- Max 50 resource groups per tenant + +## Model Providers + +SAP AI Core provides access to models from six providers: +- **Azure OpenAI**: GPT-4o, GPT-4 Turbo, GPT-3.5 +- **SAP Open Source**: Llama, Falcon, Mistral variants +- **Google Vertex AI**: Gemini Pro, PaLM 2 +- **AWS Bedrock**: Claude, Amazon Titan +- **Mistral AI**: Mistral Large, Medium, Small +- **IBM**: Granite models + +For detailed provider configurations and model lists, see `references/model-providers.md`. + +## Orchestration + +The orchestration service provides unified access to multiple models through a modular pipeline with 8 execution stages: +1. Grounding → 2. Templating (mandatory) → 3. Input Translation → 4. Data Masking → 5. Input Filtering → 6. Model Configuration (mandatory) → 7. Output Filtering → 8. Output Translation + +For complete orchestration module configurations, examples, and advanced patterns, see `references/orchestration-modules.md`. + +## Content Filtering + +**Azure Content Safety**: Filters content across 4 categories (Hate, Violence, Sexual, SelfHarm) with severity levels 0-6. Azure OpenAI blocks severity 4+ automatically. Additional features include PromptShield and Protected Material detection. + +**Llama Guard 3**: Covers 14 categories including violent crimes, privacy violations, and code interpreter abuse. + +## Data Masking + +**Two PII protection methods**: +- **Anonymization**: `MASKED_ENTITY` (non-reversible) +- **Pseudonymization**: `MASKED_ENTITY_ID` (reversible) + +**Supported entities** (25 total): Personal data, IDs, financial information, SAP-specific IDs, and sensitive attributes. For complete entity list and implementation details, see `references/orchestration-modules.md`. + +## Grounding (RAG) + +Integrate external data from SharePoint, S3, SFTP, SAP Build Work Zone, and DMS. Supports PDF, HTML, DOCX, images, and more. Limit: 2,000 documents per pipeline with daily refresh. For detailed setup, see `references/grounding-rag.md`. + +## Tool Calling + +Enable LLMs to execute functions through a 5-step workflow: define tools → receive tool_calls → execute functions → return results → LLM incorporates responses. Templates available in `templates/tool-definition.json`. + +## Structured Output + +Force model responses to match JSON schemas using strict validation. Useful for structured data extraction and API responses. + +## Embeddings + +Generate semantic embeddings for RAG and similarity search via `/v2/embeddings` endpoint. Supports document, query, and text input types. + +## ML Training + +Uses Argo Workflows for training pipelines. Key requirements: create `default` object store secret, define workflow template, create configuration with parameters, and execute training. For complete workflow patterns, see `references/ml-operations.md`. + +## Deployments + +Deploy models via two-step process: create configuration (with model binding), then create deployment with TTL. Statuses: Pending → Running → Stopping → Stopped/Dead. Templates in `templates/deployment-config.json`. + +## SAP AI Launchpad + +Web-based UI with 4 key applications: +- **Workspaces**: Manage connections and resource groups +- **ML Operations**: Train, deploy, monitor models +- **Generative AI Hub**: Prompt experimentation and orchestration +- **Functions Explorer**: Explore available AI functions + +Required roles include `genai_manager`, `genai_experimenter`, `prompt_manager`, `orchestration_executor`, and `mloperations_editor`. For complete guide, see `references/ai-launchpad-guide.md`. + +## API Reference + +### Core Endpoints + +Key endpoints: `/v2/lm/scenarios`, `/v2/lm/configurations`, `/v2/lm/deployments`, `/v2/lm/executions`, `/lm/meta`. For complete API reference with examples, see `references/api-reference.md`. + +## Common Patterns + +**Simple Chat**: Basic model invocation with templating module +**RAG with Grounding**: Combine vector search with LLM for context-aware responses +**Secure Enterprise Chat**: Filtering + masking + grounding for PII protection +Templates available in `templates/orchestration-workflow.json`. + "masking_providers": [{ + ## Troubleshooting + +**Common Issues**: +- 401 Unauthorized: Refresh OAuth token +- 403 Forbidden: Check IAM roles, request quota increase +- 404 Not Found: Verify AI-Resource-Group header +- Deployment DEAD: Check deployment logs +- Training failed: Create `default` object store secret + +Request quota increases via support ticket (Component: `CA-ML-AIC`). + +## References + +**Reference Documentation**: +1. `references/orchestration-modules.md` - All orchestration modules in detail +2. `references/generative-ai-hub.md` - Complete GenAI hub documentation +3. `references/model-providers.md` - Model providers and configurations +4. `references/api-reference.md` - Complete API endpoint reference +5. `references/grounding-rag.md` - Grounding and RAG implementation +6. `references/ml-operations.md` - ML operations and training +7. `references/advanced-features.md` - Chat, applications, security, auditing +8. `references/ai-launchpad-guide.md` - Complete SAP AI Launchpad UI guide + +**Templates**: +1. `templates/deployment-config.json` - Deployment configuration template +2. `templates/orchestration-workflow.json` - Orchestration workflow template +3. `templates/tool-definition.json` - Tool calling definition template + +**Official Sources**: +- SAP AI Core Guide: [https://help.sap.com/docs/sap-ai-core](https://help.sap.com/docs/sap-ai-core) +- SAP AI Launchpad Guide: [https://help.sap.com/docs/sap-ai-launchpad](https://help.sap.com/docs/sap-ai-launchpad) +- SAP Note 3437766: Model token rates and limits diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..e287044 --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,89 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:secondsky/sap-skills:skills/sap-ai-core", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "0ae4451ba14d10baa6076e5d9ca561ef5a35ad0f", + "treeHash": "c459205b6c23f3adc060fc6f69c0d7a5626644f848d4ccb5d7ff1c26764503a0", + "generatedAt": "2025-11-28T10:28:10.671301Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "sap-ai-core", + "description": "Guides development with SAP AI Core and AI Launchpad for enterprise AI/ML workloads. Covers generative AI models, orchestration workflows, RAG, ML pipelines, and content filtering.", + "version": "1.0.0" + }, + "content": { + "files": [ + { + "path": "README.md", + "sha256": "7a840bdd95c4ef7b0c34ac0e7cf246d7dc248e9d7b93995952df8c308630501b" + }, + { + "path": "SKILL.md", + "sha256": "afae1abed31137ef29e72bfcd5fdc3a48db0b048dbf1cbef1f1207c007c9f9b5" + }, + { + "path": "references/ai-launchpad-guide.md", + "sha256": "0e479a98330ae9f8bc4d06501faf504e5f9d6bf755f41419e86f859ea9ef9515" + }, + { + "path": "references/ml-operations.md", + "sha256": "0e584619afa0e040afca4adc4dd2fa20f29130310e67fa460d4db0bea6ab24f3" + }, + { + "path": "references/advanced-features.md", + "sha256": "25b47cf2575dea58bc9550caaa9059efe9f5707b52787ae980bc90bbd4b5d89d" + }, + { + "path": "references/model-providers.md", + "sha256": "ac3aaab1969093ebe2499bf0d1cad5c9e71d466ebf3e6de154905f4a7f750a93" + }, + { + "path": "references/grounding-rag.md", + "sha256": "98432a0ba9a601ce92fec7b6f6081f77bcb093262d0ebac51f6f277d32a68677" + }, + { + "path": "references/generative-ai-hub.md", + "sha256": "0973a4f82a8d8c6115d45b2a826ebd42be4b477c978f6f31ae905051a39b0160" + }, + { + "path": "references/api-reference.md", + "sha256": "95c80df74523b873366a2319235dbd96e85743db25c8cb1ce91c215c2c57b3dd" + }, + { + "path": "references/orchestration-modules.md", + "sha256": "0fe0ec0b1a9c71baa46312151a79db9f07987f7fcec4482c2f09156252ad806f" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "0afb4ed9a67701387796ac0019aa19b4ca79f72c6010bee77fbe68547fc60709" + }, + { + "path": "templates/orchestration-workflow.json", + "sha256": "98b3ddd63ef184d6a3f49433f6f08ccb825103fb9a85c5fcd50c7cbd955bfc8b" + }, + { + "path": "templates/tool-definition.json", + "sha256": "5a5229fabe702cecc9e26ac0831757dfd68c90cc071f9e3c51d3f2ec33dbb0b6" + }, + { + "path": "templates/deployment-config.json", + "sha256": "d349059573bd01a6fb6a7ae65922c5a8047aa83345971e2d0f126e2b2e41d5bb" + } + ], + "dirSha256": "c459205b6c23f3adc060fc6f69c0d7a5626644f848d4ccb5d7ff1c26764503a0" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file diff --git a/references/advanced-features.md b/references/advanced-features.md new file mode 100644 index 0000000..40f3ad3 --- /dev/null +++ b/references/advanced-features.md @@ -0,0 +1,653 @@ +# Advanced Features Reference + +Complete reference for additional SAP AI Core features not covered in other reference files. + +**Documentation Source:** [https://github.com/SAP-docs/sap-artificial-intelligence/tree/main/docs/sap-ai-core](https://github.com/SAP-docs/sap-artificial-intelligence/tree/main/docs/sap-ai-core) + +--- + +## Table of Contents + +1. [Chat Conversations](#chat-conversations) +2. [Applications (Git Sync)](#applications-git-sync) +3. [Prompt Templates](#prompt-templates) +4. [Prompt Optimization](#prompt-optimization) +5. [AI Content as a Service](#ai-content-as-a-service) +6. [AI Content Security](#ai-content-security) +7. [Data Protection and Privacy](#data-protection-and-privacy) +8. [Auditing and Logging](#auditing-and-logging) +9. [ServingTemplate Schema](#servingtemplate-schema) +10. [Contextualized Retrieval with Metadata](#contextualized-retrieval-with-metadata) +11. [Content Packages](#content-packages) + +--- + +## Chat Conversations + +Multi-turn conversation handling using the orchestration service. + +### Message History Management + +The orchestration service manages conversation history through the `messages_history` parameter, storing user and assistant role exchanges. + +### Request Structure + +```json +{ + "orchestration_config": { + "module_configurations": { + "templating_module_config": { + "template": [ + {"role": "user", "content": "{{?current_message}}"} + ] + }, + "llm_module_config": { + "model_name": "gpt-4o", + "model_version": "latest", + "model_params": { + "max_tokens": 300, + "temperature": 0.1 + } + } + } + }, + "messages_history": [ + {"role": "user", "content": "What is machine learning?"}, + {"role": "assistant", "content": "Machine learning is a branch of AI..."}, + {"role": "user", "content": "Can you give an example?"}, + {"role": "assistant", "content": "Sure, email spam filtering is an example..."} + ], + "input_params": { + "current_message": "What about deep learning?" + } +} +``` + +### Key Behavior + +- The templating module appends the current user message to the message history +- The combined history generates the prompt sent to the LLM module +- Response `module_results.templating` and `orchestration_result.choices` can be used as message history for subsequent requests + +### Continuation Pattern + +```python +def continue_conversation(history, new_message, response): + """Update conversation history with new exchange.""" + history.append({"role": "user", "content": new_message}) + history.append({ + "role": "assistant", + "content": response["orchestration_result"]["choices"][0]["message"]["content"] + }) + return history +``` + +--- + +## Applications (Git Sync) + +Applications synchronize workflow templates from GitHub repositories. + +### Key Features + +- **Automatic Sync:** Applications sync with GitHub every ~3 minutes +- **Manual Sync:** Trigger via `POST {{apiurl}}/admin/applications/{{appName}}/refresh` + +### API Endpoints + +| Endpoint | Method | Purpose | +|----------|--------|---------| +| `/v2/admin/applications` | POST | Create application | +| `/v2/admin/applications` | GET | List applications | +| `/v2/admin/applications/{name}` | DELETE | Remove application | +| `/v2/admin/applications/{name}/status` | GET | Get sync status | +| `/admin/applications/{name}/refresh` | POST | Trigger manual sync | + +### Create Application + +```bash +curl -X POST "$AI_API_URL/v2/admin/applications" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "applicationName": "my-workflows", + "repositoryUrl": "[https://github.com/org/ai-workflows",](https://github.com/org/ai-workflows",) + "revision": "HEAD", + "path": "workflows/" + }' +``` + +### Required Configuration + +| Parameter | Description | +|-----------|-------------| +| `applicationName` | Application identifier (becomes executable ID) | +| `repositoryUrl` | GitHub repository URL | +| `path` | Path within repository | +| `revision` | Branch, commit SHA, or HEAD | + +### Sync Status Response + +```json +{ + "health": "healthy", + "lastSyncTime": "2024-01-15T10:00:00Z", + "status": "Synced", + "message": "" +} +``` + +### Validation Checks + +The system validates: +- No duplicate workflow names +- Correct scenario labels on templates +- Valid YAML syntax +- Proper metadata structure (WorkflowTemplate kind) + +--- + +## Prompt Templates + +Manage prompts through declarative (Git) or imperative (API) approaches. + +### Declarative Approach (Git-managed) + +#### File Format + +Filename: `.prompttemplate.ai.sap.yaml` + +```yaml +name: customer-support-prompt +version: 0.0.1 +scenario: customer-service +spec: + template: + - role: system + content: "You are a helpful customer support agent for {{?company_name}}." + - role: user + content: "{{?customer_query}}" +defaults: + company_name: "Acme Corp" +additional_fields: + metadata: + author: "AI Team" + category: "support" + model_restrictions: + blocked_models: + - model_name: "gpt-3.5-turbo" + versions: ["0613"] +``` + +#### Key Characteristics + +- Managed through git commits +- Auto-sync with prompt registry +- Marked as `managedBy: declarative` +- Always reflects HEAD version +- Cannot be edited via imperative API + +### Imperative Approach (API-managed) + +#### Create Prompt Template + +```bash +curl -X POST "$AI_API_URL/v2/lm/promptTemplates" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "api-managed-prompt", + "version": "1.0.0", + "scenario": "foundation-models", + "spec": { + "template": [ + {"role": "user", "content": "{{?user_input}}"} + ] + } + }' +``` + +### List Prompt Templates + +```bash +curl -X GET "$AI_API_URL/v2/lm/promptTemplates" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" +``` + +### Placeholder Syntax + +| Syntax | Description | +|--------|-------------| +| `{{?variable}}` | Required input parameter | +| `{{?variable}}` with defaults | Optional if default provided | + +--- + +## Prompt Optimization + +Automated prompt improvement using optimization runs. + +### Overview + +Prompt Optimization takes an input prompt template and a dataset of desirable responses to maximize a specified metric. + +### Prerequisites + +- Required roles: `genai_manager` or `custom_evaluation` +- Service plan: `extended` tier required +- Object store named `default` must be registered +- Prompt template saved in prompt registry +- Dataset artifact prepared and registered + +### Dataset Preparation + +| Requirement | Value | +|-------------|-------| +| **Minimum samples** | 25 | +| **Maximum samples** | 200 | +| **Format** | JSON array | + +#### Dataset Structure + +```json +[ + { + "fields": { + "input": "Customer complaint about delivery delay", + "category": "logistics" + }, + "answer": { + "sentiment": "negative", + "urgency": "high", + "category": "delivery" + } + } +] +``` + +**Important:** Placeholder names in `fields` must match those in the template exactly. Do not include confidential or personally identifiable information. + +### Process + +1. Submit optimization job with prompt template and dataset +2. System generates multiple prompt variations +3. Evaluates variations against target metric +4. Returns optimized prompt to registry +5. Stores additional results in object store + +### Launchpad UI Flow (7 Steps) + +1. **Access**: Connect to SAP AI Core via Workspaces app +2. **Navigate**: Generative AI Hub → Optimization +3. **Initiate**: Create → Prompt Optimization +4. **Configure Artifacts**: Select template, models, and dataset +5. **Select Metric**: Choose evaluation metric +6. **Advanced Settings**: Configure template name/version (optional) +7. **Review & Deploy**: Verify and start job + +### Limitations + +| Constraint | Details | +|------------|---------| +| **Duration** | Minutes to multiple hours | +| **Requests** | Submits large number of prompt requests | +| **Model Support** | Mistral and DeepSeek NOT supported | + +### Operations (AI Launchpad) + +- Create a new prompt optimization +- View existing prompt optimizations +- View detailed run information + +--- + +## AI Content as a Service + +Publish AI content to SAP BTP Service Marketplace. + +### Capabilities + +- Publish workflows, serving templates, or Docker images +- Distribute as managed service on SAP BTP +- Other tenants can consume via standard APIs + +### Use Cases + +- Monetize AI models and workflows +- Share AI components across organization +- Provide standardized AI services + +--- + +## AI Content Security + +Security best practices for AI content (workflows, templates, Docker images). + +### Required Practices + +| Practice | Description | +|----------|-------------| +| **Threat Modeling** | Conduct security risk workshops | +| **Static Code Scans** | Use SAST tools for vulnerability analysis | +| **OSS Vulnerability Scan** | Evaluate third-party components | +| **OSS Update Strategy** | Define update cadence for open-source components | +| **Code Reviews** | Peer review with security focus | +| **Malware Scanning** | Scan uploaded data before deployment | +| **Secure Code Protection** | Use Docker image digest and signature verification | +| **Docker Base Image Security** | Use minimal, hardened base images | + +### Key Responsibility + +> "Users of AI Core are responsible for the content of their Docker images and assume the risk of running compromised containers in the platform." + +### Docker Security Guidelines + +1. Select minimal, hardened base images +2. Keep images updated +3. Remove unnecessary components +4. Use multi-stage builds +5. Scan images for vulnerabilities +6. Sign images for verification + +--- + +## Data Protection and Privacy + +Compliance features for data protection. + +### Supported Capabilities + +| Feature | Description | +|---------|-------------| +| **Data Blocking** | Simplified blocking of personal data | +| **Data Deletion** | Simplified deletion of personal data | +| **Change Logging** | Audit trail for data changes | +| **Read-Access Logging** | Track data access | +| **Consent Management** | Manage user consent | +| **Data Storage Controls** | Control data storage and processing | + +### Compliance Scope + +- General data protection acts (GDPR, etc.) +- Industry-specific legislation +- Regional privacy requirements + +### Important Notes + +- SAP does not provide legal advice +- Compliance requires secure system operation +- Case-by-case evaluation required + +--- + +## Auditing and Logging + +Security event logging in SAP AI Core. + +### Events Logged + +| Category | Events | +|----------|--------| +| **Object Store** | Create, delete, retrieve secrets | +| **Resource Groups** | Provision, deprovision | +| **Tenants** | Provision, retrieve, deprovision | +| **Docker Registry** | Create, delete secrets | +| **Deployments** | Create, delete | +| **Executions** | Create, delete | +| **Repositories** | Create, delete | +| **Applications** | Create, delete | + +### Log Details by Operation Type + +| Operation | Logged Details | +|-----------|----------------| +| List/Get/Watch | Timestamp, tenant IDs, source IPs, request URI, level | +| Create/Update/Patch | Above + request/response objects | +| Delete | Above + response object | + +### Authentication Events + +| Event | Message | +|-------|---------| +| Token expired | `Jwt is expired` | +| Missing auth header | `RBAC: access denied` | +| Invalid token | `Jwt issuer is not configured` | +| Wrong tenant | `Jwt verification fails` | + +--- + +## ServingTemplate Schema + +API schema for serving templates (KServe integration) for model deployment. + +### Quotas and Limits + +| Limit | Value | +|-------|-------| +| **Max ServingTemplates per tenant** | 50 | +| **Max WorkflowTemplates per tenant** | 50 | +| **Bulk operations** | Requires `bulkUpdates` annotation | + +### Resource Structure + +```yaml +apiVersion: ai.sap.com/v1alpha1 +kind: ServingTemplate +metadata: + name: my-serving-template + annotations: + scenarios.ai.sap.com/description: "Description of scenario" + scenarios.ai.sap.com/name: "scenario-name" + executables.ai.sap.com/description: "Description of executable" + executables.ai.sap.com/name: "executable-name" + ai.sap.com/bulkUpdates: "true" # Enable bulk operations + labels: + ai.sap.com/version: "1.0.0" + scenarios.ai.sap.com/id: "unique-scenario-id" +spec: + inputs: + parameters: + - name: modelUri + default: "" + type: string + artifacts: + - name: model + template: + apiVersion: serving.kserve.io/v1beta1 + metadata: + name: "{{inputs.parameters.name}}" + spec: + predictor: + containers: + - name: kserve-container + image: "{{inputs.parameters.image}}" + env: + - name: STORAGE_URI + value: "{{inputs.artifacts.model}}" +``` + +### Model Path Configuration + +| Environment Variable | Description | +|---------------------|-------------| +| `STORAGE_URI` | Points to artifact location for model download | +| **Default Mount Path** | `/mnt/models` (typical in SAP AI Core examples) | + +**Important:** The `/mnt/models` path is the typical default used in SAP AI Core examples, but the mount path is configurable via the ServingRuntime/ServingTemplate and container args (e.g., `--model_dir`). Your inference code should read the path from configuration or environment variables rather than assuming a hardcoded path: + +```python +import os + +# Read from environment or use default +MODEL_PATH = os.environ.get("MODEL_DIR", "/mnt/models") + +def load_model(): + """Load model from configured mount path.""" + return load_from_path(MODEL_PATH) +``` + +**Configuration Options:** +- Set via container args: `--model_dir=/custom/path` +- Set via environment variable in ServingTemplate +- Override in KServe InferenceService spec + +### Annotations Reference + +| Annotation | Purpose | +|------------|---------| +| `scenarios.ai.sap.com/description` | Scenario description | +| `scenarios.ai.sap.com/name` | Scenario display name | +| `executables.ai.sap.com/description` | Executable description | +| `executables.ai.sap.com/name` | Executable display name | +| `ai.sap.com/bulkUpdates` | Enable bulk stop/delete operations | + +### Labels Reference + +| Label | Purpose | +|-------|---------| +| `ai.sap.com/version` | Version number | +| `scenarios.ai.sap.com/id` | Unique scenario ID | + +### Parameter Types + +Only `string` type is supported for input parameters. + +### Placeholder Syntax + +Use `{{inputs.parameters.ParameterName}}` and `{{inputs.artifacts.ArtifactName}}` in template spec. + +### Bulk Operations + +When `ai.sap.com/bulkUpdates: "true"` is set: + +```bash +# Bulk stop deployments +curl -X PATCH "$AI_API_URL/v2/lm/deployments" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "targetStatus": "STOPPED", + "deploymentIds": ["deploy-1", "deploy-2", "deploy-3"] + }' + +# Bulk delete deployments +curl -X DELETE "$AI_API_URL/v2/lm/deployments" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "deploymentIds": ["deploy-1", "deploy-2", "deploy-3"] + }' +``` + +--- + +## Contextualized Retrieval with Metadata + +Include metadata in grounding retrieval results. + +### Configuration + +Add `metadata_params` to grounding configuration: + +```json +{ + "grounding_module_config": { + "grounding_service": "document_grounding_service", + "grounding_service_configuration": { + "grounding_input_parameters": ["user_query"], + "grounding_output_parameter": "context", + "metadata_params": ["source", "webUrl", "title"], + "filters": [{"id": ""}] + } + } +} +``` + +### Metadata Levels + +| Level | Description | +|-------|-------------| +| Data Repository | Repository-level metadata | +| Document | Document-level metadata | +| Chunk | Chunk-level metadata | + +### Discovery + +Query available metadata keys: + +```bash +curl -X POST "$AI_API_URL/v2/lm/document-grounding/retrieval/search" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "query": "test query", + "filters": [{"id": ""}] + }' +``` + +### Naming Convention for Conflicts + +When metadata keys exist at multiple levels: +- Chunk-level: `webUrl` +- Document-level: `document_webUrl` +- Repository-level: `repository_webUrl` + +### Using Metadata in Prompts + +```json +{ + "templating_module_config": { + "template": [ + { + "role": "system", + "content": "Answer based on the context. Include source references.\n\nContext: {{$context}}" + }, + {"role": "user", "content": "{{?user_query}}"} + ] + } +} +``` + +--- + +## Content Packages + +Additional Python packages extending SAP AI Core. + +### Available Packages + +| Package | Purpose | PyPI Link | +|---------|---------|-----------| +| `sap-ai-core-datarobot` | DataRobot integration | [https://pypi.org/project/sap-ai-core-datarobot/](https://pypi.org/project/sap-ai-core-datarobot/) | +| `sap-computer-vision-package` | Image classification and feature extraction | [https://pypi.org/project/sap-computer-vision-package/](https://pypi.org/project/sap-computer-vision-package/) | + +### Installation + +```bash +pip install sap-ai-core-datarobot +pip install sap-computer-vision-package +``` + +### Computer Vision Package Capabilities + +- Image classification +- Feature extraction +- Integration with SAP AI SDK Core + +--- + +## Documentation Links + +- Chat: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/chat-39321a9.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/chat-39321a9.md) +- Applications: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/application-7f1e35b.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/application-7f1e35b.md) +- Prompt Templates: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/create-a-prompt-template-declarative-815def5.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/create-a-prompt-template-declarative-815def5.md) +- AI Content Security: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/ai-content-security-d1cd77f.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/ai-content-security-d1cd77f.md) +- Data Protection: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/data-protection-and-privacy-d25e4c9.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/data-protection-and-privacy-d25e4c9.md) +- Auditing: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/auditing-and-logging-information-e19844a.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/auditing-and-logging-information-e19844a.md) +- API Schema: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/api-schema-spec-ai-sap-com-v1alpha1-4d1ffd2.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/api-schema-spec-ai-sap-com-v1alpha1-4d1ffd2.md) diff --git a/references/ai-launchpad-guide.md b/references/ai-launchpad-guide.md new file mode 100644 index 0000000..6381711 --- /dev/null +++ b/references/ai-launchpad-guide.md @@ -0,0 +1,598 @@ +# SAP AI Launchpad Complete Guide + +Comprehensive reference for SAP AI Launchpad features and operations. + +**Documentation Source:** [https://github.com/SAP-docs/sap-artificial-intelligence/tree/main/docs/sap-ai-launchpad](https://github.com/SAP-docs/sap-artificial-intelligence/tree/main/docs/sap-ai-launchpad) + +--- + +## Table of Contents + +1. [Overview](#overview) +2. [Initial Setup](#initial-setup) +3. [Workspaces and Connections](#workspaces-and-connections) +4. [User Roles](#user-roles) +5. [Generative AI Hub](#generative-ai-hub) +6. [Prompt Editor](#prompt-editor) +7. [Orchestration Workflows](#orchestration-workflows) +8. [ML Operations](#ml-operations) +9. [Configurations](#configurations) +10. [Deployments](#deployments) +11. [Executions and Runs](#executions-and-runs) +12. [Schedules](#schedules) +13. [Datasets and Artifacts](#datasets-and-artifacts) +14. [Model Comparison](#model-comparison) +15. [Applications](#applications) +16. [Meta API and Custom Runtime Capabilities](#meta-api-and-custom-runtime-capabilities) + +--- + +## Overview + +SAP AI Launchpad is a multitenant SaaS application on SAP BTP that provides: + +- Management UI for AI runtimes (SAP AI Core) +- Generative AI Hub for prompt experimentation +- ML Operations for model lifecycle management +- Analytics and monitoring dashboards + +### Two User Types + +| Type | Description | +|------|-------------| +| **AI Scenario Producer** | Engineers developing and productizing AI scenarios | +| **AI Scenario Consumer** | Business analysts subscribing to and using AI scenarios | + +--- + +## Initial Setup + +### Prerequisites + +1. SAP BTP enterprise account +2. Subaccount with Cloud Foundry enabled +3. SAP AI Launchpad subscription +4. SAP AI Core instance (for runtime connection) + +### Setup Steps + +1. **Create Subaccount** with Cloud Foundry environment +2. **Subscribe to SAP AI Launchpad** in Service Marketplace +3. **Create Service Instance** of SAP AI Core (if needed) +4. **Assign Role Collections** to users +5. **Add Connection** to SAP AI Core runtime + +### Service Plans + +| Plan | Cost | Support | GenAI Hub | +|------|------|---------|-----------| +| **Free** | Free | Community only, no SLA | No | +| **Standard** | Monthly fixed price | Full SAP support | Yes | + +**Note:** Free → Standard upgrade preserves data; downgrade not supported. + +--- + +## Workspaces and Connections + +### Adding a Connection + +1. Navigate to **Administration** → **Connections** +2. Click **Add** +3. Enter connection details: + - Name + - Service Key (from SAP AI Core) +4. Test connection +5. Save + +### Managing Connections + +| Operation | Description | +|-----------|-------------| +| Edit | Modify connection settings | +| Delete | Remove connection | +| Test | Verify connectivity | +| Set Default | Make primary connection | + +### Assigning Connection to Workspace + +1. Navigate to **Workspaces** +2. Select workspace +3. Click **Assign Connection** +4. Select connection from dropdown +5. Confirm + +--- + +## User Roles + +### Administrative Roles + +| Role | Capabilities | +|------|--------------| +| `ailaunchpad_admin` | Full administrative access | +| `ailaunchpad_connections_editor` | Manage connections | +| `ailaunchpad_aicore_admin` | SAP AI Core integration management | + +### ML Operations Roles + +| Role | Capabilities | +|------|--------------| +| `ailaunchpad_mloperations_viewer` | View ML operations | +| `ailaunchpad_mloperations_editor` | Full ML operations access | + +### Generative AI Hub Roles + +| Role | Capabilities | +|------|--------------| +| `genai_manager` | Full GenAI hub access, save prompts | +| `genai_experimenter` | Prompt experimentation only | +| `prompt_manager` | Manage saved prompts | +| `prompt_experimenter` | Use saved prompts | + +### Functions Explorer Roles + +| Role | Capabilities | +|------|--------------| +| `ailaunchpad_functions_explorer_editor_v2` | Edit functions explorer | +| `ailaunchpad_functions_explorer_viewer_v2` | View functions explorer | + +**Note:** Role names `prompt_media_executor` and `orchestration_executor` may be deprecated. Verify current role names in SAP documentation. + +--- + +## Generative AI Hub + +### Access Path + +**Workspaces** → Select workspace → **Generative AI Hub** + +### Features + +| Feature | Description | +|---------|-------------| +| **Prompt Editor** | Interactive prompt testing | +| **Model Library** | Browse available models | +| **Grounding Management** | Manage document pipelines | +| **Orchestration** | Build workflow configurations | +| **Chat** | Direct model interaction | +| **Saved Prompts** | Prompt management | + +### Model Library + +View model specifications including: +- Capabilities (chat, embeddings, vision) +- Context window sizes +- Performance benchmarks +- Cost per token +- Deprecation dates + +--- + +## Prompt Editor + +### Access + +**Generative AI Hub** → **Prompt Editor** + +### Interface Elements + +| Element | Description | +|---------|-------------| +| **Name** | Prompt identifier (manager roles only) | +| **Collection** | Organize prompts (manager roles only) | +| **Messages** | Configure message blocks with roles | +| **Variables** | Define input placeholders | +| **Model Selection** | Choose model and version | +| **Parameters** | Adjust model parameters | +| **Metadata** | Tags and notes (manager roles only) | + +### Message Roles + +- **System**: Instructions for the model +- **User**: User input +- **Assistant**: Previous assistant responses + +### Variable Syntax + +Use `{{variable_name}}` for placeholders with definitions section. + +### Running Prompts + +1. Configure messages and variables +2. Select model (optional - uses default) +3. Adjust parameters +4. Click **Run** +5. View response (streaming available) + +### Image Inputs + +- Supported for select models (GPT-4o, Gemini, Llama Vision) +- Maximum 5MB across all inputs +- Requires `prompt_media_executor` role + +### Saving Prompts + +- Click **Save** (manager roles only) +- Assign to collection +- Add tags and notes +- Version automatically managed + +### Prompt Types + +| Type | Description | +|------|-------------| +| Question Answering | Q&A interactions | +| Summarization | Extract key points | +| Inferencing | Sentiment, entity extraction | +| Transformations | Translation, format conversion | +| Expansions | Content generation | + +--- + +## Orchestration Workflows + +### Access + +**Generative AI Hub** → **Orchestration** → **Create** + +### Workflow Modules + +| Order | Module | Required | +|-------|--------|----------| +| 1 | Grounding | Optional | +| 2 | Templating | **Mandatory** | +| 3 | Input Translation | Optional | +| 4 | Data Masking | Optional | +| 5 | Input Filtering | Optional | +| 6 | Model Configuration | **Mandatory** | +| 7 | Output Filtering | Optional | +| 8 | Output Translation | Optional | + +**Required Modules Explained:** +- **Templating**: Constructs the actual prompt/messages sent to the LLM using input variables and context +- **Model Configuration**: Specifies which LLM model to use and its parameters (temperature, max_tokens, etc.) + +### Building Workflows + +1. Click **Create** to start new workflow +2. Configure required modules (Templating, Model) +3. Enable optional modules via **Edit** +4. Configure each enabled module +5. Click **Test** to run workflow +6. Click **Save** to store configuration + +### JSON Upload + +- Maximum file size: 200 KB +- Format: JSON with `module_configurations` +- Note: Workflows with images can be downloaded but not uploaded + +### Saving Workflows + +- Save as configuration for reuse +- Assign name and description +- Link to deployments + +--- + +## ML Operations + +### Access + +**Workspaces** → Select workspace → **ML Operations** + +### Components + +| Component | Purpose | +|-----------|---------| +| **Configurations** | Parameter and artifact settings | +| **Executions** | Training jobs | +| **Deployments** | Model serving | +| **Schedules** | Automated executions | +| **Datasets** | Training data | +| **Models** | Trained models | +| **Result Sets** | Inference outputs | +| **Other Artifacts** | Miscellaneous artifacts | + +--- + +## Configurations + +### Creating Configuration + +1. Navigate to **ML Operations** → **Configurations** +2. Click **Create** +3. Enter details: + - Name + - Scenario + - Executable + - Parameters + - Input artifacts +4. Save + +### Configuration Contents + +| Field | Description | +|-------|-------------| +| Name | Configuration identifier | +| Scenario | AI scenario reference | +| Executable | Workflow or serving template | +| Parameter Bindings | Key-value parameters | +| Artifact Bindings | Input artifact references | + +--- + +## Deployments + +### Creating Deployment + +1. Navigate to **ML Operations** → **Deployments** +2. Click **Create** +3. Select configuration +4. Set duration (optional TTL) +5. Click **Create** + +### Deployment Details + +| Field | Description | +|-------|-------------| +| ID | Unique identifier | +| Status | Current state | +| URL | Inference endpoint | +| Configuration | Associated config | +| Created | Timestamp | +| Duration | TTL if set | + +### Deployment Statuses + +| Status | Description | Actions | +|--------|-------------|---------| +| Pending | Starting | Stop | +| Running | Active | Stop | +| Stopping | Shutting down | Wait | +| Stopped | Inactive | Delete | +| Dead | Failed | Delete | +| Unknown | Initial | Delete | + +### Operations + +| Operation | Description | +|-----------|-------------| +| View | See deployment details | +| View Logs | Access pipeline logs | +| Update | Change configuration | +| Stop | Halt deployment | +| Delete | Remove deployment | + +### Bulk Operations + +- Stop multiple deployments +- Delete multiple deployments (up to 100) + +--- + +## Executions and Runs + +### Creating Execution + +1. Navigate to **ML Operations** → **Executions** +2. Click **Create** +3. Select configuration +4. Click **Create** + +### Execution Statuses + +| Status | Description | +|--------|-------------| +| Pending | Queued | +| Running | Executing | +| Completed | Finished successfully | +| Dead | Failed | +| Stopped | Manually stopped | + +### Viewing Execution Details + +- Parameters and artifacts +- Status and timing +- Logs from pipeline +- Output artifacts +- Metrics + +### Comparing Executions + +1. Select multiple executions +2. Click **Compare** +3. View side-by-side: + - Parameters + - Metrics + - Durations +4. Create charts for visualization + +--- + +## Schedules + +### Creating Schedule + +1. Navigate to **ML Operations** → **Schedules** +2. Click **Create** +3. Select configuration +4. Set cron expression +5. Define start/end dates +6. Save + +### Cron Expression Format + +``` +┌───────── minute (0-59) +│ ┌─────── hour (0-23) +│ │ ┌───── day of month (1-31) +│ │ │ ┌─── month (1-12) +│ │ │ │ ┌─ day of week (0-6) +│ │ │ │ │ +* * * * * +``` + +### Schedule Operations + +| Operation | Description | +|-----------|-------------| +| View | See schedule details | +| Edit | Modify schedule | +| Stop | Pause schedule | +| Resume | Restart schedule | +| Delete | Remove schedule | + +--- + +## Datasets and Artifacts + +### Dataset Registration + +1. Navigate to **ML Operations** → **Datasets** +2. Click **Register** +3. Enter details: + - Name + - URL (ai://secret-name/path) + - Scenario + - Description +4. Save + +### Artifact Types + +| Type | Description | +|------|-------------| +| Dataset | Training/validation data | +| Model | Trained model | +| Result Set | Inference results | +| Other | Miscellaneous | + +### Finding Artifacts + +- Filter by scenario +- Search by name +- Sort by date +- View details + +--- + +## Model Comparison + +### Comparing Models + +1. Navigate to **ML Operations** → **Models** +2. Select multiple models +3. Click **Compare** +4. View: + - Configuration differences + - Metric comparisons + - Performance charts + +### Creating Comparison Charts + +1. Select metrics to compare +2. Choose chart type +3. Configure axes +4. Generate visualization + +--- + +## Applications + +### Managing Applications + +Access: **Administration** → **Applications** + +### Operations + +| Operation | Description | +|-----------|-------------| +| Create | Add new application | +| View | See application details | +| Edit | Modify settings | +| Delete | Remove application | +| Create Disclaimer | Add usage disclaimer | + +### Chat Application + +Create chat interfaces using deployed models: + +1. Create application +2. Configure model deployment +3. Set disclaimer (optional) +4. Share application URL + +--- + +## Meta API and Custom Runtime Capabilities + +The Meta API identifies which capabilities apply to a given AI runtime, allowing SAP AI Launchpad to display only relevant features. + +### Purpose + +| Function | Description | +|----------|-------------| +| **Capability Management** | Enable/disable capabilities based on AI use case | +| **UI Streamlining** | Hide unnecessary features to reduce confusion | +| **API Decoupling** | Reduce impact of backend API changes | + +### Supported Capabilities + +| Capability | Description | +|------------|-------------| +| `userDeployments` | Allows users to create custom deployments | +| `userExecutions` | Enables execution functionality | +| `staticDeployments` | System-managed deployments | +| `timeToLiveDeployments` | TTL-based deployment limits | +| `bulkUpdates` | Bulk operations support | +| `executionSchedules` | Scheduling functionality | +| `analytics` | Analytics dashboard | + +### Metadata Refresh + +- **Automatic**: Refreshed periodically on schedule +- **On-demand**: Users can trigger manual refresh +- **Administration**: SAP Runtime team manages active capabilities + +### Custom Runtime Usage + +Custom runtimes can selectively implement only necessary capabilities, creating a tailored experience: + +``` +AI Runtime → Meta API Query → Capability List → Filtered UI +``` + +--- + +## Accessibility Features + +SAP AI Launchpad provides: +- Keyboard navigation +- Screen reader support +- High contrast themes +- Accessible UI components + +--- + +## Language Settings + +Change interface language: +1. Navigate to user settings +2. Select language preference +3. Save changes + +Supported languages vary by region and deployment. + +--- + +## Documentation Links + +- What is AI Launchpad: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-launchpad/what-is-sap-ai-launchpad-760889a.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-launchpad/what-is-sap-ai-launchpad-760889a.md) +- Initial Setup: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-launchpad/initial-setup-5d8adb6.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-launchpad/initial-setup-5d8adb6.md) +- Service Plans: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-launchpad/service-plans-ec1717d.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-launchpad/service-plans-ec1717d.md) +- ML Operations: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-launchpad/ml-operations-df78271.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-launchpad/ml-operations-df78271.md) +- Generative AI Hub: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-launchpad/generative-ai-hub-b0b935b.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-launchpad/generative-ai-hub-b0b935b.md) +- Prompt Experimentation: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-launchpad/prompt-experimentation-384cc0c.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-launchpad/prompt-experimentation-384cc0c.md) +- Orchestration: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-launchpad/build-your-orchestration-workflow-b7dc8b4.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-launchpad/build-your-orchestration-workflow-b7dc8b4.md) +- Deployments: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-launchpad/deployments-0543c2c.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-launchpad/deployments-0543c2c.md) diff --git a/references/api-reference.md b/references/api-reference.md new file mode 100644 index 0000000..f4b3735 --- /dev/null +++ b/references/api-reference.md @@ -0,0 +1,734 @@ +# SAP AI Core API Reference + +Complete API reference for SAP AI Core. + +**Documentation Source:** [https://github.com/SAP-docs/sap-artificial-intelligence/tree/main/docs/sap-ai-core](https://github.com/SAP-docs/sap-artificial-intelligence/tree/main/docs/sap-ai-core) + +--- + +## Authentication + +### OAuth Token Endpoint + +```bash +curl -X POST "[https://.authentication..hana.ondemand.com/oauth/token"](https://.authentication..hana.ondemand.com/oauth/token") \ + -H "Content-Type: application/x-www-form-urlencoded" \ + -d "grant_type=client_credentials&client_id=$CLIENT_ID&client_secret=$CLIENT_SECRET" +``` + +### Required Headers + +| Header | Description | +|--------|-------------| +| `Authorization` | `Bearer ` | +| `AI-Resource-Group` | Resource group name (e.g., `default`) | +| `Content-Type` | `application/json` | + +--- + +## Base URLs + +| Environment | URL Pattern | +|-------------|-------------| +| AI API | `[https://api.ai.prod..hana.ondemand.com`](https://api.ai.prod..hana.ondemand.com`) | +| Inference | `[https://api.ai.prod..hana.ondemand.com/v2/inference/deployments/`](https://api.ai.prod..hana.ondemand.com/v2/inference/deployments/`) | +| OAuth | `[https://.authentication..hana.ondemand.com/oauth/token`](https://.authentication..hana.ondemand.com/oauth/token`) | + +**Regions:** `eu10`, `eu11`, `us10`, `us21`, `jp10`, `ap10`, `ap11` + +--- + +## API Versioning + +All endpoints use the `/v2/*` versioned routes: +- `/v2/lm/*` - Language model operations +- `/v2/inference/*` - Inference deployments +- `/v2/admin/*` - Administrative operations (secrets, repositories) + +--- + +## Scenarios + +### List Scenarios + +```bash +GET $AI_API_URL/v2/lm/scenarios +``` + +**Response:** +```json +{ + "count": 2, + "resources": [ + { + "id": "foundation-models", + "name": "Foundation Models", + "description": "Access to generative AI models" + }, + { + "id": "orchestration", + "name": "Orchestration", + "description": "Unified model access with pipeline features" + } + ] +} +``` + +--- + +## Models + +### List Available Models + +```bash +GET $AI_API_URL/v2/lm/scenarios/foundation-models/models +``` + +**Response:** +```json +{ + "count": 50, + "resources": [ + { + "model": "gpt-4o", + "accessType": "Remote", + "displayName": "GPT-4o", + "provider": "azure-openai", + "executableId": "azure-openai", + "versions": [ + { + "name": "2024-05-13", + "isLatest": true, + "capabilities": ["text-generation", "chat"], + "contextLength": 128000, + "inputCost": 5.0, + "outputCost": 15.0, + "isStreamingSupported": true + } + ] + } + ] +} +``` + +--- + +## Configurations + +### Create Configuration + +```bash +POST $AI_API_URL/v2/lm/configurations +``` + +**Request Body:** +```json +{ + "name": "my-config", + "executableId": "azure-openai", + "scenarioId": "foundation-models", + "parameterBindings": [ + {"key": "modelName", "value": "gpt-4o"}, + {"key": "modelVersion", "value": "latest"} + ], + "inputArtifactBindings": [] +} +``` + +**Response:** +```json +{ + "id": "abc123-def456-ghi789", + "message": "Configuration created" +} +``` + +### List Configurations + +```bash +GET $AI_API_URL/v2/lm/configurations +``` + +**Query Parameters:** +| Parameter | Description | +|-----------|-------------| +| `scenarioId` | Filter by scenario | +| `executableId` | Filter by executable | +| `$top` | Limit results | +| `$skip` | Skip results | + +### Get Configuration + +```bash +GET $AI_API_URL/v2/lm/configurations/{configurationId} +``` + +### Delete Configuration + +```bash +DELETE $AI_API_URL/v2/lm/configurations/{configurationId} +``` + +--- + +## Deployments + +### Create Deployment + +```bash +POST $AI_API_URL/v2/lm/deployments +``` + +**Request Body:** +```json +{ + "configurationId": "", + "ttl": "24h" +} +``` + +**TTL Format:** Natural numbers with units: `m` (minutes), `h` (hours), `d` (days) +- Valid: `5m`, `2h`, `7d` +- Invalid: `4.5h`, `4h30m` (fractional and combined units not supported) +- **Tip:** Convert combined durations to a single unit (e.g., `270m` instead of `4h30m`) + +**Response:** +```json +{ + "id": "d12345-abcd-efgh", + "deploymentUrl": "[https://...",](https://...",) + "status": "PENDING", + "message": "Deployment created" +} +``` + +### Get Deployment + +```bash +GET $AI_API_URL/v2/lm/deployments/{deploymentId} +``` + +**Response:** +```json +{ + "id": "d12345-abcd-efgh", + "configurationId": "c12345-abcd", + "configurationName": "my-config", + "scenarioId": "foundation-models", + "status": "RUNNING", + "statusMessage": "", + "deploymentUrl": "[https://...",](https://...",) + "createdAt": "2024-01-15T10:00:00Z", + "modifiedAt": "2024-01-15T10:05:00Z" +} +``` + +### Deployment Statuses + +| Status | Description | +|--------|-------------| +| `UNKNOWN` | Initial state | +| `PENDING` | Starting up | +| `RUNNING` | Active, serving requests | +| `STOPPING` | Shutting down | +| `STOPPED` | Inactive | +| `DEAD` | Failed | + +### List Deployments + +```bash +GET $AI_API_URL/v2/lm/deployments +``` + +### Update Deployment + +```bash +PATCH $AI_API_URL/v2/lm/deployments/{deploymentId} +``` + +**Request Body:** +```json +{ + "configurationId": "" +} +``` + +### Stop Deployment + +```bash +PATCH $AI_API_URL/v2/lm/deployments/{deploymentId} +``` + +**Request Body:** +```json +{ + "targetStatus": "STOPPED" +} +``` + +### Delete Deployment + +```bash +DELETE $AI_API_URL/v2/lm/deployments/{deploymentId} +``` + +--- + +## Executions + +### Create Execution + +```bash +POST $AI_API_URL/v2/lm/executions +``` + +**Request Body:** +```json +{ + "configurationId": "" +} +``` + +### Get Execution + +```bash +GET $AI_API_URL/v2/lm/executions/{executionId} +``` + +### Execution Statuses + +| Status | Description | +|--------|-------------| +| `UNKNOWN` | Initial state | +| `PENDING` | Queued | +| `RUNNING` | Executing | +| `COMPLETED` | Finished successfully | +| `DEAD` | Failed | +| `STOPPED` | Manually stopped | + +### List Executions + +```bash +GET $AI_API_URL/v2/lm/executions +``` + +### Stop Execution + +```bash +PATCH $AI_API_URL/v2/lm/executions/{executionId} +``` + +**Request Body:** +```json +{ + "targetStatus": "STOPPED" +} +``` + +### Delete Execution + +```bash +DELETE $AI_API_URL/v2/lm/executions/{executionId} +``` + +### Get Execution Logs + +```bash +GET $AI_API_URL/v2/lm/executions/{executionId}/logs +``` + +--- + +## Artifacts + +### Register Artifact + +```bash +POST $AI_API_URL/v2/lm/artifacts +``` + +**Request Body:** +```json +{ + "name": "training-data", + "kind": "dataset", + "url": "ai:///", + "scenarioId": "", + "description": "Training dataset" +} +``` + +**Artifact Kinds:** +- `dataset`: Training data +- `model`: Trained model +- `resultset`: Inference results +- `other`: Other artifacts + +### Get Artifact + +```bash +GET $AI_API_URL/v2/lm/artifacts/{artifactId} +``` + +### List Artifacts + +```bash +GET $AI_API_URL/v2/lm/artifacts +``` + +--- + +## Resource Groups + +### Create Resource Group + +```bash +POST $AI_API_URL/v2/admin/resourceGroups +``` + +**Request Body:** +```json +{ + "resourceGroupId": "my-resource-group" +} +``` + +### List Resource Groups + +```bash +GET $AI_API_URL/v2/admin/resourceGroups +``` + +### Delete Resource Group + +```bash +DELETE $AI_API_URL/v2/admin/resourceGroups/{resourceGroupId} +``` + +--- + +## Secrets + +### Create Generic Secret + +```bash +POST $AI_API_URL/v2/admin/secrets +``` + +**Request Body:** +```json +{ + "name": "my-secret", + "data": { + "key1": "value1", + "key2": "value2" + } +} +``` + +### Create Object Store Secret + +```bash +POST $AI_API_URL/v2/admin/objectStoreSecrets +``` + +**AWS S3:** +```json +{ + "name": "default", + "type": "S3", + "pathPrefix": "my-bucket/path", + "data": { + "AWS_ACCESS_KEY_ID": "", + "AWS_SECRET_ACCESS_KEY": "" + } +} +``` + +### List Secrets + +```bash +GET $AI_API_URL/v2/admin/secrets +``` + +### Delete Secret + +```bash +DELETE $AI_API_URL/v2/admin/secrets/{secretName} +``` + +--- + +## Meta API + +### Get Runtime Capabilities + +```bash +GET $AI_API_URL/lm/meta +``` + +**Response:** +```json +{ + "capabilities": { + "logs.executions": true, + "logs.deployments": true, + "multitenant": true, + "shareable": false, + "staticDeployments": true, + "userDeployments": true, + "userExecutions": true, + "timeToLiveDeployments": true, + "analytics": true, + "bulkUpdates": true, + "executionSchedules": true + }, + "limits": { + "deployments.maxRunningCount": 10, + "executions.maxRunningCount": 10, + "minimumFrequencyHour": 1, + "timeToLiveDeployments.minimum": "5m", + "timeToLiveDeployments.maximum": "90d" + }, + "extensions": { + "analytics": "1.0", + "metrics": "1.0", + "resourceGroups": "1.0", + "dataset": "1.0" + } +} +``` + +--- + +## Orchestration API + +### Chat Completion + +```bash +POST $ORCHESTRATION_URL/v2/completion +``` + +**Request Body:** +```json +{ + "config": { + "module_configurations": { + "llm_module_config": { + "model_name": "gpt-4o", + "model_version": "latest", + "model_params": { + "max_tokens": 1000, + "temperature": 0.7 + } + }, + "templating_module_config": { + "template": [ + {"role": "system", "content": "{{?system}}"}, + {"role": "user", "content": "{{?user}}"} + ] + } + } + }, + "input_params": { + "system": "You are a helpful assistant.", + "user": "Hello!" + } +} +``` + +### Streaming Completion + +```bash +POST $ORCHESTRATION_URL/v2/completion +``` + +**Request Body:** +```json +{ + "config": { + "module_configurations": { + "llm_module_config": { + "model_name": "gpt-4o", + "model_version": "latest", + "model_params": { + "stream": true + } + }, + "templating_module_config": { + "template": [{"role": "user", "content": "{{?prompt}}"}] + } + } + }, + "input_params": {"prompt": "Tell me a story"} +} +``` + +### Embeddings + +```bash +POST $ORCHESTRATION_URL/v2/embeddings +``` + +**Request Body:** +```json +{ + "config": { + "module_configurations": { + "embedding_module_config": { + "model_name": "text-embedding-3-large", + "model_version": "latest", + "model_params": { + "encoding_format": "float", + "dimensions": 1024 + } + } + } + }, + "input": ["Text to embed"] +} +``` + +--- + +## Grounding API + +### Create Pipeline + +```bash +POST $AI_API_URL/v2/lm/groundingPipelines +``` + +**Request Body (SharePoint):** +```json +{ + "name": "hr-docs-pipeline", + "configuration": { + "dataSource": { + "type": "sharepoint", + "configuration": { + "siteUrl": "[https://company.sharepoint.com/sites/HR",](https://company.sharepoint.com/sites/HR",) + "folderPath": "/Documents/Policies" + } + }, + "secretName": "sharepoint-secret" + } +} +``` + +### List Pipelines + +```bash +GET $AI_API_URL/v2/lm/groundingPipelines +``` + +### Delete Pipeline + +```bash +DELETE $AI_API_URL/v2/lm/groundingPipelines/{pipelineId} +``` + +--- + +## Error Responses + +### Standard Error Format + +```json +{ + "error": { + "code": "ERROR_CODE", + "message": "Human-readable error message", + "requestId": "req-12345", + "target": "deployments" + } +} +``` + +### Common Error Codes + +| Code | HTTP Status | Description | +|------|-------------|-------------| +| `UNAUTHORIZED` | 401 | Invalid or expired token | +| `FORBIDDEN` | 403 | Missing permissions or quota exceeded | +| `NOT_FOUND` | 404 | Resource not found | +| `CONFLICT` | 409 | Resource already exists | +| `QUOTA_EXCEEDED` | 429 | Rate limit or quota exceeded | +| `INTERNAL_ERROR` | 500 | Server error | + +--- + +## Bulk Operations + +### Bulk Update Deployments + +```bash +PATCH $AI_API_URL/v2/lm/deployments +``` + +**Request Body:** +```json +{ + "deployments": [ + {"id": "dep1", "targetStatus": "STOPPED"}, + {"id": "dep2", "targetStatus": "STOPPED"} + ] +} +``` + +**Limit:** 100 items per request + +### Bulk Delete Deployments + +```bash +DELETE $AI_API_URL/v2/lm/deployments +``` + +**Request Body:** +```json +{ + "deploymentIds": ["dep1", "dep2", "dep3"] +} +``` + +--- + +## Schedules + +### Create Schedule + +```bash +POST $AI_API_URL/v2/lm/executionSchedules +``` + +**Request Body:** +```json +{ + "configurationId": "", + "cron": "0 0 * * *", + "start": "2024-01-01T00:00:00Z", + "end": "2024-12-31T23:59:59Z" +} +``` + +**Cron Format:** `minute hour day month weekday` + +### List Schedules + +```bash +GET $AI_API_URL/v2/lm/executionSchedules +``` + +### Delete Schedule + +```bash +DELETE $AI_API_URL/v2/lm/executionSchedules/{scheduleId} +``` + +--- + +## Documentation Links + +- AI API Overview: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/ai-api-overview-716d4c3.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/ai-api-overview-716d4c3.md) +- Configurations: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/create-configurations-884ae34.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/create-configurations-884ae34.md) +- Deployments: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/deploy-models-dd16e8e.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/deploy-models-dd16e8e.md) diff --git a/references/generative-ai-hub.md b/references/generative-ai-hub.md new file mode 100644 index 0000000..d871d31 --- /dev/null +++ b/references/generative-ai-hub.md @@ -0,0 +1,464 @@ +# Generative AI Hub Reference + +Complete reference for SAP AI Core Generative AI Hub. + +**Documentation Source:** [https://github.com/SAP-docs/sap-artificial-intelligence/tree/main/docs/sap-ai-core](https://github.com/SAP-docs/sap-artificial-intelligence/tree/main/docs/sap-ai-core) + +--- + +## Overview + +The Generative AI Hub integrates large language models (LLMs) into SAP AI Core and SAP AI Launchpad, providing unified access to models from multiple providers. + +### Key Features + +- Access to LLMs from 6 providers via unified API +- Harmonized API for model switching without code changes +- Prompt experimentation in AI Launchpad UI +- Orchestration workflows with filtering, masking, grounding +- Token-based metering and billing + +### Prerequisites + +- SAP AI Core with **Extended** service plan +- Valid service key credentials +- Resource group created + +--- + +## Global Scenarios + +Two scenarios provide generative AI access: + +| Scenario ID | Description | Use Case | +|-------------|-------------|----------| +| `foundation-models` | Direct model access | Single model deployment | +| `orchestration` | Unified multi-model access | Pipeline workflows | + +--- + +## Model Providers + +### 1. Azure OpenAI (`azure-openai`) + +Access to OpenAI models via Azure's private instance. + +**Models:** +- GPT-4o, GPT-4o-mini +- GPT-4 Turbo, GPT-4 +- GPT-3.5 Turbo +- text-embedding-3-large, text-embedding-3-small + +**Capabilities:** Chat, embeddings, vision + +### 2. SAP-Hosted Open Source (`aicore-opensource`) + +SAP-hosted open source models via OpenAI-compatible API. + +**Models:** +- Llama 3.1 (8B, 70B, 405B) +- Llama 3.2 (1B, 3B, 11B-Vision, 90B-Vision) +- Mistral 7B, Mixtral 8x7B +- Falcon 40B + +**Capabilities:** Chat, embeddings, vision (select models) + +### 3. Google Vertex AI (`gcp-vertexai`) + +Access to Google's AI models. + +**Models:** +- Gemini 1.5 Pro, Gemini 1.5 Flash +- Gemini 1.0 Pro +- PaLM 2 (text-bison, chat-bison) +- text-embedding-004 + +**Capabilities:** Chat, embeddings, vision, code + +### 4. AWS Bedrock (`aws-bedrock`) + +Access to models via AWS Bedrock. + +**Models:** +- Anthropic Claude 3.5 Sonnet, Claude 3 Opus/Sonnet/Haiku +- Amazon Titan Text, Titan Embeddings +- Meta Llama 3 +- Cohere Command + +**Capabilities:** Chat, embeddings + +### 5. Mistral AI (`aicore-mistralai`) + +SAP-hosted Mistral models. + +**Models:** +- Mistral Large +- Mistral Medium +- Mistral Small +- Mistral 7B Instruct +- Codestral + +**Capabilities:** Chat, code + +### 6. IBM (`aicore-ibm`) + +SAP-hosted IBM models. + +**Models:** +- Granite 13B Chat, Granite 13B Instruct +- Granite Code + +**Capabilities:** Chat, code + +--- + +## API: List Available Models + +```bash +curl -X GET "$AI_API_URL/v2/lm/scenarios/foundation-models/models" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" +``` + +### Response Structure + +```json +{ + "count": 50, + "resources": [ + { + "model": "gpt-4o", + "accessType": "Remote", + "displayName": "GPT-4o", + "provider": "azure-openai", + "allowedScenarios": ["foundation-models"], + "executableId": "azure-openai", + "description": "OpenAI's most advanced model", + "versions": [ + { + "name": "2024-05-13", + "isLatest": true, + "capabilities": ["text-generation", "chat", "vision"], + "contextLength": 128000, + "inputCost": 5.0, + "outputCost": 15.0, + "deprecationDate": null, + "retirementDate": null, + "isStreamingSupported": true + } + ] + } + ] +} +``` + +### Model Metadata Fields + +| Field | Description | +|-------|-------------| +| `model` | Model identifier for API calls | +| `accessType` | "Remote" (external) or "Local" (SAP-hosted) | +| `provider` | Provider identifier | +| `executableId` | Executable ID for deployments | +| `contextLength` | Maximum context window tokens | +| `inputCost` | Cost per 1K input tokens | +| `outputCost` | Cost per 1K output tokens | +| `deprecationDate` | Date version becomes deprecated | +| `retirementDate` | Date version is removed | +| `isStreamingSupported` | Streaming capability | + +--- + +## Deploying a Model + +### Step 1: Create Configuration + +```bash +curl -X POST "$AI_API_URL/v2/lm/configurations" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "gpt4o-deployment-config", + "executableId": "azure-openai", + "scenarioId": "foundation-models", + "parameterBindings": [ + {"key": "modelName", "value": "gpt-4o"}, + {"key": "modelVersion", "value": "latest"} + ] + }' +``` + +### Step 2: Create Deployment + +```bash +curl -X POST "$AI_API_URL/v2/lm/deployments" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "configurationId": "" + }' +``` + +### Step 3: Check Status + +```bash +curl -X GET "$AI_API_URL/v2/lm/deployments/" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" +``` + +Wait for status `RUNNING` and note the `deploymentUrl`. + +--- + +## Using the Harmonized API + +The harmonized API provides unified access without model-specific code. + +### Chat Completion + +```bash +curl -X POST "$DEPLOYMENT_URL/chat/completions" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-4o", + "messages": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is SAP AI Core?"} + ], + "max_tokens": 1000, + "temperature": 0.7 + }' +``` + +### With Streaming + +```bash +curl -X POST "$DEPLOYMENT_URL/chat/completions" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "gpt-4o", + "messages": [{"role": "user", "content": "Tell me a story"}], + "stream": true + }' +``` + +### Embeddings + +```bash +curl -X POST "$DEPLOYMENT_URL/embeddings" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "model": "text-embedding-3-large", + "input": ["Document chunk to embed"], + "encoding_format": "float" + }' +``` + +--- + +## Orchestration Deployment + +For unified access to multiple models: + +### Create Orchestration Deployment + +```bash +# Get orchestration configuration ID +curl -X GET "$AI_API_URL/v2/lm/configurations?scenarioId=orchestration" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" + +# Create deployment +curl -X POST "$AI_API_URL/v2/lm/deployments" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "configurationId": "" + }' +``` + +### Use Orchestration API + +```bash +curl -X POST "$ORCHESTRATION_URL/v2/completion" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "config": { + "module_configurations": { + "llm_module_config": { + "model_name": "gpt-4o", + "model_version": "latest" + }, + "templating_module_config": { + "template": [ + {"role": "user", "content": "{{?prompt}}"} + ] + } + } + }, + "input_params": { + "prompt": "What is machine learning?" + } + }' +``` + +--- + +## Model Version Management + +### Auto-Upgrade Strategy + +Set `modelVersion` to `"latest"` for automatic upgrades: + +```json +{ + "parameterBindings": [ + {"key": "modelName", "value": "gpt-4o"}, + {"key": "modelVersion", "value": "latest"} + ] +} +``` + +### Pinned Version Strategy + +Specify exact version for stability: + +```json +{ + "parameterBindings": [ + {"key": "modelName", "value": "gpt-4o"}, + {"key": "modelVersion", "value": "2024-05-13"} + ] +} +``` + +### Manual Version Upgrade + +Patch deployment with new configuration: + +```bash +curl -X PATCH "$AI_API_URL/v2/lm/deployments/" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "configurationId": "" + }' +``` + +--- + +## SAP AI Launchpad UI + +### Prompt Experimentation + +Access: **Workspaces** → **Generative AI Hub** → **Prompt Editor** + +Features: +- Interactive prompt testing +- Model selection and parameter tuning +- Variable placeholders +- Image inputs (select models) +- Streaming responses +- Save prompts (manager roles) + +### Required Roles + +| Role | Capabilities | +|------|--------------| +| `genai_manager` | Full access, save prompts | +| `genai_experimenter` | Test only, no save | +| `prompt_manager` | Manage saved prompts | +| `prompt_experimenter` | Use saved prompts | +| `prompt_media_executor` | Upload images | + +### Prompt Types + +- **Question Answering**: Q&A interactions +- **Summarization**: Extract key points +- **Inferencing**: Sentiment, entity extraction +- **Transformations**: Translation, format conversion +- **Expansions**: Content generation + +--- + +## Model Library + +View model specifications and benchmarks in AI Launchpad: + +**Access:** Generative AI Hub → Model Library + +Information available: +- Model capabilities +- Context window sizes +- Performance benchmarks (win rates, arena scores) +- Cost per token +- Deprecation schedules + +--- + +## Rate Limits and Quotas + +Refer to **SAP Note 3437766** for: +- Token conversion rates per model +- Rate limits (requests/minute, tokens/minute) +- Regional availability +- Deprecation dates + +### Quota Increase Request + +Submit support ticket: +- Component: `CA-ML-AIC` +- Include: tenant ID, current limits, requested limits, justification + +--- + +## Best Practices + +### Model Selection + +| Use Case | Recommended Model | +|----------|-------------------| +| General chat | GPT-4o, Claude 3.5 Sonnet | +| Cost-sensitive | GPT-4o-mini, Mistral Small | +| Long context | GPT-4o (128K), Claude 3 (200K) | +| Embeddings | text-embedding-3-large | +| Code | Codestral, GPT-4o | +| Vision | GPT-4o, Gemini 1.5 Pro | + +### Cost Optimization + +1. Use smaller models for simple tasks +2. Implement caching for repeated queries +3. Set appropriate `max_tokens` limits +4. Use streaming for better UX without extra cost +5. Monitor token usage via AI Launchpad analytics + +### Reliability + +1. Implement fallback configurations +2. Pin model versions in production +3. Monitor deprecation dates +4. Test before upgrading versions + +--- + +## Documentation Links + +- Generative AI Hub: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/generative-ai-hub-7db524e.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/generative-ai-hub-7db524e.md) +- Supported Models: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/supported-models-509e588.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/supported-models-509e588.md) +- SAP Note 3437766: Token rates, limits, deprecation +- SAP Discovery Center: [https://discovery-center.cloud.sap/serviceCatalog/sap-ai-core](https://discovery-center.cloud.sap/serviceCatalog/sap-ai-core) diff --git a/references/grounding-rag.md b/references/grounding-rag.md new file mode 100644 index 0000000..ed40745 --- /dev/null +++ b/references/grounding-rag.md @@ -0,0 +1,549 @@ +# Grounding and RAG Reference + +Complete reference for SAP AI Core grounding capabilities (Retrieval-Augmented Generation). + +**Documentation Source:** [https://github.com/SAP-docs/sap-artificial-intelligence/tree/main/docs/sap-ai-core](https://github.com/SAP-docs/sap-artificial-intelligence/tree/main/docs/sap-ai-core) + +--- + +## Overview + +Grounding integrates external, contextually relevant data into AI processes, enhancing LLM capabilities beyond general training material using vector databases. + +### Key Benefits + +- Provide domain-specific context +- Access real-time data +- Reduce hallucinations +- Enable enterprise knowledge retrieval + +--- + +## Architecture + +### Indexing Pipeline + +``` +Documents → Preprocessing → Chunking → Embedding → Vector Database +``` + +1. Upload documents to supported repository +2. Pipeline preprocesses and chunks documents +3. Embedding model generates vectors +4. Vectors stored in managed vector database + +### Retrieval Pipeline + +``` +User Query → Embedding → Vector Search → Retrieved Chunks → LLM Context +``` + +1. User query converted to embedding +2. Vector similarity search in database +3. Relevant chunks retrieved +4. Chunks injected into LLM prompt + +--- + +## Supported Data Sources + +| Source | Type | Configuration | +|--------|------|---------------| +| **Microsoft SharePoint** | Cloud | Site URL, folder path | +| **AWS S3** | Object storage | Bucket, prefix | +| **SFTP** | File server | Host, path | +| **SAP Build Work Zone** | SAP | Site, content | +| **SAP Document Management** | SAP | Repository, folder | + +--- + +## Document Specifications + +### Supported Formats + +| Format | Content Types | +|--------|---------------| +| PDF | Text, tables, images | +| HTML | Text, structure | +| TXT | Plain text | +| DOCX | Text, tables | +| PPT/PPTX | Text, tables, images | +| JPEG/JPG | Images with OCR | +| PNG | Images with OCR | +| TIFF | Images with OCR | + +### Limits + +- **Maximum documents per pipeline:** 2,000 +- **Refresh rate:** Daily automatic refresh +- **File size:** Varies by format + +--- + +## Data Management APIs + +Three primary APIs for document processing and retrieval: + +### Pipelines API + +Creates data management pipelines that fetch documents from supported data sources. + +| Feature | Description | +|---------|-------------| +| **Purpose** | Automated document fetching, preprocessing, chunking, embedding | +| **Best for** | Documents in external repositories | +| **Output** | Vectors stored in HANA Vector Store | +| **Note** | No need to call Vector API after using Pipelines API | + +### Vector API + +REST APIs for direct document ingestion and retrieval using vector embeddings. + +| Feature | Description | +|---------|-------------| +| **Purpose** | Manual document upload and embedding | +| **Best for** | Directly uploaded/managed documents | +| **Process** | Preprocesses chunks and stores semantic embeddings | + +### Retrieval API + +Performs similarity searches on the vector database. + +| Feature | Description | +|---------|-------------| +| **Purpose** | Information retrieval using semantic search | +| **Works with** | Repositories (Pipelines API) or collections (Vector API) | +| **Output** | Ranked relevant document chunks | + +### API Comparison + +| Use Case | Recommended API | +|----------|-----------------| +| Documents in SharePoint/S3/SFTP | Pipelines API | +| Direct file uploads | Vector API | +| Custom chunking needed | Vector API | +| Full automation | Pipelines API | + +--- + +## Implementation Options + +### Option 1: Pipeline API + +Automated document processing pipeline. + +#### Create SharePoint Pipeline + +```bash +curl -X POST "$AI_API_URL/v2/lm/groundingPipelines" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "hr-policies-pipeline", + "configuration": { + "dataSource": { + "type": "sharepoint", + "configuration": { + "siteUrl": "[https://company.sharepoint.com/sites/HR",](https://company.sharepoint.com/sites/HR",) + "folderPath": "/Documents/Policies" + } + }, + "secretName": "sharepoint-credentials" + } + }' +``` + +#### Create S3 Pipeline + +```bash +curl -X POST "$AI_API_URL/v2/lm/groundingPipelines" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "knowledge-base-pipeline", + "configuration": { + "dataSource": { + "type": "s3", + "configuration": { + "bucket": "my-knowledge-base", + "prefix": "documents/" + } + }, + "secretName": "s3-credentials" + } + }' +``` + +#### Create SFTP Pipeline + +```bash +curl -X POST "$AI_API_URL/v2/lm/groundingPipelines" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "docs-sftp-pipeline", + "configuration": { + "dataSource": { + "type": "sftp", + "configuration": { + "host": "sftp.company.com", + "port": 22, + "path": "/documents" + } + }, + "secretName": "sftp-credentials" + } + }' +``` + +### Option 2: Vector API + +Direct vector upload for custom chunking/embedding. + +#### Create Collection + +```bash +curl -X POST "$AI_API_URL/v2/lm/groundingCollections" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "custom-knowledge-base", + "embeddingConfig": { + "model": "text-embedding-3-small", + "dimensions": 1536 + } + }' +``` + +**Note:** Use `text-embedding-3-small` for 1536 dimensions or `text-embedding-3-large` with 3072 dimensions. Ensure model and dimensions align with OpenAI/SAP AI Core specifications. + +#### Add Documents + +```bash +curl -X POST "$AI_API_URL/v2/lm/groundingCollections/{collectionId}/documents" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "documents": [ + { + "id": "doc-001", + "content": "Document chunk text...", + "metadata": { + "source": "policy-manual.pdf", + "page": 5, + "department": "HR" + } + }, + { + "id": "doc-002", + "content": "Another chunk...", + "metadata": { + "source": "policy-manual.pdf", + "page": 6, + "department": "HR" + } + } + ] + }' +``` + +#### Add Pre-computed Vectors + +```bash +curl -X POST "$AI_API_URL/v2/lm/groundingCollections/{collectionId}/documents" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "documents": [ + { + "id": "doc-001", + "content": "Document chunk text...", + "vector": [0.123, -0.456, 0.789, ...], + "metadata": {"source": "manual.pdf"} + } + ] + }' +``` + +--- + +## Creating Secrets + +### SharePoint Secret + +```bash +curl -X POST "$AI_API_URL/v2/admin/secrets" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "sharepoint-credentials", + "data": { + "clientId": "", + "clientSecret": "", + "tenantId": "" + } + }' +``` + +### S3 Secret + +```bash +curl -X POST "$AI_API_URL/v2/admin/secrets" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "s3-credentials", + "data": { + "AWS_ACCESS_KEY_ID": "", + "AWS_SECRET_ACCESS_KEY": "", + "AWS_REGION": "us-east-1" + } + }' +``` + +### SFTP Secret + +```bash +curl -X POST "$AI_API_URL/v2/admin/secrets" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "sftp-credentials", + "data": { + "username": "", + "password": "" + } + }' +``` + +--- + +## Using Grounding in Orchestration + +### Basic Grounding Configuration + +```json +{ + "config": { + "module_configurations": { + "grounding_module_config": { + "grounding_service": "document_grounding_service", + "grounding_service_configuration": { + "grounding_input_parameters": ["user_query"], + "grounding_output_parameter": "context", + "filters": [ + { + "id": "", + "search_configuration": { + "max_chunk_count": 5 + } + } + ] + } + }, + "templating_module_config": { + "template": [ + { + "role": "system", + "content": "Answer based on the following context:\n\n{{$context}}\n\nIf the answer is not in the context, say you don't know." + }, + { + "role": "user", + "content": "{{?user_query}}" + } + ] + }, + "llm_module_config": { + "model_name": "gpt-4o", + "model_version": "latest" + } + } + }, + "input_params": { + "user_query": "What is the vacation policy?" + } +} +``` + +### Grounding with Metadata Filters + +```json +{ + "grounding_module_config": { + "grounding_service": "document_grounding_service", + "grounding_service_configuration": { + "grounding_input_parameters": ["user_query"], + "grounding_output_parameter": "context", + "filters": [ + { + "id": "", + "data_repositories": [""], + "document_metadata": [ + { + "key": "department", + "value": "HR" + }, + { + "key": "document_type", + "value": "policy" + } + ], + "search_configuration": { + "max_chunk_count": 10, + "max_document_count": 5, + "similarity_threshold": 0.7 + } + } + ] + } + } +} +``` + +### Multiple Pipeline Sources + +```json +{ + "grounding_module_config": { + "grounding_service": "document_grounding_service", + "grounding_service_configuration": { + "grounding_input_parameters": ["user_query"], + "grounding_output_parameter": "context", + "filters": [ + { + "id": "", + "search_configuration": {"max_chunk_count": 3} + }, + { + "id": "", + "search_configuration": {"max_chunk_count": 3} + }, + { + "id": "", + "search_configuration": {"max_chunk_count": 3} + } + ] + } + } +} +``` + +--- + +## Search Configuration Options + +| Parameter | Type | Description | Default | +|-----------|------|-------------|---------| +| `max_chunk_count` | int | Maximum chunks to retrieve | 5 | +| `max_document_count` | int | Maximum source documents | No limit | +| `similarity_threshold` | float | Minimum similarity score (0-1) | 0.0 | + +--- + +## Managing Pipelines + +### List Pipelines + +```bash +curl -X GET "$AI_API_URL/v2/lm/groundingPipelines" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" +``` + +### Get Pipeline Status + +```bash +curl -X GET "$AI_API_URL/v2/lm/groundingPipelines/{pipelineId}" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" +``` + +**Pipeline Statuses:** +- `PENDING`: Initializing +- `INDEXING`: Processing documents +- `READY`: Available for queries +- `FAILED`: Error occurred + +### Delete Pipeline + +```bash +curl -X DELETE "$AI_API_URL/v2/lm/groundingPipelines/{pipelineId}" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" +``` + +--- + +## Best Practices + +### Document Preparation + +1. **Clean content**: Remove irrelevant headers, footers, boilerplate +2. **Consistent formatting**: Use clear headings and structure +3. **Metadata tagging**: Add useful metadata for filtering +4. **Regular updates**: Keep documents current + +### Chunking Strategy + +1. **Semantic chunks**: Break at logical boundaries (sections, paragraphs) +2. **Appropriate size**: 200-500 tokens per chunk typically works well +3. **Overlap**: Consider 10-20% overlap between chunks +4. **Context preservation**: Include section headers in chunks + +### Query Optimization + +1. **Clear questions**: Rephrase vague queries +2. **Keyword inclusion**: Include relevant technical terms +3. **Context addition**: Add domain context to queries + +### Retrieval Tuning + +| Use Case | max_chunk_count | similarity_threshold | +|----------|-----------------|---------------------| +| Precise answers | 3-5 | 0.8 | +| Comprehensive | 10-15 | 0.6 | +| Exploratory | 20+ | 0.5 | + +--- + +## Troubleshooting + +### No Results Returned + +1. Check pipeline status is `READY` +2. Verify documents were indexed successfully +3. Lower similarity threshold +4. Increase max_chunk_count +5. Check metadata filters match documents + +### Irrelevant Results + +1. Increase similarity threshold +2. Add metadata filters +3. Review document chunking +4. Check embedding model matches query style + +### Performance Issues + +1. Reduce max_chunk_count +2. Add specific metadata filters +3. Use multiple smaller pipelines +4. Consider pagination for large result sets + +--- + +## Documentation Links + +- Grounding Overview: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/grounding-035c455.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/grounding-035c455.md) +- Pipeline API: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/create-a-document-grounding-pipeline-using-the-pipelines-api-0a13e1c.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/create-a-document-grounding-pipeline-using-the-pipelines-api-0a13e1c.md) +- SharePoint Pipeline: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/create-a-pipeline-with-microsoft-sharepoint-4b8d58c.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/create-a-pipeline-with-microsoft-sharepoint-4b8d58c.md) +- S3 Pipeline: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/create-a-pipeline-with-aws-s3-7f97adf.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/create-a-pipeline-with-aws-s3-7f97adf.md) diff --git a/references/ml-operations.md b/references/ml-operations.md new file mode 100644 index 0000000..b000aa1 --- /dev/null +++ b/references/ml-operations.md @@ -0,0 +1,505 @@ +# ML Operations Reference + +Complete reference for SAP AI Core ML training and operations. + +**Documentation Source:** [https://github.com/SAP-docs/sap-artificial-intelligence/tree/main/docs/sap-ai-core](https://github.com/SAP-docs/sap-artificial-intelligence/tree/main/docs/sap-ai-core) + +--- + +## Overview + +SAP AI Core uses Argo Workflows for training pipelines, supporting batch jobs for model preprocessing, training, and inference. + +### Key Components + +| Component | Description | +|-----------|-------------| +| **Scenarios** | AI use case implementations | +| **Executables** | Reusable workflow templates | +| **Configurations** | Parameters and artifact bindings | +| **Executions** | Running instances of workflows | +| **Artifacts** | Datasets, models, and results | + +--- + +## Workflow Engine + +### Argo Workflows + +SAP AI Core uses Argo Workflows (container-native workflow engine) supporting: + +- Direct Acyclic Graph (DAG) structures +- Parallel step execution +- Container-based steps +- Data ingestion and preprocessing +- Model training and batch inference + +**Limitation:** Not optimized for time-critical tasks due to scheduling overhead. + +--- + +## Prerequisites + +### 1. Object Store Secret (Required) + +Create a secret named `default` for training output artifacts: + +```bash +curl -X POST "$AI_API_URL/v2/admin/objectStoreSecrets" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "default", + "type": "S3", + "pathPrefix": "my-bucket/training-output", + "data": { + "AWS_ACCESS_KEY_ID": "", + "AWS_SECRET_ACCESS_KEY": "" + } + }' +``` + +**Note:** Without a `default` secret, training pipelines will fail. + +### 2. Docker Registry Secret + +For custom training images: + +```bash +curl -X POST "$AI_API_URL/v2/admin/dockerRegistrySecrets" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "docker-registry", + "data": { + ".dockerconfigjson": "" + } + }' +``` + +### 3. Git Repository + +Sync workflow templates from Git: + +```bash +curl -X POST "$AI_API_URL/v2/admin/repositories" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "training-repo", + "url": "[https://github.com/org/training-workflows",](https://github.com/org/training-workflows",) + "username": "", + "password": "" + }' +``` + +--- + +## Workflow Template + +### Basic Structure + +```yaml +apiVersion: ai.sap.com/v1alpha1 +kind: WorkflowTemplate +metadata: + name: text-classifier-training + annotations: + scenarios.ai.sap.com/description: "Train text classification model" + scenarios.ai.sap.com/name: "text-classifier" + executables.ai.sap.com/description: "Training executable" + executables.ai.sap.com/name: "text-classifier-train" + artifacts.ai.sap.com/training-data.kind: "dataset" + artifacts.ai.sap.com/trained-model.kind: "model" + labels: + scenarios.ai.sap.com/id: "text-classifier" + executables.ai.sap.com/id: "text-classifier-train" + ai.sap.com/version: "1.0.0" +spec: + imagePullSecrets: + - name: docker-registry + entrypoint: main + arguments: + parameters: + - name: learning_rate + default: "0.001" + - name: epochs + default: "10" + artifacts: + - name: training-data + path: /data/input + archive: + none: {} + templates: + - name: main + steps: + - - name: preprocess + template: preprocess-data + - - name: train + template: train-model + - - name: evaluate + template: evaluate-model + + - name: preprocess-data + container: + image: my-registry/preprocessing:latest + command: ["python", "preprocess.py"] + args: ["--input", "/data/input", "--output", "/data/processed"] + + - name: train-model + container: + image: my-registry/training:latest + command: ["python", "train.py"] + args: + - "--data=/data/processed" + - "--lr={{workflow.parameters.learning_rate}}" + - "--epochs={{workflow.parameters.epochs}}" + - "--output=/data/model" + outputs: + artifacts: + - name: trained-model + path: /data/model + globalName: trained-model + archive: + none: {} + + - name: evaluate-model + container: + image: my-registry/evaluation:latest + command: ["python", "evaluate.py"] + args: ["--model", "/data/model"] +``` + +### Annotations Reference + +| Annotation | Description | +|------------|-------------| +| `scenarios.ai.sap.com/name` | Human-readable scenario name | +| `scenarios.ai.sap.com/id` | Scenario identifier | +| `executables.ai.sap.com/name` | Executable name | +| `executables.ai.sap.com/id` | Executable identifier | +| `artifacts.ai.sap.com/.kind` | Artifact type (dataset, model, etc.) | + +--- + +## Artifacts + +### Types + +| Kind | Description | Use Case | +|------|-------------|----------| +| `dataset` | Training/validation data | Input for training | +| `model` | Trained model | Output from training | +| `resultset` | Inference results | Output from batch inference | +| `other` | Miscellaneous | Logs, metrics, configs | + +### Register Input Artifact + +```bash +curl -X POST "$AI_API_URL/v2/lm/artifacts" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "training-dataset-v1", + "kind": "dataset", + "url": "ai://default/datasets/training-v1", + "scenarioId": "text-classifier", + "description": "Training dataset version 1" + }' +``` + +### URL Format + +- `ai://default/` - Uses default object store secret +- `ai:///` - Uses named object store secret + +### List Artifacts + +```bash +curl -X GET "$AI_API_URL/v2/lm/artifacts?scenarioId=text-classifier" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" +``` + +--- + +## Configurations + +### Create Training Configuration + +```bash +curl -X POST "$AI_API_URL/v2/lm/configurations" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "text-classifier-config-v1", + "executableId": "text-classifier-train", + "scenarioId": "text-classifier", + "parameterBindings": [ + {"key": "learning_rate", "value": "0.001"}, + {"key": "epochs", "value": "20"}, + {"key": "batch_size", "value": "32"} + ], + "inputArtifactBindings": [ + {"key": "training-data", "artifactId": ""} + ] + }' +``` + +--- + +## Executions + +### Create Execution + +```bash +curl -X POST "$AI_API_URL/v2/lm/executions" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "configurationId": "" + }' +``` + +### Execution Statuses + +| Status | Description | +|--------|-------------| +| `UNKNOWN` | Initial state | +| `PENDING` | Queued for execution | +| `RUNNING` | Currently executing | +| `COMPLETED` | Finished successfully | +| `DEAD` | Failed | +| `STOPPED` | Manually stopped | + +### Check Execution Status + +```bash +curl -X GET "$AI_API_URL/v2/lm/executions/" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" +``` + +### Get Execution Logs + +```bash +curl -X GET "$AI_API_URL/v2/lm/executions//logs" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" +``` + +### Stop Execution + +```bash +curl -X PATCH "$AI_API_URL/v2/lm/executions/" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{"targetStatus": "STOPPED"}' +``` + +--- + +## Metrics + +### Write Metrics from Training + +In your training code: + +```python +import requests +import os + +def log_metrics(metrics: dict, step: int): + """Log metrics to SAP AI Core.""" + api_url = os.environ.get("AICORE_API_URL") + token = os.environ.get("AICORE_AUTH_TOKEN") + execution_id = os.environ.get("AICORE_EXECUTION_ID") + + response = requests.post( + f"{api_url}/v2/lm/executions/{execution_id}/metrics", + headers={ + "Authorization": f"Bearer {token}", + "Content-Type": "application/json" + }, + json={ + "metrics": [ + {"name": name, "value": value, "step": step} + for name, value in metrics.items() + ] + } + ) + +# Usage in training loop +for epoch in range(epochs): + train_loss = train_epoch() + val_loss = validate() + log_metrics({ + "train_loss": train_loss, + "val_loss": val_loss, + "accuracy": accuracy + }, step=epoch) +``` + +### Read Metrics + +```bash +curl -X GET "$AI_API_URL/v2/lm/executions//metrics" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" +``` + +--- + +## Training Schedules + +### Create Schedule + +```bash +curl -X POST "$AI_API_URL/v2/lm/executionSchedules" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" \ + -H "Content-Type: application/json" \ + -d '{ + "configurationId": "", + "cron": "0 0 * * 0", + "start": "2024-01-01T00:00:00Z", + "end": "2024-12-31T23:59:59Z" + }' +``` + +### Cron Expression Format + +SAP AI Core uses 5-field cron expressions with **3-letter day-of-week names**: + +``` +┌───────── minute (0-59) +│ ┌─────── hour (0-23) +│ │ ┌───── day of month (1-31) +│ │ │ ┌─── month (1-12) +│ │ │ │ ┌─ day of week (mon, tue, wed, thu, fri, sat, sun) +│ │ │ │ │ +* * * * * +``` + +Examples: +- `0 0 * * *` - Daily at midnight +- `0 0 * * sun` - Weekly on Sunday +- `0 0 * * fri` - Weekly on Friday +- `0 0 1 * *` - Monthly on 1st +- `0 */6 * * *` - Every 6 hours + +**Note:** Using `* * * * *` treats the schedule as "Run Always" (continuous check), which differs from standard cron behavior. Minimum interval for pipeline schedules is 1 hour. + +### List Schedules + +```bash +curl -X GET "$AI_API_URL/v2/lm/executionSchedules" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" +``` + +### Delete Schedule + +```bash +curl -X DELETE "$AI_API_URL/v2/lm/executionSchedules/" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" +``` + +--- + +## SAP AI Launchpad + +### ML Operations App + +Access: **Workspaces** → **ML Operations** + +Features: +- View scenarios and executables +- Create/manage configurations +- Run/monitor executions +- View training metrics +- Manage artifacts +- Create schedules + +### Required Roles + +| Role | Capabilities | +|------|--------------| +| `operations_manager` | Access ML Operations app | +| `mloperations_viewer` | View-only access | +| `mloperations_editor` | Full edit access | + +### Comparing Runs + +1. Navigate to ML Operations → Executions +2. Select multiple executions +3. Click "Compare" +4. View side-by-side metrics and parameters + +--- + +## Best Practices + +### Workflow Design + +1. **Modular steps**: Break workflow into reusable templates +2. **Parameterization**: Use parameters for hyperparameters +3. **Artifact management**: Define clear input/output artifacts +4. **Error handling**: Include retry logic for flaky operations + +### Resource Management + +1. **Appropriate sizing**: Match container resources to workload +2. **GPU allocation**: Request GPUs only when needed +3. **Storage**: Use object store for large datasets +4. **Cleanup**: Delete old executions and artifacts + +### Monitoring + +1. **Log metrics**: Track loss, accuracy, etc. during training +2. **Check logs**: Review execution logs for errors +3. **Compare runs**: Analyze different hyperparameter settings +4. **Set alerts**: Monitor for failed executions + +--- + +## Troubleshooting + +### Execution Failed + +1. Check execution logs: `GET /v2/lm/executions/{id}/logs` +2. Verify object store secret exists and is named `default` +3. Check Docker image is accessible +4. Verify artifact paths are correct +5. Check resource quota not exceeded + +### Artifacts Not Found + +1. Verify artifact URL format: `ai://default/` +2. Check object store secret permissions +3. Verify file exists in object store +4. Check artifact registered in correct scenario + +### Schedule Not Running + +1. Verify schedule is active (not paused) +2. Check cron expression is valid +3. Verify start/end dates bracket current time +4. Check configuration still exists + +--- + +## Documentation Links + +- Training Overview: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/train-your-model-a9ceb06.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/train-your-model-a9ceb06.md) +- ML Operations (Launchpad): [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-launchpad/ml-operations-df78271.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-launchpad/ml-operations-df78271.md) +- Schedules: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/create-a-training-schedule-bd409a9.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/create-a-training-schedule-bd409a9.md) +- Metrics: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/view-the-metric-resource-for-an-execution-d85dd44.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/view-the-metric-resource-for-an-execution-d85dd44.md) diff --git a/references/model-providers.md b/references/model-providers.md new file mode 100644 index 0000000..2541793 --- /dev/null +++ b/references/model-providers.md @@ -0,0 +1,389 @@ +# Model Providers Reference + +Complete reference for SAP AI Core model providers and available models. + +**Documentation Source:** [https://github.com/SAP-docs/sap-artificial-intelligence/tree/main/docs/sap-ai-core](https://github.com/SAP-docs/sap-artificial-intelligence/tree/main/docs/sap-ai-core) + +**Latest Models:** SAP Note 3437766 + +--- + +## Overview + +SAP AI Core provides access to models from six providers via the Generative AI Hub. All models are accessed through a unified API, allowing easy switching between providers. + +--- + +## Provider Summary + +| Provider | Executable ID | Access Type | Model Categories | +|----------|---------------|-------------|------------------| +| Azure OpenAI | `azure-openai` | Remote | Chat, Embeddings, Vision | +| SAP Open Source | `aicore-opensource` | Local | Chat, Embeddings, Vision | +| Google Vertex AI | `gcp-vertexai` | Remote | Chat, Embeddings, Code | +| AWS Bedrock | `aws-bedrock` | Remote | Chat, Embeddings | +| Mistral AI | `aicore-mistralai` | Local | Chat, Code | +| IBM | `aicore-ibm` | Local | Chat, Code | + +--- + +## 1. Azure OpenAI + +**Executable ID:** `azure-openai` +**Access Type:** Remote (Azure-hosted) + +### Chat Models + +| Model | Context | Capabilities | Use Case | +|-------|---------|--------------|----------| +| gpt-4o | 128K | Chat, Vision | Advanced reasoning, multimodal | +| gpt-4o-mini | 128K | Chat, Vision | Cost-efficient, fast | +| gpt-4-turbo | 128K | Chat, Vision | Previous flagship | +| gpt-4 | 8K/32K | Chat | Reasoning, analysis | +| gpt-35-turbo | 4K/16K | Chat | Fast, economical | + +### Embedding Models + +| Model | Dimensions | Use Case | +|-------|------------|----------| +| text-embedding-3-large | 3072 | High accuracy embeddings | +| text-embedding-3-small | 1536 | Cost-efficient embeddings | +| text-embedding-ada-002 | 1536 | Legacy embeddings | + +### Configuration Example + +```json +{ + "name": "azure-gpt4o-config", + "executableId": "azure-openai", + "scenarioId": "foundation-models", + "parameterBindings": [ + {"key": "modelName", "value": "gpt-4o"}, + {"key": "modelVersion", "value": "2024-05-13"} + ] +} +``` + +--- + +## 2. SAP-Hosted Open Source + +**Executable ID:** `aicore-opensource` +**Access Type:** Local (SAP-hosted) + +### Llama Models + +| Model | Parameters | Context | Capabilities | +|-------|------------|---------|--------------| +| llama-3.1-405b | 405B | 128K | Advanced reasoning | +| llama-3.1-70b | 70B | 128K | Strong reasoning | +| llama-3.1-8b | 8B | 128K | Fast, efficient | +| llama-3.2-90b-vision | 90B | 128K | Vision + text | +| llama-3.2-11b-vision | 11B | 128K | Vision + text | +| llama-3.2-3b | 3B | 128K | Lightweight | +| llama-3.2-1b | 1B | 128K | Edge deployment | + +### Mistral Models (Open Source) + +| Model | Parameters | Context | +|-------|------------|---------| +| mistral-7b-instruct | 7B | 32K | +| mixtral-8x7b | 46.7B | 32K | + +### Falcon Models + +| Model | Parameters | Context | +|-------|------------|---------| +| falcon-40b | 40B | 2K | + +### Configuration Example + +```json +{ + "name": "llama-config", + "executableId": "aicore-opensource", + "scenarioId": "foundation-models", + "parameterBindings": [ + {"key": "modelName", "value": "meta--llama-3.1-70b-instruct"}, + {"key": "modelVersion", "value": "latest"} + ] +} +``` + +--- + +## 3. Google Vertex AI + +**Executable ID:** `gcp-vertexai` +**Access Type:** Remote (Google Cloud) + +### Gemini Models + +| Model | Context | Capabilities | +|-------|---------|--------------| +| gemini-2.5-pro | 2M | Chat, Vision, Code, Long context | +| gemini-2.5-flash | 1M | Fast, multimodal | +| gemini-2.5-flash-lite | 1M | Fast, lower-cost multimodal | +| gemini-2.0-flash | 1M | Flash family, multimodal | +| gemini-2.0-flash-lite | 1M | Flash family, lower-cost | + +### PaLM 2 Models + +| Model | Use Case | +|-------|----------| +| text-bison | Text generation | +| chat-bison | Conversational | +| code-bison | Code generation | + +### Embedding Models + +| Model | Dimensions | +|-------|------------| +| text-embedding-004 | 768 | +| textembedding-gecko | 768 | + +### Configuration Example + +```json +{ + "name": "gemini-config", + "executableId": "gcp-vertexai", + "scenarioId": "foundation-models", + "parameterBindings": [ + {"key": "modelName", "value": "gemini-1.5-pro"}, + {"key": "modelVersion", "value": "latest"} + ] +} +``` + +--- + +## 4. AWS Bedrock + +**Executable ID:** `aws-bedrock` +**Access Type:** Remote (AWS) + +### Anthropic Claude Models + +| Model | Context | Capabilities | +|-------|---------|--------------| +| claude-sonnet-4-5 | 200K | Latest, advanced reasoning | +| claude-4-opus | 200K | Highest capability | +| claude-4-sonnet | 200K | Balanced, high performance | +| claude-opus-4-1 | 200K | Extended Opus capabilities | +| claude-3-7-sonnet | 200K | Improved Sonnet 3.5 | +| claude-3-5-sonnet | 200K | Advanced reasoning | +| claude-3-opus | 200K | High capability | +| claude-3-sonnet | 200K | Balanced performance | +| claude-3-haiku | 200K | Fast, efficient | + +### Amazon Titan Models + +| Model | Use Case | +|-------|----------| +| titan-text-express | General text | +| titan-text-lite | Lightweight | +| titan-embed-text | Embeddings | + +### Meta Llama 3 (Bedrock) + +| Model | Parameters | +|-------|------------| +| llama-3-70b | 70B | +| llama-3-8b | 8B | + +### Configuration Example + +```json +{ + "name": "claude-config", + "executableId": "aws-bedrock", + "scenarioId": "foundation-models", + "parameterBindings": [ + {"key": "modelName", "value": "anthropic--claude-3-5-sonnet"}, + {"key": "modelVersion", "value": "latest"} + ] +} +``` + +--- + +## 5. Mistral AI + +**Executable ID:** `aicore-mistralai` +**Access Type:** Local (SAP-hosted) + +### Models + +| Model | Parameters | Context | Use Case | +|-------|------------|---------|----------| +| mistral-large | - | 32K | Advanced reasoning | +| mistral-medium | - | 32K | Balanced | +| mistral-small | - | 32K | Cost-efficient | +| codestral | - | 32K | Code generation | + +### Configuration Example + +```json +{ + "name": "mistral-config", + "executableId": "aicore-mistralai", + "scenarioId": "foundation-models", + "parameterBindings": [ + {"key": "modelName", "value": "mistralai--mistral-large"}, + {"key": "modelVersion", "value": "latest"} + ] +} +``` + +--- + +## 6. IBM + +**Executable ID:** `aicore-ibm` +**Access Type:** Local (SAP-hosted) + +### Granite Models + +| Model | Parameters | Use Case | +|-------|------------|----------| +| granite-13b-chat | 13B | Conversational | +| granite-13b-instruct | 13B | Task completion | +| granite-code | - | Code generation | + +### Configuration Example + +```json +{ + "name": "granite-config", + "executableId": "aicore-ibm", + "scenarioId": "foundation-models", + "parameterBindings": [ + {"key": "modelName", "value": "ibm--granite-13b-chat"}, + {"key": "modelVersion", "value": "latest"} + ] +} +``` + +--- + +## Model Selection Guide + +### By Use Case + +| Use Case | Recommended Models | +|----------|-------------------| +| General chat | gpt-4o, claude-3-5-sonnet, gemini-1.5-pro | +| Code generation | gpt-4o, codestral, claude-3-5-sonnet | +| Long documents | gemini-1.5-pro (2M), claude-3 (200K), gpt-4o (128K) | +| Vision/images | gpt-4o, gemini-1.5-pro, llama-3.2-vision | +| Embeddings | text-embedding-3-large, text-embedding-004 | +| Cost-sensitive | gpt-4o-mini, mistral-small, llama-3.1-8b | +| High throughput | gpt-35-turbo, claude-3-haiku, mistral-small | + +### By Budget + +| Budget | Tier | Models | +|--------|------|--------| +| Low | Economy | gpt-4o-mini, claude-3-haiku, mistral-small | +| Medium | Standard | gpt-4o, claude-3-sonnet, gemini-1.5-flash | +| High | Premium | claude-3-opus, gpt-4-turbo, gemini-1.5-pro | + +### By Capability + +| Capability | Best Models | +|------------|-------------| +| Reasoning | claude-3-opus, gpt-4o, llama-3.1-405b | +| Speed | claude-3-haiku, gpt-35-turbo, mistral-small | +| Context length | gemini-1.5-pro (2M), claude-3 (200K) | +| Multimodal | gpt-4o, gemini-1.5-pro, llama-3.2-vision | +| Code | codestral, gpt-4o, claude-3-5-sonnet | + +--- + +## Model Version Management + +### Version Strategies + +| Strategy | Configuration | Use Case | +|----------|---------------|----------| +| Latest | `"modelVersion": "latest"` | Development, auto-upgrade | +| Pinned | `"modelVersion": "2024-05-13"` | Production stability | + +### Checking Available Versions + +```bash +curl -X GET "$AI_API_URL/v2/lm/scenarios/foundation-models/models" \ + -H "Authorization: Bearer $AUTH_TOKEN" \ + -H "AI-Resource-Group: default" | \ + jq '.resources[] | select(.model == "gpt-4o") | .versions' +``` + +### Handling Deprecation + +1. Monitor `deprecationDate` in model metadata +2. Plan migration before `retirementDate` +3. Test new version in staging +4. Update configuration with new version +5. Patch existing deployments + +--- + +## Pricing Considerations + +Pricing varies by: +- Model complexity (larger = more expensive) +- Input vs output tokens (output often 2-3x input cost) +- Provider region +- Access type (Remote vs Local) + +**Reference:** SAP Note 3437766 for current token rates. + +### Cost Optimization + +1. **Right-size models**: Use smaller models for simple tasks +2. **Batch requests**: Combine multiple queries when possible +3. **Cache responses**: Store and reuse common query results +4. **Limit tokens**: Set appropriate `max_tokens` limits +5. **Use streaming**: No additional cost, better UX + +--- + +## Rate Limits + +Rate limits vary by: +- Service plan tier +- Model provider +- Specific model + +**Default limits** (vary by configuration): +- Requests per minute: 60-600 +- Tokens per minute: 40K-400K + +### Handling Rate Limits + +```python +import time +from requests.exceptions import HTTPError + +def call_with_retry(func, max_retries=3): + for attempt in range(max_retries): + try: + return func() + except HTTPError as e: + if e.response.status_code == 429: + wait_time = 2 ** attempt + time.sleep(wait_time) + else: + raise + raise Exception("Max retries exceeded") +``` + +--- + +## Documentation Links + +- Supported Models: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/supported-models-509e588.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/supported-models-509e588.md) +- Generative AI Hub: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/generative-ai-hub-7db524e.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/generative-ai-hub-7db524e.md) +- SAP Note 3437766: Token rates, limits, deprecation dates +- SAP Discovery Center: [https://discovery-center.cloud.sap/serviceCatalog/sap-ai-core](https://discovery-center.cloud.sap/serviceCatalog/sap-ai-core) diff --git a/references/orchestration-modules.md b/references/orchestration-modules.md new file mode 100644 index 0000000..04ffda8 --- /dev/null +++ b/references/orchestration-modules.md @@ -0,0 +1,579 @@ +# Orchestration Modules Reference + +Complete reference for all SAP AI Core orchestration modules. + +**Documentation Source:** [https://github.com/SAP-docs/sap-artificial-intelligence/tree/main/docs/sap-ai-core](https://github.com/SAP-docs/sap-artificial-intelligence/tree/main/docs/sap-ai-core) + +--- + +## Orchestration V2 API + +### Endpoint + +**V2 Endpoint:** `POST {{deployment_url}}/v2/completion` + +### V1 to V2 Migration + +If migrating from V1 to V2: + +1. Update endpoint from `/completion` to `/v2/completion` +2. Modify payload structure to use `config.modules` format +3. Test with existing orchestration configurations + +### V2 Request Structure + +```json +{ + "config": { + "modules": { + "prompt_templating": { /* template config */ }, + "llm": { /* model config */ }, + "grounding": { /* optional */ }, + "filtering": { /* optional */ }, + "masking": { /* optional */ }, + "translation": { /* optional */ } + } + }, + "placeholder_values": { + "variable_name": "value" + } +} +``` + +### Key V2 Changes + +| Aspect | V1 | V2 | +|--------|----|----| +| Endpoint | `/completion` | `/v2/completion` | +| Module structure | `module_configurations` | `config.modules` | +| Embeddings | Not available | `POST /v2/embeddings` | + +--- + +## Module Execution Order + +The orchestration pipeline executes modules in this fixed order: + +``` +1. Grounding → 2. Templating → 3. Input Translation → 4. Data Masking → +5. Input Filtering → 6. Model Configuration → 7. Output Filtering → 8. Output Translation +``` + +Only **Templating** and **Model Configuration** are mandatory. + +--- + +## 1. Templating Module (Mandatory) + +Compose prompts with placeholders that get populated during inference. + +### Configuration + +```json +{ + "templating_module_config": { + "template": [ + {"role": "system", "content": "You are {{?assistant_type}}"}, + {"role": "user", "content": "{{?user_message}}"} + ], + "defaults": { + "assistant_type": "a helpful assistant" + } + } +} +``` + +### Placeholder Syntax + +| Syntax | Description | +|--------|-------------| +| `{{?variable}}` | Required placeholder (must be provided) | +| `{{?variable}}` with defaults | Optional if default provided | +| `{{$grounding_output}}` | System variable from grounding module | + +### Message Roles + +- `system`: System instructions +- `user`: User input +- `assistant`: Assistant responses (for multi-turn) +- `tool`: Tool call results + +--- + +## 2. Model Configuration Module (Mandatory) + +Configure the LLM parameters. + +### Configuration + +```json +{ + "llm_module_config": { + "model_name": "gpt-4o", + "model_version": "latest", + "model_params": { + "max_tokens": 2000, + "temperature": 0.7, + "top_p": 0.95, + "frequency_penalty": 0, + "presence_penalty": 0, + "stop": ["\n\n"] + } + } +} +``` + +### Common Parameters + +| Parameter | Type | Description | Range | +|-----------|------|-------------|-------| +| `max_tokens` | int | Maximum response tokens | 1-4096+ | +| `temperature` | float | Randomness | 0.0-2.0 | +| `top_p` | float | Nucleus sampling | 0.0-1.0 | +| `frequency_penalty` | float | Repetition penalty | -2.0 to 2.0 | +| `presence_penalty` | float | Topic diversity | -2.0 to 2.0 | +| `stop` | array | Stop sequences | Up to 4 | + +### Model Version Options + +- `"latest"`: Auto-upgrade to newest version +- Specific version: e.g., `"2024-05-13"` for pinned version + +--- + +## 3. Content Filtering Module + +Filter harmful content in input and output. + +### Azure Content Safety Configuration + +```json +{ + "filtering_module_config": { + "input": { + "filters": [ + { + "type": "azure_content_safety", + "config": { + "Hate": 2, + "Violence": 2, + "Sexual": 2, + "SelfHarm": 2 + } + } + ] + }, + "output": { + "filters": [ + { + "type": "azure_content_safety", + "config": { + "Hate": 0, + "Violence": 0, + "Sexual": 0, + "SelfHarm": 0 + } + } + ] + } + } +} +``` + +### Azure Content Safety Categories + +| Category | Description | Severity Levels | +|----------|-------------|-----------------| +| `Hate` | Discriminatory, hateful content | 0, 2, 4, 6 | +| `Violence` | Violent content and threats | 0, 2, 4, 6 | +| `Sexual` | Sexual content | 0, 2, 4, 6 | +| `SelfHarm` | Self-harm promotion | 0, 2, 4, 6 | + +**Severity Scale:** +- 0: Safe +- 2: Low severity +- 4: Medium severity (blocked by Azure global filter) +- 6: High severity (blocked by Azure global filter) + +### PromptShield Configuration + +Detect prompt injection attacks: + +```json +{ + "filtering_module_config": { + "input": { + "filters": [ + { + "type": "azure_content_safety", + "config": { + "PromptShield": true + } + } + ] + } + } +} +``` + +### Llama Guard 3 Configuration + +```json +{ + "filtering_module_config": { + "input": { + "filters": [ + { + "type": "llama_guard_3", + "config": { + "categories": [ + "violent_crimes", + "hate", + "sexual_content", + "self_harm" + ] + } + } + ] + } + } +} +``` + +### Llama Guard 3 Categories (14) + +| Category | Description | +|----------|-------------| +| `violent_crimes` | Violence and violent crimes | +| `non_violent_crimes` | Non-violent criminal activities | +| `sex_crimes` | Sexual crimes | +| `child_exploitation` | Child sexual abuse material | +| `defamation` | Defamation and libel | +| `specialized_advice` | Unqualified professional advice | +| `privacy` | Privacy violations | +| `intellectual_property` | IP infringement | +| `indiscriminate_weapons` | Weapons of mass destruction | +| `hate` | Hate speech | +| `self_harm` | Self-harm content | +| `sexual_content` | Explicit sexual content | +| `elections` | Election interference | +| `code_interpreter_abuse` | Malicious code execution | + +--- + +## 4. Data Masking Module + +Anonymize or pseudonymize PII before sending to LLM. + +### Pseudonymization Configuration + +```json +{ + "masking_module_config": { + "masking_providers": [ + { + "type": "sap_data_privacy_integration", + "method": "pseudonymization", + "entities": [ + {"type": "profile-person"}, + {"type": "profile-email"}, + {"type": "profile-phone"}, + {"type": "profile-credit-card-number"} + ] + } + ] + } +} +``` + +### Anonymization Configuration + +```json +{ + "masking_module_config": { + "masking_providers": [ + { + "type": "sap_data_privacy_integration", + "method": "anonymization", + "entities": [ + {"type": "profile-person"}, + {"type": "profile-ssn"} + ] + } + ] + } +} +``` + +### Complete Entity Type Reference (25) + +**Personal Identifiers:** +| Entity Type | Coverage | Description | +|-------------|----------|-------------| +| `profile-person` | English | Person names | +| `profile-email` | Global | Email addresses | +| `profile-phone` | International | Phone numbers with country codes | +| `profile-address` | US | Physical addresses | +| `profile-url` | Global | User-accessible URLs | +| `profile-username-password` | Global | Credentials via keywords | + +**Organizations:** +| Entity Type | Coverage | Description | +|-------------|----------|-------------| +| `profile-org` | Global | SAP customers + Fortune 1000 | +| `profile-university` | Global | Public universities | +| `profile-location` | US | US locations | + +**Government/Financial IDs:** +| Entity Type | Coverage | Description | +|-------------|----------|-------------| +| `profile-nationalid` | 20+ countries | National ID numbers | +| `profile-ssn` | US, Canada | Social Security Numbers | +| `profile-passport` | 30+ countries | Passport numbers | +| `profile-driverlicense` | 30+ countries | Driver's license numbers | +| `profile-iban` | 70+ countries | Bank account numbers | +| `profile-credit-card-number` | Global | Credit card numbers | + +**SAP-Specific:** +| Entity Type | Coverage | Description | +|-------------|----------|-------------| +| `profile-sapids-internal` | SAP | Staff IDs (C/I/D + 6-8 digits) | +| `profile-sapids-public` | SAP | S-user (S + 6-11 digits), P-user (P + 10 digits) | + +**Sensitive Attributes:** +| Entity Type | Coverage | Description | +|-------------|----------|-------------| +| `profile-nationality` | 190+ countries | Country names and codes | +| `profile-religious-group` | 200+ groups | Religious affiliations | +| `profile-political-group` | 100+ parties | Political affiliations | +| `profile-pronouns-gender` | Global | Gender pronouns | +| `profile-gender` | Global | Gender identifiers | +| `profile-sexual-orientation` | Global | Sexual orientation | +| `profile-trade-union` | Global | Trade union membership | +| `profile-ethnicity` | Global | Ethnic identifiers | +| `profile-sensitive-data` | Global | Composite of sensitive attributes | + +### Custom Entity with Regex + +```json +{ + "masking_module_config": { + "masking_providers": [ + { + "type": "sap_data_privacy_integration", + "method": "pseudonymization", + "entities": [ + { + "type": "custom", + "pattern": "EMP-[0-9]{6}", + "replacement": "EMPLOYEE_ID" + } + ] + } + ] + } +} +``` + +--- + +## 5. Grounding Module + +Inject external context from vector databases (RAG). + +### Basic Grounding Configuration + +```json +{ + "grounding_module_config": { + "grounding_service": "document_grounding_service", + "grounding_service_configuration": { + "grounding_input_parameters": ["user_query"], + "grounding_output_parameter": "context", + "filters": [ + { + "id": "", + "search_configuration": { + "max_chunk_count": 5 + } + } + ] + } + } +} +``` + +### Grounding with Metadata Filters + +```json +{ + "grounding_module_config": { + "grounding_service": "document_grounding_service", + "grounding_service_configuration": { + "grounding_input_parameters": ["user_query"], + "grounding_output_parameter": "context", + "filters": [ + { + "id": "", + "data_repositories": [""], + "document_metadata": [ + { + "key": "department", + "value": "HR" + } + ], + "search_configuration": { + "max_chunk_count": 10, + "max_document_count": 5 + } + } + ] + } + } +} +``` + +### Using Grounding Output in Template + +```json +{ + "templating_module_config": { + "template": [ + { + "role": "system", + "content": "Answer questions using only the following context:\n\n{{$context}}" + }, + { + "role": "user", + "content": "{{?user_query}}" + } + ] + } +} +``` + +--- + +## 6. Translation Module + +Translate input and output between languages. + +### Input Translation Configuration + +```json +{ + "translation_module_config": { + "input": { + "source_language": "auto", + "target_language": "en" + } + } +} +``` + +### Output Translation Configuration + +```json +{ + "translation_module_config": { + "output": { + "source_language": "en", + "target_language": "{{?user_language}}" + } + } +} +``` + +### Combined Translation + +```json +{ + "translation_module_config": { + "input": { + "source_language": "auto", + "target_language": "en" + }, + "output": { + "source_language": "en", + "target_language": "auto" + } + } +} +``` + +--- + +## Complete Orchestration Example + +All modules combined: + +```json +{ + "config": { + "module_configurations": { + "grounding_module_config": { + "grounding_service": "document_grounding_service", + "grounding_service_configuration": { + "grounding_input_parameters": ["user_query"], + "grounding_output_parameter": "context", + "filters": [{"id": ""}] + } + }, + "templating_module_config": { + "template": [ + {"role": "system", "content": "You are a helpful assistant. Use this context:\n{{$context}}"}, + {"role": "user", "content": "{{?user_query}}"} + ] + }, + "translation_module_config": { + "input": {"source_language": "auto", "target_language": "en"}, + "output": {"source_language": "en", "target_language": "auto"} + }, + "masking_module_config": { + "masking_providers": [{ + "type": "sap_data_privacy_integration", + "method": "pseudonymization", + "entities": [ + {"type": "profile-person"}, + {"type": "profile-email"} + ] + }] + }, + "filtering_module_config": { + "input": { + "filters": [{ + "type": "azure_content_safety", + "config": {"Hate": 2, "Violence": 2, "Sexual": 2, "SelfHarm": 2} + }] + }, + "output": { + "filters": [{ + "type": "azure_content_safety", + "config": {"Hate": 0, "Violence": 0, "Sexual": 0, "SelfHarm": 0} + }] + } + }, + "llm_module_config": { + "model_name": "gpt-4o", + "model_version": "latest", + "model_params": { + "max_tokens": 2000, + "temperature": 0.5 + } + } + } + }, + "input_params": { + "user_query": "What are the company's vacation policies?" + } +} +``` + +--- + +## Documentation Links + +- Orchestration Overview: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/orchestration-8d02235.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/orchestration-8d02235.md) +- Content Filtering: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/content-filtering-f804175.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/content-filtering-f804175.md) +- Data Masking: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/data-masking-8b87002.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/data-masking-8b87002.md) +- Grounding: [https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/grounding-035c455.md](https://github.com/SAP-docs/sap-artificial-intelligence/blob/main/docs/sap-ai-core/grounding-035c455.md) diff --git a/templates/deployment-config.json b/templates/deployment-config.json new file mode 100644 index 0000000..5bd3977 --- /dev/null +++ b/templates/deployment-config.json @@ -0,0 +1,51 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$comment": "SAP AI Core Deployment Configuration Template", + + "foundation_model_configuration": { + "name": "my-model-deployment-config", + "executableId": "azure-openai", + "scenarioId": "foundation-models", + "parameterBindings": [ + {"key": "modelName", "value": "gpt-4o"}, + {"key": "modelVersion", "value": "latest"} + ] + }, + + "orchestration_configuration": { + "name": "my-orchestration-config", + "executableId": "orchestration", + "scenarioId": "orchestration", + "parameterBindings": [] + }, + + "deployment_request": { + "configurationId": "", + "ttl": "24h" + }, + + "deployment_with_replicas": { + "configurationId": "", + "ttl": "7d", + "minReplicas": 1, + "maxReplicas": 3 + }, + + "_documentation": { + "ttl_format": "Natural numbers with units: m (minutes), h (hours), d (days)", + "ttl_examples": ["5m", "2h", "7d", "30d"], + "executable_ids": { + "azure-openai": "Azure OpenAI models (GPT-4o, GPT-4, GPT-3.5)", + "aicore-opensource": "SAP-hosted open source (Llama, Mistral, Falcon)", + "gcp-vertexai": "Google Vertex AI (Gemini, PaLM)", + "aws-bedrock": "AWS Bedrock (Claude, Titan)", + "aicore-mistralai": "Mistral AI models", + "aicore-ibm": "IBM Granite models", + "orchestration": "Orchestration service" + }, + "model_version_options": { + "latest": "Auto-upgrade to newest version", + "specific": "Pin to specific version (e.g., '2024-05-13')" + } + } +} diff --git a/templates/orchestration-workflow.json b/templates/orchestration-workflow.json new file mode 100644 index 0000000..6d01242 --- /dev/null +++ b/templates/orchestration-workflow.json @@ -0,0 +1,219 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$comment": "SAP AI Core Orchestration Workflow Template", + + "basic_chat_completion": { + "config": { + "module_configurations": { + "llm_module_config": { + "model_name": "gpt-4o", + "model_version": "latest", + "model_params": { + "max_tokens": 1000, + "temperature": 0.7 + } + }, + "templating_module_config": { + "template": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "{{?user_message}}"} + ] + } + } + }, + "input_params": { + "user_message": "Hello, how can you help me?" + } + }, + + "with_content_filtering": { + "config": { + "module_configurations": { + "llm_module_config": { + "model_name": "gpt-4o", + "model_version": "latest" + }, + "templating_module_config": { + "template": [ + {"role": "user", "content": "{{?user_message}}"} + ] + }, + "filtering_module_config": { + "input": { + "filters": [ + { + "type": "azure_content_safety", + "config": { + "Hate": 2, + "Violence": 2, + "Sexual": 2, + "SelfHarm": 2 + } + } + ] + }, + "output": { + "filters": [ + { + "type": "azure_content_safety", + "config": { + "Hate": 0, + "Violence": 0, + "Sexual": 0, + "SelfHarm": 0 + } + } + ] + } + } + } + } + }, + + "with_data_masking": { + "config": { + "module_configurations": { + "llm_module_config": { + "model_name": "gpt-4o", + "model_version": "latest" + }, + "templating_module_config": { + "template": [ + {"role": "user", "content": "{{?user_message}}"} + ] + }, + "masking_module_config": { + "masking_providers": [ + { + "type": "sap_data_privacy_integration", + "method": "pseudonymization", + "entities": [ + {"type": "profile-person"}, + {"type": "profile-email"}, + {"type": "profile-phone"}, + {"type": "profile-credit-card-number"} + ] + } + ] + } + } + } + }, + + "with_grounding_rag": { + "config": { + "module_configurations": { + "grounding_module_config": { + "grounding_service": "document_grounding_service", + "grounding_service_configuration": { + "grounding_input_parameters": ["user_query"], + "grounding_output_parameter": "context", + "filters": [ + { + "id": "", + "search_configuration": { + "max_chunk_count": 5 + } + } + ] + } + }, + "llm_module_config": { + "model_name": "gpt-4o", + "model_version": "latest" + }, + "templating_module_config": { + "template": [ + { + "role": "system", + "content": "Answer questions based only on the following context:\n\n{{$context}}\n\nIf the answer is not in the context, say you don't know." + }, + {"role": "user", "content": "{{?user_query}}"} + ] + } + } + }, + "input_params": { + "user_query": "What is the vacation policy?" + } + }, + + "full_enterprise_workflow": { + "config": { + "module_configurations": { + "grounding_module_config": { + "grounding_service": "document_grounding_service", + "grounding_service_configuration": { + "grounding_input_parameters": ["user_query"], + "grounding_output_parameter": "context", + "filters": [{"id": ""}] + } + }, + "templating_module_config": { + "template": [ + {"role": "system", "content": "You are a helpful enterprise assistant. Use the following context:\n{{$context}}"}, + {"role": "user", "content": "{{?user_query}}"} + ] + }, + "translation_module_config": { + "input": {"source_language": "auto", "target_language": "en"}, + "output": {"source_language": "en", "target_language": "auto"} + }, + "masking_module_config": { + "masking_providers": [{ + "type": "sap_data_privacy_integration", + "method": "pseudonymization", + "entities": [ + {"type": "profile-person"}, + {"type": "profile-email"}, + {"type": "profile-phone"} + ] + }] + }, + "filtering_module_config": { + "input": { + "filters": [{ + "type": "azure_content_safety", + "config": {"Hate": 2, "Violence": 2, "Sexual": 2, "SelfHarm": 2} + }] + }, + "output": { + "filters": [{ + "type": "azure_content_safety", + "config": {"Hate": 0, "Violence": 0, "Sexual": 0, "SelfHarm": 0} + }] + } + }, + "llm_module_config": { + "model_name": "gpt-4o", + "model_version": "latest", + "model_params": { + "max_tokens": 2000, + "temperature": 0.5 + } + } + } + } + }, + + "_documentation": { + "module_execution_order": [ + "1. Grounding", + "2. Templating (mandatory)", + "3. Input Translation", + "4. Data Masking", + "5. Input Filtering", + "6. Model Configuration (mandatory)", + "7. Output Filtering", + "8. Output Translation" + ], + "placeholder_syntax": { + "{{?variable}}": "Required input parameter", + "{{$output}}": "Output from previous module (e.g., grounding)" + }, + "masking_methods": { + "anonymization": "Irreversible - MASKED_ENTITY placeholder", + "pseudonymization": "Reversible - MASKED_ENTITY_ID placeholder" + } + } +} diff --git a/templates/tool-definition.json b/templates/tool-definition.json new file mode 100644 index 0000000..fe62a2f --- /dev/null +++ b/templates/tool-definition.json @@ -0,0 +1,277 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$comment": "SAP AI Core Tool Calling Definition Template", + + "single_tool_definition": { + "tools": [ + { + "type": "function", + "function": { + "name": "get_product_info", + "description": "Get information about a product including price, availability, and description", + "strict": true, + "parameters": { + "type": "object", + "properties": { + "product_id": { + "type": "string", + "description": "The unique product identifier (e.g., 'PROD-12345')" + } + }, + "required": ["product_id"], + "additionalProperties": false + } + } + } + ] + }, + + "multiple_tools_definition": { + "tools": [ + { + "type": "function", + "function": { + "name": "get_inventory", + "description": "Get current inventory quantity for a product", + "strict": true, + "parameters": { + "type": "object", + "properties": { + "product_id": { + "type": "string", + "description": "The product identifier" + }, + "warehouse": { + "type": "string", + "enum": ["US-EAST", "US-WEST", "EU-CENTRAL", "APAC"], + "description": "The warehouse location" + } + }, + "required": ["product_id"], + "additionalProperties": false + } + } + }, + { + "type": "function", + "function": { + "name": "create_order", + "description": "Create a new order for a customer", + "strict": true, + "parameters": { + "type": "object", + "properties": { + "customer_id": { + "type": "string", + "description": "The customer identifier" + }, + "product_id": { + "type": "string", + "description": "The product to order" + }, + "quantity": { + "type": "integer", + "minimum": 1, + "description": "Number of items to order" + }, + "shipping_address": { + "type": "object", + "properties": { + "street": {"type": "string"}, + "city": {"type": "string"}, + "country": {"type": "string"}, + "postal_code": {"type": "string"} + }, + "required": ["street", "city", "country"] + } + }, + "required": ["customer_id", "product_id", "quantity"], + "additionalProperties": false + } + } + }, + { + "type": "function", + "function": { + "name": "get_customer_history", + "description": "Get order history for a customer", + "strict": true, + "parameters": { + "type": "object", + "properties": { + "customer_id": { + "type": "string", + "description": "The customer identifier" + }, + "limit": { + "type": "integer", + "minimum": 1, + "maximum": 100, + "default": 10, + "description": "Maximum number of orders to return" + } + }, + "required": ["customer_id"], + "additionalProperties": false + } + } + } + ] + }, + + "orchestration_request_with_tools": { + "config": { + "module_configurations": { + "llm_module_config": { + "model_name": "gpt-4o", + "model_version": "latest", + "model_params": { + "max_tokens": 1000 + } + }, + "templating_module_config": { + "template": [ + { + "role": "system", + "content": "You are a helpful sales assistant. Use the available tools to help customers with their orders and product inquiries." + }, + { + "role": "user", + "content": "{{?user_message}}" + } + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_product_info", + "description": "Get product information", + "parameters": { + "type": "object", + "properties": { + "product_id": {"type": "string"} + }, + "required": ["product_id"] + } + } + } + ] + } + } + }, + "input_params": { + "user_message": "What is the price of product PROD-12345?" + } + }, + + "tool_response_continuation": { + "config": { + "module_configurations": { + "llm_module_config": { + "model_name": "gpt-4o", + "model_version": "latest" + }, + "templating_module_config": { + "template": [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": "What is the inventory for product ABC123?"}, + { + "role": "assistant", + "tool_calls": [ + { + "id": "call_abc123", + "type": "function", + "function": { + "name": "get_inventory", + "arguments": "{\"product_id\": \"ABC123\"}" + } + } + ] + }, + { + "role": "tool", + "tool_call_id": "call_abc123", + "content": "{\"product_id\": \"ABC123\", \"quantity\": 150, \"warehouse\": \"US-EAST\"}" + } + ] + } + } + } + }, + + "structured_output_json_schema": { + "config": { + "module_configurations": { + "llm_module_config": { + "model_name": "gpt-4o", + "model_version": "latest", + "model_params": { + "response_format": { + "type": "json_schema", + "json_schema": { + "name": "product_analysis", + "strict": true, + "schema": { + "type": "object", + "properties": { + "product_name": {"type": "string"}, + "category": { + "type": "string", + "enum": ["electronics", "clothing", "food", "other"] + }, + "price_range": { + "type": "string", + "enum": ["budget", "mid-range", "premium"] + }, + "key_features": { + "type": "array", + "items": {"type": "string"} + }, + "recommendation_score": { + "type": "number", + "minimum": 0, + "maximum": 10 + } + }, + "required": ["product_name", "category", "recommendation_score"], + "additionalProperties": false + } + } + } + } + }, + "templating_module_config": { + "template": [ + {"role": "user", "content": "Analyze this product: {{?product_description}}"} + ] + } + } + } + }, + + "_documentation": { + "tool_calling_workflow": [ + "1. Send request with tool definitions", + "2. Model returns tool_calls with function name and arguments", + "3. Execute function externally with provided arguments", + "4. Return result in 'tool' role message with matching tool_call_id", + "5. Model incorporates result in final response" + ], + "best_practices": [ + "Use descriptive function names and descriptions", + "Enable strict mode for parameter validation", + "Define clear parameter types and constraints", + "Include examples in descriptions when helpful", + "Handle tool call errors gracefully" + ], + "supported_parameter_types": [ + "string", + "integer", + "number", + "boolean", + "array", + "object", + "enum (via type + enum array)" + ] + } +}