Initial commit

2025-11-30 08:55:25 +08:00
commit e23395aeb2
19 changed files with 6391 additions and 0 deletions
--- a/templates/basic-graph.json
+++ b/templates/basic-graph.json
@@ -0,0 +1,100 @@
+{
+  "name": "basic_data_pipeline",
+  "description": "Basic data processing pipeline template",
+  "version": "1.0.0",
+  "properties": {
+    "autoRecovery": {
+      "enabled": true,
+      "snapshotInterval": "60s"
+    }
+  },
+  "parameters": [
+    {
+      "name": "source_connection",
+      "type": "string",
+      "description": "Connection ID for source system (configure in Connection Management)",
+      "default": ""
+    },
+    {
+      "name": "target_connection",
+      "type": "string",
+      "description": "Connection ID for target system (configure in Connection Management)",
+      "default": ""
+    },
+    {
+      "name": "source_path",
+      "type": "string",
+      "description": "Source file path",
+      "default": "/data/input/"
+    },
+    {
+      "name": "target_path",
+      "type": "string",
+      "description": "Target file path",
+      "default": "/data/output/"
+    },
+    {
+      "name": "batch_size",
+      "type": "int32",
+      "description": "Processing batch size",
+      "default": 10000
+    }
+  ],
+  "operators": [
+    {
+      "name": "source",
+      "component": "com.sap.system.structuredFileConsumer",
+      "config": {
+        "connection": "${source_connection}",
+        "path": "${source_path}",
+        "format": "csv",
+        "header": true
+      }
+    },
+    {
+      "name": "transform",
+      "component": "com.sap.dataTransform",
+      "config": {
+        "transformations": []
+      }
+    },
+    {
+      "name": "target",
+      "component": "com.sap.system.structuredFileProducer",
+      "config": {
+        "connection": "${target_connection}",
+        "path": "${target_path}",
+        "format": "parquet",
+        "compression": "SNAPPY"
+      }
+    }
+  ],
+  "connections": [
+    {
+      "source": {
+        "operator": "source",
+        "port": "output"
+      },
+      "target": {
+        "operator": "transform",
+        "port": "input"
+      }
+    },
+    {
+      "source": {
+        "operator": "transform",
+        "port": "output"
+      },
+      "target": {
+        "operator": "target",
+        "port": "input"
+      }
+    }
+  ],
+  "notes": [
+    "This is a basic Gen2 pipeline template.",
+    "Configure source and target connections before running.",
+    "Customize transformations in the Data Transform operator.",
+    "Auto-recovery is enabled with 60-second snapshot intervals."
+  ]
+}
--- a/templates/ml-training-pipeline.json
+++ b/templates/ml-training-pipeline.json
@@ -0,0 +1,99 @@
+{
+  "$schema": "https://sap.github.io/data-intelligence/schemas/graph.json",
+  "name": "ml_training_pipeline",
+  "description": "Machine learning training pipeline template",
+  "version": "1.0.0",
+  "properties": {
+    "autoRecovery": {
+      "enabled": true,
+      "snapshotInterval": "120s"
+    }
+  },
+  "parameters": [
+    {
+      "name": "dataset_path",
+      "type": "string",
+      "description": "Training dataset path",
+      "default": "/ml/datasets/training.parquet"
+    },
+    {
+      "name": "model_output_path",
+      "type": "string",
+      "description": "Model output path",
+      "default": "/ml/models/"
+    },
+    {
+      "name": "test_split",
+      "type": "float64",
+      "description": "Test data split ratio",
+      "default": 0.2
+    },
+    {
+      "name": "n_estimators",
+      "type": "int32",
+      "description": "Number of estimators for RandomForest",
+      "default": 100
+    },
+    {
+      "name": "max_depth",
+      "type": "int32",
+      "description": "Maximum tree depth",
+      "default": 10
+    }
+  ],
+  "operators": [
+    {
+      "name": "data_loader",
+      "component": "com.sap.system.structuredFileConsumer",
+      "config": {
+        "connection": "${data_connection}",
+        "path": "${dataset_path}",
+        "format": "parquet"
+      }
+    },
+    {
+      "name": "trainer",
+      "component": "com.sap.system.python3Operator",
+      "config": {
+        "script": "# ML Training Script\nimport pandas as pd\nimport pickle\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import accuracy_score, f1_score\nfrom sapdi import tracking\n\ndef on_input(msg_id, header, body):\n    # Get parameters\n    test_split = float(api.config.test_split)\n    n_estimators = int(api.config.n_estimators)\n    max_depth = int(api.config.max_depth)\n    \n    # Load data\n    df = pd.DataFrame(body)\n    \n    # Prepare features and target\n    X = df.drop('target', axis=1)\n    y = df['target']\n    \n    # Split data\n    X_train, X_test, y_train, y_test = train_test_split(\n        X, y, test_size=test_split, random_state=42\n    )\n    \n    # Train model\n    model = RandomForestClassifier(\n        n_estimators=n_estimators,\n        max_depth=max_depth,\n        random_state=42\n    )\n    model.fit(X_train, y_train)\n    \n    # Evaluate\n    y_pred = model.predict(X_test)\n    accuracy = accuracy_score(y_test, y_pred)\n    f1 = f1_score(y_test, y_pred, average='weighted')\n    \n    # Track with ML Scenario Manager\n    with tracking.start_run(run_name='rf_training') as run:\n        run.log_param('algorithm', 'RandomForest')\n        run.log_param('n_estimators', n_estimators)\n        run.log_param('max_depth', max_depth)\n        run.log_param('test_split', test_split)\n        run.log_metric('accuracy', accuracy)\n        run.log_metric('f1_score', f1)\n        run.log_artifact('model.pkl', pickle.dumps(model))\n    \n    # Send results\n    result = {\n        'accuracy': accuracy,\n        'f1_score': f1,\n        'model_path': f'{api.config.model_output_path}model.pkl'\n    }\n    api.send('output', api.Message(result))\n\napi.set_port_callback('input', on_input)"
+      }
+    },
+    {
+      "name": "metrics_logger",
+      "component": "com.sap.ml.submitMetrics",
+      "config": {
+        "metricsType": "training"
+      }
+    }
+  ],
+  "connections": [
+    {
+      "source": {
+        "operator": "data_loader",
+        "port": "output"
+      },
+      "target": {
+        "operator": "trainer",
+        "port": "input"
+      }
+    },
+    {
+      "source": {
+        "operator": "trainer",
+        "port": "output"
+      },
+      "target": {
+        "operator": "metrics_logger",
+        "port": "input"
+      }
+    }
+  ],
+  "notes": [
+    "This is a machine learning training pipeline template.",
+    "Customize the Python script for your specific model.",
+    "Configure dataset path and model parameters.",
+    "Metrics are automatically tracked in ML Scenario Manager.",
+    "Model artifacts are logged for versioning and deployment.",
+    "Adjust test_split, n_estimators, max_depth as needed."
+  ]
+}
--- a/templates/replication-flow.json
+++ b/templates/replication-flow.json
@@ -0,0 +1,90 @@
+{
+  "name": "abap_to_hana_replication",
+  "description": "Replication flow from ABAP system to HANA Cloud",
+  "version": "1.0.0",
+  "source": {
+    "type": "ABAP",
+    "connection": "${abap_connection}",
+    "properties": {
+      "extractionType": "CDS",
+      "packageSize": 50000
+    }
+  },
+  "target": {
+    "type": "HANA",
+    "connection": "${hana_connection}",
+    "properties": {
+      "schema": "${target_schema}",
+      "writeMode": "UPSERT",
+      "batchSize": 10000
+    }
+  },
+  "tasks": [
+    {
+      "name": "customer_master",
+      "source": {
+        "object": "I_Customer",
+        "type": "CDS_VIEW",
+        "filter": ""
+      },
+      "target": {
+        "table": "CUSTOMER_MASTER",
+        "keyColumns": ["Customer"]
+      },
+      "mapping": {
+        "mode": "auto",
+        "customMappings": []
+      },
+      "loadType": {
+        "initial": true,
+        "delta": true
+      }
+    },
+    {
+      "name": "sales_orders",
+      "source": {
+        "object": "I_SalesOrder",
+        "type": "CDS_VIEW",
+        "filter": "CreationDate ge datetime'2024-01-01T00:00:00'"
+      },
+      "target": {
+        "table": "SALES_ORDERS",
+        "keyColumns": ["SalesOrder"]
+      },
+      "mapping": {
+        "mode": "auto",
+        "customMappings": [
+          {
+            "source": "SalesOrder",
+            "target": "SALES_ORDER_ID"
+          },
+          {
+            "source": "SoldToParty",
+            "target": "CUSTOMER_ID"
+          }
+        ]
+      },
+      "loadType": {
+        "initial": true,
+        "delta": true
+      }
+    }
+  ],
+  "schedule": {
+    "enabled": false,
+    "cron": "0 0 * * * *",
+    "timezone": "UTC"
+  },
+  "settings": {
+    "parallelTasks": 4,
+    "errorHandling": "CONTINUE",
+    "logging": "INFO"
+  },
+  "notes": [
+    "Configure ABAP and HANA connections before deployment.",
+    "Modify CDS view names for your specific data.",
+    "Adjust filters based on data volume requirements.",
+    "Enable schedule after successful initial load.",
+    "Uses UPSERT for exactly-once delivery semantics."
+  ]
+}