Initial commit
This commit is contained in:
100
templates/basic-graph.json
Normal file
100
templates/basic-graph.json
Normal file
@@ -0,0 +1,100 @@
|
||||
{
|
||||
"name": "basic_data_pipeline",
|
||||
"description": "Basic data processing pipeline template",
|
||||
"version": "1.0.0",
|
||||
"properties": {
|
||||
"autoRecovery": {
|
||||
"enabled": true,
|
||||
"snapshotInterval": "60s"
|
||||
}
|
||||
},
|
||||
"parameters": [
|
||||
{
|
||||
"name": "source_connection",
|
||||
"type": "string",
|
||||
"description": "Connection ID for source system (configure in Connection Management)",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"name": "target_connection",
|
||||
"type": "string",
|
||||
"description": "Connection ID for target system (configure in Connection Management)",
|
||||
"default": ""
|
||||
},
|
||||
{
|
||||
"name": "source_path",
|
||||
"type": "string",
|
||||
"description": "Source file path",
|
||||
"default": "/data/input/"
|
||||
},
|
||||
{
|
||||
"name": "target_path",
|
||||
"type": "string",
|
||||
"description": "Target file path",
|
||||
"default": "/data/output/"
|
||||
},
|
||||
{
|
||||
"name": "batch_size",
|
||||
"type": "int32",
|
||||
"description": "Processing batch size",
|
||||
"default": 10000
|
||||
}
|
||||
],
|
||||
"operators": [
|
||||
{
|
||||
"name": "source",
|
||||
"component": "com.sap.system.structuredFileConsumer",
|
||||
"config": {
|
||||
"connection": "${source_connection}",
|
||||
"path": "${source_path}",
|
||||
"format": "csv",
|
||||
"header": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "transform",
|
||||
"component": "com.sap.dataTransform",
|
||||
"config": {
|
||||
"transformations": []
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "target",
|
||||
"component": "com.sap.system.structuredFileProducer",
|
||||
"config": {
|
||||
"connection": "${target_connection}",
|
||||
"path": "${target_path}",
|
||||
"format": "parquet",
|
||||
"compression": "SNAPPY"
|
||||
}
|
||||
}
|
||||
],
|
||||
"connections": [
|
||||
{
|
||||
"source": {
|
||||
"operator": "source",
|
||||
"port": "output"
|
||||
},
|
||||
"target": {
|
||||
"operator": "transform",
|
||||
"port": "input"
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"operator": "transform",
|
||||
"port": "output"
|
||||
},
|
||||
"target": {
|
||||
"operator": "target",
|
||||
"port": "input"
|
||||
}
|
||||
}
|
||||
],
|
||||
"notes": [
|
||||
"This is a basic Gen2 pipeline template.",
|
||||
"Configure source and target connections before running.",
|
||||
"Customize transformations in the Data Transform operator.",
|
||||
"Auto-recovery is enabled with 60-second snapshot intervals."
|
||||
]
|
||||
}
|
||||
99
templates/ml-training-pipeline.json
Normal file
99
templates/ml-training-pipeline.json
Normal file
@@ -0,0 +1,99 @@
|
||||
{
|
||||
"$schema": "https://sap.github.io/data-intelligence/schemas/graph.json",
|
||||
"name": "ml_training_pipeline",
|
||||
"description": "Machine learning training pipeline template",
|
||||
"version": "1.0.0",
|
||||
"properties": {
|
||||
"autoRecovery": {
|
||||
"enabled": true,
|
||||
"snapshotInterval": "120s"
|
||||
}
|
||||
},
|
||||
"parameters": [
|
||||
{
|
||||
"name": "dataset_path",
|
||||
"type": "string",
|
||||
"description": "Training dataset path",
|
||||
"default": "/ml/datasets/training.parquet"
|
||||
},
|
||||
{
|
||||
"name": "model_output_path",
|
||||
"type": "string",
|
||||
"description": "Model output path",
|
||||
"default": "/ml/models/"
|
||||
},
|
||||
{
|
||||
"name": "test_split",
|
||||
"type": "float64",
|
||||
"description": "Test data split ratio",
|
||||
"default": 0.2
|
||||
},
|
||||
{
|
||||
"name": "n_estimators",
|
||||
"type": "int32",
|
||||
"description": "Number of estimators for RandomForest",
|
||||
"default": 100
|
||||
},
|
||||
{
|
||||
"name": "max_depth",
|
||||
"type": "int32",
|
||||
"description": "Maximum tree depth",
|
||||
"default": 10
|
||||
}
|
||||
],
|
||||
"operators": [
|
||||
{
|
||||
"name": "data_loader",
|
||||
"component": "com.sap.system.structuredFileConsumer",
|
||||
"config": {
|
||||
"connection": "${data_connection}",
|
||||
"path": "${dataset_path}",
|
||||
"format": "parquet"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "trainer",
|
||||
"component": "com.sap.system.python3Operator",
|
||||
"config": {
|
||||
"script": "# ML Training Script\nimport pandas as pd\nimport pickle\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import accuracy_score, f1_score\nfrom sapdi import tracking\n\ndef on_input(msg_id, header, body):\n # Get parameters\n test_split = float(api.config.test_split)\n n_estimators = int(api.config.n_estimators)\n max_depth = int(api.config.max_depth)\n \n # Load data\n df = pd.DataFrame(body)\n \n # Prepare features and target\n X = df.drop('target', axis=1)\n y = df['target']\n \n # Split data\n X_train, X_test, y_train, y_test = train_test_split(\n X, y, test_size=test_split, random_state=42\n )\n \n # Train model\n model = RandomForestClassifier(\n n_estimators=n_estimators,\n max_depth=max_depth,\n random_state=42\n )\n model.fit(X_train, y_train)\n \n # Evaluate\n y_pred = model.predict(X_test)\n accuracy = accuracy_score(y_test, y_pred)\n f1 = f1_score(y_test, y_pred, average='weighted')\n \n # Track with ML Scenario Manager\n with tracking.start_run(run_name='rf_training') as run:\n run.log_param('algorithm', 'RandomForest')\n run.log_param('n_estimators', n_estimators)\n run.log_param('max_depth', max_depth)\n run.log_param('test_split', test_split)\n run.log_metric('accuracy', accuracy)\n run.log_metric('f1_score', f1)\n run.log_artifact('model.pkl', pickle.dumps(model))\n \n # Send results\n result = {\n 'accuracy': accuracy,\n 'f1_score': f1,\n 'model_path': f'{api.config.model_output_path}model.pkl'\n }\n api.send('output', api.Message(result))\n\napi.set_port_callback('input', on_input)"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "metrics_logger",
|
||||
"component": "com.sap.ml.submitMetrics",
|
||||
"config": {
|
||||
"metricsType": "training"
|
||||
}
|
||||
}
|
||||
],
|
||||
"connections": [
|
||||
{
|
||||
"source": {
|
||||
"operator": "data_loader",
|
||||
"port": "output"
|
||||
},
|
||||
"target": {
|
||||
"operator": "trainer",
|
||||
"port": "input"
|
||||
}
|
||||
},
|
||||
{
|
||||
"source": {
|
||||
"operator": "trainer",
|
||||
"port": "output"
|
||||
},
|
||||
"target": {
|
||||
"operator": "metrics_logger",
|
||||
"port": "input"
|
||||
}
|
||||
}
|
||||
],
|
||||
"notes": [
|
||||
"This is a machine learning training pipeline template.",
|
||||
"Customize the Python script for your specific model.",
|
||||
"Configure dataset path and model parameters.",
|
||||
"Metrics are automatically tracked in ML Scenario Manager.",
|
||||
"Model artifacts are logged for versioning and deployment.",
|
||||
"Adjust test_split, n_estimators, max_depth as needed."
|
||||
]
|
||||
}
|
||||
90
templates/replication-flow.json
Normal file
90
templates/replication-flow.json
Normal file
@@ -0,0 +1,90 @@
|
||||
{
|
||||
"name": "abap_to_hana_replication",
|
||||
"description": "Replication flow from ABAP system to HANA Cloud",
|
||||
"version": "1.0.0",
|
||||
"source": {
|
||||
"type": "ABAP",
|
||||
"connection": "${abap_connection}",
|
||||
"properties": {
|
||||
"extractionType": "CDS",
|
||||
"packageSize": 50000
|
||||
}
|
||||
},
|
||||
"target": {
|
||||
"type": "HANA",
|
||||
"connection": "${hana_connection}",
|
||||
"properties": {
|
||||
"schema": "${target_schema}",
|
||||
"writeMode": "UPSERT",
|
||||
"batchSize": 10000
|
||||
}
|
||||
},
|
||||
"tasks": [
|
||||
{
|
||||
"name": "customer_master",
|
||||
"source": {
|
||||
"object": "I_Customer",
|
||||
"type": "CDS_VIEW",
|
||||
"filter": ""
|
||||
},
|
||||
"target": {
|
||||
"table": "CUSTOMER_MASTER",
|
||||
"keyColumns": ["Customer"]
|
||||
},
|
||||
"mapping": {
|
||||
"mode": "auto",
|
||||
"customMappings": []
|
||||
},
|
||||
"loadType": {
|
||||
"initial": true,
|
||||
"delta": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "sales_orders",
|
||||
"source": {
|
||||
"object": "I_SalesOrder",
|
||||
"type": "CDS_VIEW",
|
||||
"filter": "CreationDate ge datetime'2024-01-01T00:00:00'"
|
||||
},
|
||||
"target": {
|
||||
"table": "SALES_ORDERS",
|
||||
"keyColumns": ["SalesOrder"]
|
||||
},
|
||||
"mapping": {
|
||||
"mode": "auto",
|
||||
"customMappings": [
|
||||
{
|
||||
"source": "SalesOrder",
|
||||
"target": "SALES_ORDER_ID"
|
||||
},
|
||||
{
|
||||
"source": "SoldToParty",
|
||||
"target": "CUSTOMER_ID"
|
||||
}
|
||||
]
|
||||
},
|
||||
"loadType": {
|
||||
"initial": true,
|
||||
"delta": true
|
||||
}
|
||||
}
|
||||
],
|
||||
"schedule": {
|
||||
"enabled": false,
|
||||
"cron": "0 0 * * * *",
|
||||
"timezone": "UTC"
|
||||
},
|
||||
"settings": {
|
||||
"parallelTasks": 4,
|
||||
"errorHandling": "CONTINUE",
|
||||
"logging": "INFO"
|
||||
},
|
||||
"notes": [
|
||||
"Configure ABAP and HANA connections before deployment.",
|
||||
"Modify CDS view names for your specific data.",
|
||||
"Adjust filters based on data volume requirements.",
|
||||
"Enable schedule after successful initial load.",
|
||||
"Uses UPSERT for exactly-once delivery semantics."
|
||||
]
|
||||
}
|
||||
Reference in New Issue
Block a user