Initial commit

This commit is contained in:
Zhongwei Li
2025-11-30 08:55:25 +08:00
commit e23395aeb2
19 changed files with 6391 additions and 0 deletions

100
templates/basic-graph.json Normal file
View File

@@ -0,0 +1,100 @@
{
"name": "basic_data_pipeline",
"description": "Basic data processing pipeline template",
"version": "1.0.0",
"properties": {
"autoRecovery": {
"enabled": true,
"snapshotInterval": "60s"
}
},
"parameters": [
{
"name": "source_connection",
"type": "string",
"description": "Connection ID for source system (configure in Connection Management)",
"default": ""
},
{
"name": "target_connection",
"type": "string",
"description": "Connection ID for target system (configure in Connection Management)",
"default": ""
},
{
"name": "source_path",
"type": "string",
"description": "Source file path",
"default": "/data/input/"
},
{
"name": "target_path",
"type": "string",
"description": "Target file path",
"default": "/data/output/"
},
{
"name": "batch_size",
"type": "int32",
"description": "Processing batch size",
"default": 10000
}
],
"operators": [
{
"name": "source",
"component": "com.sap.system.structuredFileConsumer",
"config": {
"connection": "${source_connection}",
"path": "${source_path}",
"format": "csv",
"header": true
}
},
{
"name": "transform",
"component": "com.sap.dataTransform",
"config": {
"transformations": []
}
},
{
"name": "target",
"component": "com.sap.system.structuredFileProducer",
"config": {
"connection": "${target_connection}",
"path": "${target_path}",
"format": "parquet",
"compression": "SNAPPY"
}
}
],
"connections": [
{
"source": {
"operator": "source",
"port": "output"
},
"target": {
"operator": "transform",
"port": "input"
}
},
{
"source": {
"operator": "transform",
"port": "output"
},
"target": {
"operator": "target",
"port": "input"
}
}
],
"notes": [
"This is a basic Gen2 pipeline template.",
"Configure source and target connections before running.",
"Customize transformations in the Data Transform operator.",
"Auto-recovery is enabled with 60-second snapshot intervals."
]
}

View File

@@ -0,0 +1,99 @@
{
"$schema": "https://sap.github.io/data-intelligence/schemas/graph.json",
"name": "ml_training_pipeline",
"description": "Machine learning training pipeline template",
"version": "1.0.0",
"properties": {
"autoRecovery": {
"enabled": true,
"snapshotInterval": "120s"
}
},
"parameters": [
{
"name": "dataset_path",
"type": "string",
"description": "Training dataset path",
"default": "/ml/datasets/training.parquet"
},
{
"name": "model_output_path",
"type": "string",
"description": "Model output path",
"default": "/ml/models/"
},
{
"name": "test_split",
"type": "float64",
"description": "Test data split ratio",
"default": 0.2
},
{
"name": "n_estimators",
"type": "int32",
"description": "Number of estimators for RandomForest",
"default": 100
},
{
"name": "max_depth",
"type": "int32",
"description": "Maximum tree depth",
"default": 10
}
],
"operators": [
{
"name": "data_loader",
"component": "com.sap.system.structuredFileConsumer",
"config": {
"connection": "${data_connection}",
"path": "${dataset_path}",
"format": "parquet"
}
},
{
"name": "trainer",
"component": "com.sap.system.python3Operator",
"config": {
"script": "# ML Training Script\nimport pandas as pd\nimport pickle\nfrom sklearn.ensemble import RandomForestClassifier\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.metrics import accuracy_score, f1_score\nfrom sapdi import tracking\n\ndef on_input(msg_id, header, body):\n # Get parameters\n test_split = float(api.config.test_split)\n n_estimators = int(api.config.n_estimators)\n max_depth = int(api.config.max_depth)\n \n # Load data\n df = pd.DataFrame(body)\n \n # Prepare features and target\n X = df.drop('target', axis=1)\n y = df['target']\n \n # Split data\n X_train, X_test, y_train, y_test = train_test_split(\n X, y, test_size=test_split, random_state=42\n )\n \n # Train model\n model = RandomForestClassifier(\n n_estimators=n_estimators,\n max_depth=max_depth,\n random_state=42\n )\n model.fit(X_train, y_train)\n \n # Evaluate\n y_pred = model.predict(X_test)\n accuracy = accuracy_score(y_test, y_pred)\n f1 = f1_score(y_test, y_pred, average='weighted')\n \n # Track with ML Scenario Manager\n with tracking.start_run(run_name='rf_training') as run:\n run.log_param('algorithm', 'RandomForest')\n run.log_param('n_estimators', n_estimators)\n run.log_param('max_depth', max_depth)\n run.log_param('test_split', test_split)\n run.log_metric('accuracy', accuracy)\n run.log_metric('f1_score', f1)\n run.log_artifact('model.pkl', pickle.dumps(model))\n \n # Send results\n result = {\n 'accuracy': accuracy,\n 'f1_score': f1,\n 'model_path': f'{api.config.model_output_path}model.pkl'\n }\n api.send('output', api.Message(result))\n\napi.set_port_callback('input', on_input)"
}
},
{
"name": "metrics_logger",
"component": "com.sap.ml.submitMetrics",
"config": {
"metricsType": "training"
}
}
],
"connections": [
{
"source": {
"operator": "data_loader",
"port": "output"
},
"target": {
"operator": "trainer",
"port": "input"
}
},
{
"source": {
"operator": "trainer",
"port": "output"
},
"target": {
"operator": "metrics_logger",
"port": "input"
}
}
],
"notes": [
"This is a machine learning training pipeline template.",
"Customize the Python script for your specific model.",
"Configure dataset path and model parameters.",
"Metrics are automatically tracked in ML Scenario Manager.",
"Model artifacts are logged for versioning and deployment.",
"Adjust test_split, n_estimators, max_depth as needed."
]
}

View File

@@ -0,0 +1,90 @@
{
"name": "abap_to_hana_replication",
"description": "Replication flow from ABAP system to HANA Cloud",
"version": "1.0.0",
"source": {
"type": "ABAP",
"connection": "${abap_connection}",
"properties": {
"extractionType": "CDS",
"packageSize": 50000
}
},
"target": {
"type": "HANA",
"connection": "${hana_connection}",
"properties": {
"schema": "${target_schema}",
"writeMode": "UPSERT",
"batchSize": 10000
}
},
"tasks": [
{
"name": "customer_master",
"source": {
"object": "I_Customer",
"type": "CDS_VIEW",
"filter": ""
},
"target": {
"table": "CUSTOMER_MASTER",
"keyColumns": ["Customer"]
},
"mapping": {
"mode": "auto",
"customMappings": []
},
"loadType": {
"initial": true,
"delta": true
}
},
{
"name": "sales_orders",
"source": {
"object": "I_SalesOrder",
"type": "CDS_VIEW",
"filter": "CreationDate ge datetime'2024-01-01T00:00:00'"
},
"target": {
"table": "SALES_ORDERS",
"keyColumns": ["SalesOrder"]
},
"mapping": {
"mode": "auto",
"customMappings": [
{
"source": "SalesOrder",
"target": "SALES_ORDER_ID"
},
{
"source": "SoldToParty",
"target": "CUSTOMER_ID"
}
]
},
"loadType": {
"initial": true,
"delta": true
}
}
],
"schedule": {
"enabled": false,
"cron": "0 0 * * * *",
"timezone": "UTC"
},
"settings": {
"parallelTasks": 4,
"errorHandling": "CONTINUE",
"logging": "INFO"
},
"notes": [
"Configure ABAP and HANA connections before deployment.",
"Modify CDS view names for your specific data.",
"Adjust filters based on data volume requirements.",
"Enable schedule after successful initial load.",
"Uses UPSERT for exactly-once delivery semantics."
]
}