Initial commit
This commit is contained in:
11
skills/log-aggregation-setup/assets/README.md
Normal file
11
skills/log-aggregation-setup/assets/README.md
Normal file
@@ -0,0 +1,11 @@
|
||||
# Assets
|
||||
|
||||
Bundled resources for log-aggregation-setup skill
|
||||
|
||||
- [ ] elk_config_template.conf: Template configuration file for Logstash.
|
||||
- [ ] loki_config_template.yaml: Template configuration file for Loki.
|
||||
- [ ] splunk_config_template.conf: Template configuration file for Splunk.
|
||||
- [ ] example_log_data.json: Example log data in JSON format for testing the log aggregation setup.
|
||||
- [ ] dashboard_elk.json: Example Kibana dashboard configuration.
|
||||
- [ ] dashboard_loki.json: Example Grafana dashboard configuration for Loki.
|
||||
- [ ] dashboard_splunk.json: Example Splunk dashboard configuration.
|
||||
204
skills/log-aggregation-setup/assets/dashboard_elk.json
Normal file
204
skills/log-aggregation-setup/assets/dashboard_elk.json
Normal file
@@ -0,0 +1,204 @@
|
||||
{
|
||||
"_comment": "Kibana Dashboard Configuration for ELK Stack",
|
||||
"title": "System Performance and Log Analysis",
|
||||
"description": "Dashboard providing insights into system performance and log data.",
|
||||
"panels": [
|
||||
{
|
||||
"id": "cpu_usage",
|
||||
"type": "visualization",
|
||||
"title": "CPU Usage",
|
||||
"description": "Displays CPU usage over time.",
|
||||
"visState": {
|
||||
"type": "timeseries",
|
||||
"params": {
|
||||
"indexPattern": "system-metrics-*",
|
||||
"timeField": "@timestamp",
|
||||
"interval": "auto",
|
||||
"metrics": [
|
||||
{
|
||||
"field": "system.cpu.usage",
|
||||
"type": "avg",
|
||||
"alias": "Average CPU Usage"
|
||||
}
|
||||
],
|
||||
"xAxisMode": "timeseries",
|
||||
"yAxisMode": "normal"
|
||||
}
|
||||
},
|
||||
"gridData": {
|
||||
"x": 0,
|
||||
"y": 0,
|
||||
"w": 12,
|
||||
"h": 6
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "memory_usage",
|
||||
"type": "visualization",
|
||||
"title": "Memory Usage",
|
||||
"description": "Displays memory usage over time.",
|
||||
"visState": {
|
||||
"type": "timeseries",
|
||||
"params": {
|
||||
"indexPattern": "system-metrics-*",
|
||||
"timeField": "@timestamp",
|
||||
"interval": "auto",
|
||||
"metrics": [
|
||||
{
|
||||
"field": "system.memory.actual.used.pct",
|
||||
"type": "avg",
|
||||
"alias": "Average Memory Usage"
|
||||
}
|
||||
],
|
||||
"xAxisMode": "timeseries",
|
||||
"yAxisMode": "normal"
|
||||
}
|
||||
},
|
||||
"gridData": {
|
||||
"x": 0,
|
||||
"y": 6,
|
||||
"w": 12,
|
||||
"h": 6
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "disk_usage",
|
||||
"type": "visualization",
|
||||
"title": "Disk Usage",
|
||||
"description": "Displays disk usage over time.",
|
||||
"visState": {
|
||||
"type": "timeseries",
|
||||
"params": {
|
||||
"indexPattern": "system-metrics-*",
|
||||
"timeField": "@timestamp",
|
||||
"interval": "auto",
|
||||
"metrics": [
|
||||
{
|
||||
"field": "system.disk.used.pct",
|
||||
"type": "avg",
|
||||
"alias": "Average Disk Usage"
|
||||
}
|
||||
],
|
||||
"xAxisMode": "timeseries",
|
||||
"yAxisMode": "normal"
|
||||
}
|
||||
},
|
||||
"gridData": {
|
||||
"x": 12,
|
||||
"y": 0,
|
||||
"w": 12,
|
||||
"h": 6
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "log_level_distribution",
|
||||
"type": "visualization",
|
||||
"title": "Log Level Distribution",
|
||||
"description": "Displays the distribution of log levels.",
|
||||
"visState": {
|
||||
"type": "pie",
|
||||
"params": {
|
||||
"indexPattern": "application-logs-*",
|
||||
"timeField": "@timestamp",
|
||||
"interval": "auto",
|
||||
"metrics": [
|
||||
{
|
||||
"field": "log.level",
|
||||
"type": "count",
|
||||
"alias": "Count"
|
||||
}
|
||||
],
|
||||
"xAxisMode": "categorical",
|
||||
"yAxisMode": "normal",
|
||||
"terms": {
|
||||
"field": "log.level",
|
||||
"size": 5
|
||||
}
|
||||
}
|
||||
},
|
||||
"gridData": {
|
||||
"x": 12,
|
||||
"y": 6,
|
||||
"w": 6,
|
||||
"h": 6
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "error_rate",
|
||||
"type": "visualization",
|
||||
"title": "Error Rate",
|
||||
"description": "Displays the rate of error logs over time.",
|
||||
"visState": {
|
||||
"type": "timeseries",
|
||||
"params": {
|
||||
"indexPattern": "application-logs-*",
|
||||
"timeField": "@timestamp",
|
||||
"interval": "auto",
|
||||
"metrics": [
|
||||
{
|
||||
"field": "log.level",
|
||||
"type": "count",
|
||||
"alias": "Error Count",
|
||||
"filters": [
|
||||
{
|
||||
"field": "log.level",
|
||||
"operator": "is",
|
||||
"value": "error"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"xAxisMode": "timeseries",
|
||||
"yAxisMode": "normal"
|
||||
}
|
||||
},
|
||||
"gridData": {
|
||||
"x": 18,
|
||||
"y": 6,
|
||||
"w": 6,
|
||||
"h": 6
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "log_table",
|
||||
"type": "visualization",
|
||||
"title": "Recent Logs",
|
||||
"description": "Displays a table of recent log entries.",
|
||||
"visState": {
|
||||
"type": "table",
|
||||
"params": {
|
||||
"indexPattern": "application-logs-*",
|
||||
"timeField": "@timestamp",
|
||||
"columns": [
|
||||
"@timestamp",
|
||||
"log.level",
|
||||
"message",
|
||||
"service.name"
|
||||
],
|
||||
"sort": {
|
||||
"field": "@timestamp",
|
||||
"direction": "desc"
|
||||
},
|
||||
"pageSize": 10
|
||||
}
|
||||
},
|
||||
"gridData": {
|
||||
"x": 0,
|
||||
"y": 12,
|
||||
"w": 24,
|
||||
"h": 6
|
||||
}
|
||||
}
|
||||
],
|
||||
"timeRestore": true,
|
||||
"timeTo": "now",
|
||||
"timeFrom": "now-15m",
|
||||
"refreshInterval": {
|
||||
"pause": false,
|
||||
"value": 15000
|
||||
},
|
||||
"indexPatternRefName": "kibana_index_pattern_ref",
|
||||
"kibanaSavedObjectMeta": {
|
||||
"searchSourceJSON": "{\"filter\": []}"
|
||||
}
|
||||
}
|
||||
288
skills/log-aggregation-setup/assets/dashboard_loki.json
Normal file
288
skills/log-aggregation-setup/assets/dashboard_loki.json
Normal file
@@ -0,0 +1,288 @@
|
||||
{
|
||||
"_comment": "Grafana dashboard for Loki",
|
||||
"dashboard": {
|
||||
"annotations": {
|
||||
"list": []
|
||||
},
|
||||
"description": "Example Grafana dashboard for Loki log aggregation.",
|
||||
"editable": true,
|
||||
"gnetId": null,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"panels": [
|
||||
{
|
||||
"_comment": "Panel: Logs overview",
|
||||
"datasource": null,
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "palette-classic"
|
||||
},
|
||||
"custom": {
|
||||
"display": "auto",
|
||||
"filterable": true
|
||||
},
|
||||
"mappings": [],
|
||||
"min": null,
|
||||
"max": null,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 24,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"dedupStrategy": "none",
|
||||
"enableExemplar": true,
|
||||
"prettifyJson": true,
|
||||
"showCommonContext": true,
|
||||
"showTime": true,
|
||||
"sortOrder": "Descending",
|
||||
"wrapLines": true
|
||||
},
|
||||
"pluginVersion": "7.5.7",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${DS_LOKI}",
|
||||
"editorMode": "code",
|
||||
"expr": "{job=\"my-app\"} |= \"error\"",
|
||||
"instant": false,
|
||||
"queryType": "range",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Error Logs",
|
||||
"type": "logs"
|
||||
},
|
||||
{
|
||||
"_comment": "Panel: Log volume over time",
|
||||
"datasource": null,
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 20,
|
||||
"gradientMode": "none",
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": "normal",
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"min": null,
|
||||
"max": null,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "7.5.7",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${DS_LOKI}",
|
||||
"editorMode": "code",
|
||||
"expr": "rate({job=\"my-app\"} |= `error` [1m])",
|
||||
"instant": false,
|
||||
"legendFormat": "{{job}}",
|
||||
"queryType": "range",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Error Log Volume",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"_comment": "Panel: HTTP Request Latency",
|
||||
"datasource": null,
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"drawStyle": "line",
|
||||
"fillOpacity": 20,
|
||||
"gradientMode": "none",
|
||||
"lineInterpolation": "linear",
|
||||
"lineWidth": 1,
|
||||
"showPoints": "auto",
|
||||
"spanNulls": false,
|
||||
"stacking": "normal",
|
||||
"thresholdsStyle": {
|
||||
"mode": "off"
|
||||
}
|
||||
},
|
||||
"mappings": [],
|
||||
"min": null,
|
||||
"max": null,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 80
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "ms"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"legend": {
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "7.5.7",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": "${DS_LOKI}",
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.99, sum(rate({job=\"my-app\", endpoint=\"/api/users\"} | json | unwrap duration [1m])) by (le))",
|
||||
"instant": false,
|
||||
"legendFormat": "99th percentile",
|
||||
"queryType": "range",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "HTTP Request Latency (99th percentile)",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "1m",
|
||||
"schemaVersion": 30,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"loki",
|
||||
"logs",
|
||||
"example"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"_comment": "Loki datasource variable",
|
||||
"current": {
|
||||
"text": "Loki",
|
||||
"value": "Loki"
|
||||
},
|
||||
"datasource": null,
|
||||
"definition": "Loki",
|
||||
"hide": 0,
|
||||
"includeAll": false,
|
||||
"label": "Loki Datasource",
|
||||
"multi": false,
|
||||
"name": "DS_LOKI",
|
||||
"options": [],
|
||||
"query": "Loki",
|
||||
"refresh": 1,
|
||||
"regex": "",
|
||||
"sort": 0,
|
||||
"tagValuesQuery": "",
|
||||
"tagsQuery": "",
|
||||
"type": "datasource",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-1h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"5s",
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "",
|
||||
"title": "Loki Log Aggregation Dashboard",
|
||||
"uid": "logi-aggregation-dashboard",
|
||||
"version": 1
|
||||
}
|
||||
}
|
||||
141
skills/log-aggregation-setup/assets/dashboard_splunk.json
Normal file
141
skills/log-aggregation-setup/assets/dashboard_splunk.json
Normal file
@@ -0,0 +1,141 @@
|
||||
{
|
||||
"_comment": "Splunk Dashboard Configuration - Example",
|
||||
"dashboard": {
|
||||
"label": "Application Performance Overview",
|
||||
"description": "A dashboard providing insights into application performance and health.",
|
||||
"version": "1.0",
|
||||
"layout": {
|
||||
"type": "absolute",
|
||||
"options": {
|
||||
"width": "100%",
|
||||
"height": "100%"
|
||||
}
|
||||
},
|
||||
"panels": [
|
||||
{
|
||||
"id": "panel1",
|
||||
"title": "Requests per Minute",
|
||||
"description": "Shows the rate of incoming requests.",
|
||||
"type": "timeseries",
|
||||
"options": {
|
||||
"xAxisTitle": "Time",
|
||||
"yAxisTitle": "Requests/Minute"
|
||||
},
|
||||
"search": {
|
||||
"query": "index=main sourcetype=access_combined | timechart count by _time span=1m",
|
||||
"earliest": "-15m",
|
||||
"latest": "now"
|
||||
},
|
||||
"position": {
|
||||
"x": 0,
|
||||
"y": 0,
|
||||
"width": 6,
|
||||
"height": 4
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "panel2",
|
||||
"title": "Error Rate",
|
||||
"description": "Displays the percentage of error responses.",
|
||||
"type": "singlevalue",
|
||||
"options": {
|
||||
"unit": "%",
|
||||
"underLabel": "Error Rate (Last 15 minutes)"
|
||||
},
|
||||
"search": {
|
||||
"query": "index=main sourcetype=access_combined status>=500 | stats count as errors | eval total = [search index=main sourcetype=access_combined | stats count] | eval error_rate=round((errors/total)*100,2)",
|
||||
"earliest": "-15m",
|
||||
"latest": "now"
|
||||
},
|
||||
"position": {
|
||||
"x": 6,
|
||||
"y": 0,
|
||||
"width": 3,
|
||||
"height": 4
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "panel3",
|
||||
"title": "Average Response Time",
|
||||
"description": "Measures the average time taken to process requests.",
|
||||
"type": "singlevalue",
|
||||
"options": {
|
||||
"unit": "ms",
|
||||
"underLabel": "Average Response Time (Last 15 minutes)"
|
||||
},
|
||||
"search": {
|
||||
"query": "index=main sourcetype=access_combined | stats avg(response_time) as avg_rt | eval avg_rt=round(avg_rt,2)",
|
||||
"earliest": "-15m",
|
||||
"latest": "now"
|
||||
},
|
||||
"position": {
|
||||
"x": 9,
|
||||
"y": 0,
|
||||
"width": 3,
|
||||
"height": 4
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "panel4",
|
||||
"title": "Top 10 Slowest Endpoints",
|
||||
"description": "Lists the endpoints with the highest average response times.",
|
||||
"type": "table",
|
||||
"options": {
|
||||
"drilldown": "none"
|
||||
},
|
||||
"search": {
|
||||
"query": "index=main sourcetype=access_combined | stats avg(response_time) as avg_rt by uri | sort -avg_rt | head 10",
|
||||
"earliest": "-1h",
|
||||
"latest": "now"
|
||||
},
|
||||
"position": {
|
||||
"x": 0,
|
||||
"y": 4,
|
||||
"width": 6,
|
||||
"height": 4
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "panel5",
|
||||
"title": "Server CPU Utilization",
|
||||
"description": "Displays the CPU utilization across all servers.",
|
||||
"type": "timeseries",
|
||||
"options": {
|
||||
"xAxisTitle": "Time",
|
||||
"yAxisTitle": "% CPU Utilization"
|
||||
},
|
||||
"search": {
|
||||
"query": "index=os sourcetype=cpu | timechart avg(percentIdle) as idle by host span=1m | eval cpu_utilization=100-idle",
|
||||
"earliest": "-15m",
|
||||
"latest": "now"
|
||||
},
|
||||
"position": {
|
||||
"x": 6,
|
||||
"y": 4,
|
||||
"width": 6,
|
||||
"height": 4
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": "panel6",
|
||||
"title": "Recent Error Logs",
|
||||
"description": "Shows the most recent error logs.",
|
||||
"type": "event",
|
||||
"options": {
|
||||
"count": 5
|
||||
},
|
||||
"search": {
|
||||
"query": "index=main sourcetype=application log_level=ERROR",
|
||||
"earliest": "-1h",
|
||||
"latest": "now"
|
||||
},
|
||||
"position": {
|
||||
"x": 0,
|
||||
"y": 8,
|
||||
"width": 12,
|
||||
"height": 4
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
107
skills/log-aggregation-setup/assets/example_log_data.json
Normal file
107
skills/log-aggregation-setup/assets/example_log_data.json
Normal file
@@ -0,0 +1,107 @@
|
||||
[
|
||||
{
|
||||
"_comment": "Example log entry from a web server",
|
||||
"timestamp": "2024-01-26T10:00:00.000Z",
|
||||
"log_level": "INFO",
|
||||
"component": "web_server",
|
||||
"message": "Request received",
|
||||
"request_id": "a1b2c3d4e5f6",
|
||||
"client_ip": "192.168.1.100",
|
||||
"http_method": "GET",
|
||||
"http_path": "/api/users",
|
||||
"http_status_code": 200,
|
||||
"response_time_ms": 123
|
||||
},
|
||||
{
|
||||
"_comment": "Example log entry from a database",
|
||||
"timestamp": "2024-01-26T10:00:01.000Z",
|
||||
"log_level": "DEBUG",
|
||||
"component": "database",
|
||||
"message": "SQL query executed",
|
||||
"query": "SELECT * FROM users WHERE id = 1",
|
||||
"execution_time_ms": 5,
|
||||
"rows_returned": 1
|
||||
},
|
||||
{
|
||||
"_comment": "Example log entry from an application",
|
||||
"timestamp": "2024-01-26T10:00:02.000Z",
|
||||
"log_level": "ERROR",
|
||||
"component": "application",
|
||||
"message": "Error processing request",
|
||||
"error_code": 500,
|
||||
"error_message": "Internal server error",
|
||||
"request_id": "a1b2c3d4e5f6",
|
||||
"user_id": 123
|
||||
},
|
||||
{
|
||||
"_comment": "Example log entry from a system",
|
||||
"timestamp": "2024-01-26T10:00:03.000Z",
|
||||
"log_level": "WARN",
|
||||
"component": "system",
|
||||
"message": "Disk space nearing capacity",
|
||||
"disk_usage_percent": 90,
|
||||
"disk_path": "/var/log"
|
||||
},
|
||||
{
|
||||
"_comment": "Example log entry from a security component",
|
||||
"timestamp": "2024-01-26T10:00:04.000Z",
|
||||
"log_level": "INFO",
|
||||
"component": "security",
|
||||
"message": "Authentication successful",
|
||||
"user_id": 456,
|
||||
"username": "testuser",
|
||||
"client_ip": "192.168.1.200"
|
||||
},
|
||||
{
|
||||
"_comment": "Example log entry for authentication failure",
|
||||
"timestamp": "2024-01-26T10:00:05.000Z",
|
||||
"log_level": "WARN",
|
||||
"component": "security",
|
||||
"message": "Authentication failed",
|
||||
"username": "invaliduser",
|
||||
"client_ip": "192.168.1.200",
|
||||
"reason": "Invalid password"
|
||||
},
|
||||
{
|
||||
"_comment": "Example log entry from a microservice",
|
||||
"timestamp": "2024-01-26T10:00:06.000Z",
|
||||
"log_level": "INFO",
|
||||
"component": "microservice-auth",
|
||||
"message": "User authenticated",
|
||||
"user_id": 789,
|
||||
"username": "validuser",
|
||||
"service_name": "auth-service"
|
||||
},
|
||||
{
|
||||
"_comment": "Example log entry with exception details",
|
||||
"timestamp": "2024-01-26T10:00:07.000Z",
|
||||
"log_level": "ERROR",
|
||||
"component": "application",
|
||||
"message": "Unhandled exception",
|
||||
"exception_type": "NullPointerException",
|
||||
"exception_message": "Object reference not set to an instance of an object.",
|
||||
"stack_trace": "at MyApp.Main.DoSomething() in MyApp.cs:line 20",
|
||||
"request_id": "g7h8i9j0k1l2"
|
||||
},
|
||||
{
|
||||
"_comment": "Example log entry with metrics",
|
||||
"timestamp": "2024-01-26T10:00:08.000Z",
|
||||
"log_level": "INFO",
|
||||
"component": "monitoring",
|
||||
"message": "System metrics",
|
||||
"cpu_usage_percent": 35,
|
||||
"memory_usage_percent": 60,
|
||||
"network_throughput_kbps": 1024
|
||||
},
|
||||
{
|
||||
"_comment": "Example log entry with audit information",
|
||||
"timestamp": "2024-01-26T10:00:09.000Z",
|
||||
"log_level": "INFO",
|
||||
"component": "audit",
|
||||
"message": "User profile updated",
|
||||
"user_id": 123,
|
||||
"updated_field": "email",
|
||||
"old_value": "old@example.com",
|
||||
"new_value": "new@example.com"
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,75 @@
|
||||
# Loki Configuration File
|
||||
# This file configures the Loki log aggregation system.
|
||||
|
||||
auth_enabled: false # Disable authentication for simplicity (REPLACE_ME: Enable authentication in production)
|
||||
|
||||
server:
|
||||
http_listen_port: 3100 # Port Loki listens on for HTTP requests
|
||||
grpc_listen_port: 9096 # Port Loki listens on for gRPC requests
|
||||
|
||||
ingester:
|
||||
lifecycler:
|
||||
address: 127.0.0.1 # Address of the ingester
|
||||
ring:
|
||||
kvstore:
|
||||
store: inmemory # Use in-memory store for simplicity (REPLACE_ME: Use a persistent store like Consul or etcd in production)
|
||||
replication_factor: 1 # Number of replicas for log data
|
||||
wal:
|
||||
enabled: true # Enable Write-Ahead Log for durability
|
||||
dir: /tmp/loki/wal # Directory for the Write-Ahead Log (REPLACE_ME: Use a persistent volume in production)
|
||||
chunk_idle_period: 1h # Time after which an inactive chunk is flushed to storage
|
||||
chunk_block_size: 262144 # Size of each chunk block (256KB)
|
||||
chunk_retain_period: 24h # Time after which a chunk is deleted from the ingester
|
||||
max_transfer_retries: 0 # Maximum number of retries for transferring chunks
|
||||
|
||||
schema_config:
|
||||
configs:
|
||||
- from: 2020-10-24 # Start date for this schema
|
||||
store: boltdb-shipper # Use BoltDB shipper for index storage
|
||||
object_store: filesystem # Use filesystem for chunk storage
|
||||
schema: v11 # Schema version
|
||||
index:
|
||||
prefix: index_ # Prefix for index keys
|
||||
period: 24h # Index rotation period
|
||||
|
||||
storage_config:
|
||||
boltdb_shipper:
|
||||
active_index_directory: /tmp/loki/index # Directory for the active index (REPLACE_ME: Use a persistent volume in production)
|
||||
shared_dir: /tmp/loki/chunks # Directory for shared chunks (REPLACE_ME: Use a persistent volume in production)
|
||||
filesystem:
|
||||
path: /tmp/loki/chunks # Directory for chunk storage (REPLACE_ME: Use a persistent volume in production)
|
||||
|
||||
limits_config:
|
||||
enforce_metric_name: false # Disable enforcement of metric names
|
||||
reject_old_samples: true # Reject samples older than the configured time
|
||||
reject_old_samples_max_age: 168h # Maximum age of samples (7 days)
|
||||
max_global_streams_per_user: 0 # 0 means unlimited
|
||||
max_streams_per_user: 0 # 0 means unlimited
|
||||
ingestion_rate_mb: 100 # Maximum ingestion rate in MB/s
|
||||
ingestion_burst_size_mb: 200 # Maximum burst size in MB
|
||||
max_line_size: 512000 # Maximum line size in bytes (500KB)
|
||||
max_line_length: 512000 # DEPRECATED: use max_line_size instead
|
||||
max_query_lookback: 720h # Maximum query lookback (30 days)
|
||||
split_queries_by_interval: 12h # Split queries by this interval
|
||||
max_concurrent_queries: 30 # Maximum number of concurrent queries
|
||||
max_query_series: 1000 # Maximum number of series returned by a query
|
||||
max_query_parallelism: 16 # Maximum query parallelism
|
||||
max_query_length: 720h # Maximum query length (30 days)
|
||||
|
||||
compactor:
|
||||
working_directory: /tmp/loki/compactor # Directory for compactor working files (REPLACE_ME: Use a persistent volume in production)
|
||||
shared_store: filesystem # Use filesystem for shared storage
|
||||
compaction_interval: 1h # Interval between compactor runs
|
||||
retention_enabled: true # Enable retention of old chunks
|
||||
retention_delete_delay: 24h # Delay before deleting old chunks
|
||||
retention_max_age: 720h # Maximum age of chunks to retain (30 days)
|
||||
|
||||
ruler:
|
||||
storage:
|
||||
type: local
|
||||
local:
|
||||
directory: /tmp/loki/rules # Directory to store the rules (REPLACE_ME: Use a persistent volume in production)
|
||||
rule_path: /tmp/loki/rules # Path where rules are stored (REPLACE_ME: Use a persistent volume in production)
|
||||
alertmanager_url: "" # URL of the Alertmanager instance (REPLACE_ME: YOUR_ALERTMANAGER_URL)
|
||||
poll_interval: 30s # Interval to poll for rule changes
|
||||
enable_api: true # Enable the API for managing rules
|
||||
114
skills/log-aggregation-setup/assets/splunk_config_template.conf
Normal file
114
skills/log-aggregation-setup/assets/splunk_config_template.conf
Normal file
@@ -0,0 +1,114 @@
|
||||
# Splunk Configuration Template
|
||||
|
||||
# This file provides a template for configuring Splunk to collect and index logs.
|
||||
# It includes examples for various log sources and configurations.
|
||||
# Please review and modify this file according to your specific environment and requirements.
|
||||
|
||||
# ==============================================================================
|
||||
# Global Settings
|
||||
# ==============================================================================
|
||||
|
||||
[default]
|
||||
host = <YOUR_HOSTNAME> # Replace with the actual hostname of the Splunk instance
|
||||
|
||||
# ==============================================================================
|
||||
# Input Configuration: System Logs (Syslog)
|
||||
# ==============================================================================
|
||||
|
||||
# Configure a UDP input for receiving syslog messages.
|
||||
# Ensure your syslog daemon is configured to forward logs to this Splunk instance.
|
||||
|
||||
[udp://514]
|
||||
connection_host = ip
|
||||
sourcetype = syslog
|
||||
index = main # Change if you want to index into a different index
|
||||
disabled = false
|
||||
|
||||
# ==============================================================================
|
||||
# Input Configuration: File Monitoring (Tail)
|
||||
# ==============================================================================
|
||||
|
||||
# Monitor a specific log file. Useful for application logs.
|
||||
# Adjust the path and sourcetype accordingly.
|
||||
|
||||
[monitor:///var/log/<YOUR_APPLICATION>/<YOUR_APPLICATION>.log]
|
||||
sourcetype = <YOUR_APPLICATION>_log
|
||||
index = main # Change if you want to index into a different index
|
||||
disabled = false
|
||||
# Optional: Multiline event breaking (if needed)
|
||||
# MUST_BREAK_AFTER = ^\d{4}-\d{2}-\d{2}
|
||||
|
||||
# ==============================================================================
|
||||
# Input Configuration: Windows Event Logs (Windows)
|
||||
# ==============================================================================
|
||||
|
||||
# Configure Splunk to collect Windows Event Logs.
|
||||
# Adjust the event logs to monitor as needed.
|
||||
|
||||
[WinEventLog://Application]
|
||||
disabled = false
|
||||
index = wineventlog
|
||||
sourcetype = WinEventLog:Application
|
||||
# Optional: Filter events by event code
|
||||
# evt_resolve_ad_obj = 1 # Resolve AD objects
|
||||
# whitelist = 4624,4625 # Example: Only collect events with ID 4624 and 4625
|
||||
|
||||
[WinEventLog://System]
|
||||
disabled = false
|
||||
index = wineventlog
|
||||
sourcetype = WinEventLog:System
|
||||
|
||||
[WinEventLog://Security]
|
||||
disabled = false
|
||||
index = wineventlog
|
||||
sourcetype = WinEventLog:Security
|
||||
# IMPORTANT: Consider the volume of security logs and storage implications.
|
||||
|
||||
# ==============================================================================
|
||||
# Input Configuration: Scripted Input (Example: CPU Utilization)
|
||||
# ==============================================================================
|
||||
|
||||
# Example of a scripted input to collect CPU utilization.
|
||||
# Requires a script (e.g., cpu_utilization.sh or cpu_utilization.ps1)
|
||||
# that outputs the CPU utilization in a structured format (e.g., CSV, JSON).
|
||||
|
||||
[script://$SPLUNK_HOME/etc/apps/<YOUR_APP_NAME>/bin/cpu_utilization.sh]
|
||||
interval = 60 # Run every 60 seconds
|
||||
sourcetype = cpu_utilization
|
||||
index = metrics # Consider a dedicated metrics index
|
||||
disabled = false
|
||||
|
||||
# ==============================================================================
|
||||
# Transformations (Optional)
|
||||
# ==============================================================================
|
||||
|
||||
# Use transformations to modify events before they are indexed.
|
||||
# Example: Masking sensitive data.
|
||||
|
||||
# [transform-null]
|
||||
# REGEX = (.*)<SENSITIVE_FIELD>.*
|
||||
# DEST_KEY = _raw
|
||||
# FORMAT = $1<SENSITIVE_FIELD>MASKED
|
||||
|
||||
# ==============================================================================
|
||||
# Index Configuration (Optional)
|
||||
# ==============================================================================
|
||||
|
||||
# Configure index-specific settings.
|
||||
|
||||
# [<YOUR_INDEX_NAME>]
|
||||
# homePath = $SPLUNK_DB/<YOUR_INDEX_NAME>/db
|
||||
# coldPath = $SPLUNK_DB/<YOUR_INDEX_NAME>/colddb
|
||||
# thawedPath = $SPLUNK_DB/<YOUR_INDEX_NAME>/thaweddb
|
||||
# maxDataSize = auto
|
||||
# frozenTimePeriodInSecs = 90d # 90 days retention
|
||||
|
||||
# ==============================================================================
|
||||
# Notes
|
||||
# ==============================================================================
|
||||
|
||||
# * Replace placeholders with actual values.
|
||||
# * Ensure proper permissions are set for log files and scripts.
|
||||
# * Test configurations thoroughly before deploying to production.
|
||||
# * Consider using Splunk's monitoring console for health checks and troubleshooting.
|
||||
# * Review Splunk documentation for detailed information on configuration options.
|
||||
Reference in New Issue
Block a user