From b9da7b3a23ee2cd4232a91114caeed1222c542d5 Mon Sep 17 00:00:00 2001 From: Zhongwei Li Date: Sun, 30 Nov 2025 08:46:08 +0800 Subject: [PATCH] Initial commit --- .claude-plugin/plugin.json | 14 + README.md | 3 + commands/analyze-node-tuning.md | 116 ++ commands/generate-tuned-profile.md | 200 ++++ plugin.lock.json | 61 + skills/scripts/SKILL.md | 183 +++ skills/scripts/analyze_node_tuning.py | 1292 ++++++++++++++++++++++ skills/scripts/generate_tuned_profile.py | 414 +++++++ 8 files changed, 2283 insertions(+) create mode 100644 .claude-plugin/plugin.json create mode 100644 README.md create mode 100644 commands/analyze-node-tuning.md create mode 100644 commands/generate-tuned-profile.md create mode 100644 plugin.lock.json create mode 100644 skills/scripts/SKILL.md create mode 100644 skills/scripts/analyze_node_tuning.py create mode 100644 skills/scripts/generate_tuned_profile.py diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json new file mode 100644 index 0000000..a57aabf --- /dev/null +++ b/.claude-plugin/plugin.json @@ -0,0 +1,14 @@ +{ + "name": "node-tuning", + "description": "Automatically create and apply tuned profile", + "version": "1.0.0", + "author": { + "name": "github.com/openshift-eng" + }, + "skills": [ + "./skills" + ], + "commands": [ + "./commands" + ] +} \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..fc8d574 --- /dev/null +++ b/README.md @@ -0,0 +1,3 @@ +# node-tuning + +Automatically create and apply tuned profile diff --git a/commands/analyze-node-tuning.md b/commands/analyze-node-tuning.md new file mode 100644 index 0000000..6373a46 --- /dev/null +++ b/commands/analyze-node-tuning.md @@ -0,0 +1,116 @@ +--- +description: Analyze kernel/sysctl tuning from a live node or sosreport snapshot and propose NTO recommendations +argument-hint: "[--sosreport PATH] [--format json|markdown] [--max-irq-samples N]" +--- + +## Name +node-tuning:analyze-node-tuning + +## Synopsis +```text +/node-tuning:analyze-node-tuning [--sosreport PATH] [--collect-sosreport|--no-collect-sosreport] [--sosreport-output PATH] [--node NODE] [--kubeconfig PATH] [--oc-binary PATH] [--format json|markdown] [--max-irq-samples N] [--keep-snapshot] +``` + +## Description +The `node-tuning:analyze-node-tuning` command inspects kernel tuning signals gathered from either a live OpenShift node (`/proc`, `/sys`), an `oc debug node/` snapshot captured via KUBECONFIG, or an extracted sosreport directory. It parses CPU isolation parameters, IRQ affinity, huge page allocation, critical sysctl settings, and networking counters before compiling actionable recommendations that can be enforced through Tuned profiles or MachineConfig updates. + +Use this command when you need to: +- Audit a node for tuning regressions after upgrades or configuration changes. +- Translate findings into remediation steps for the Node Tuning Operator. +- Produce JSON or Markdown reports suitable for incident response, CI gates, or documentation. + +## Implementation +1. **Establish data source** + - Live (local) analysis: the helper script defaults to `/proc` and `/sys`. Ensure the command runs on the target node (or within an SSH session / debug pod). + - Remote analysis via `oc debug`: provide `--node ` (plus optional `--kubeconfig` and `--oc-binary`). The helper defaults to entering the RHCOS `toolbox` (backed by the `registry.redhat.io/rhel9/support-tools` image) via `oc debug node/`, running `sosreport --batch --quiet -e openshift -e openshift_ovn -e openvswitch -e podman -e crio -k crio.all=on -k crio.logs=on -k podman.all=on -k podman.logs=on -k networking.ethtool-namespaces=off --all-logs --plugin-timeout=600`, streaming the archive locally (respecting `--sosreport-output` when set), and analyzing the extracted data. Use `--toolbox-image` (or `TOOLBOX_IMAGE`) to point at a mirrored support-tools image, `--sosreport-arg` to append extra flags (repeat per flag), or `--skip-default-sosreport-flags` to take full control. Host HTTP(S) proxy variables are forwarded when present but entirely optional. Add `--no-collect-sosreport` to skip sosreport generation entirely, and `--keep-snapshot` if you want to retain the downloaded files. + - Offline analysis: provide `--sosreport /path/to/sosreport-` pointing to an extracted sosreport directory; the script auto-discovers embedded `proc/` and `sys/` trees. + - Override non-standard layouts with `--proc-root` or `--sys-root` as needed. + +2. **Prepare workspace** + - Create `.work/node-tuning//` to store generated reports (remote snapshots and sosreport captures may reuse this path or default to a temporary directory). + - Decide whether you want Markdown (human-readable) or JSON (automation-ready) output. Set `--format json` and `--output` for machine consumption. + +3. **Invoke the analysis helper** + ```bash + python3 plugins/node-tuning/skills/scripts/analyze_node_tuning.py \ + --sosreport "$SOS_DIR" \ + --format markdown \ + --max-irq-samples 10 \ + --output ".work/node-tuning/${HOSTNAME}/analysis.md" + ``` + - Omit `--sosreport` and `--node` to evaluate the local environment. + - Lower `--max-irq-samples` to cap the number of IRQ affinity overlaps listed in the report. + +4. **Interpret results** + - **System Overview**: Validates kernel release, NUMA nodes, and kernel cmdline flags (isolcpus, nohz_full, tuned.non_isolcpus). + - **CPU & Isolation**: Highlights SMT detection, isolated CPU masks, and mismatches between default IRQ affinity and isolated cores. + - **Huge Pages**: Summarizes global and per-NUMA huge page pools, reserved counts, and sysctl targets. + - **Sysctl Highlights**: Surfaces values for tuning-critical keys (e.g., `net.core.netdev_max_backlog`, `vm.swappiness`, THP state) with recommendations when thresholds are missed. + - **Network Signals**: Examines `TcpExt` counters and sockstat data for backlog drops, syncookie failures, or orphaned sockets. + - **IRQ Affinity**: Lists IRQs overlapping isolated CPUs so you can adjust tuned profiles or irqbalance policies. + - **Process Snapshot**: When available in sosreport snapshots, shows top CPU consumers and flags irqbalance presence. + +5. **Apply remediation** + - Feed the recommendations into `/node-tuning:generate-tuned-profile` or MachineConfig workflows. + - For immediate live tuning, adjust sysctls or interrupt affinities manually, then rerun the analysis to confirm remediation. + +## Return Value +- **Success**: Returns a Markdown or JSON report summarizing findings and recommended actions. +- **Failure**: Reports descriptive errors (e.g., missing `proc/` or `sys/` directories, unreadable sosreport path) and exits non-zero. + +## Examples + +1. **Analyze a live node and print Markdown** + ```text + /node-tuning:analyze-node-tuning --format markdown + ``` + +2. **Capture `/proc` and `/sys` via `oc debug` (sosreport by default) and analyze remotely** + ```text + /node-tuning:analyze-node-tuning \ + --node worker-rt-0 \ + --kubeconfig ~/.kube/prod \ + --format markdown + ``` + +3. **Collect a sosreport via `oc debug` (custom image + flags) and analyze it locally** + ```text + /node-tuning:analyze-node-tuning \ + --node worker-rt-0 \ + --toolbox-image registry.example.com/support-tools:latest \ + --sosreport-arg "--case-id=01234567" \ + --sosreport-output .work/node-tuning/sosreports \ + --format json + ``` + +4. **Inspect an extracted sosreport and save JSON to disk** + ```text + /node-tuning:analyze-node-tuning \ + --sosreport ~/Downloads/sosreport-worker-001 \ + --format json \ + --max-irq-samples 20 + ``` + +5. **Limit the recommendation set to a handful of IRQ overlaps** + ```text + /node-tuning:analyze-node-tuning --sosreport /tmp/sosreport --max-irq-samples 5 + ``` + +## Arguments: +- **--sosreport**: Path to an extracted sosreport directory to analyze instead of the live filesystem. +- **--format**: Output format (`markdown` default or `json` for structured data). +- **--output**: Optional file path where the helper writes the report. +- **--max-irq-samples**: Maximum number of IRQ affinity overlaps to include in the output (default 15). +- **--proc-root**: Override path to the procfs tree when auto-detection is insufficient. +- **--sys-root**: Override path to the sysfs tree when auto-detection is insufficient. +- **--node**: OpenShift node name to analyze via `oc debug node/` when direct access is not possible. +- **--kubeconfig**: Path to the kubeconfig file used for `oc debug`; relies on the current oc context when omitted. +- **--oc-binary**: Path to the `oc` binary (defaults to `$OC_BIN` or `oc`). +- **--keep-snapshot**: Preserve the temporary directory produced from `oc debug` (snapshots or sosreports) for later inspection. +- **--collect-sosreport**: Trigger `sosreport` via `oc debug node/`, download the archive, and analyze the extracted contents automatically (default behavior whenever `--node` is supplied and no other source is chosen). +- **--no-collect-sosreport**: Disable the default sosreport workflow when `--node` is supplied, falling back to the raw `/proc`/`/sys` snapshot. +- **--sosreport-output**: Directory where downloaded sosreport archives and their extraction should be placed (defaults to a temporary directory). +- **--toolbox-image**: Override the container image that toolbox pulls when collecting sosreport (defaults to `registry.redhat.io/rhel9/support-tools:latest` or `TOOLBOX_IMAGE` env). +- **--sosreport-arg**: Append an additional argument to the sosreport command (repeatable). +- **--skip-default-sosreport-flags**: Do not include the default OpenShift-focused sosreport plugins/collectors; only use values supplied via `--sosreport-arg`. + diff --git a/commands/generate-tuned-profile.md b/commands/generate-tuned-profile.md new file mode 100644 index 0000000..afafd86 --- /dev/null +++ b/commands/generate-tuned-profile.md @@ -0,0 +1,200 @@ +--- +description: Generate a Tuned (tuned.openshift.io/v1) profile manifest for the Node Tuning Operator +argument-hint: "[profile-name] [--summary ...] [--sysctl ...] [options]" +--- + +## Name +node-tuning:generate-tuned-profile + +## Synopsis +```text +/node-tuning:generate-tuned-profile [profile-name] [--summary TEXT] [--include VALUE ...] [--sysctl KEY=VALUE ...] [--match-label KEY[=VALUE] ...] [options] +``` + +## Description +The `node-tuning:generate-tuned-profile` command streamlines creation of `tuned.openshift.io/v1` manifests for the OpenShift Node Tuning Operator. It captures the desired Tuned profile metadata, tuned daemon configuration blocks (e.g. `[sysctl]`, `[variables]`, `[bootloader]`), and recommendation rules, then invokes the helper script at `plugins/node-tuning/skills/scripts/generate_tuned_profile.py` to render a ready-to-apply YAML file. + +Use this command whenever you need to: +- Bootstrap a new Tuned custom profile targeting selected nodes or machine config pools +- Generate manifests that can be version-controlled alongside other automation +- Iterate on sysctl, bootloader, or service parameters without hand-editing multi-line YAML + +The generated manifest follows the structure expected by the cluster Node Tuning Operator: +``` +apiVersion: tuned.openshift.io/v1 +kind: Tuned +metadata: + name: + namespace: openshift-cluster-node-tuning-operator +spec: + profile: + - data: | + [main] + summary=... + include=... + ... + name: + recommend: + - machineConfigLabels: {...} + match: + - label: ... + value: ... + priority: + profile: +``` + +## Implementation +1. **Collect inputs** + - Confirm Python 3.8+ is available (`python3 --version`). + - Gather the Tuned profile name, summary, optional include chain, sysctl values, variables, and any additional section lines (e.g. `[bootloader]`, `[service]`). + - Determine targeting rules: either `--match-label` entries (node labels) or `--machine-config-label` entries (MachineConfigPool selectors). + - Decide whether an accompanying MachineConfigPool (MCP) workflow is required for kernel boot arguments (see **Advanced Workflow** below). + - Use the helper's `--list-nodes` and `--label-node` flags when you need to inspect or label nodes prior to manifest generation. + +2. **Build execution workspace** + - Create or reuse `.work/node-tuning//`. + - Decide on the manifest filename (default `tuned.yaml` inside the workspace) or provide `--output` to override. + +3. **Invoke the generator script** + - Run the helper with the collected switches: + ```text + bash + python3 plugins/node-tuning/skills/scripts/generate_tuned_profile.py \ + --profile-name "$PROFILE_NAME" \ + --summary "$SUMMARY" \ + --include openshift-node \ + --sysctl net.core.netdev_max_backlog=16384 \ + --variable isolated_cores=1 \ + --section bootloader:cmdline_ocp_realtime=+systemd.cpu_affinity=${not_isolated_cores_expanded} \ + --machine-config-label machineconfiguration.openshift.io/role=worker-rt \ + --match-label tuned.openshift.io/elasticsearch="" \ + --priority 25 \ + --output ".work/node-tuning/$PROFILE_NAME/tuned.yaml" + ``` + - Use `--dry-run` to print the manifest to stdout before writing, if desired. + +4. **Validate output** + - Inspect the generated YAML (`yq e . .work/node-tuning/$PROFILE_NAME/tuned.yaml` or open in an editor). + - Optionally run `oc apply --server-dry-run=client -f .work/node-tuning/$PROFILE_NAME/tuned.yaml` to confirm schema compatibility. + +5. **Apply or distribute** + - Apply to a cluster with `oc apply -f .work/node-tuning/$PROFILE_NAME/tuned.yaml`. + - Commit the manifest to Git or attach to automated pipelines as needed. + +## Advanced Workflow: Huge Pages with a Dedicated MachineConfigPool +Use this workflow when enabling huge pages or other kernel boot parameters that require coordinating the Node Tuning Operator with the Machine Config Operator while minimizing reboots. + +1. **Label target nodes** + - Preview candidates: `python3 plugins/node-tuning/skills/scripts/generate_tuned_profile.py --list-nodes --node-selector "node-role.kubernetes.io/worker" --skip-manifest`. + - Label workers with the helper (repeat per node): + ```text + bash + python3 plugins/node-tuning/skills/scripts/generate_tuned_profile.py \ + --label-node ip-10-0-1-23.ec2.internal:node-role.kubernetes.io/worker-hp= \ + --overwrite-labels \ + --skip-manifest + ``` + - Alternatively run `oc label node node-role.kubernetes.io/worker-hp=` directly if you prefer the CLI. + +2. **Generate the Tuned manifest** + - Include bootloader arguments via the helper script: + ```text + bash + python3 plugins/node-tuning/skills/scripts/generate_tuned_profile.py \ + --profile-name "openshift-node-hugepages" \ + --summary "Boot time configuration for hugepages" \ + --include openshift-node \ + --section bootloader:cmdline_openshift_node_hugepages="hugepagesz=2M hugepages=50" \ + --machine-config-label machineconfiguration.openshift.io/role=worker-hp \ + --priority 30 \ + --output .work/node-tuning/openshift-node-hugepages/hugepages-tuned-boottime.yaml + ``` + - Review the `[bootloader]` section to ensure the kernel arguments match the desired configuration (e.g. `kernel-rt`, huge pages, additional sysctls). + +3. **Author the MachineConfigPool manifest** + - Create `.work/node-tuning/openshift-node-hugepages/hugepages-mcp.yaml` with: + ```yaml + apiVersion: machineconfiguration.openshift.io/v1 + kind: MachineConfigPool + metadata: + name: worker-hp + labels: + worker-hp: "" + spec: + machineConfigSelector: + matchExpressions: + - key: machineconfiguration.openshift.io/role + operator: In + values: + - worker + - worker-hp + nodeSelector: + matchLabels: + node-role.kubernetes.io/worker-hp: "" + ``` + +4. **Apply manifests (optional `--dry-run`)** + - `oc apply -f .work/node-tuning/openshift-node-hugepages/hugepages-tuned-boottime.yaml` + - `oc apply -f .work/node-tuning/openshift-node-hugepages/hugepages-mcp.yaml` + - Watch progress: `oc get mcp worker-hp -w` + +5. **Verify results** + - Confirm huge page allocation after the reboot: `oc get node -o jsonpath="{.status.allocatable.hugepages-2Mi}"` + - Inspect kernel arguments: `oc debug node/ -q -- chroot /host cat /proc/cmdline` + +## Return Value +- **Success**: Path to the generated manifest and the profile name are returned to the caller. +- **Failure**: Script exits non-zero with stderr diagnostics (e.g. invalid `KEY=VALUE` pair, missing labels, unwritable output path). + +## Examples + +1. **Realtime worker profile targeting worker-rt MCP** + ```text + /node-tuning:generate-tuned-profile openshift-realtime \ + --summary "Custom realtime tuned profile" \ + --include openshift-node --include realtime \ + --variable isolated_cores=1 \ + --section bootloader:cmdline_ocp_realtime=+systemd.cpu_affinity=${not_isolated_cores_expanded} \ + --machine-config-label machineconfiguration.openshift.io/role=worker-rt \ + --output .work/node-tuning/openshift-realtime/realtime.yaml + ``` + +2. **Sysctl-only profile matched by node label** + ```text + /node-tuning:generate-tuned-profile custom-net-tuned \ + --summary "Increase conntrack table" \ + --sysctl net.netfilter.nf_conntrack_max=262144 \ + --match-label tuned.openshift.io/custom-net \ + --priority 18 + ``` + +3. **Preview manifest without writing to disk** + ```text + /node-tuning:generate-tuned-profile pidmax-test \ + --summary "Raise pid max" \ + --sysctl kernel.pid_max=131072 \ + --match-label tuned.openshift.io/pidmax="" \ + --dry-run + ``` + +## Arguments: +- **$1** (`profile-name`): Name for the Tuned profile and manifest resource. +- **--summary**: Required summary string placed in the `[main]` section. +- **--include**: Optional include chain entries (multiple allowed). +- **--main-option**: Additional `[main]` section key/value pairs (`KEY=VALUE`). +- **--variable**: Add entries to the `[variables]` section (`KEY=VALUE`). +- **--sysctl**: Add sysctl settings to the `[sysctl]` section (`KEY=VALUE`). +- **--section**: Add lines to arbitrary sections using `SECTION:KEY=VALUE`. +- **--machine-config-label**: MachineConfigPool selector labels (`key=value`) applied under `machineConfigLabels`. +- **--match-label**: Node selector labels for the `recommend[].match[]` block; omit `=value` to match existence only. +- **--priority**: Recommendation priority (integer, default 20). +- **--namespace**: Override the manifest namespace (default `openshift-cluster-node-tuning-operator`). +- **--output**: Destination file path; defaults to `.yaml` in the current directory. +- **--dry-run**: Print manifest to stdout instead of writing to a file. +- **--skip-manifest**: Skip manifest generation; useful when only listing or labeling nodes. +- **--list-nodes**: List nodes via `oc get nodes` (works with `--node-selector`). +- **--node-selector**: Label selector applied when `--list-nodes` is used. +- **--label-node**: Apply labels to nodes using `NODE:KEY[=VALUE]` notation; repeatable. +- **--overwrite-labels**: Allow overwriting existing labels when labeling nodes. +- **--oc-binary**: Path to the `oc` executable (defaults to `$OC_BIN` or `oc`). + diff --git a/plugin.lock.json b/plugin.lock.json new file mode 100644 index 0000000..af1335d --- /dev/null +++ b/plugin.lock.json @@ -0,0 +1,61 @@ +{ + "$schema": "internal://schemas/plugin.lock.v1.json", + "pluginId": "gh:openshift-eng/ai-helpers:plugins/node-tuning", + "normalized": { + "repo": null, + "ref": "refs/tags/v20251128.0", + "commit": "fef29390caa6cbea53442961962a2c708c15ed47", + "treeHash": "7845a42d4cc3371458e0d08eeb3238859cdd7ca540e0715e933622281dbb1735", + "generatedAt": "2025-11-28T10:27:31.563422Z", + "toolVersion": "publish_plugins.py@0.2.0" + }, + "origin": { + "remote": "git@github.com:zhongweili/42plugin-data.git", + "branch": "master", + "commit": "aa1497ed0949fd50e99e70d6324a29c5b34f9390", + "repoRoot": "/Users/zhongweili/projects/openmind/42plugin-data" + }, + "manifest": { + "name": "node-tuning", + "description": "Automatically create and apply tuned profile", + "version": "1.0.0" + }, + "content": { + "files": [ + { + "path": "README.md", + "sha256": "26494cca3deb591b796ec73b9a3f6d378789efcbbc047fc85bf5a1f512d88bf0" + }, + { + "path": ".claude-plugin/plugin.json", + "sha256": "2b8d7a8e0767f06b1146b67ba4aa6ba83e0936588cdd692c0ae6c443d672d39c" + }, + { + "path": "commands/analyze-node-tuning.md", + "sha256": "6c3fc4379ffaca1f44fdfbe6232d03289c50f522dd97287f365d9e4aabd30351" + }, + { + "path": "commands/generate-tuned-profile.md", + "sha256": "5656e219f18ed3ddcd95b386f1f7e147ba1cb69e1878114aa62d76e4c39fa2c5" + }, + { + "path": "skills/scripts/generate_tuned_profile.py", + "sha256": "64ff03b8b2c05b08bbff33e2c601cf7c53c02c55d54f39a9e9ffb7bb822cc72f" + }, + { + "path": "skills/scripts/analyze_node_tuning.py", + "sha256": "3769a2edcf48cc2113214f47e2a91c3d6a8b0494b3b955bf94ac3adcc34d613a" + }, + { + "path": "skills/scripts/SKILL.md", + "sha256": "d1afc55c0edeb5d8272ca792f13b86148c66bc00192fbbd561b067f5c7448fd3" + } + ], + "dirSha256": "7845a42d4cc3371458e0d08eeb3238859cdd7ca540e0715e933622281dbb1735" + }, + "security": { + "scannedAt": null, + "scannerVersion": null, + "flags": [] + } +} \ No newline at end of file diff --git a/skills/scripts/SKILL.md b/skills/scripts/SKILL.md new file mode 100644 index 0000000..050a983 --- /dev/null +++ b/skills/scripts/SKILL.md @@ -0,0 +1,183 @@ +--- +name: Node Tuning Helper Scripts +description: Generate tuned manifests and evaluate node tuning snapshots +--- + +# Node Tuning Helper Scripts + +Detailed instructions for invoking the helper utilities that back `/node-tuning` commands: +- `generate_tuned_profile.py` renders Tuned manifests (`tuned.openshift.io/v1`). +- `analyze_node_tuning.py` inspects live nodes or sosreports for tuning gaps. + +## When to Use These Scripts +- Translate structured command inputs into Tuned manifests for the Node Tuning Operator. +- Iterate on generated YAML outside the assistant or integrate the generator into automation. +- Analyze CPU isolation, IRQ affinity, huge pages, sysctl values, and networking counters from live clusters or archived sosreports. + +## Prerequisites +- Python 3.8 or newer (`python3 --version`). +- Repository checkout so the scripts under `plugins/node-tuning/skills/scripts/` are accessible. +- Optional: `oc` CLI when validating or applying manifests. +- Optional: Extracted sosreport directory when running the analysis script offline. +- Optional (remote analysis): `oc` CLI access plus a valid `KUBECONFIG` when capturing `/proc`/`/sys` or sosreport via `oc debug node/`. The sosreport workflow pulls the `registry.redhat.io/rhel9/support-tools` image (override with `--toolbox-image` or `TOOLBOX_IMAGE`) and requires registry access. HTTP(S) proxy env vars from the host are forwarded automatically when present, but using a proxy is optional. + +--- + +## Script: `generate_tuned_profile.py` + +### Implementation Steps +1. **Collect Inputs** + - `--profile-name`: Tuned resource name. + - `--summary`: `[main]` section summary. + - Repeatable options: `--include`, `--main-option`, `--variable`, `--sysctl`, `--section` (`SECTION:KEY=VALUE`). + - Target selectors: `--machine-config-label key=value`, `--match-label key[=value]`. + - Optional: `--priority` (default 20), `--namespace`, `--output`, `--dry-run`. + - Use `--list-nodes`/`--node-selector` to inspect nodes and `--label-node NODE:KEY[=VALUE]` (plus `--overwrite-labels`) to tag machines. + +2. **Inspect or Label Nodes (optional)** + ```bash + # List all worker nodes + python3 plugins/node-tuning/skills/scripts/generate_tuned_profile.py --list-nodes --node-selector "node-role.kubernetes.io/worker" --skip-manifest + + # Label a specific node for the worker-hp pool + python3 plugins/node-tuning/skills/scripts/generate_tuned_profile.py \ + --label-node ip-10-0-1-23.ec2.internal:node-role.kubernetes.io/worker-hp= \ + --overwrite-labels \ + --skip-manifest + ``` + +3. **Render the Manifest** + ```bash + python3 plugins/node-tuning/skills/scripts/generate_tuned_profile.py \ + --profile-name "$PROFILE" \ + --summary "$SUMMARY" \ + --sysctl net.core.netdev_max_backlog=16384 \ + --match-label tuned.openshift.io/custom-net \ + --output .work/node-tuning/$PROFILE/tuned.yaml + ``` + - Omit `--output` to write `.yaml` in the current directory. + - Add `--dry-run` to print the manifest to stdout. + +4. **Review Output** + - Inspect the generated YAML for accuracy. + - Optionally format with `yq` or open in an editor for readability. + +5. **Validate and Apply** + - Dry-run: `oc apply --server-dry-run=client -f `. + - Apply: `oc apply -f `. + +### Error Handling +- Missing required options raise `ValueError` with descriptive messages. +- The script exits non-zero when no target selectors (`--machine-config-label` or `--match-label`) are supplied. +- Invalid key/value or section inputs identify the failing argument explicitly. + +### Examples +```bash +python3 plugins/node-tuning/skills/scripts/generate_tuned_profile.py \ + --profile-name realtime-worker \ + --summary "Realtime tuned profile" \ + --include openshift-node --include realtime \ + --variable isolated_cores=1 \ + --section bootloader:cmdline_ocp_realtime=+systemd.cpu_affinity=${not_isolated_cores_expanded} \ + --machine-config-label machineconfiguration.openshift.io/role=worker-rt \ + --priority 25 \ + --output .work/node-tuning/realtime-worker/tuned.yaml +``` +```bash +python3 plugins/node-tuning/skills/scripts/generate_tuned_profile.py \ + --profile-name openshift-node-hugepages \ + --summary "Boot time configuration for hugepages" \ + --include openshift-node \ + --section bootloader:cmdline_openshift_node_hugepages="hugepagesz=2M hugepages=50" \ + --machine-config-label machineconfiguration.openshift.io/role=worker-hp \ + --priority 30 \ + --output .work/node-tuning/openshift-node-hugepages/hugepages-tuned-boottime.yaml +``` + +--- + +## Script: `analyze_node_tuning.py` + +### Purpose +Inspect either a live node (`/proc`, `/sys`) or an extracted sosreport snapshot for tuning signals (CPU isolation, IRQ affinity, huge pages, sysctl state, networking counters) and emit actionable recommendations. + +### Usage Patterns +- **Live node analysis** + ```bash + python3 plugins/node-tuning/skills/scripts/analyze_node_tuning.py --format markdown + ``` +- **Remote analysis via oc debug** + ```bash + python3 plugins/node-tuning/skills/scripts/analyze_node_tuning.py \ + --node worker-rt-0 \ + --kubeconfig ~/.kube/prod \ + --format markdown + ``` +- **Collect sosreport via oc debug and analyze locally** + ```bash + python3 plugins/node-tuning/skills/scripts/analyze_node_tuning.py \ + --node worker-rt-0 \ + --toolbox-image registry.example.com/support-tools:latest \ + --sosreport-arg "--case-id=01234567" \ + --sosreport-output .work/node-tuning/sosreports \ + --format json + ``` +- **Offline sosreport analysis** + ```bash + python3 plugins/node-tuning/skills/scripts/analyze_node_tuning.py \ + --sosreport /path/to/sosreport-2025-10-20 + ``` +- **Automation-friendly JSON** + ```bash + python3 plugins/node-tuning/skills/scripts/analyze_node_tuning.py \ + --sosreport /path/to/sosreport \ + --format json --output .work/node-tuning/node-analysis.json + ``` + +### Implementation Steps +1. **Select data source** + - Provide `--node ` (with optional `--kubeconfig` / `--oc-binary`). By default the helper runs `sosreport` remotely from inside the RHCOS toolbox container (`registry.redhat.io/rhel9/support-tools`). Override the image with `--toolbox-image`, extend the sosreport command with `--sosreport-arg`, or disable the curated OpenShift flags via `--skip-default-sosreport-flags`. Pass `--no-collect-sosreport` to fall back to the direct `/proc` snapshot mode. + - Provide `--sosreport ` for archived diagnostics; detection finds embedded `proc/` and `sys/`. + - Omit both switches to query the live filesystem (defaults to `/proc` and `/sys`). + - Override paths with `--proc-root` or `--sys-root` when the layout differs. +2. **Run analysis** + - The script parses `cpuinfo`, kernel cmdline parameters (`isolcpus`, `nohz_full`, `tuned.non_isolcpus`), default IRQ affinities, huge page counters, sysctl values (net, vm, kernel), transparent hugepage settings, `netstat`/`sockstat` counters, and `ps` snapshots (when available in sosreport). +3. **Review the report** + - Markdown output groups findings by section (System Overview, CPU & Isolation, Huge Pages, Sysctl Highlights, Network Signals, IRQ Affinity, Process Snapshot) and lists recommendations. + - JSON output contains the same information in structured form for pipelines or dashboards. +4. **Act on recommendations** + - Apply Tuned profiles, MachineConfig updates, or manual sysctl/irqbalance adjustments. + - Feed actionable items back into `/node-tuning:generate-tuned-profile` to codify desired state. + +### Error Handling +- Missing `proc/` or `sys/` directories trigger descriptive errors. +- Unreadable files are skipped gracefully and noted in observations where relevant. +- Non-numeric sysctl values are flagged for manual investigation. + +### Example Output (Markdown excerpt) +``` +# Node Tuning Analysis + +## System Overview +- Hostname: worker-rt-1 +- Kernel: 4.18.0-477.el8 +- NUMA nodes: 2 +- Kernel cmdline: `BOOT_IMAGE=... isolcpus=2-15 tuned.non_isolcpus=0-1` + +## CPU & Isolation +- Logical CPUs: 32 +- Physical cores: 16 across 2 socket(s) +- SMT detected: yes +- Isolated CPUs: 2-15 +... + +## Recommended Actions +- Configure net.core.netdev_max_backlog (>=32768) to accommodate bursty NIC traffic. +- Transparent Hugepages are not disabled (`[never]` not selected). Consider setting to `never` for latency-sensitive workloads. +- 4 IRQs overlap isolated CPUs. Relocate interrupt affinities using tuned profiles or irqbalance. +``` + +### Follow-up Automation Ideas +- Persist JSON results in `.work/node-tuning//analysis.json` for historical tracing. +- Gate upgrades by comparing recommendations across nodes. +- Integrate with CI jobs that validate cluster tuning post-change. diff --git a/skills/scripts/analyze_node_tuning.py b/skills/scripts/analyze_node_tuning.py new file mode 100644 index 0000000..7a0d15e --- /dev/null +++ b/skills/scripts/analyze_node_tuning.py @@ -0,0 +1,1292 @@ +""" +Analyze kernel and node tuning state from a live OpenShift node or an extracted +Linux sosreport directory. The script inspects procfs/sysfs snapshots for +signals related to CPU isolation, IRQ affinity, huge pages, and networking +queues, then emits actionable tuning recommendations. + +The implementation remains dependency-free so it can run anywhere Python 3.8+ +is available (CI, developer workstations, or automation pipelines). +""" + +from __future__ import annotations + +import argparse +import atexit +import json +import os +import shlex +import shutil +import subprocess +import sys +import tarfile +import tempfile +import textwrap +from dataclasses import dataclass +from pathlib import Path +from typing import Dict, Iterable, List, Optional, Sequence, Tuple + + +@dataclass(frozen=True) +class EnvironmentPaths: + base: Path + proc: Path + sys: Path + sos_commands: Optional[Path] + + +DEFAULT_OC_BINARY = os.environ.get("OC_BIN", "oc") +DEFAULT_TOOLBOX_IMAGE = os.environ.get("TOOLBOX_IMAGE", "registry.redhat.io/rhel9/support-tools:latest") + +DEFAULT_SOSREPORT_FLAGS: List[str] = [ + "-e", + "openshift", + "-e", + "openshift_ovn", + "-e", + "openvswitch", + "-e", + "podman", + "-e", + "crio", + "-k", + "crio.all=on", + "-k", + "crio.logs=on", + "-k", + "podman.all=on", + "-k", + "podman.logs=on", + "-k", + "networking.ethtool-namespaces=off", + "--all-logs", + "--plugin-timeout=600", +] + +SNAPSHOT_ITEMS = [ + "proc/cmdline", + "proc/cpuinfo", + "proc/meminfo", + "proc/net", + "proc/irq", + "proc/sys", + "proc/uptime", + "proc/version", + "proc/sys/kernel", + "proc/sys/vm", + "proc/sys/net", + "proc/net/netstat", + "proc/net/snmp", + "proc/net/sockstat", + "sys/devices/system/node", + "sys/kernel/mm/transparent_hugepage", +] + + +def parse_arguments(argv: Iterable[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser( + description=( + "Analyze kernel tuning signals from a live node (/proc, /sys) or an " + "extracted sosreport directory." + ), + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument( + "--sosreport", + help=( + "Path to an extracted sosreport directory. The script will locate the " + "embedded proc/ and sys/ trees automatically." + ), + ) + parser.add_argument( + "--root", + default="", + help=( + "Root path of a filesystem snapshot containing proc/ and sys/ " + "(defaults to the live '/' filesystem when unset)." + ), + ) + parser.add_argument( + "--proc-root", + help="Explicit path to the procfs tree. Overrides auto-detection.", + ) + parser.add_argument( + "--sys-root", + help="Explicit path to the sysfs tree. Overrides auto-detection.", + ) + parser.add_argument( + "--node", + help=( + "OpenShift node name to inspect via `oc debug node/`. " + "The script captures relevant /proc and /sys data using the provided KUBECONFIG." + ), + ) + parser.add_argument( + "--kubeconfig", + help="Path to the kubeconfig file used for oc debug commands (defaults to current oc context).", + ) + parser.add_argument( + "--oc-binary", + default=DEFAULT_OC_BINARY, + help="Path to the oc CLI binary.", + ) + parser.add_argument( + "--keep-snapshot", + action="store_true", + help="Keep temporary artifacts (oc-debug snapshots or sosreports) instead of deleting them on exit.", + ) + parser.add_argument( + "--collect-sosreport", + dest="collect_sosreport", + action="store_true", + help=( + "Use `oc debug node/` to run sosreport on the target node, download the archive, " + "and analyze it as an extracted sosreport." + ), + ) + parser.add_argument( + "--no-collect-sosreport", + dest="collect_sosreport", + action="store_false", + help="Disable automatic sosreport collection when targeting a live cluster via --node.", + ) + parser.set_defaults(collect_sosreport=True) + parser.add_argument( + "--sosreport-output", + help=( + "Optional directory to store downloaded sosreport archives and their extraction. " + "Defaults to a temporary directory when omitted." + ), + ) + parser.add_argument( + "--toolbox-image", + default=DEFAULT_TOOLBOX_IMAGE, + help="Container image used by toolbox when collecting sosreport (default: %(default)s).", + ) + parser.add_argument( + "--sosreport-arg", + action="append", + default=[], + metavar="ARG", + help="Additional argument to pass to the sosreport command (repeatable).", + ) + parser.add_argument( + "--skip-default-sosreport-flags", + action="store_true", + help="Do not include the default OpenShift-focused sosreport flags; only use custom --sosreport-arg values.", + ) + parser.add_argument( + "--format", + choices=("markdown", "json"), + default="markdown", + help="Output format.", + ) + parser.add_argument( + "--output", + help="Optional path to write the report. Defaults to stdout when omitted.", + ) + parser.add_argument( + "--max-irq-samples", + type=int, + default=15, + help="Limit how many IRQ affinity mismatches are included in the report.", + ) + return parser.parse_args(argv) + + +def resolve_environment(args: argparse.Namespace) -> EnvironmentPaths: + collect_pref = args.collect_sosreport + if collect_pref and not args.node: + # Cannot collect sosreport without a target node; defer to other sources. + collect_pref = False + + if collect_pref: + if args.sosreport: + raise ValueError("Cannot combine --collect-sosreport with --sosreport.") + if not args.node: + raise ValueError("Sosreport collection requires --node.") + sos_dir = collect_sosreport_via_oc_debug( + node=args.node, + oc_binary=args.oc_binary, + kubeconfig=args.kubeconfig, + keep_snapshot=args.keep_snapshot, + output_base=args.sosreport_output, + toolbox_image=args.toolbox_image, + proxy_exports_script=_build_proxy_exports_script(), + sosreport_flag_string=_build_sosreport_flag_string( + use_defaults=not args.skip_default_sosreport_flags, + extra_args=args.sosreport_arg, + ), + ) + return _resolve_sosreport_dir(sos_dir) + + if args.sosreport: + return _resolve_sosreport_dir(Path(args.sosreport)) + + if args.node: + return capture_node_snapshot( + node=args.node, + oc_binary=args.oc_binary, + kubeconfig=args.kubeconfig, + keep_snapshot=args.keep_snapshot, + ) + + root = Path(args.root or "/").expanduser().resolve() + if not root.exists(): + raise FileNotFoundError(f"root path '{root}' does not exist") + proc_root = Path(args.proc_root).expanduser().resolve() if args.proc_root else root / "proc" + sys_root = Path(args.sys_root).expanduser().resolve() if args.sys_root else root / "sys" + if not proc_root.is_dir(): + raise FileNotFoundError(f"proc path '{proc_root}' does not exist or is not a directory") + if not sys_root.is_dir(): + raise FileNotFoundError(f"sys path '{sys_root}' does not exist or is not a directory") + return EnvironmentPaths(base=root, proc=proc_root, sys=sys_root, sos_commands=None) + + +def _safe_extract_tar(archive: Path, destination: Path) -> None: + with tarfile.open(archive, "r:*") as tar: + for member in tar.getmembers(): + member_path = destination / member.name + if not _is_within_directory(destination, member_path): + raise ValueError("Archive extraction attempted path traversal.") + tar.extractall(destination) + + +def _is_within_directory(directory: Path, target: Path) -> bool: + directory = directory.resolve() + if not directory.exists(): + directory.mkdir(parents=True, exist_ok=True) + try: + target.resolve(strict=False).relative_to(directory) + return True + except ValueError: + return False + + +def _create_artifact_dir(base_dir: Optional[str], prefix: str) -> Path: + if base_dir: + base = Path(base_dir).expanduser().resolve() + base.mkdir(parents=True, exist_ok=True) + path = Path(tempfile.mkdtemp(prefix=f"{prefix}-", dir=str(base))) + else: + path = Path(tempfile.mkdtemp(prefix=f"{prefix}-")) + return path + + +def _resolve_sosreport_dir(path: Path) -> EnvironmentPaths: + base = Path(path).expanduser().resolve() + if not base.exists(): + raise FileNotFoundError(f"sosreport path '{base}' does not exist") + if base.is_file(): + raise ValueError(f"sosreport path '{base}' is a file; provide an extracted directory") + root_candidates = [base] + [child for child in base.iterdir() if child.is_dir()] + proc_root: Optional[Path] = None + sys_root: Optional[Path] = None + sos_commands: Optional[Path] = None + selected_base = base + for candidate in root_candidates: + candidate_proc = candidate / "proc" + if candidate_proc.is_dir(): + proc_root = candidate_proc + candidate_sys = candidate / "sys" + if candidate_sys.is_dir(): + sys_root = candidate_sys + if (candidate / "sos_commands").is_dir(): + sos_commands = candidate / "sos_commands" + if proc_root and sys_root: + selected_base = candidate + break + if proc_root is None: + raise FileNotFoundError(f"Unable to locate a proc/ directory under '{base}'") + if sys_root is None: + possible_sys = proc_root.parent / "sys" + if possible_sys.is_dir(): + sys_root = possible_sys + else: + raise FileNotFoundError(f"Unable to locate a sys/ directory under '{base}'") + return EnvironmentPaths(base=selected_base, proc=proc_root, sys=sys_root, sos_commands=sos_commands) + + +def _build_proxy_exports_script() -> str: + proxy_vars = [ + "HTTP_PROXY", + "http_proxy", + "HTTPS_PROXY", + "https_proxy", + "NO_PROXY", + "no_proxy", + ] + lines = [] + for var in proxy_vars: + value = os.environ.get(var) + if value: + lines.append(f"export {var}={shlex.quote(value)}") + return "\n".join(lines) + + +def _build_sosreport_flag_string(*, use_defaults: bool, extra_args: Sequence[str]) -> str: + flags: List[str] = [] + if use_defaults: + flags.extend(DEFAULT_SOSREPORT_FLAGS) + flags.extend(extra_args) + if not flags: + return "" + return " ".join(shlex.quote(flag) for flag in flags) + + +def collect_sosreport_via_oc_debug( + *, + node: str, + oc_binary: str, + kubeconfig: Optional[str], + keep_snapshot: bool, + output_base: Optional[str], + toolbox_image: str, + proxy_exports_script: str, + sosreport_flag_string: str, +) -> Path: + safe_node = node.replace("/", "-") + artifact_dir = _create_artifact_dir(output_base, f"node-tuning-sosreport-{safe_node}") + if not keep_snapshot: + atexit.register(lambda: shutil.rmtree(artifact_dir, ignore_errors=True)) + + archive_path = artifact_dir / "sosreport.tar" + extract_dir = artifact_dir / "extracted" + extract_dir.mkdir(parents=True, exist_ok=True) + + archive_host_path = "/tmp/node-tuning-sosreport.tar.xz" + payload_host_path = "/tmp/node-tuning-toolbox.sh" + proxy_block = "" + if proxy_exports_script.strip(): + proxy_block = textwrap.dedent( + f""" + PROXY_EXPORTS=$(cat <<'__NTO_PROXY__' + {proxy_exports_script} + __NTO_PROXY__ + ) + eval "$PROXY_EXPORTS" + """ + ).strip() + + sosreport_flag_string = sosreport_flag_string or "" + remote_script = textwrap.dedent( + f""" + set -euo pipefail + TOOLBOX_IMAGE={shlex.quote(toolbox_image)} + ARCHIVE_PATH="{archive_host_path}" + PAYLOAD="{payload_host_path}" + TOOLBOX_LOG="/tmp/node-tuning-toolbox.log" + {proxy_block} + cat <<'__NTO_PAYLOAD__' > "$PAYLOAD" + set -euo pipefail + TMPDIR=$(mktemp -d /var/tmp/node-tuning-sos.XXXX) + cleanup() {{ rm -rf "$TMPDIR"; }} + trap cleanup EXIT + SOSREPORT_FLAGS="{sosreport_flag_string}" + sosreport --batch --quiet --tmp-dir "$TMPDIR" $SOSREPORT_FLAGS >/dev/null + LATEST=$(ls -1tr "$TMPDIR"/sosreport-* 2>/dev/null | tail -1) + if [ -z "$LATEST" ]; then + echo "Unable to locate sosreport archive" >&2 + exit 1 + fi + mkdir -p "$(dirname "/host{archive_host_path}")" + cp "$LATEST" "/host{archive_host_path}" + __NTO_PAYLOAD__ + + remove_existing() {{ + podman rm -f toolbox- >/dev/null 2>&1 || true + toolbox rm -f node-tuning-sos >/dev/null 2>&1 || true + }} + + remove_existing + + run_toolbox() {{ + local status=0 + if command -v script >/dev/null 2>&1; then + script -q -c "toolbox --container node-tuning-sos --image $TOOLBOX_IMAGE -- /bin/bash /host$PAYLOAD" /dev/null >> "$TOOLBOX_LOG" 2>&1 || status=$? + else + toolbox --container node-tuning-sos --image "$TOOLBOX_IMAGE" -- /bin/bash "/host$PAYLOAD" >> "$TOOLBOX_LOG" 2>&1 || status=$? + fi + return "$status" + }} + + if ! run_toolbox; then + echo "toolbox execution failed; falling back to host sosreport (inspect $TOOLBOX_LOG)" >&2 + if ! bash "/host$PAYLOAD" >> "$TOOLBOX_LOG" 2>&1; then + echo "host sosreport fallback failed; inspect $TOOLBOX_LOG" >&2 + exit 1 + fi + fi + + rm -f "$PAYLOAD" + + if [ ! -s "{archive_host_path}" ]; then + echo "sosreport archive missing after execution; inspect $TOOLBOX_LOG" >&2 + exit 1 + fi + + cat "{archive_host_path}" + rm -f "{archive_host_path}" "$TOOLBOX_LOG" + """ + ).strip() + + cmd: List[str] = [oc_binary] + if kubeconfig: + cmd.extend(["--kubeconfig", kubeconfig]) + cmd.extend( + [ + "debug", + f"node/{node}", + "--quiet", + "--", + "/bin/bash", + "-c", + f"chroot /host /bin/bash -c {shlex.quote(remote_script)}", + ] + ) + + try: + with archive_path.open("wb") as archive_handle: + result = subprocess.run( + cmd, + check=False, + stdout=archive_handle, + stderr=subprocess.PIPE, + text=True, + ) + except FileNotFoundError as exc: + raise FileNotFoundError(f"Unable to execute oc binary '{oc_binary}': {exc}") from exc + + if result.returncode != 0: + stderr_output = result.stderr.strip() if result.stderr else "unknown error" + raise RuntimeError(f"`oc debug node/{node}` sosreport capture failed: {stderr_output}") + + _safe_extract_tar(archive_path, extract_dir) + + # Choose the first directory that contains proc/. + candidates = [p for p in extract_dir.rglob("proc") if p.is_dir()] + if not candidates: + raise FileNotFoundError("Downloaded sosreport archive did not contain a proc/ directory.") + sos_base = candidates[0].parent + return sos_base + + +def capture_node_snapshot( + *, + node: str, + oc_binary: str, + kubeconfig: Optional[str], + keep_snapshot: bool, +) -> EnvironmentPaths: + tmp_dir = Path(tempfile.mkdtemp(prefix="node-tuning-")) + if not keep_snapshot: + atexit.register(lambda: shutil.rmtree(tmp_dir, ignore_errors=True)) + + tar_path = tmp_dir / "snapshot.tar" + include_args = " ".join(shlex.quote(item) for item in SNAPSHOT_ITEMS) + remote_cmd = ( + "chroot /host /bin/bash -c " + f"'cd / && tar --ignore-failed-read --warning=no-file-changed -cf - {include_args}'" + ) + + cmd: List[str] = [oc_binary] + if kubeconfig: + cmd.extend(["--kubeconfig", kubeconfig]) + cmd.extend(["debug", f"node/{node}", "--quiet", "--", "/bin/bash", "-c", remote_cmd]) + + try: + with tar_path.open("wb") as tar_handle: + result = subprocess.run( + cmd, + check=False, + stdout=tar_handle, + stderr=subprocess.PIPE, + text=True, + ) + except FileNotFoundError as exc: + raise FileNotFoundError( + f"Unable to execute oc binary '{oc_binary}': {exc}" + ) from exc + + if result.returncode != 0: + stderr_output = result.stderr.strip() if result.stderr else "unknown error" + raise RuntimeError( + f"`oc debug node/{node}` failed (exit {result.returncode}): {stderr_output}" + ) + + _safe_extract_tar(tar_path, tmp_dir) + if not keep_snapshot: + tar_path.unlink(missing_ok=True) # type: ignore[arg-type] + + proc_path = tmp_dir / "proc" + sys_path = tmp_dir / "sys" + if not proc_path.exists(): + raise FileNotFoundError("Captured snapshot is missing proc/ data from the node.") + if not sys_path.exists(): + raise FileNotFoundError("Captured snapshot is missing sys/ data from the node.") + return EnvironmentPaths(base=tmp_dir, proc=proc_path, sys=sys_path, sos_commands=None) + + +def _safe_read_text(path: Path) -> Optional[str]: + try: + return path.read_text(encoding="utf-8", errors="ignore") + except (FileNotFoundError, IsADirectoryError, PermissionError): + return None + + +def _safe_read_int(path: Path) -> Optional[int]: + text = _safe_read_text(path) + if text is None: + return None + text = text.strip() + if not text: + return None + try: + return int(text) + except ValueError: + return None + + +def _parse_kernel_cmdline(raw_cmdline: Optional[str]) -> Tuple[str, Dict[str, List[str]]]: + if not raw_cmdline: + return "", {} + cmdline = raw_cmdline.replace("\x00", " ").strip() + params: Dict[str, List[str]] = {} + for token in cmdline.split(): + if "=" in token: + key, value = token.split("=", 1) + else: + key, value = token, "" + params.setdefault(key, []).append(value) + return cmdline, params + + +def _parse_cpu_list(expression: str) -> List[int]: + cpus: List[int] = [] + for part in expression.split(","): + part = part.strip() + if not part: + continue + if "-" in part: + start_str, end_str = part.split("-", 1) + try: + start = int(start_str) + end = int(end_str) + except ValueError: + continue + cpus.extend(range(min(start, end), max(start, end) + 1)) + else: + try: + cpus.append(int(part)) + except ValueError: + continue + return sorted(set(cpus)) + + +def _parse_cpu_mask(mask: str) -> List[int]: + cleaned = mask.strip().replace(",", "") + if not cleaned: + return [] + try: + value = int(cleaned, 16) + except ValueError: + return [] + cpus: List[int] = [] + bit = 0 + while value: + if value & 1: + cpus.append(bit) + value >>= 1 + bit += 1 + return cpus + + +def gather_system_info(env: EnvironmentPaths) -> Dict[str, object]: + hostname = _safe_read_text(env.proc / "sys/kernel/hostname") + kernel_release = _safe_read_text(env.proc / "sys/kernel/osrelease") + kernel_version = _safe_read_text(env.proc / "version") + uptime_text = _safe_read_text(env.proc / "uptime") + if uptime_text: + try: + uptime_seconds = float(uptime_text.split()[0]) + except (ValueError, IndexError): + uptime_seconds = None + else: + uptime_seconds = None + cmdline_raw = _safe_read_text(env.proc / "cmdline") + cmdline, cmd_params = _parse_kernel_cmdline(cmdline_raw) + num_nodes = 0 + nodes_path = env.sys / "devices/system/node" + if nodes_path.is_dir(): + num_nodes = sum(1 for entry in nodes_path.iterdir() if entry.name.startswith("node")) + return { + "hostname": (hostname or "").strip(), + "kernel_release": (kernel_release or "").strip(), + "kernel_version": (kernel_version or "").strip(), + "uptime_seconds": uptime_seconds, + "kernel_cmdline": cmdline, + "kernel_cmdline_params": cmd_params, + "numa_nodes": num_nodes, + } + + +def gather_cpu_info(env: EnvironmentPaths, cmd_params: Dict[str, List[str]]) -> Dict[str, object]: + cpuinfo_text = _safe_read_text(env.proc / "cpuinfo") + logical_cpus = 0 + sockets: List[str] = [] + cores: List[Tuple[str, str]] = [] + smt_possible = False + if cpuinfo_text: + block: Dict[str, str] = {} + for line in cpuinfo_text.splitlines(): + if not line.strip(): + if block: + logical_cpus += 1 + physical_id = block.get("physical id", str(block.get("processor", logical_cpus - 1))) + core_id = block.get("core id", str(block.get("processor", logical_cpus - 1))) + sockets.append(physical_id) + cores.append((physical_id, core_id)) + siblings = block.get("siblings") + core_count = block.get("cpu cores") + if siblings and core_count: + try: + if int(siblings) > int(core_count): + smt_possible = True + except ValueError: + pass + block = {} + continue + if ":" in line: + key, value = line.split(":", 1) + block[key.strip()] = value.strip() + if block: + logical_cpus += 1 + physical_id = block.get("physical id", str(block.get("processor", logical_cpus - 1))) + core_id = block.get("core id", str(block.get("processor", logical_cpus - 1))) + sockets.append(physical_id) + cores.append((physical_id, core_id)) + siblings = block.get("siblings") + core_count = block.get("cpu cores") + if siblings and core_count: + try: + if int(siblings) > int(core_count): + smt_possible = True + except ValueError: + pass + + unique_sockets = sorted(set(sockets)) + unique_cores = sorted(set(cores)) + isolated_params = cmd_params.get("isolcpus", []) + cmd_params.get("tuned.isolcpus", []) + isolated_cpus: List[int] = [] + for value in isolated_params: + isolated_cpus.extend(_parse_cpu_list(value)) + nohz_full = [] + for value in cmd_params.get("nohz_full", []): + nohz_full.extend(_parse_cpu_list(value)) + tuned_non_isol = [] + for value in cmd_params.get("tuned.non_isolcpus", []): + tuned_non_isol.extend(_parse_cpu_list(value)) + + default_irq_affinity = _parse_cpu_mask(_safe_read_text(env.proc / "irq/default_smp_affinity") or "") + + recommendations: List[str] = [] + observations: List[str] = [] + if logical_cpus: + observations.append(f"Detected {logical_cpus} logical CPUs across {len(unique_sockets)} socket(s).") + if smt_possible: + observations.append("Hyper-Threading/SMT appears to be enabled (siblings > cpu cores).") + if isolated_cpus: + observations.append(f"Kernel cmdline isolates CPUs: {','.join(str(cpu) for cpu in isolated_cpus)}.") + else: + if logical_cpus >= 8: + recommendations.append( + "Configure `isolcpus` (or `tuned.non_isolcpus`) to reserve dedicated cores for workload isolation." + ) + if nohz_full and not isolated_cpus: + recommendations.append( + "`nohz_full` specified without matching `isolcpus`; verify scheduler isolation covers intended CPUs." + ) + if tuned_non_isol: + observations.append(f"Tuned non-isolated CPU mask: {','.join(str(cpu) for cpu in sorted(set(tuned_non_isol)))}.") + if default_irq_affinity and isolated_cpus: + overlap = sorted(set(default_irq_affinity) & set(isolated_cpus)) + if overlap: + recommendations.append( + f"Default IRQ affinity includes isolated CPUs ({','.join(map(str, overlap))}); adjust " + "`/proc/irq/default_smp_affinity` and tuned profiles to keep interrupts off dedicated cores." + ) + return { + "logical_cpus": logical_cpus, + "sockets": len(unique_sockets), + "physical_cores": len(unique_cores), + "smt_detected": smt_possible, + "isolated_cpus": sorted(set(isolated_cpus)), + "nohz_full": sorted(set(nohz_full)), + "tuned_non_isolcpus": sorted(set(tuned_non_isol)), + "default_irq_affinity": default_irq_affinity, + "observations": observations, + "recommendations": recommendations, + } + + +def gather_hugepage_info(env: EnvironmentPaths) -> Dict[str, object]: + meminfo_text = _safe_read_text(env.proc / "meminfo") + hugepages_total = None + hugepages_free = None + hugepages_rsvd = None + hugepages_surp = None + hugepage_size_kb = None + mem_total_kb = None + if meminfo_text: + for line in meminfo_text.splitlines(): + if line.startswith("HugePages_Total:"): + hugepages_total = int(line.split()[1]) + elif line.startswith("HugePages_Free:"): + hugepages_free = int(line.split()[1]) + elif line.startswith("HugePages_Rsvd:"): + hugepages_rsvd = int(line.split()[1]) + elif line.startswith("HugePages_Surp:"): + hugepages_surp = int(line.split()[1]) + elif line.startswith("Hugepagesize:"): + hugepage_size_kb = int(line.split()[1]) + elif line.startswith("MemTotal:"): + mem_total_kb = int(line.split()[1]) + sysctl_nr_hugepages = _safe_read_int(env.proc / "sys/vm/nr_hugepages") + sysctl_overcommit_huge = _safe_read_int(env.proc / "sys/vm/nr_overcommit_hugepages") + + per_node: Dict[str, Dict[str, int]] = {} + nodes_dir = env.sys / "devices/system/node" + if nodes_dir.is_dir(): + for node_dir in sorted(nodes_dir.iterdir()): + if not node_dir.name.startswith("node"): + continue + node_info: Dict[str, int] = {} + hugepages_dir = node_dir / "hugepages" + if hugepages_dir.is_dir(): + for hp_dir in hugepages_dir.iterdir(): + nr_path = hp_dir / "nr_hugepages" + free_path = hp_dir / "free_hugepages" + if nr_path.exists(): + node_info["total"] = node_info.get("total", 0) + int(nr_path.read_text().strip()) + if free_path.exists(): + node_info["free"] = node_info.get("free", 0) + int(free_path.read_text().strip()) + if node_info: + per_node[node_dir.name] = node_info + + recommendations: List[str] = [] + observations: List[str] = [] + if hugepages_total is not None: + observations.append(f"HugePages_Total={hugepages_total} (size={hugepage_size_kb or 'unknown'} KB).") + if hugepages_total == 0: + recommendations.append( + "Huge pages are disabled. Configure `vm.nr_hugepages` or MachineConfig/Tuned profiles if workloads require pinned memory." + ) + elif hugepages_free is not None and hugepages_free / max(hugepages_total, 1) < 0.1: + recommendations.append( + "Huge pages are nearly exhausted (free <10%). Increase the allocation cap or investigate consumption." + ) + if hugepages_rsvd: + observations.append(f"HugePages_Rsvd={hugepages_rsvd}.") + if mem_total_kb and hugepages_total and hugepage_size_kb: + provisioned_percent = (hugepages_total * hugepage_size_kb) / mem_total_kb * 100 + if provisioned_percent < 1: + recommendations.append( + "Huge page pool is <1% of system memory. Verify sizing matches workload requirements." + ) + if sysctl_nr_hugepages and hugepages_total and sysctl_nr_hugepages != hugepages_total: + observations.append( + f"Runtime HugePages_Total ({hugepages_total}) differs from sysctl target ({sysctl_nr_hugepages})." + ) + + return { + "hugepages_total": hugepages_total, + "hugepages_free": hugepages_free, + "hugepages_reserved": hugepages_rsvd, + "hugepages_surplus": hugepages_surp, + "hugepage_size_kb": hugepage_size_kb, + "sysctl_nr_hugepages": sysctl_nr_hugepages, + "sysctl_nr_overcommit": sysctl_overcommit_huge, + "per_node": per_node, + "observations": observations, + "recommendations": recommendations, + } + + +SYSCTL_CHECKS: List[Dict[str, object]] = [ + { + "path": "kernel/sched_rt_runtime_us", + "comparison": "eq", + "value": -1, + "message": "Set `kernel.sched_rt_runtime_us=-1` to allow realtime workloads full CPU bandwidth.", + }, + { + "path": "kernel/nmi_watchdog", + "comparison": "eq", + "value": 0, + "message": "Disable NMI watchdog (`kernel.nmi_watchdog=0`) on isolated/latency-sensitive nodes.", + }, + { + "path": "vm/swappiness", + "comparison": "lte", + "value": 10, + "message": "Lower `vm.swappiness` (<=10) to reduce swap pressure on performance nodes.", + }, + { + "path": "vm/zone_reclaim_mode", + "comparison": "eq", + "value": 0, + "message": "Ensure `vm.zone_reclaim_mode=0` unless targeting NUMA-local reclaim.", + }, + { + "path": "net/core/netdev_max_backlog", + "comparison": "gte", + "value": 32768, + "message": "Increase `net.core.netdev_max_backlog` (>=32768) to accommodate bursty NIC traffic.", + }, + { + "path": "net/core/somaxconn", + "comparison": "gte", + "value": 1024, + "message": "Increase `net.core.somaxconn` (>=1024) to avoid listen queue overflows.", + }, + { + "path": "net/ipv4/tcp_tw_reuse", + "comparison": "eq", + "value": 1, + "message": "Enable `net.ipv4.tcp_tw_reuse=1` for faster TIME-WAIT socket reuse.", + }, + { + "path": "net/ipv4/tcp_fin_timeout", + "comparison": "lte", + "value": 30, + "message": "Reduce `net.ipv4.tcp_fin_timeout` (<=30) to shorten FIN-WAIT-2 linger.", + }, + { + "path": "net/ipv4/tcp_rmem", + "comparison": "triplet_min", + "value": (4096, 87380, 16777216), + "message": "Grow `net.ipv4.tcp_rmem` (recommended min/def/max >= 4096/87380/16777216).", + }, + { + "path": "net/ipv4/tcp_wmem", + "comparison": "triplet_min", + "value": (4096, 65536, 16777216), + "message": "Grow `net.ipv4.tcp_wmem` (recommended min/def/max >= 4096/65536/16777216).", + }, +] + + +def gather_sysctl_info(env: EnvironmentPaths) -> Dict[str, object]: + results: Dict[str, Dict[str, object]] = {} + recommendations: List[str] = [] + observations: List[str] = [] + for check in SYSCTL_CHECKS: + path = env.proc / "sys" / Path(str(check["path"])) + value_text = _safe_read_text(path) + if value_text is None: + continue + normalized = value_text.strip() + results[str(check["path"])] = {"value": normalized} + comparison = str(check["comparison"]) + target = check["value"] + try: + if comparison == "eq": + actual_int = int(normalized) + if actual_int != int(target): + recommendations.append(str(check["message"])) + elif comparison == "lte": + actual_int = int(normalized) + if actual_int > int(target): + recommendations.append(str(check["message"])) + elif comparison == "gte": + actual_int = int(normalized) + if actual_int < int(target): + recommendations.append(str(check["message"])) + elif comparison == "triplet_min": + actual_parts = [int(part) for part in normalized.split()] + target_parts = list(target) if isinstance(target, (list, tuple)) else [] + if len(actual_parts) >= 3 and len(target_parts) >= 3: + for idx in range(3): + if actual_parts[idx] < target_parts[idx]: + recommendations.append(str(check["message"])) + break + else: + observations.append(f"Unhandled comparison type '{comparison}' for {check['path']}.") + except ValueError: + observations.append(f"Non-integer sysctl value for {check['path']}: '{normalized}'.") + + thp_enabled = _safe_read_text(env.sys / "kernel/mm/transparent_hugepage/enabled") + if thp_enabled: + results["sys.kernel.mm.transparent_hugepage.enabled"] = {"value": thp_enabled.strip()} + if "[never]" not in thp_enabled: + recommendations.append( + "Transparent Hugepages are not disabled (`[never]` not selected). Consider setting to `never` for latency-sensitive workloads." + ) + + thp_defrag = _safe_read_text(env.sys / "kernel/mm/transparent_hugepage/defrag") + if thp_defrag: + results["sys.kernel.mm.transparent_hugepage.defrag"] = {"value": thp_defrag.strip()} + if "[never]" not in thp_defrag and "[madvise]" not in thp_defrag: + recommendations.append( + "Transparent Hugepage defrag is aggressive. Set to `never` or `madvise` to reduce allocation jitter." + ) + + return { + "values": results, + "observations": observations, + "recommendations": recommendations, + } + + +def _parse_netstat_file(path: Path) -> Dict[str, Dict[str, int]]: + text = _safe_read_text(path) + if not text: + return {} + lines = [line.strip() for line in text.splitlines() if line.strip()] + parsed: Dict[str, Dict[str, int]] = {} + idx = 0 + while idx + 1 < len(lines): + header = lines[idx].split() + values = lines[idx + 1].split() + if not header or not values: + idx += 2 + continue + section = header[0].rstrip(":") + metrics: Dict[str, int] = {} + for key, value in zip(header[1:], values[1:]): + try: + metrics[key] = int(value) + except ValueError: + continue + parsed[section] = metrics + idx += 2 + return parsed + + +def _parse_sockstat(path: Path) -> Dict[str, Dict[str, int]]: + text = _safe_read_text(path) + if not text: + return {} + parsed: Dict[str, Dict[str, int]] = {} + for line in text.splitlines(): + if ":" not in line: + continue + section, rest = line.split(":", 1) + metrics: Dict[str, int] = {} + parts = rest.split() + for idx in range(0, len(parts), 2): + key = parts[idx] + if idx + 1 >= len(parts): + break + value = parts[idx + 1] + try: + metrics[key] = int(value) + except ValueError: + continue + parsed[section.strip()] = metrics + return parsed + + +def gather_network_info(env: EnvironmentPaths) -> Dict[str, object]: + netstat_data = _parse_netstat_file(env.proc / "net/netstat") + snmp_data = _parse_netstat_file(env.proc / "net/snmp") + sockstat_data = _parse_sockstat(env.proc / "net/sockstat") + recommendations: List[str] = [] + observations: List[str] = [] + + tcp_ext = netstat_data.get("TcpExt", {}) + listen_drops = tcp_ext.get("ListenDrops") + backlog_drops = tcp_ext.get("TCPBacklogDrop") + aborted_listens = tcp_ext.get("TCPAbortOnListen") + syncookies_failed = tcp_ext.get("SyncookiesFailed") + if listen_drops and listen_drops > 0: + recommendations.append( + f"Detected {listen_drops} TCP listen drops. Increase `net.core.somaxconn` and review application accept loops." + ) + if backlog_drops and backlog_drops > 0: + recommendations.append( + f"Detected {backlog_drops} TCP backlog drops. Increase `net.core.netdev_max_backlog` / `somaxconn` and tune application backlog." + ) + if aborted_listens and aborted_listens > 0: + observations.append(f"{aborted_listens} connections aborted on listen; investigate SYN flood or backlog exhaustion.") + if syncookies_failed and syncookies_failed > 0: + recommendations.append( + f"Syncookies failures observed ({syncookies_failed}); validate NIC offload settings and SYN cookie limits." + ) + + tcp_sockstat = sockstat_data.get("TCP", {}) + if tcp_sockstat: + in_use = tcp_sockstat.get("inuse") + orphan = tcp_sockstat.get("orphan") + if orphan and in_use and orphan > max(1, in_use // 10): + recommendations.append( + f"High orphaned TCP socket count ({orphan}) relative to in-use sockets ({in_use}). Tune FIN timeout and monitor retransmits." + ) + + return { + "netstat": netstat_data, + "snmp": snmp_data, + "sockstat": sockstat_data, + "observations": observations, + "recommendations": recommendations, + } + + +def gather_irq_affinity_info( + env: EnvironmentPaths, + isolated_cpus: Sequence[int], + *, + max_samples: int, +) -> Dict[str, object]: + isolated_set = set(isolated_cpus) + irq_dir = env.proc / "irq" + mismatches: List[Dict[str, object]] = [] + total_irqs_checked = 0 + if irq_dir.is_dir(): + for entry in sorted(irq_dir.iterdir(), key=lambda p: p.name): + if not entry.name.isdigit(): + continue + total_irqs_checked += 1 + effective = _safe_read_text(entry / "effective_affinity_list") + if effective is None: + effective = _safe_read_text(entry / "smp_affinity_list") + if effective is None: + continue + effective_cpus = _parse_cpu_list(effective.strip()) + if isolated_set and any(cpu in isolated_set for cpu in effective_cpus): + desc = _safe_read_text(entry / "actions") or _safe_read_text(entry / "spurious") + if desc: + desc = desc.strip().splitlines()[0] + mismatches.append( + { + "irq": entry.name, + "cpus": effective_cpus, + "detail": desc, + } + ) + recommendations: List[str] = [] + if mismatches: + sample_count = min(len(mismatches), max_samples) + recommendations.append( + f"{len(mismatches)} IRQs overlap isolated CPUs. Relocate interrupt affinities using tuned profiles or `irqbalance` (showing {sample_count})." + ) + return { + "total_irqs_checked": total_irqs_checked, + "isolated_cpu_overlaps": mismatches[:max_samples], + "recommendations": recommendations, + } + + +def gather_process_summary(env: EnvironmentPaths) -> Dict[str, object]: + # Prefer sosreport process snapshot for richer context. + ps_snapshot: Optional[Path] = None + if env.sos_commands: + candidates = [ + env.sos_commands / "process/ps_auxwww", + env.sos_commands / "process" / "ps_auxwww", + env.sos_commands / "process" / "ps_auxwww_-www", + ] + for candidate in candidates: + if candidate.exists(): + ps_snapshot = candidate + break + if ps_snapshot is None: + return {"top_processes": [], "recommendations": []} + text = _safe_read_text(ps_snapshot) + if not text: + return {"top_processes": [], "recommendations": []} + lines = [line for line in text.splitlines() if line.strip()] + _header = lines[0] # Header row, not needed for parsing + processes: List[Dict[str, str]] = [] + for line in lines[1:]: + parts = line.split(None, 10) + if len(parts) < 11: + continue + _user, pid, cpu, mem, _vsz, _rss, _tty, _stat, _start, _time, command = parts + processes.append( + { + "pid": pid, + "cpu_percent": cpu, + "mem_percent": mem, + "command": command.strip(), + } + ) + processes.sort(key=lambda entry: float(entry.get("cpu_percent", "0") or "0"), reverse=True) + top_processes = processes[:10] + recommendations: List[str] = [] + for proc in top_processes: + if "irqbalance" in proc["command"]: + recommendations.append( + "Verify irqbalance configuration excludes isolated CPUs (saw irqbalance among top processes)." + ) + break + return { + "top_processes": top_processes, + "recommendations": recommendations, + } + + +def assemble_report(env: EnvironmentPaths, max_irq_samples: int) -> Dict[str, object]: + system_info = gather_system_info(env) + cpu_info = gather_cpu_info(env, system_info.get("kernel_cmdline_params", {})) + hugepage_info = gather_hugepage_info(env) + sysctl_info = gather_sysctl_info(env) + network_info = gather_network_info(env) + irq_info = gather_irq_affinity_info( + env, + cpu_info.get("isolated_cpus", []), + max_samples=max_irq_samples, + ) + process_info = gather_process_summary(env) + + recommendations: List[str] = [] + sections = [cpu_info, hugepage_info, sysctl_info, network_info, irq_info, process_info] + for section in sections: + recommendations.extend(section.get("recommendations", [])) # type: ignore[arg-type] + unique_recommendations = sorted(set(rec.strip() for rec in recommendations if rec.strip())) + + return { + "system": system_info, + "cpu": cpu_info, + "hugepages": hugepage_info, + "sysctl": sysctl_info, + "network": network_info, + "irq_affinity": irq_info, + "processes": process_info, + "recommendations": unique_recommendations, + } + + +def format_markdown(report: Dict[str, object]) -> str: + lines: List[str] = [] + system = report["system"] # type: ignore[assignment] + cpu = report["cpu"] # type: ignore[assignment] + hugepages = report["hugepages"] # type: ignore[assignment] + sysctl = report["sysctl"] # type: ignore[assignment] + network = report["network"] # type: ignore[assignment] + irq = report["irq_affinity"] # type: ignore[assignment] + processes = report["processes"] # type: ignore[assignment] + + lines.append("# Node Tuning Analysis") + lines.append("") + lines.append("## System Overview") + lines.append(f"- Hostname: {system.get('hostname') or 'unknown'}") + lines.append(f"- Kernel: {system.get('kernel_release') or 'unknown'}") + lines.append(f"- NUMA nodes: {system.get('numa_nodes')}") + cmdline = system.get("kernel_cmdline") or "" + if cmdline: + lines.append(f"- Kernel cmdline: `{cmdline}`") + uptime = system.get("uptime_seconds") + if uptime is not None: + lines.append(f"- Uptime: {uptime:.0f} seconds") + + lines.append("") + lines.append("## CPU & Isolation") + lines.append(f"- Logical CPUs: {cpu.get('logical_cpus')}") + lines.append(f"- Physical cores: {cpu.get('physical_cores')} across {cpu.get('sockets')} socket(s)") + lines.append(f"- SMT detected: {'yes' if cpu.get('smt_detected') else 'no'}") + if cpu.get("isolated_cpus"): + lines.append(f"- Isolated CPUs: {','.join(str(v) for v in cpu['isolated_cpus'])}") # type: ignore[index] + if cpu.get("nohz_full"): + lines.append(f"- nohz_full CPUs: {','.join(str(v) for v in cpu['nohz_full'])}") # type: ignore[index] + if cpu.get("tuned_non_isolcpus"): + lines.append( + f"- tuned.non_isolcpus: {','.join(str(v) for v in cpu['tuned_non_isolcpus'])}" # type: ignore[index] + ) + for obs in cpu.get("observations", []): + lines.append(f"- {obs}") + + lines.append("") + lines.append("## Huge Pages") + lines.append(f"- Total: {hugepages.get('hugepages_total')} (size={hugepages.get('hugepage_size_kb')} KB)") + lines.append(f"- Free: {hugepages.get('hugepages_free')}, Reserved: {hugepages.get('hugepages_reserved')}") + if hugepages.get("per_node"): + per_node = hugepages["per_node"] # type: ignore[assignment] + node_summaries = [] + for node, values in per_node.items(): + node_summaries.append(f"{node}:total={values.get('total',0)}/free={values.get('free',0)}") + lines.append(f"- Per NUMA node: {', '.join(node_summaries)}") + for obs in hugepages.get("observations", []): + lines.append(f"- {obs}") + + lines.append("") + lines.append("## Sysctl Highlights") + for key, info in sorted(sysctl.get("values", {}).items()): # type: ignore[call-arg] + lines.append(f"- {key}: {info.get('value')}") + for obs in sysctl.get("observations", []): + lines.append(f"- {obs}") + + lines.append("") + lines.append("## Network Signals") + tcp_ext = network.get("netstat", {}).get("TcpExt", {}) # type: ignore[index] + if tcp_ext: + lines.append( + "- TcpExt counters: " + + ", ".join(f"{key}={value}" for key, value in list(tcp_ext.items())[:8]) + ) + tcp_sock = network.get("sockstat", {}).get("TCP", {}) # type: ignore[index] + if tcp_sock: + lines.append("- Sockstat TCP: " + ", ".join(f"{k}={v}" for k, v in tcp_sock.items())) + for obs in network.get("observations", []): + lines.append(f"- {obs}") + + lines.append("") + lines.append("## IRQ Affinity") + lines.append(f"- IRQs inspected: {irq.get('total_irqs_checked')}") + overlaps = irq.get("isolated_cpu_overlaps", []) + if overlaps: + lines.append(f"- IRQs overlapping isolated CPUs: {len(overlaps)}") + for entry in overlaps: + lines.append( + f" - IRQ {entry.get('irq')}: CPUs {','.join(str(cpu) for cpu in entry.get('cpus', []))}" + ) + else: + lines.append("- No IRQ affinity overlaps with isolated CPUs detected.") + + process_list = processes.get("top_processes", []) + if process_list: + lines.append("") + lines.append("## Process Snapshot (top by %CPU)") + for proc in process_list[:5]: + lines.append( + f"- PID {proc['pid']}: {proc['cpu_percent']}% CPU, {proc['mem_percent']}% MEM, cmd='{proc['command']}'" + ) + + recommendations = report.get("recommendations", []) + if recommendations: + lines.append("") + lines.append("## Recommended Actions") + for rec in recommendations: + lines.append(f"- {rec}") + + return "\n".join(lines) + "\n" + + +def main(argv: Sequence[str]) -> int: + args = parse_arguments(argv) + try: + env = resolve_environment(args) + report = assemble_report(env, max_irq_samples=args.max_irq_samples) + except (FileNotFoundError, ValueError) as exc: + print(f"error: {exc}", file=sys.stderr) + return 1 + + if args.format == "json": + output = json.dumps(report, indent=2) + else: + output = format_markdown(report) + + if args.output: + output_path = Path(args.output).expanduser() + if output_path.parent and not output_path.parent.exists(): + output_path.parent.mkdir(parents=True, exist_ok=True) + output_path.write_text(output, encoding="utf-8") + print(f"Wrote analysis report to {output_path}") + else: + sys.stdout.write(output) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) + + diff --git a/skills/scripts/generate_tuned_profile.py b/skills/scripts/generate_tuned_profile.py new file mode 100644 index 0000000..fd4f3d8 --- /dev/null +++ b/skills/scripts/generate_tuned_profile.py @@ -0,0 +1,414 @@ +""" +Utility script to generate tuned.openshift.io/v1 Tuned manifests. + +The script is intentionally dependency-free so it can run anywhere Python 3.8+ +is available (CI, developer workstations, or automation pipelines). +""" + +from __future__ import annotations + +import argparse +import json +import os +import subprocess +import sys +from collections import OrderedDict +from typing import Iterable, List, Optional, Sequence, Tuple + + +def _parse_key_value_pairs( + raw_values: Sequence[str], + *, + parameter: str, + allow_empty_value: bool = False, +) -> List[Tuple[str, str]]: + """Split KEY=VALUE (or KEY when allow_empty_value=True) pairs.""" + parsed: List[Tuple[str, str]] = [] + for raw in raw_values: + if "=" in raw: + key, value = raw.split("=", 1) + elif allow_empty_value: + key, value = raw, "" + else: + raise ValueError(f"{parameter} expects KEY=VALUE entries, got '{raw}'") + key = key.strip() + value = value.strip() + if not key: + raise ValueError(f"{parameter} entries must include a non-empty key (got '{raw}')") + parsed.append((key, value)) + return parsed + + +def _parse_section_entries(raw_values: Sequence[str]) -> List[Tuple[str, str, str]]: + """ + Parse SECTION:KEY=VALUE entries for arbitrary tuned.ini sections. + + Examples: + bootloader:cmdline_ocp_realtime=+nohz_full=1-3 + service:service.stalld=start,enable + """ + parsed: List[Tuple[str, str, str]] = [] + for raw in raw_values: + if ":" not in raw: + raise ValueError( + f"--section expects SECTION:KEY=VALUE entries, got '{raw}'" + ) + section, remainder = raw.split(":", 1) + section = section.strip() + if not section: + raise ValueError(f"--section requires a section name before ':', got '{raw}'") + key_value = _parse_key_value_pairs([remainder], parameter="--section") + parsed.append((section, key_value[0][0], key_value[0][1])) + return parsed + + +def _build_profile_ini( + *, + summary: str, + includes: Sequence[str], + main_options: Sequence[Tuple[str, str]], + variables: Sequence[Tuple[str, str]], + sysctls: Sequence[Tuple[str, str]], + extra_sections: Sequence[Tuple[str, str, str]], +) -> str: + sections: "OrderedDict[str, List[str]]" = OrderedDict() + sections["main"] = [f"summary={summary}"] + if includes: + sections["main"].append(f"include={','.join(includes)}") + for key, value in main_options: + sections["main"].append(f"{key}={value}") + + if variables: + sections["variables"] = [f"{key}={value}" for key, value in variables] + if sysctls: + sections["sysctl"] = [f"{key}={value}" for key, value in sysctls] + + for section, key, value in extra_sections: + section = section.strip() + if not section: + continue + if section not in sections: + sections[section] = [] + sections[section].append(f"{key}={value}") + + rendered_sections: List[str] = [] + non_empty_sections = [(name, lines) for name, lines in sections.items() if lines] + for idx, (name, lines) in enumerate(non_empty_sections): + rendered_sections.append(f"[{name}]") + rendered_sections.extend(lines) + if idx != len(non_empty_sections) - 1: + rendered_sections.append("") + return "\n".join(rendered_sections) + + +def _json_string(value: str) -> str: + """Return a JSON-encoded string (adds surrounding quotes, escapes).""" + return json.dumps(value) + + +def _render_manifest( + *, + profile_name: str, + namespace: str, + profile_ini: str, + machine_config_labels: Sequence[Tuple[str, str]], + match_labels: Sequence[Tuple[str, str]], + priority: int, +) -> str: + lines: List[str] = [ + "apiVersion: tuned.openshift.io/v1", + "kind: Tuned", + "metadata:", + f" name: {profile_name}", + ] + if namespace: + lines.append(f" namespace: {namespace}") + lines.extend( + [ + "spec:", + " profile:", + " - data: |", + ] + ) + profile_lines = profile_ini.splitlines() + if not profile_lines: + raise ValueError("Profile contents may not be empty") + for entry in profile_lines: + # Preserve blank lines for readability inside the literal block. + if entry: + lines.append(f" {entry}") + else: + lines.append(" ") + lines.append(f" name: {profile_name}") + + if not machine_config_labels and not match_labels: + raise ValueError("At least one --machine-config-label or --match-label must be provided") + + lines.append(" recommend:") + + if machine_config_labels: + lines.append(" - machineConfigLabels:") + for key, value in machine_config_labels: + lines.append(f" {key}: {_json_string(value)}") + start_written = True + else: + start_written = False + + if match_labels: + prefix = " match:" if start_written else " - match:" + lines.append(prefix) + item_indent = " - " if start_written else " - " + value_indent = " " if start_written else " " + for label, value in match_labels: + lines.append(f"{item_indent}label: {_json_string(label)}") + if value != "": + lines.append(f"{value_indent}value: {_json_string(value)}") + start_written = True + + priority_prefix = " priority" if start_written else " - priority" + lines.append(f"{priority_prefix}: {priority}") + + profile_prefix = " profile" if start_written else " - profile" + lines.append(f"{profile_prefix}: {_json_string(profile_name)}") + + return "\n".join(lines) + "\n" + + +def _run_oc_command(command: Sequence[str]) -> subprocess.CompletedProcess: + """Execute an oc command and return the completed process.""" + try: + result = subprocess.run( + command, + check=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + except FileNotFoundError as exc: + raise RuntimeError( + "Unable to locate the 'oc' binary. Install the OpenShift CLI or set --oc-binary." + ) from exc + except subprocess.CalledProcessError as exc: + message = exc.stderr.strip() or exc.stdout.strip() or str(exc) + raise RuntimeError(f"Command '{' '.join(command)}' failed: {message}") from exc + return result + + +def list_nodes(*, oc_binary: str, selector: Optional[str]) -> List[str]: + """List nodes using the oc CLI and return their names.""" + command: List[str] = [oc_binary, "get", "nodes", "-o", "name"] + if selector: + command.extend(["-l", selector]) + result = _run_oc_command(command) + nodes = [line.strip() for line in result.stdout.splitlines() if line.strip()] + if nodes: + for node in nodes: + print(node) + else: + print("No nodes matched the provided selector.") + return nodes + + +def label_nodes( + *, + oc_binary: str, + entries: Sequence[str], + overwrite: bool, +) -> None: + """Label nodes via oc CLI using NODE:label entries.""" + if not entries: + return + for raw in entries: + if ":" not in raw: + raise ValueError( + f"--label-node expects NODE:KEY[=VALUE] format (e.g. node1:node-role.kubernetes.io/worker-hp=) - got '{raw}'" + ) + node_name, label = raw.split(":", 1) + node_name = node_name.strip() + label = label.strip() + if not node_name or not label: + raise ValueError(f"--label-node entry must include both node name and label (got '{raw}')") + command: List[str] = [oc_binary, "label", "node", node_name, label] + if overwrite: + command.append("--overwrite") + _run_oc_command(command) + print(f"Labeled {node_name} with {label}") + + +def generate_manifest(args: argparse.Namespace) -> str: + includes = [value.strip() for value in args.include or [] if value.strip()] + + main_options = _parse_key_value_pairs(args.main_option or [], parameter="--main-option") + variables = _parse_key_value_pairs(args.variable or [], parameter="--variable") + sysctls = _parse_key_value_pairs(args.sysctl or [], parameter="--sysctl") + extra_sections = _parse_section_entries(args.section or []) + + match_labels = _parse_key_value_pairs( + args.match_label or [], + parameter="--match-label", + allow_empty_value=True, + ) + machine_config_labels = _parse_key_value_pairs( + args.machine_config_label or [], + parameter="--machine-config-label", + allow_empty_value=True, + ) + + profile_ini = _build_profile_ini( + summary=args.summary, + includes=includes, + main_options=main_options, + variables=variables, + sysctls=sysctls, + extra_sections=extra_sections, + ) + + manifest = _render_manifest( + profile_name=args.profile_name, + namespace=args.namespace, + profile_ini=profile_ini, + machine_config_labels=machine_config_labels, + match_labels=match_labels, + priority=args.priority, + ) + return manifest + + +def parse_arguments(argv: Iterable[str]) -> argparse.Namespace: + parser = argparse.ArgumentParser( + description="Generate tuned.openshift.io/v1 Tuned manifests for the Node Tuning Operator.", + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument("--profile-name", help="Name of the Tuned profile and resource") + parser.add_argument("--summary", help="Summary placed inside the [main] section") + parser.add_argument( + "--namespace", + default="openshift-cluster-node-tuning-operator", + help="Namespace to place in metadata.namespace", + ) + parser.add_argument( + "--include", + action="append", + help="Append an entry to the 'include=' list (multiple flags allowed)", + ) + parser.add_argument( + "--main-option", + action="append", + help="Add KEY=VALUE to the [main] section beyond summary/include", + ) + parser.add_argument( + "--variable", + action="append", + help="Add KEY=VALUE to the [variables] section", + ) + parser.add_argument( + "--sysctl", + action="append", + help="Add KEY=VALUE to the [sysctl] section", + ) + parser.add_argument( + "--section", + action="append", + help="Add arbitrary SECTION:KEY=VALUE lines (e.g. bootloader:cmdline=...)", + ) + parser.add_argument( + "--machine-config-label", + action="append", + help="Add a MachineConfigPool selector (key=value) under machineConfigLabels", + ) + parser.add_argument( + "--match-label", + action="append", + help="Add a node label entry (key[=value]) under recommend[].match[]", + ) + parser.add_argument( + "--priority", + type=int, + default=20, + help="Recommendation priority", + ) + parser.add_argument( + "--output", + help="Output file path; defaults to .yaml in the current directory", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Print manifest to stdout instead of writing to disk", + ) + parser.add_argument( + "--skip-manifest", + action="store_true", + help="Skip manifest generation; useful when only listing or labeling nodes", + ) + parser.add_argument( + "--list-nodes", + action="store_true", + help="List nodes via 'oc get nodes' before other actions", + ) + parser.add_argument( + "--node-selector", + help="Label selector to filter nodes when using --list-nodes", + ) + parser.add_argument( + "--label-node", + action="append", + help="Label nodes using NODE:KEY[=VALUE] entries (repeat for multiple nodes)", + ) + parser.add_argument( + "--overwrite-labels", + action="store_true", + help="Allow overwriting existing labels when using --label-node", + ) + parser.add_argument( + "--oc-binary", + default=os.environ.get("OC_BIN", "oc"), + help="Path to the oc binary to execute", + ) + return parser.parse_args(argv) + + +def main(argv: Sequence[str]) -> int: + args = parse_arguments(argv) + try: + if args.list_nodes: + list_nodes(oc_binary=args.oc_binary, selector=args.node_selector) + + if args.label_node: + label_nodes( + oc_binary=args.oc_binary, + entries=args.label_node, + overwrite=args.overwrite_labels, + ) + + if args.skip_manifest: + return 0 + + if not args.profile_name: + raise ValueError("--profile-name is required unless --skip-manifest is set") + if not args.summary: + raise ValueError("--summary is required unless --skip-manifest is set") + + manifest = generate_manifest(args) + except (ValueError, RuntimeError) as exc: + print(f"error: {exc}", file=sys.stderr) + return 1 + + if args.dry_run: + sys.stdout.write(manifest) + return 0 + + output_path = args.output or f"{args.profile_name}.yaml" + output_dir = os.path.dirname(output_path) + if output_dir and not os.path.exists(output_dir): + os.makedirs(output_dir, exist_ok=True) + + with open(output_path, "w", encoding="utf-8") as handle: + handle.write(manifest) + print(f"Wrote Tuned manifest to {output_path}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) +