Files
2025-11-30 08:47:38 +08:00

5.5 KiB

Ansible Troubleshooting Reference

Common Errors

Error Cause Solution
SSH connection failed Wrong host/key/user Check ansible_host, ansible_user, key
Permission denied Need sudo/wrong user Add become: true, check sudo config
Module not found Collection not installed ansible-galaxy collection install
Variable undefined Missing var/typo Check var name, define in vars
Syntax error YAML/Jinja2 issue Run ansible-playbook --syntax-check
Host unreachable Network/SSH issue ansible host -m ping, check firewall

Debug Commands

# Test connectivity
ansible all -m ping
ansible host -m ping -vvv

# Syntax check
ansible-playbook playbook.yml --syntax-check

# Dry run (check mode)
ansible-playbook playbook.yml --check

# Diff mode (show changes)
ansible-playbook playbook.yml --diff

# Verbose output
ansible-playbook playbook.yml -v     # Minimal
ansible-playbook playbook.yml -vv    # More
ansible-playbook playbook.yml -vvv   # Connection debug
ansible-playbook playbook.yml -vvvv  # Full debug

# List tasks without running
ansible-playbook playbook.yml --list-tasks

# List hosts
ansible-playbook playbook.yml --list-hosts

# Start at specific task
ansible-playbook playbook.yml --start-at-task="Task name"

# Step through tasks
ansible-playbook playbook.yml --step

Connection Issues

Test SSH

# Direct SSH test
ssh -i ~/.ssh/key user@host

# Ansible ping
ansible host -m ping -vvv

# Check SSH config
ansible host -m debug -a "var=ansible_ssh_private_key_file"

Common SSH Fixes

# In inventory or ansible.cfg
ansible_ssh_private_key_file: ~/.ssh/mykey
ansible_user: ubuntu
ansible_host: 192.168.1.10
host_key_checking: False  # Only for testing

SSH Connection Options

# In inventory
host1:
  ansible_host: 192.168.1.10
  ansible_ssh_common_args: '-o StrictHostKeyChecking=no'
  ansible_ssh_extra_args: '-o ConnectTimeout=10'

Permission Issues

Sudo Not Working

# Enable become
- hosts: all
  become: true
  become_method: sudo
  become_user: root
# On target host, check sudoers
sudo visudo

# User should have:
# ubuntu ALL=(ALL) NOPASSWD: ALL

Ask for Sudo Password

ansible-playbook playbook.yml --ask-become-pass

Variable Issues

Debug Variables

- name: Print all vars
  ansible.builtin.debug:
    var: vars

- name: Print specific var
  ansible.builtin.debug:
    var: my_var

- name: Print hostvars
  ansible.builtin.debug:
    var: hostvars[inventory_hostname]

- name: Print facts
  ansible.builtin.debug:
    var: ansible_facts

Check Variable Precedence

# See where variable comes from
ansible-inventory --host hostname --yaml

Undefined Variable

# Provide default
value: "{{ my_var | default('fallback') }}"

# Check if defined
- name: Task
  when: my_var is defined

# Fail early if required
- name: Validate
  ansible.builtin.assert:
    that: my_var is defined
    fail_msg: "my_var must be set"

Module Issues

Module Not Found

# Install collection
ansible-galaxy collection install community.docker

# Check installed
ansible-galaxy collection list

# Update collections
ansible-galaxy collection install -r requirements.yml --force

Module Arguments

# Get module documentation
ansible-doc ansible.builtin.copy
ansible-doc community.docker.docker_compose_v2

Idempotency Issues

Task Always Shows "changed"

# Bad - always changed
- name: Run script
  ansible.builtin.command: /bin/script.sh

# Good - check first
- name: Run script
  ansible.builtin.command: /bin/script.sh
  args:
    creates: /opt/app/.installed

# Good - explicit changed_when
- name: Run script
  ansible.builtin.command: /bin/script.sh
  register: result
  changed_when: "'Created' in result.stdout"

Test Idempotency

# Run twice, second should show all "ok"
ansible-playbook playbook.yml
ansible-playbook playbook.yml  # Should show "changed=0"

Handler Issues

Handler Not Running

  • Handlers only run if task reports "changed"
  • Handlers run at end of play, not immediately
  • Force handler run: ansible-playbook --force-handlers
# Force handler to run immediately
- name: Config change
  ansible.builtin.template:
    src: config.j2
    dest: /etc/app/config
  notify: Restart app

- name: Flush handlers
  ansible.builtin.meta: flush_handlers

- name: Continue with restarted service
  ansible.builtin.uri:
    url: http://localhost:8080/health

Performance Issues

Slow Playbook

# Disable fact gathering if not needed
- hosts: all
  gather_facts: false

# Or gather specific facts
- hosts: all
  gather_facts: true
  gather_subset:
    - network
# Increase parallelism
ansible-playbook playbook.yml -f 20  # 20 forks

# Use pipelining (add to ansible.cfg)
# [ssh_connection]
# pipelining = True

Callback Timer

# ansible.cfg
[defaults]
callbacks_enabled = timer, profile_tasks

Recovery

Failed Playbook

# Retry failed hosts
ansible-playbook playbook.yml --limit @playbook.retry

# Start at failed task
ansible-playbook playbook.yml --start-at-task="Failed Task Name"

Cleanup After Failure

- name: Risky operation
  block:
    - name: Do something
      ansible.builtin.command: /bin/risky
  rescue:
    - name: Cleanup on failure
      ansible.builtin.file:
        path: /tmp/incomplete
        state: absent
  always:
    - name: Always cleanup
      ansible.builtin.file:
        path: /tmp/lock
        state: absent