Initial commit
This commit is contained in:
86
skills/toolkit/bash/system/check-disk.meta.yml
Normal file
86
skills/toolkit/bash/system/check-disk.meta.yml
Normal file
@@ -0,0 +1,86 @@
|
||||
tool_id: BASH-CHECK-DISK-003
|
||||
tool_name: "磁盘健康检查器"
|
||||
|
||||
基本信息:
|
||||
语言: bash
|
||||
文件: check-disk.sh
|
||||
复杂度: level-2
|
||||
创建日期: 2025-11-14
|
||||
作者: CodeConscious
|
||||
|
||||
用途分类:
|
||||
- MONITOR # 监控诊断
|
||||
|
||||
功能描述:
|
||||
简介: "检查磁盘空间、inode使用和健康状态,提供详细的分析和建议"
|
||||
详细: |
|
||||
支持功能:
|
||||
- 磁盘空间使用检查(总容量、已使用、可用)
|
||||
- Inode使用检查(总数、已使用、可用)
|
||||
- 使用阈值警报(可配置阈值,默认80%)
|
||||
- 文件系统类型检测(ext4, xfs等)
|
||||
- 挂载点和访问权限检查
|
||||
- 性能指标分析(大文件 vs 小文件占比)
|
||||
- 智能建议生成(基于检查结果)
|
||||
|
||||
使用场景:
|
||||
- "监控服务器磁盘空间,预防空间不足导致的服务中断"
|
||||
- "检查日志目录磁盘使用情况,规划日志轮转策略"
|
||||
- '诊断"No space left on device"错误的根本原因'
|
||||
- "定期检查系统健康状态,识别潜在问题"
|
||||
- "在扩容前评估当前磁盘使用情况"
|
||||
- "排查inode耗尽问题(大量小文件导致)"
|
||||
|
||||
使用方法:
|
||||
命令: "bash check-disk.sh [路径] [阈值]"
|
||||
参数:
|
||||
路径: "要检查的磁盘路径(默认: /)"
|
||||
阈值: "空间使用警报阈值百分比(默认: 80%,1-100之间)"
|
||||
示例:
|
||||
- "检查根目录(阈值80%): bash check-disk.sh"
|
||||
- "检查/home(阈值85%): bash check-disk.sh /home 85"
|
||||
- "检查/var/log(阈值90%): bash check-disk.sh /var/log 90"
|
||||
|
||||
依赖要求:
|
||||
系统命令:
|
||||
- bash: 4.0+
|
||||
- df: 磁盘使用情况
|
||||
- du: 文件大小
|
||||
- find: 文件查找
|
||||
- awk: 文本处理
|
||||
- sort: 排序
|
||||
|
||||
输入输出:
|
||||
输入:
|
||||
- 无(从文件系统自动获取)
|
||||
输出:
|
||||
- stdout: 彩色格式化报告(空间使用、inode、警报、建议)
|
||||
- 退出码:
|
||||
- 0: 磁盘健康
|
||||
- 1: 有警报
|
||||
- 2: 严重问题
|
||||
|
||||
上次使用:
|
||||
时间: 2024-11-14 11:30:00
|
||||
用途: "检查服务器根目录磁盘空间,发现/var/log占用过大"
|
||||
结果: "成功识别日志目录占用85%,建议清理旧日志后立即恢复20%空间"
|
||||
满意度: 0.93
|
||||
|
||||
相关工具:
|
||||
- 前置工具: 无
|
||||
- 互补工具:
|
||||
- toolkit/bash/analysis/analyze-logs.sh(日志分析)
|
||||
- toolkit/bash/system/check-service.sh(服务健康)
|
||||
- 替代工具:
|
||||
- df -h(手动检查)
|
||||
- ncdu(交互式磁盘使用分析)
|
||||
|
||||
维护记录:
|
||||
2025-11-14:
|
||||
- 初始创建
|
||||
- 支持空间和inode检查
|
||||
- 添加性能指标分析
|
||||
TODO:
|
||||
- 支持磁盘健康度检测(S.M.A.R.T.)
|
||||
- 添加磁盘I/O性能测试
|
||||
- 支持磁盘温度监测
|
||||
246
skills/toolkit/bash/system/check-disk.sh
Executable file
246
skills/toolkit/bash/system/check-disk.sh
Executable file
@@ -0,0 +1,246 @@
|
||||
#!/bin/bash
|
||||
# 磁盘健康检查器 - 检查磁盘空间、inode使用和健康状态
|
||||
|
||||
set -e
|
||||
|
||||
DISK_PATH="${1:-/}"
|
||||
THRESHOLD="${2:-80}"
|
||||
|
||||
# 颜色输出
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
CYAN='\033[0;36m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# 帮助信息
|
||||
usage() {
|
||||
echo "💾 磁盘健康检查器"
|
||||
echo ""
|
||||
echo "使用: $0 [路径] [阈值]"
|
||||
echo ""
|
||||
echo "参数:"
|
||||
echo " 路径 要检查的磁盘路径 (默认: /)"
|
||||
echo " 阈值 空间使用警报阈值百分比 (默认: 80%)"
|
||||
echo ""
|
||||
echo "示例:"
|
||||
echo " $0 # 检查根目录,阈值80%"
|
||||
echo " $0 /home 85 # 检查/home目录,阈值85%"
|
||||
echo " $0 /var/log 90 # 检查日志目录,阈值90%"
|
||||
echo ""
|
||||
}
|
||||
|
||||
# 检查参数
|
||||
if [ "$1" = "--help" ] || [ "$1" = "-h" ]; then
|
||||
usage
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# 检查路径是否存在
|
||||
if [ ! -d "$DISK_PATH" ]; then
|
||||
echo "❌ 错误: 路径不存在: $DISK_PATH"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 数字验证
|
||||
if ! [[ "$THRESHOLD" =~ ^[0-9]+$ ]] || [ "$THRESHOLD" -lt 1 ] || [ "$THRESHOLD" -gt 100 ]; then
|
||||
echo "❌ 错误: 阈值必须是1-100之间的数字"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "💾 磁盘健康检查器"
|
||||
echo "=========================================="
|
||||
echo "检查路径: ${BLUE}$DISK_PATH${NC}"
|
||||
echo "警报阈值: ${THRESHOLD}%"
|
||||
echo "=========================================="
|
||||
echo ""
|
||||
|
||||
# 获取磁盘使用情况
|
||||
DISK_INFO=$(df -h "$DISK_PATH" | tail -1)
|
||||
DISK_DEVICE=$(echo "$DISK_INFO" | awk '{print $1}')
|
||||
DISK_SIZE=$(echo "$DISK_INFO" | awk '{print $2}')
|
||||
DISK_USED=$(echo "$DISK_INFO" | awk '{print $3}')
|
||||
DISK_AVAIL=$(echo "$DISK_INFO" | awk '{print $4}')
|
||||
DISK_USAGE_PERCENT=$(echo "$DISK_INFO" | awk '{print $5}' | sed 's/%//')
|
||||
|
||||
echo "📊 磁盘使用概况:"
|
||||
echo "=========================================="
|
||||
echo -e "设备: ${CYAN}$DISK_DEVICE${NC}"
|
||||
echo -e "总容量: ${BLUE}$DISK_SIZE${NC}"
|
||||
echo -e "已使用: ${YELLOW}$DISK_USED${NC}"
|
||||
echo -e "可用: ${GREEN}$DISK_AVAIL${NC}"
|
||||
|
||||
# 使用百分比颜色标记
|
||||
if [ "$DISK_USAGE_PERCENT" -gt 90 ]; then
|
||||
USAGE_COLOR="$RED"
|
||||
STATUS="🔴 严重"
|
||||
elif [ "$DISK_USAGE_PERCENT" -gt "$THRESHOLD" ]; then
|
||||
USAGE_COLOR="$YELLOW"
|
||||
STATUS="🟡 警告"
|
||||
else
|
||||
USAGE_COLOR="$GREEN"
|
||||
STATUS="🟢 正常"
|
||||
fi
|
||||
|
||||
echo -e "使用率: ${USAGE_COLOR}${DISK_USAGE_PERCENT}%${NC} $STATUS"
|
||||
echo ""
|
||||
|
||||
# inode检查
|
||||
echo "=========================================="
|
||||
echo "📂 Inode使用概况:"
|
||||
echo "=========================================="
|
||||
|
||||
INODE_INFO=$(df -i "$DISK_PATH" | tail -1)
|
||||
INODE_USED_PERCENT=$(echo "$INODE_INFO" | awk '{print $5}' | sed 's/%//')
|
||||
INODE_TOTAL=$(echo "$INODE_INFO" | awk '{print $2}')
|
||||
INODE_USED=$(echo "$INODE_INFO" | awk '{print $3}')
|
||||
INODE_AVAIL=$(echo "$INODE_INFO" | awk '{print $4}')
|
||||
|
||||
echo -e "总数: ${BLUE}$INODE_TOTAL${NC}"
|
||||
echo -e "已使用: ${YELLOW}$INODE_USED${NC}"
|
||||
echo -e "可用: ${GREEN}$INODE_AVAIL${NC}"
|
||||
|
||||
# Inode百分比颜色
|
||||
if [ "$INODE_USED_PERCENT" -gt 90 ]; then
|
||||
INODE_COLOR="$RED"
|
||||
INODE_STATUS="🔴 严重"
|
||||
elif [ "$INODE_USED_PERCENT" -gt "$THRESHOLD" ]; then
|
||||
INODE_COLOR="$YELLOW"
|
||||
INODE_STATUS="🟡 警告"
|
||||
else
|
||||
INODE_COLOR="$GREEN"
|
||||
INODE_STATUS="🟢 正常"
|
||||
fi
|
||||
|
||||
echo -e "使用率: ${INODE_COLOR}${INODE_USED_PERCENT}%${NC} $INODE_STATUS"
|
||||
echo ""
|
||||
|
||||
# 详细信息检测
|
||||
echo "=========================================="
|
||||
echo "🔍 详细信息:"
|
||||
echo "=========================================="
|
||||
|
||||
# 文件系统类型
|
||||
FS_TYPE=$(df -T "$DISK_PATH" | tail -1 | awk '{print $2}')
|
||||
echo -e "文件系统类型: ${CYAN}$FS_TYPE${NC}"
|
||||
|
||||
# 挂载点
|
||||
MOUNT_POINT=$(df "$DISK_PATH" | tail -1 | awk '{print $NF}')
|
||||
echo -e "挂载点: ${BLUE}$MOUNT_POINT${NC}"
|
||||
|
||||
# 可读写性检测
|
||||
if [ -w "$DISK_PATH" ]; then
|
||||
RW_STATUS="${GREEN}可读写${NC}"
|
||||
else
|
||||
RW_STATUS="${RED}只读${NC}"
|
||||
fi
|
||||
echo -e "访问权限: $RW_STATUS"
|
||||
|
||||
echo ""
|
||||
|
||||
# 警报检查
|
||||
echo "=========================================="
|
||||
echo "🚨 警报检查:"
|
||||
echo "=========================================="
|
||||
|
||||
ALERT_COUNT=0
|
||||
|
||||
if [ "$DISK_USAGE_PERCENT" -gt 90 ]; then
|
||||
echo -e "⚠️ 磁盘使用率过高: ${RED}${DISK_USAGE_PERCENT}%${NC}"
|
||||
((ALERT_COUNT++))
|
||||
fi
|
||||
|
||||
if [ "$DISK_USAGE_PERCENT" -gt "$THRESHOLD" ]; then
|
||||
echo -e "⚠️ 磁盘使用率超过阈值": "${DISK_USAGE_PERCENT}% > ${THRESHOLD}%"
|
||||
fi
|
||||
|
||||
if [ "$INODE_USED_PERCENT" -gt 90 ]; then
|
||||
echo -e "⚠️ Inode使用率过高: ${RED}${INODE_USED_PERCENT}%${NC}"
|
||||
((ALERT_COUNT++))
|
||||
fi
|
||||
|
||||
if [ "$INODE_USED_PERCENT" -gt "$THRESHOLD" ]; then
|
||||
echo -e "⚠️ Inode使用率超过阈值": "${INODE_USED_PERCENT}% > ${THRESHOLD}%"
|
||||
fi
|
||||
|
||||
if [ ! -w "$DISK_PATH" ]; then
|
||||
echo -e "⚠️ ${YELLOW}磁盘为只读状态${NC}"
|
||||
((ALERT_COUNT++))
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "📈 性能指标:"
|
||||
echo "=========================================="
|
||||
|
||||
# 检查磁盘是被大量小文件填满还是少数大文件
|
||||
echo "按大小排序的顶级目录:"
|
||||
du -h "$DISK_PATH" 2>/dev/null | sort -hr | head -5 | while read -r size path; do
|
||||
echo -e " ${YELLOW}$size${NC}\t$path"
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "按数量排序的顶级目录:"
|
||||
find "$DISK_PATH" -maxdepth 2 -type d 2>/dev/null | head -10 | while read -r dir; do
|
||||
count=$(find "$dir" -maxdepth 1 -type f 2>/dev/null | wc -l)
|
||||
if [ "$count" -gt 100 ]; then
|
||||
echo -e " ${YELLOW}$count${NC}个文件\t$dir"
|
||||
fi
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "💡 建议:"
|
||||
echo "=========================================="
|
||||
|
||||
if [ "$ALERT_COUNT" -gt 0 ]; then
|
||||
echo "🚨 发现 $ALERT_COUNT 个问题需要处理:"
|
||||
echo ""
|
||||
|
||||
if [ "$DISK_USAGE_PERCENT" -gt 90 ]; then
|
||||
echo " 磁盘使用率超过90%:"
|
||||
echo " - 建议立即清理日志文件(/var/log)"
|
||||
echo " - 检查临时文件(/tmp)"
|
||||
echo " - 考虑扩容或迁移数据"
|
||||
echo ""
|
||||
fi
|
||||
|
||||
if [ "$INODE_USED_PERCENT" -gt 90 ]; then
|
||||
echo " Inode使用率高:"
|
||||
echo " - 通常由大量小文件导致"
|
||||
echo " - 检查并清理临时文件"
|
||||
echo " - 查找并删除空文件"
|
||||
echo " - 可能是邮件队列或缓存文件过多"
|
||||
echo ""
|
||||
fi
|
||||
|
||||
if [ ! -w "$DISK_PATH" ]; then
|
||||
echo " 磁盘只读:"
|
||||
echo " - 检查文件系统错误(fsck)"
|
||||
echo " - 可能是磁盘故障或挂载问题"
|
||||
echo " - 需要root权限检查和修复"
|
||||
echo ""
|
||||
fi
|
||||
else
|
||||
echo "✅ 磁盘健康状况良好"
|
||||
echo ""
|
||||
|
||||
if [ "$DISK_USAGE_PERCENT" -lt 70 ]; then
|
||||
echo " - 使用率充足,暂无扩容需求"
|
||||
fi
|
||||
|
||||
if [ "$INODE_USED_PERCENT" -lt 70 ]; then
|
||||
echo " - Inode充足,无小文件问题"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo " 💡 建议定期运行此工具监控磁盘状态"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=========================================="
|
||||
echo "磁盘检查完成"
|
||||
echo "=========================================="
|
||||
|
||||
exit 0
|
||||
80
skills/toolkit/bash/system/check-service.meta.yml
Normal file
80
skills/toolkit/bash/system/check-service.meta.yml
Normal file
@@ -0,0 +1,80 @@
|
||||
tool_id: SERVICE-CHECK-001
|
||||
tool_name: "服务健康检查器"
|
||||
|
||||
基本信息:
|
||||
语言: bash
|
||||
文件: check-service.sh
|
||||
复杂度: level-1
|
||||
创建日期: 2025-11-14
|
||||
作者: CodeConscious
|
||||
|
||||
用途分类:
|
||||
- MONITOR # 监控诊断
|
||||
|
||||
功能描述:
|
||||
简介: "检查服务健康状态(HTTP/数据库/Redis)"
|
||||
详细: |
|
||||
支持多种服务类型的健康检查:
|
||||
- HTTP服务:检查 /health 端点
|
||||
- 数据库:PostgreSQL连接检查
|
||||
- Redis:连接和ping测试
|
||||
|
||||
使用场景:
|
||||
- "部署后验证服务是否正常启动"
|
||||
- "诊断服务不可访问问题"
|
||||
- "CI/CD流程中的健康检查"
|
||||
- "定期监控服务状态"
|
||||
|
||||
使用方法:
|
||||
命令: "bash check-service.sh [服务名] [检查类型] [超时时间]"
|
||||
参数:
|
||||
服务名: "要检查的服务名称(用于显示)"
|
||||
检查类型:
|
||||
- http: "HTTP健康检查"
|
||||
- db/database: "数据库连接检查"
|
||||
- redis: "Redis连接检查"
|
||||
超时时间: "秒(默认:5秒)"
|
||||
示例:
|
||||
- "检查HTTP服务: bash check-service.sh auth-service http"
|
||||
- "检查数据库: bash check-service.sh db-service db"
|
||||
- "检查Redis: bash check-service.sh cache redis"
|
||||
|
||||
依赖要求:
|
||||
系统命令:
|
||||
- curl: "HTTP客户端"
|
||||
- jq: "JSON解析(可选)"
|
||||
- pg_isready: "PostgreSQL客户端"
|
||||
- redis-cli: "Redis客户端"
|
||||
环境变量:
|
||||
- DB_HOST: "数据库主机(默认:localhost)"
|
||||
- DB_PORT: "数据库端口(默认:5432)"
|
||||
- DB_NAME: "数据库名"
|
||||
- DB_USER: "数据库用户"
|
||||
- REDIS_HOST: "Redis主机(默认:localhost)"
|
||||
- REDIS_PORT: "Redis端口(默认:6379)"
|
||||
|
||||
输入输出:
|
||||
输入:
|
||||
- 无(从环境变量读取配置)
|
||||
输出:
|
||||
- stdout: "人类可读的检查结果"
|
||||
- 退出码:
|
||||
- 0: 所有检查通过
|
||||
- 1: 检查失败
|
||||
- 其他: 参数错误
|
||||
|
||||
上次使用:
|
||||
时间: 2025-11-14 16:45:00
|
||||
用途: "验证auth-service修复后状态"
|
||||
结果: "服务健康,响应时间45ms"
|
||||
满意度: 0.9
|
||||
|
||||
相关工具:
|
||||
- 前置工具: 无
|
||||
- 互补工具:
|
||||
- toolkit/python/analysis/analyze_logs.py(详细日志分析)
|
||||
- toolkit/bash/system/check-disk.sh(磁盘空间检查)
|
||||
|
||||
维护记录:
|
||||
2025-11-14:
|
||||
- 初始创建
|
||||
76
skills/toolkit/bash/system/check-service.sh
Normal file
76
skills/toolkit/bash/system/check-service.sh
Normal file
@@ -0,0 +1,76 @@
|
||||
#!/bin/bash
|
||||
# Service Health Checker
|
||||
# 检查服务健康状态,支持HTTP、数据库、Redis
|
||||
|
||||
set -e
|
||||
|
||||
SERVICE_NAME="${1:-auth-service}"
|
||||
CHECK_TYPE="${2:-http}"
|
||||
TIMEOUT="${3:-5}"
|
||||
|
||||
echo "🔍 检查服务健康状态"
|
||||
echo "服务: $SERVICE_NAME"
|
||||
echo "检查类型: $CHECK_TYPE"
|
||||
echo "超时: ${TIMEOUT}s"
|
||||
echo "=========================================="
|
||||
|
||||
case $CHECK_TYPE in
|
||||
http)
|
||||
# HTTP健康检查
|
||||
URL="http://localhost:3000/health"
|
||||
STATUS=$(curl -s -o /dev/null -w "%{http_code}" --max-time "$TIMEOUT" "$URL" 2>/dev/null || echo "000")
|
||||
|
||||
if [ "$STATUS" = "200" ]; then
|
||||
echo "✅ HTTP服务正常 (状态码: 200)"
|
||||
curl -s --max-time 2 "$URL" | jq '.' 2>/dev/null || echo " (无法解析JSON响应)"
|
||||
else
|
||||
echo "❌ HTTP服务异常 (状态码: $STATUS)"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
|
||||
db|database)
|
||||
# 数据库连接检查
|
||||
if [ -f ".env" ]; then
|
||||
source .env
|
||||
fi
|
||||
|
||||
DB_HOST="${DB_HOST:-localhost}"
|
||||
DB_PORT="${DB_PORT:-5432}"
|
||||
DB_NAME="${DB_NAME:-myapp}"
|
||||
DB_USER="${DB_USER:-postgres}"
|
||||
|
||||
if pg_isready -h "$DB_HOST" -p "$DB_PORT" -U "$DB_USER" -d "$DB_NAME" -t "$TIMEOUT" >/dev/null 2>&1; then
|
||||
echo "✅ 数据库连接正常"
|
||||
echo " Host: $DB_HOST:$DB_PORT"
|
||||
echo " Database: $DB_NAME"
|
||||
else
|
||||
echo "❌ 数据库连接失败"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
|
||||
redis)
|
||||
# Redis连接检查
|
||||
REDIS_HOST="${REDIS_HOST:-localhost}"
|
||||
REDIS_PORT="${REDIS_PORT:-6379}"
|
||||
|
||||
if redis-cli -h "$REDIS_HOST" -p "$REDIS_PORT" ping >/dev/null 2>&1; then
|
||||
echo "✅ Redis连接正常"
|
||||
echo " Host: $REDIS_HOST:$REDIS_PORT"
|
||||
else
|
||||
echo "❌ Redis连接失败"
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
|
||||
*)
|
||||
echo "❌ 未知的检查类型: $CHECK_TYPE"
|
||||
echo "支持的类型: http, db, redis"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
echo "=========================================="
|
||||
echo "✅ 所有检查通过"
|
||||
exit 0
|
||||
Reference in New Issue
Block a user