一、软件及系统信息
redis:redis-6.2.6
redis_exporter:redis_exporter-v1.50.0.linux-amd64.tar.gz
# cat /etc/anolis-release
Anolis OS release 8.9
granfa; 7.5.3
二、下载地址
https://github.com/oliver006/redis_exporter/releases?page=3
https://github.com/oliver006/redis_exporter/releases/download/v1.50.0/redis_exporter-v1.50.0.linux-amd64.tar.gz
三、安装
# tar -zxf redis_exporter-v1.50.0.linux-amd64.tar.gz
# cat /usr/lib/systemd/system/redis_exporter.service
[Unit]
Description=node_exporter
Documentation=https://prometheus.io/
After=network.target
[Service]
Type=simple
User=monitor
ExecStart=/opt/softs/redis_exporter-v1.50.0.linux-amd64/redis_exporter \
--redis.addr=redis://192.168.8.147:6379 \
--redis.password=ge********z!e68 \
--web.listen-address=0.0.0.0:9121
Restart=on-failure
[Install]
WantedBy=multi-user.target
# useradd -r -s /usr/sbin/nologin monitor
-r:创建系统账户,通常用于运行服务。
-s /usr/sbin/nologin:指定用户的登录 Shell 为 /usr/sbin/nologin,这是一个特殊的 Shell,不允许交互式登录
# chown monitor:monitor /usr/lib/systemd/system/redis_exporter.service
# chown monitor:monitor /opt/softs/redis_exporter-v1.50.0.linux-amd64/redis_exporter
# chmod +x /usr/lib/systemd/system/redis_exporter.service
# systemctl daemon-reload
# systemctl start redis_exporter.service
# firewall-cmd --zone=public --add-port=9121/tcp --permanent
# firewall-cmd --reload
四、prometheus配置
# vi /usr/local/prometheus/prometheus.yml
- job_name: 'redis'
static_configs:
- targets: ["192.168.8.146:9121"]
labels:
env: 'prod'
group: 'redis-cluster'
role: 'master'
- targets: ["192.168.8.147:9121"]
labels:
env: 'prod'
role: 'slave'
metrics_path: /metrics
# /usr/local/prometheus/promtool check config /usr/local/prometheus/prometheus.yml
# curl -XPOST 127.0.0.1:9090/-/reload
五、测试
六、看板下载:
https://grafana.com/grafana/dashboards/
Grafana dashboards | Grafana Labs
搜索模板: redis_exporter for redis
七、告警配置
# cat redis-exporter-alert-rules.yml
- alert: RedisReplicationBroken
expr: redis_master_link_up{role="slave"} == 0
for: 2m # 持续2分钟断开触发
labels:
severity: critical
env: "{{ $labels.env }}"
annotations:
summary: "Redis 主从复制中断 (实例: {{ $labels.instance }})"
description: "从节点 {{ $labels.instance }} 超过2分钟未连接到主节点,请检查网络或主节点状态。"
- alert: RedisTooManyConnections
expr: redis_connected_clients > 1000 # 根据业务规模调整
for: 5m
labels:
severity: warning
annotations:
summary: "Redis 客户端连接数激增"
description: "实例 {{ $labels.instance }} 当前连接数 {{ $value }},可能遭受攻击或配置不合理。"
- alert: RedisBlockedClients
expr: redis_blocked_clients > 0 # 存在阻塞命令(如 BLPOP)
for: 1m
labels:
severity: warning
annotations:
summary: "Redis 阻塞客户端存在"
description: "实例 {{ $labels.instance }} 有 {{ $value }} 个客户端被阻塞,检查慢查询或大键操作。"
- alert: RedisKeyspaceHitRateLow
expr: (rate(redis_keyspace_hits_total[5m]) / (rate(redis_keyspace_hits_total[5m]) + rate(redis_keyspace_misses_total[5m]))) < 0.5 # 命中率<50%
for: 30m
labels:
severity: warning
annotations:
summary: "Redis 缓存命中率过低"
description: "实例 {{ $labels.instance }} 缓存命中率仅 {{ printf \"%.1f\" ($value*100) }}%,需优化缓存策略或扩容。"