1 部署集群
1.1 安装
# 创建一个中间件的命名空间
kubectl create namespace middleware
# 创建ConfigMap,包含RabbitMQ的配置文件内容
kubectl apply -f rabbitmq-configmap.yaml
# 配置用于存储RabbitMQ数据的PersistentVolume(PV)和PersistentVolumeClaim(PVC)。这可以确保RabbitMQ集群的数据在节点故障时不会丢失。
kubectl apply -f rabbitmq-persistentvolume.yaml
kubectl apply -f rabbitmq-persistentvolumeclaim.yaml
# StatefulSet可以确保每个实例都有唯一的标识和稳定的网络标识符
kubectl apply -f rabbitmq-statefulset.yaml
# 创建service
kubectl apply -f rabbitmq-service.yaml
1.2 rabbitmq-configmap
apiVersion: v1
kind: ConfigMap
metadata:
name: rabbitmq-config
namespace: middleware
data:
enabled_plugins: |
[rabbitmq_management,rabbitmq_peer_discovery_k8s,rabbitmq_delayed_message_exchange].
rabbitmq.conf: |
cluster_formation.peer_discovery_backend = rabbit_peer_discovery_k8s
cluster_formation.k8s.host = kubernetes.default.svc.cluster.local
cluster_formation.k8s.address_type = hostname
cluster_formation.node_cleanup.interval = 10
cluster_formation.node_cleanup.only_log_warning = true
cluster_partition_handling = autoheal
queue_master_locator=min-masters
loopback_users.guest = false
cluster_formation.randomized_startup_delay_range.min = 0
cluster_formation.randomized_startup_delay_range.max = 2
# 必须设置service_name,否则Pod无法正常启动,这里设置后可以不设置statefulset下env中的K8S_SERVICE_NAME变量
cluster_formation.k8s.service_name = rabbitmq-headless
# 必须设置hostname_suffix,否则节点不能成为集群
cluster_formation.k8s.hostname_suffix = .rabbitmq-headless.middleware.svc.cluster.local
# 内存上限
vm_memory_high_watermark.absolute = 1.6GB
# 硬盘上限
disk_free_limit.absolute = 2G
1.3 rabbitmq-headless
apiVersion: v1
kind: Service
metadata:
name: rabbitmq-headless
namespace: middleware
spec:
clusterIP: None
ports:
- name: amqp
port: 5672
- name: management
port: 15672
- name: cluster
port: 25672
selector:
app: rabbitmq
1.4 rabbitmq-log-pvc
这里的managed-nfs-storage沿用了nacos集群的部署。
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: rabbitmq-log-pvc
namespace: middleware
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 5Gi
storageClassName: managed-nfs-storage
1.5 rabbitmq-persistentvolume
apiVersion: v1
kind: PersistentVolume
metadata:
name: rabbitmq-pv
annotations:
pv.kubernetes.io/provisioned-by: fuseim.pri/ifs
spec:
capacity:
storage: 10Gi
accessModes:
- ReadWriteMany
nfs:
server: 10.101.12.95
path: /data/k8s_storage/rabbitmq
persistentVolumeReclaimPolicy: Retain
storageClassName: managed-nfs-storage
1.6 rabbitmq-persistentvolumeclaim
# PersistentVolumeClaim示例
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: rabbitmq-pvc
namespace: middleware
spec:
accessModes:
- ReadWriteMany
resources:
requests:
storage: 10Gi
storageClassName: managed-nfs-storage
1.7 rabbitmq-service
apiVersion: v1
kind: Service
metadata:
name: rabbitmq-web-service
namespace: middleware
spec:
type: NodePort
ports:
- name: http
port: 15672
targetPort: 15672
nodePort: 31567
selector:
app: rabbitmq
1.8 rabbitmq-statefulset
apiVersion: apps/v1
kind: StatefulSet
metadata:
name: rabbitmq
namespace: middleware
spec:
replicas: 3
serviceName: rabbitmq-headless
selector:
matchLabels:
app: rabbitmq
template:
metadata:
labels:
app: rabbitmq
spec:
containers:
- name: rabbitmq
image: rabbitmq:3.8.17-management
ports:
- containerPort: 5672
- containerPort: 15672
volumeMounts:
- name: data
mountPath: /var/lib/rabbitmq
- name: conf
mountPath: /etc/rabbitmq
- name: log
mountPath: /var/log/rabbitmq
env:
- name: RABBITMQ_NODENAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: RABBITMQ_ERLANG_COOKIE
value: "nrB35tmJx5sfE3EaaYTMW7S8DxpajMrP"
- name: RABBITMQ_LOG_BASE
value: "/var/log/rabbitmq"
volumes:
- name: data
persistentVolumeClaim:
claimName: rabbitmq-pvc
- name: conf
configMap:
name: rabbitmq-config
- name: log
persistentVolumeClaim:
claimName: rabbitmq-log-pvc
2 问题处理
2.1 pvc创建失败
[root@master1 rabbitmq]# kubectl get pvc -n middleware
NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE
rabbitmq-data-rabbitmq-0 Pending 15m
rabbitmq-pvc Bound rabbitmq-pv 5Gi RWO standard 15m
查看存储StorageClass的名称,就知道了创建pv和pvc中storageClassName应该为managed-nfs-storage
NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE
managed-nfs-storage fuseim.pri/ifs Delete Immediate false 3h7m
更改rabbitmq-persistentvolume.yaml和rabbitmq-persistentvolumeclaim.yaml之后
# 查看有没有无效的pvc,与namespace有关
kubectl get pvc -n middleware
# 删除pvc,使得pvc与pv解
kubectl delete pvc rabbitmq-data-rabbitmq-0 -n middleware
kubectl delete pvc rabbitmq-pvc -n middleware
#
kubectl get pv
# 删除无效的pv,pv被绑定,需要先删除pvc,pv与namespace无关
kubectl delete pv rabbitmq-pv
cd /appdata/download/rabbitmq
kubectl apply -f rabbitmq-persistentvolume.yaml
kubectl apply -f rabbitmq-persistentvolumeclaim.yaml
# StatefulSet可以确保每个实例都有唯一的标识和稳定的网络标识符
kubectl apply -f rabbitmq-statefulset.yaml
# 创建service
kubectl apply -f rabbitmq-service.yaml
执行完,需要删除之前的pod,就可以自动创建了。
2024-07-16 06:47:18.299 [info] <0.1754.0> Closing all channels from connection '10.42.7.3:33078 -> 10.42.6.3:5672' because it has been closed
2024-07-16 06:49:17.462 [info] <0.1783.0> accepting AMQP connection <0.1783.0> (10.42.7.3:33480 -> 10.42.6.3:5672)
2024-07-16 06:49:17.462 [error] <0.1783.0> closing AMQP connection <0.1783.0> (10.42.7.3:33480 -> 10.42.6.3:5672):
{bad_header,<<"GET / HT">>}
ss -tuln
或 netstat -tul
无法执行
kubectl run ss-checker --rm -it --image=ubuntu --restart=Never -- sh -c 'apt-get update && apt-get install -y iproute2 && ss -tuln'
kubectl run ss-checker --rm=true -it --image=alpine --restart=Never -- /bin/sh -c 'apk add --no-cache iproute2 && ss -tuln'
查看镜像的版本,rabbitmq用的ubuntu,而nacos用的是centos7
[root@master1 rabbitmq]# kubectl exec -it rabbitmq-0 -n middleware -- cat /etc/os-release
NAME="Ubuntu"
VERSION="20.04.2 LTS (Focal Fossa)"
ID=ubuntu
ID_LIKE=debian
PRETTY_NAME="Ubuntu 20.04.2 LTS"
VERSION_ID="20.04"
HOME_URL="https://www.ubuntu.com/"
SUPPORT_URL="https://help.ubuntu.com/"
BUG_REPORT_URL="https://bugs.launchpad.net/ubuntu/"
PRIVACY_POLICY_URL="https://www.ubuntu.com/legal/terms-and-policies/privacy-policy"
VERSION_CODENAME=focal
UBUNTU_CODENAME=focal
因为rabbitmq本身不能查看端口,于是需要创建临时的容器,来排查问题。
# 创建一个临时的 Ubuntu Pod
kubectl run ss-checker --rm -it --image=ubuntu --restart=Never -- sh -c 'apt-get update && apt-get install -y iproute2'
# 等待 Pod 准备好
kubectl wait --for=condition=Ready pod/ss-checker
# 进入临时 Pod
kubectl exec -it ss-checker -- /bin/bash
# 在临时 Pod 内部安装 ss
apt-get update && apt-get install -y iproute2
# 使用 ss 命令检查服务 Pod 的端口状态
ss -tuln | grep <service-pod-name-or-ip>:<port>
# 或者,如果需要检查服务 Pod 的具体端口是否可连接
nc -zv <service-pod-name-or-ip> <port>
或者进入到容器里面
# ubuntu
apt-get update && apt-get install -y iproute2
# centos
apk add --no-cache iproute2
发现15672这个端口没有起来,25672 端口(集群通信端口)和 5672 端口(AMQP 客户端连接端口)可以正常工作,这通常意味着管理界面的配置或暴露方式存在问题
执行命令ss -tuln
问题出在configmap,之前得到的太简单了,完整的配置如上
2.2 如何使用
rabbitmq-headless.middleware.svc.cluster.local:5672