一、部署flink-1.16.3、jdk-11.0.19、zookeeper-3.4.13、kafka_2.12-2.2.2
#软件下载
https://archive.apache.org/dist/kafka/2.2.2/kafka_2.12-2.2.2.tgz
https://archive.apache.org/dist/zookeeper/zookeeper-3.4.13/zookeeper-3.4.13.tar.gz
https://archive.apache.org/dist/flink/flink-1.16.3/flink-1.16.3-bin-scala_2.12.tgz
https://www.oracle.com/java/technologies/javase/jdk11-archive-downloads.html
#环境信息:
bcLinux For Euler 21.10 LTS
#4台机器部署flink
10.xx.xx.207、10.xx.xx.208、10.xx.xx.209、10.xx.xx.210
#3台机器部署zookeeper 、kafka
10.xx.xx.207、10.xx.xx.208、10.xx.xx.209
#使用普通用户nwom部署,及启动上述程序。
#部署路径:
都放在/data/software下
cd /data/software
[nwom@gx-087 software]$ ls
flink-1.16.3 jdk-11.0.19 kafka_2.12-2.2.2 kafka-logs tmpflink zookeeper-3.4.13
#配置hosts信息
[nwom@gx085 software]$ cat /etc/hosts
10.xx.xx.207 gx-085
10.xx.xx.208 gx-086
10.xx.xx.209 gx-087
10.xx.xx.210 gx-088
#内存情况
[nwom@gx-085 conf]$ free -mh
total used free shared buff/cache available
Mem: 753Gi 33Gi 714Gi 686Mi 5.5Gi 716Gi
Swap: 4.0Gi 0B 4.0Gi
### 一、部署flink-1.16.3、jdk-11.0.19
#### 1.1 修改flink配置文件
```shell
#注意! 4台机器的配置文件内容都是一样的即可。
cd /data/software
tar xf jdk-11.0.19_linux-x64_bin.tar.gz
tar xf flink-1.16.3-bin-scala_2.12.tgz
cd /data/software/flink-1.16.3/conf
#编辑文件 masters 、 workers 、 flink-conf.yaml
masters文件
[nwom@gx-085 conf]$ cat masters
gx-085:8081
workers文件
[nwom@gx-085 conf]$ cat workers
gx-085
gx-086
gx-087
gx-088
flink-conf.yaml
#全部内容:
[nwom@gx-085 conf]$ cat flink-conf.yaml | egrep -v "^$|^#"
jobmanager.rpc.address: gx-085
jobmanager.rpc.port: 6123
jobmanager.bind-host: 0.0.0.0
jobmanager.memory.process.size: 16384m
taskmanager.bind-host: 0.0.0.0
taskmanager.memory.process.size: 163840m
taskmanager.numberOfTaskSlots: 100
parallelism.default: 1
jobmanager.execution.failover-strategy: region
rest.port: 8081
rest.address: localhost
rest.bind-address: 0.0.0.0
web.cancel.enable: true
io.tmp.dirs: /data/software/tmpflink
classloader.resolve-order: parent-first
taskmanager.memory.network.fraction: 0.3
#添加jdk相关配置
env.java.home: /data/software/jdk-11.0.19
akka.ask.timeout: 120s
heartbeat.timeout: 300000
cluster.evenly-spread-out-slots: true
task.cancellation.timeout: 0
taskmanager.network.memory.buffer-debloat.enabled: true
1.2 启动flink
#nwom用户启动。 仅登录81.207操作即可。
cd /data/software/flink-1.16.3/bin
./start-cluster.sh
(过程需要输入其他3台服务器的nwom密码)
1.3 访问flink
http://10.xx.xx.207:8081/#/overview
1.4 部署自研的flink-lte.jar包
#将flink-lte-1.0.jar上传到/data/software/
#如果有新包,则点击上图的“Cancel Job”,然后替换新包启动。
cd /data/software/flink-1.16.3/bin
./flink run -c com.inspur.KafkaStreamingJob /data/software/flink-lte-1.0.jar
flink part end
二、部署zookeeper-3.4.13
#3台机器部署zookeeper 、kafka
10.xx.xx.207、10.xx.xx.208、10.xx.xx.209
cd /data/software
tar xf zookeeper-3.4.13.tar.gz
cd zookeeper-3.4.13/
2.1 修改zoo.cfg文件 (有区别项)
cd /data/software/zookeeper-3.4.13/conf
#10.xx.xx.207
[nwom@gx-085 conf]$ cat zoo.cfg | grep -v "^#"
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/data/software/zookeeper-3.4.13/data
clientPort=2181
autopurge.snapRetainCount=3
autopurge.purgeInterval=1
server.1=gx-085:2888:3888
server.2=gx-086:2888:3888
server.3=gx-087:2888:3888
#10.xx.xx.208
[nwom@gx-086 conf]$ cat zoo.cfg | grep -v "^#"
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/data/software/zookeeper-3.4.13/data
clientPort=2181
autopurge.snapRetainCount=3
autopurge.purgeInterval=1
server.1=gx-085:2888:3888
server.2=gx-086:2888:3888
server.3=gx-087:2888:3888
#10.xx.xx.209
[nwom@gx-087 conf]$ cat zoo.cfg | grep -v "^#"
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/data/software/zookeeper-3.4.13/data
clientPort=2181
autopurge.snapRetainCount=3
autopurge.purgeInterval=1
server.1=gx-085:2888:3888
server.2=gx-086:2888:3888
server.3=gx-087:2888:3888
2.2 有区别的地方 myid文件内容:
#10.xx.xx.207
[nwom@gx-085 data]$ cat /data/software/zookeeper-3.4.13/data/myid
1
#10.xx.xx.208
[nwom@gx-086 zookeeper-3.4.13]$ cat /data/software/zookeeper-3.4.13/data/myid
2
#10.xx.xx.209
[nwom@gx-087 software]$ cat /data/software/zookeeper-3.4.13/data/myid
3
2.3 启动zookeeper
#上述三台xx.207/208/209 都执行启动。
cd /data/software/zookeeper-3.4.13
./bin/zkServer.sh start
#检查状态
./bin/zkServer.sh status
三、部署kafka_2.12-2.2.2
#3台机器部署zookeeper 、kafka
10.xx.xx.207、10.xx.xx.208、10.xx.xx.209
cd /data/software
tar xf kafka_2.12-2.2.2.tgz
3.1 修改server.properties文件 (有区别:)
cd /data/software/kafka_2.12-2.2.2/config
#10.xx.xx.207
[nwom@gx-085 config]$ cat server.properties | grep -v "^#" | grep -v "^$"
#注意这里区别
broker.id=0
listeners=PLAINTEXT://gx-085:9092
advertised.listeners=PLAINTEXT://gx-085:9092
num.network.threads=3
num.io.threads=8
socket.send.buffer.bytes=102400
socket.receive.buffer.bytes=102400
socket.request.max.bytes=104857600
#注意这里区别
log.dirs=/data/software/kafka-logs
num.partitions=1
num.recovery.threads.per.data.dir=1
offsets.topic.replication.factor=1
transaction.state.log.replication.factor=1
transaction.state.log.min.isr=1
log.retention.hours=6
log.segment.bytes=1073741824
log.retention.check.interval.ms=300000
#注意这里区别
zookeeper.connect=gx-085:2181,gx-086:2181,gx-087:2181/kafka
zookeeper.connection.timeout.ms=6000
group.initial.rebalance.delay.ms=0
#10.xx.xx.208
[nwom@gx-086 config]$ cat server.properties | grep -v "^#" | grep -v "^$"
#注意这里区别
broker.id=1
listeners=PLAINTEXT://gx-086:9092
advertised.listeners=PLAINTEXT://gx-086:9092
num.network.threads=3
num.io.threads=8
socket.send.buffer.bytes=102400
socket.receive.buffer.bytes=102400
socket.request.max.bytes=104857600
#注意这里区别
log.dirs=/data/software/kafka-logs
num.partitions=1
num.recovery.threads.per.data.dir=1
offsets.topic.replication.factor=1
transaction.state.log.replication.factor=1
transaction.state.log.min.isr=1
log.retention.hours=6
log.segment.bytes=1073741824
log.retention.check.interval.ms=300000
#注意这里区别
zookeeper.connect=gx-085:2181,gx-086:2181,gx-087:2181/kafka
zookeeper.connection.timeout.ms=6000
group.initial.rebalance.delay.ms=0
#10.xx.xx.209
[nwom@gx-087 config]$ cat server.properties | grep -v "^#" | grep -v "^$"
#注意这里区别
broker.id=2
listeners=PLAINTEXT://gx-087:9092
advertised.listeners=PLAINTEXT://gx-087:9092
num.network.threads=3
num.io.threads=8
socket.send.buffer.bytes=102400
socket.receive.buffer.bytes=102400
socket.request.max.bytes=104857600
#注意这里区别
log.dirs=/data/software/kafka-logs
num.partitions=1
num.recovery.threads.per.data.dir=1
offsets.topic.replication.factor=1
transaction.state.log.replication.factor=1
transaction.state.log.min.isr=1
log.retention.hours=6
log.segment.bytes=1073741824
log.retention.check.interval.ms=300000
#注意这里区别
zookeeper.connect=gx-085:2181,gx-086:2181,gx-087:2181/kafka
zookeeper.connection.timeout.ms=6000
group.initial.rebalance.delay.ms=0
3.2 启动kafka
#kafka启动: (3台服务器都启动)
cd /data/software/kafka_2.12-2.2.2
nohup sh ./bin/kafka-server-start.sh ./config/server.properties &
#查看kafka日志状态
[nwom@gx-085 kafka_2.12-2.2.2]$ tail -f logs/server.log
#创建测试topic data
[nwom@gx-085 kafka_2.12-2.2.2]$ bin/kafka-topics.sh --create --zookeeper gx-085:2181/kafka --replication-factor 1 --partitions 1 --topic data
Created topic data.
#创建测试topic city_cell
bin/kafka-topics.sh --create --zookeeper gx-085:2181/kafka --replication-factor 1 --partitions 1 --topic city_cell
#查看目前已有topic
[nwom@gx-085 bin]$ ./kafka-topics.sh --list --zookeeper gx-085:2181/kafka
3.3 推送测试数据到kafka (具体jar还是要找开发要)
[nwom@gx-085 config]$ crontab -l
*/5 * * * * bash /data/software/shellDir/pushDataKafka.sh
#测试脚本内容:
[nwom@gx-085 config]$ cat /data/software/shellDir/pushDataKafka.sh
#!/bin/bash
KAFKA_BROKER=gx-085:9092,gx-086:9092,gx-087:9092
TOPIC_NAME=data
local_path=/data/software/shellDir/data.csv
count=0
while IFS= read -r line; do
/data/software/kafka_2.12-2.2.2/bin/kafka-console-producer.sh --broker-list $KAFKA_BROKER --topic $TOPIC_NAME <<< "$line"
count=$((count+1))
done < "$local_path"
echo $count
#测试数据内容:
[nwom@gx-085 config]$ head -3 /data/software/shellDir/data.csv
8004,CEP_admin_20240530113917019,1719817881,13535300261,4.60005E+14,3.56714E+14,46000C2D9297,28074,782,,,,,,UE Triggered Service Request(4G),6,,,,,,,,,,,,,,xx家园12、14、16#楼室分-HLW-1,高铁-南广线,,,,,,,,,,,,,,274
8004,CEP_admin_20240530113917019,1719817856,18376062109,4.60026E+14,8.67939E+14,46000E01EE6F,0,771,,,,,,Create Session,6,,,,,,,,,,,,,,高铁3-1FDD18-HLH-5,高铁-南广线,,,,,,,,,,,,,,273
END