1、设置环境变量
export JAVA_HOME=/cluster/jdk
export CLASSPATH=.:$JAVA_HOME/lib/tools.jar:$JAVA_HOME/lib/dt.jarp
#export HIVE_HOME=/cluster/hive
export MYSQL_HOME=/cluster/mysql
export HADOOP_HOME=/cluster/hadoop3
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_CLASSPATH=`hadoop classpath`
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
export FLINK_HOME=/cluster/flink
export SPARK_HOME=/cluster/spark
export ZK_HOME=/cluster/zookeeper
export NACOS_HOME=/cluster/nacos
export KAFKA_HOME=/cluster/kafka
export DATART_HOME=/cluster/datart
export HBASE_HOME=/cluster/hbase
export SEATUNNEL_HOME=/cluster/seatunnel
export STREAMPARK_HOME=/cluster/streampark
export KYUUBI_HOME=/cluster/kyuubi
export DINKY_HOME=/cluster/dinky
export INLONG_HOME=/cluster/inlong
export DORIS_HOME=/cluster/doris
export BE_HOME=$DORIS_HOME/be
export FE_HOME=$DORIS_HOME/fe
export M2_HOME=/cluster/maven
export PATH=$PATH:$M2_HOME/bin:$BE_HOME/bin:$FE_HOME/bin:$DINKY_HOME/bin:$INLONG_HOME/bin:$DATART_HOME/bin:$KYUUBI_HOME/bin:$HBASE_HOME/bin:$SEATUNNEL_HOME/bin:$STREAMPARK_HOME/bin:$FLINK_HOME/bin:$SPARK_HOME/bin:$SPARK_HOME/SPARK_HOME:$KAFKA_HOME:$MYSQL_HOME/bin:$HIVE_HOME/bin:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$NACOS_HOME/bin:$ZK_HOME/bin
2、 flink的配置文件config.yaml
env:
java:
opts:
all: --add-exports=java.base/sun.net.util=ALL-UNNAMED --add-exports=java.rmi/sun.rmi.registry=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.api=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.file=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.parser=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.tree=ALL-UNNAMED --add-exports=jdk.compiler/com.sun.tools.javac.util=ALL-UNNAMED --add-exports=java.security.jgss/sun.security.krb5=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.net=ALL-UNNAMED --add-opens=java.base/java.io=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/sun.nio.ch=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens=java.base/java.text=ALL-UNNAMED --add-opens=java.base/java.time=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED --add-opens=java.base/java.util.concurrent=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.atomic=ALL-UNNAMED --add-opens=java.base/java.util.concurrent.locks=ALL-UNNAMED
#==============================================================================
# Common
#==============================================================================
# Common
#==============================================================================
jobmanager:
bind-host: 0.0.0.0
rpc:
address: 0.0.0.0
port: 6123
memory:
process:
size: 1600m
execution:
failover-strategy: region
archive:
fs:
dir: hdfs://10.10.10.99:9000/flink/completed-jobs/
taskmanager:
bind-host: 0.0.0.0
host: 0.0.0.0
numberOfTaskSlots: 100
memory:
process:
size: 1728m
network:
fraction: 0.1
min: 64mb
max: 1gb
parallelism:
default: 1
fs:
default-scheme: hdfs://10.10.10.99:9000
#==============================================================================
# High Availability zookeeper没有开启认证,应该尝试下怎么开启zookeeper的认证方式
#==============================================================================
high-availability:
# The high-availability mode. Possible options are 'NONE' or 'zookeeper'.
type: zookeeper
# The path where metadata for master recovery is persisted. While ZooKeeper stores
# the small ground truth for checkpoint and leader election, this location stores
# the larger objects, like persisted dataflow graphs.
#
# Must be a durable file system that is accessible from all nodes
# (like HDFS, S3, Ceph, nfs, ...)
storageDir: hdfs:///flink/ha/
zookeeper:
# The list of ZooKeeper quorum peers that coordinate the high-availability
# setup. This must be a list of the form:
# "host1:clientPort,host2:clientPort,..." (default clientPort: 2181)
quorum: localhost:2181
client:
# ACL options are based on https://zookeeper.apache.org/doc/r3.1.2/zookeeperProgrammers.html#sc_BuiltinACLSchemes
# It can be either "creator" (ZOO_CREATE_ALL_ACL) or "open" (ZOO_OPEN_ACL_UNSAFE)
# The default value is "open" and it can be changed to "creator" if ZK security is enabled
acl: open
#==============================================================================
# Fault tolerance and checkpointing
#==============================================================================
# The backend that will be used to store operator state checkpoints if
# checkpointing is enabled. Checkpointing is enabled when execution.checkpointing.interval > 0.
# # Execution checkpointing related parameters. Please refer to CheckpointConfig and CheckpointingOptions for more details.
execution:
checkpointing:
interval: 3min
externalized-checkpoint-retention: DELETE_ON_CANCELLATION
max-concurrent-checkpoints: 1
min-pause: 0s
mode: EXACTLY_ONCE
timeout: 10min
tolerable-failed-checkpoints: 0
unaligned: false
state:
backend:
type: hashmap
incremental: false
checkpoints:
dir: hdfs://10.10.10.99:9000/flink/flink-checkpoints
savepoints:
dir: hdfs://10.10.10.99:9000/flink/flink-savepoints
#==============================================================================
# Rest & web frontend
#==============================================================================
rest:
address: 0.0.0.0
bind-address: 0.0.0.0
web:
submit:
enable: true
cancel:
enable: true
#==============================================================================
# Advanced
#==============================================================================
io:
tmp:
dirs: /tmp
classloader:
resolve:
order: child-first
#==============================================================================
# Flink Cluster Security Configuration
#==============================================================================
# Kerberos authentication for various components - Hadoop, ZooKeeper, and connectors -
# may be enabled in four steps:
# 1. configure the local krb5.conf file
# 2. provide Kerberos credentials (either a keytab or a ticket cache w/ kinit)
# 3. make the credentials available to various JAAS login contexts
# 4. configure the connector to use JAAS/SASL
# # The below configure how Kerberos credentials are provided. A keytab will be used instead of
# # a ticket cache if the keytab path and principal are set.
# security:
# kerberos:
# login:
# use-ticket-cache: true
# keytab: /path/to/kerberos/keytab
# principal: flink-user
# # The configuration below defines which JAAS login contexts
# contexts: Client,KafkaClient
#==============================================================================
# ZK Security Configuration
#==============================================================================
# zookeeper:
# sasl:
# # Below configurations are applicable if ZK ensemble is configured for security
# #
# # Override below configuration to provide custom ZK service name if configured
# # zookeeper.sasl.service-name: zookeeper
# #
# # The configuration below must match one of the values set in "security.kerberos.login.contexts"
# login-context-name: Client
#==============================================================================
# HistoryServer
#==============================================================================
historyserver:
web:
address: 0.0.0.0
port: 8082
archive:
fs:
dir: hdfs://10.10.10.99:9000/flink/historyserver/completed-jobs/
fs.refresh-interval: 10000
3、提交运行
一、Flink 作业提交模式及对应命令
(一)Per - Job 模式
/cluster/flink/bin/flink run \
-t yarn-per-job \
-d \
-ynm YarnPerJobTopSpeedWindowing \
-Dyarn.application.name=YarnPerJobTopSpeedWindowing \
-c org.apache.flink.streaming.examples.windowing.TopSpeedWindowing \
/cluster/flink/examples/streaming/TopSpeedWindowing.jar 3000
====================================================================================================================
(二)Session 模式
该模式需要先启动 Yarn 会话,获取会话 ID 后再提交作业。
1. 启动 Yarn 会话、启动会话后会打印输出会话 ID,示例:application_1740741184000_0002
/cluster/flink/bin/yarn-session.sh \
-jm 2048 \
-tm 2048 \
-s 1 \
-nm yarn-session-app \
-d
停止 $ echo "stop" | ./bin/yarn-session.sh -id application_1740741184000_0002
2. 在 Yarn 会话中提交作业
/cluster/flink/bin/flink run \
-Dyarn.application.name=YarnSessionAppTopSpeedWindowing \
-c org.apache.flink.streaming.examples.windowing.TopSpeedWindowing \
-yid application_1740741184000_0002 \
/cluster/flink/examples/streaming/TopSpeedWindowing.jar
这个会唤起一个新的flink进程,页面的端口不一定是8081
====================================================================================================================
三)Application 模式
/cluster/flink/bin/flink run-application \
-t yarn-application \
-Dparallelism.default=1 \
-Djobmanager.memory.process.size=2048m \
-Dtaskmanager.memory.process.size=2048m \
-Dyarn.application.name=RunApplicationTopSpeedWindowing \
-Dtaskmanager.numberOfTaskSlots=1 \
-c org.apache.flink.streaming.examples.windowing.TopSpeedWindowing \
/cluster/flink/examples/streaming/TopSpeedWindowing.jar 3000
问题1:Caused by: org.apache.flink.configuration.IllegalConfigurationException:
The number of requested virtual cores for application master 1 exceeds
the maximum number of virtual cores 0 available in the Yarn Cluster
yarn启动失败,spark-3.5.4-yarn-shuffle.jar文件,重启hadoop集群。
页面访问http://ip:18088/cluster