配置步骤
一、检查环境
- JDK
# 目前还是 JDK8 最适合 Hadoop
java -version
echo $JAVA_HOME
- Hadoop
hadoop version
echo $HADOOP_HOME
二、配置SSH免密登录
Hadoop需要通过SSH管理节点(即使在伪分布式模式下)
sudo apt install openssh-server # 安装SSH服务(如未安装)
cd ~/.ssh/
ssh-keygen -t rsa # 生成密钥对
cat ./id_rsa.pub >> ./authorized_keys # 添加公钥到授权列表
ssh localhost # 测试免密登录(首次可能需输入yes)
ssh-copy-id xxx
ssh xxx
三、修改Hadoop核心配置文件
进入配置文件目录:
cd $HADOOP_HOME/etc/hadoop
# 手动创建文件夹
mkdir -p /opt/software/hadoop/data/tmp
mkdir -p /opt/software/hadoop/data/logs
mkdir -p /opt/software/hadoop/data/namenode
mkdir -p /opt/software/hadoop/data/datanode
- hadoop-env.sh
vim hadoop-env.sh
# 在文件最后一行加上
export JAVA_HOME="/usr/lib/jvm/java-8-openjdk-amd64"
export HADOOP_CONF_DIR=/opt/software/hadoop/etc/hadoop
export HADOOP_LOG_DIR=/opt/software/hadoop/data/logs
export HADOOP_PID_DIR=/opt/software/hadoop/data/tmp
- yarn-env.sh
vim hadoop-env.sh
# 在文件最后一行加上
export JAVA_HOME="/usr/lib/jvm/java-8-openjdk-amd64"
-
mapred-env.sh
vim mapred-env.sh
# 在文件最后一行加上
export JAVA_HOME="/usr/lib/jvm/java-8-openjdk-amd64"
把环境刷新一下
source hadoop-env.sh
source yarn-env.sh
source mapred-env.sh
-
core-site.xml
<configuration>
<!-- 指定 NameNode 通信的地址 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop01:8020</value>
</property>
<!-- 指定 hadoop 运行过程中临时数据的存储目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/software/hadoop/data/tmp</value>
</property>
</configuration>
-
hdfs-site.xml
<configuration>
<!-- NameNode 存放元数据的本地目录-->
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/software/hadoop/data/namenode</value>
</property>
<!-- NameNode web 端访问地址-->
<property>
<name>dfs.namenode.http-address</name>
<value>hadoop01:9870</value>
</property>
<!-- Datanode 在本地存储 block 块的目录 -->
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/software/hadoop/data/datanode</value>
</property>
<!-- HDFS blocksize block 块大小 -->
<property>
<name>dfs.blocksize</name>
<value>134217728</value>
</property>
<!-- HDFS blocksize 也就是 block 的副本数-->
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
</configuration>
-
mapred-site.xml
<configuration>
<!-- 指定 MapReduce 程序运行在 Yarn 上 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
-
yarn-site.xml
<configuration>
<!-- 指定 MR 走 shuffle -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- 环境变量的继承 -->
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HA
DOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_H
OME,HADOOP_MAPRED_HOME</value>
</property>
</configuration>
四、初始化与启动Hadoop
- 格式化HDFS
hdfs namenode -format
# 成功标志:Storage directory ... has been successfully formatted
- 启动HDFS和YARN
start-dfs.sh # 启动NameNode和DataNode
start-yarn.sh # 启动ResourceManager和NodeManager
- jps 验证进程
jps
结束