deepin 搭建 hadoop singlenode
一、准备
1、升级软件
sudo apt update
sudo apt -y dist-upgrade
2、安装常用软件
sudo apt -y install gcc make openssl libssl-dev libpcre3 libpcre3-dev libgd-dev \
rsync openssh-server vim man zip unzip net-tools tcpdump lrzsz tar wget
3、开启ssh
sudo vim /etc/ssh/ssh_config
追加以下内容
Port 22
重启ssh服务
sudo systemctl restart ssh
4、修改主机名
sudo hostnamectl set-hostname lihaozhe
5、修改hosts配置文件
vim /etc/hosts
修改内容如下:
192.168.10.10 lihaozhe
重启系统
sudo reboot
6、上传软件并配置环境变量
创建软件目录
mkdir -p /home/lhz/opt
进入软件目录
cd /home/lhz/opt
下载 JDK
下载 hadoop
wget https://dlcdn.apache.org/hadoop/common/hadoop-3.4.1/hadoop-3.4.1.tar.gz
解压 JDK 修改名称
tar -zxvf jdk-8u421-linux-x64.tar.gz
tar -zxvf jdk-21_linux-x64_bin.tar.gz
mv jdk1.8.0_421 jdk-8
mv jdk-21.0.5 jdk-21
解压 hadoop 修改名称
tar -zxvf hadoop-3.4.1.tar.gz
mv hadoop-3.4.1 hadoop-3
删除安装包 (不推荐)
rm -f *.gz
配置环境变量
vim /home/lhz/.bashrc
追加以下内容:
export JAVA_HOME=/home/lhz/opt/jdk-8
export HDFS_NAMENODE_USER=lhz
export HDFS_SECONDARYNAMENODE_USER=lhz
export HDFS_DATANODE_USER=lhz
export HDFS_ZKFC_USER=lhz
export HDFS_JOURNALNODE_USER=lhz
export HADOOP_SHELL_EXECNAME=lhz
export YARN_RESOURCEMANAGER_USER=lhz
export YARN_NODEMANAGER_USER=lhz
export HADOOP_HOME=/home/lhz/opt/hadoop-3
export HADOOP_INSTALL=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export JAVA_LIBRARY_PATH=$HADOOP_HOME/lib/native
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
验证环境变量
printenv
7. 配置ssh免密钥登录
创建本地秘钥并将公共秘钥写入认证文件
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
ssh-copy-id lhz@lihaozhe
二、hadoop
1、修改配置文件
$HADOOP_HOME/etc/hadoop
hadoop-env.sh
core-site.xml
hdfs-site.xml
workers
mapred-site.xml
yarn-site.xml
hadoop-env.sh
文档末尾追加以下内容:
export JAVA_HOME=/home/lhz/opt/jdk-8
export HDFS_NAMENODE_USER=lhz
export HDFS_SECONDARYNAMENODE_USER=lhz
export HDFS_DATANODE_USER=lhz
export HDFS_ZKFC_USER=lhz
export HDFS_JOURNALNODE_USER=lhz
export HADOOP_SHELL_EXECNAME=lhz
export YARN_RESOURCEMANAGER_USER=lhz
export YARN_NODEMANAGER_USER=lhz
export JAVA_LIBRARY_PATH=$HADOOP_HOME/lib/native
core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://lihaozhe</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/lhz/opt/data/hadoop</value>
</property>
<property>
<name>hadoop.http.staticuser.user</name>
<value>lhz</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<!--配置所有节点的用户都可作为代理用户-->
<property>
<name>hadoop.proxyuser.lhz.hosts</name>
<value>*</value>
</property>
<!--配置用户能够代理的用户组为任意组-->
<property>
<name>hadoop.proxyuser.lhz.groups</name>
<value>*</value>
</property>
<!--配置用户能够代理的用户为任意用户-->
<property>
<name>hadoop.proxyuser.lhz.users</name>
<value>*</value>
</property>
</configuration>
hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
workers
注意:
hadoop2.x中该文件名为slaves
hadoop3.x中该文件名为workers
lihaozhe
mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*:$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value>
</property>
<!-- yarn历史服务端口 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>lihaozhe:10020</value>
</property>
<!-- yarn历史服务web访问端口 -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>lihaozhe:19888</value>
</property>
</configuration>
yarn-site.xml
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>
JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,PATH,LANG,TZ,HADOOP_MAPRED_HOME
</value>
</property>
<!-- 是否将对容器实施物理内存限制 -->
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<!-- 是否将对容器实施虚拟内存限制。 -->
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<!-- 开启日志聚集 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 设置yarn历史服务器地址 -->
<property>
<name>yarn.log.server.url</name>
<value>http://lihaozhe01:19888/jobhistory/logs</value>
</property>
<!-- 保存的时间7天 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
<!-- yarn 允许分配的最大最小内存 -->
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>1024</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>4096</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>4096</value>
</property>
</configuration>
2、初始化集群
格式化
hdfs namenode -format
修改启动脚本名称
mv $HADOOP_HOME/sbin/start-all.sh $HADOOP_HOME/sbin/start-hadoop.sh
mv $HADOOP_HOME/sbin/stop-all.sh $HADOOP_HOME/sbin/stop-hadoop.sh
启动集群
start-hadoop.sh
停止器群
stop-hadoop.sh
重点提示:
# 关机之前 依关闭服务
mapred --daemon stop historyserver
stop-yarn.sh
stop-dfs.sh
# 开机后 依次开启服务
start-dfs.sh
start-yarn.sh
mapred --daemon start historyserver
或者
# 关机之前关闭服务
mapred --daemon stop historyserver
stop-hadoop.sh
# 开机后开启服务
start-hadoop.sh
mapred --daemon start historyserver
#jps 检查进程正常后开启胡哦关闭在再做其它操作
3、测试
3.1 浏览器访问hadoop
浏览器访问: http://lihaozhe:9870
浏览器访问: http://lihaozhe:9868
浏览器访问:http://lihaozhe:8088
浏览器访问:http://lihaozhe:19888
3.2 测试 hdfs
本地文件系统创建 测试文件 wcdata.txt
vim wcdata.txt
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
FlinkHBase Flink
Hive StormHive Flink HadoopHBase
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
FlinkHBase Flink
Hive StormHive Flink HadoopHBase
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
FlinkHBase Flink
Hive StormHive Flink HadoopHBase
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
FlinkHBase Flink
Hive StormHive Flink HadoopHBase
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
FlinkHBase Flink
Hive StormHive Flink HadoopHBase
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
FlinkHBase Flink
Hive StormHive Flink HadoopHBase
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
FlinkHBase Flink
Hive StormHive Flink HadoopHBase
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
FlinkHBase Flink
Hive StormHive Flink HadoopHBase
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
FlinkHBase Flink
Hive StormHive Flink HadoopHBase
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
FlinkHBase Flink
Hive StormHive Flink HadoopHBase
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
FlinkHBase Flink
Hive StormHive Flink HadoopHBase
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
FlinkHBase Flink
Hive StormHive Flink HadoopHBase
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
FlinkHBase Flink
Hive StormHive Flink HadoopHBase
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
FlinkHBase Flink
Hive StormHive Flink HadoopHBase
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
FlinkHBase Flink
Hive StormHive Flink HadoopHBase
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
FlinkHBase Flink
Hive StormHive Flink HadoopHBase
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
FlinkHBase Flink
Hive StormHive Flink HadoopHBase
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
FlinkHBase Flink
Hive StormHive Flink HadoopHBase
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
FlinkHBase Flink
Hive StormHive Flink HadoopHBase
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
FlinkHBase Flink
Hive StormHive Flink HadoopHBase
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
FlinkHBase Flink
Hive StormHive Flink HadoopHBase
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
FlinkHBase Flink
Hive StormHive Flink HadoopHBase
HiveHadoop lihaozhe HBase StormHBase
Hadoop Hive FlinkHBase Flink Hive StormHive
Flink HadoopHBase Hive
lihaozhe HBaseHive Flink
Storm Hadoop HBase lihaozheFlinkHBase
StormHBase Hadoop Hive
在 HDFS 上创建目录 /wordcount/input
hdfs dfs -mkdir -p /wordcount/input
查看 HDFS 目录结构
hdfs dfs -ls /
hdfs dfs -ls /wordcount
hdfs dfs -ls /wordcount/input
上传本地测试文件 wcdata.txt 到 HDFS 上 /wordcount/input
hdfs dfs -put wcdata.txt /wordcount/input
检查文件是否上传成功
hdfs dfs -ls /wordcount/input
hdfs dfs -cat /wordcount/input/wcdata.txt
3.3 测试 mapreduce
计算 PI 的值
hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.4.1.jar pi 10 10
单词统计
hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.4.1.jar wordcount /wordcount/input/wcdata.txt /wordcount/result
hdfs dfs -ls /wordcount/result
hdfs dfs -cat /wordcount/result/part-r-00000
HDFS 目录结构
hdfs dfs -ls /
hdfs dfs -ls /wordcount
hdfs dfs -ls /wordcount/input
上传本地测试文件 wcdata.txt 到 HDFS 上 /wordcount/input
hdfs dfs -put wcdata.txt /wordcount/input
检查文件是否上传成功
hdfs dfs -ls /wordcount/input
hdfs dfs -cat /wordcount/input/wcdata.txt
3.3 测试 mapreduce
计算 PI 的值
hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.4.1.jar pi 10 10
单词统计
hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.4.1.jar wordcount /wordcount/input/wcdata.txt /wordcount/result
hdfs dfs -ls /wordcount/result
hdfs dfs -cat /wordcount/result/part-r-00000