HDFS 配置与使用
- 一.HDFS配置
- 二.HDFS Shell
- 1.默认配置说明
- 2.shell 命令
- 三.Java 读写 HDFS
- 1.Java 工程配置
- 2.测试
一.HDFS配置
## 基于上一篇文章进入 HADOOP_HOME 目录
cd $HADOOP_HOME/etc/hadoop
## 修改文件权限
chown -R root:root /usr/local/hadoop/hadoop-3.3.6/*
## 修改配置给文件:core-site.xml 和 hdfs-site.xml
vim core-site.xml
vim hdfs-site.xml
给 core-site.xml 添加如下信息
<property>
<name>hadoop.http.staticuser.user</name>
<value>root</value>
</property>
<property>
<name>fs.defaultFS</name>
<value>hdfs://<你的IP>:9000</value>
</property>
给 hdfs-site.xml 添加如下信息
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
## 格式化文件结构
hdfs namenode -format
## 修改环境配置信息添加如下内容
vim hadoop-env.sh
export JAVA_HOME=/usr/local/java/jdk-11.0.19/
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export YARN_RESOURCEMANAGER_USER=root
export YARN_NODEMANAGER_USER=root
## 关闭并禁用防火墙
systemctl stop firewalld
systemctl disable firewalld
## 启动 HDFS
start-dfs.sh
## 查看进程
jps
## 停止 HDFS
stop-dfs.sh
查看 HDFS 自带面板:http://192.168.1.6:9870/
二.HDFS Shell
1.默认配置说明
hdfs-site.xml 文件中设置 dfs.name.dir 和 dfs.data.dir
dfs.name.dir 的默认值为 ${hadoop.tmp.dir}/dfs/name
dfs.data.dir 的默认值为 ${hadoop.tmp.dir}/dfs/data
hadoop.tmp.dir 的值如果未使用 -D 选项或配置文件设置,则默认值为 /tmp/hadoop-${user.name}
默认值可参考如下文件
core-default.xml、hdfs-default.xml 和 mapred-default.xml
2.shell 命令
# 显示根目录 / 下的文件和子目录,绝对路径
hadoop fs -ls /
# 新建文件夹,绝对路径
hadoop fs -mkdir /testData
## 创建文件
echo 123 >> /home/test.txt
# 上传文件
hadoop fs -put /home/test.txt /testData/
# 下载文件
hadoop fs -get /testData/test.txt
# 输出文件内容
hadoop fs -cat /testData/test.txt
查看目录:hadoop fs -ls /
通过UI查看文件
三.Java 读写 HDFS
在本地idea上运行Hadoop相关服务,控制台打印出此错误“HADOOP_HOME and hadoop.home.dir are unset”,需要在本地Windows系
统配置 Hadoop 环境变量(重启IDEA)
GitHub 下载包路径
解压取出对应版本放到某个目录,如:D:\hadoop-3.3.5\bin
添加到环境变量 HADOOP_HOME
修改 PATH
1.Java 工程配置
Xml 配置
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>org.example</groupId>
<artifactId>HadoopDemo</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>19</maven.compiler.source>
<maven.compiler.target>19</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<spring.version>3.1.1</spring.version>
<hadoop.version>3.3.6</hadoop.version>
</properties>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
<version>${spring.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-log4j2</artifactId>
<version>${spring.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-aop</artifactId>
<version>${spring.version}</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.26</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>2.0.32</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>${hadoop.version}</version>
</dependency>
</dependencies>
</project>
启动类
package org.example;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
/**
* @author Administrator
*/
@SpringBootApplication
public class HadoopApp {
public static void main(String[] args) {
SpringApplication.run(HadoopApp.class,args);
}
}
读写类
package org.example.controller;
import jakarta.annotation.PostConstruct;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import java.io.IOException;
/**
* @author Administrator
* @Description
* @create 2023-07-13 23:19
*/
@RestController
@RequestMapping("/test")
public class TestController {
@PostConstruct
public void init(){
System.setProperty("HADOOP_USER_NAME","root");
}
@GetMapping("/get")
public String get(){
String result = "";
try {
// 配置连接地址
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://192.168.1.6:9000");
FileSystem fs = FileSystem.get(conf);
// 打开文件并读取输出
Path path = new Path("/testData/test.txt");
FSDataInputStream ins = fs.open(path);
StringBuilder builder = new StringBuilder();
int ch = ins.read();
while (ch != -1) {
builder.append((char)ch);
ch = ins.read();
}
result = builder.toString();
} catch (IOException ioe) {
ioe.printStackTrace();
}
return result;
}
@GetMapping("/set")
public void set() {
FileSystem fs = null;
FSDataOutputStream ws = null;
try {
// 配置连接地址
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "hdfs://192.168.1.6:9000");
fs = FileSystem.get(conf);
// 打开文件并读取输出
Path path = new Path("/testData/test.txt");
ws = fs.append(path);
ws.writeBytes("Hello World!");
} catch (IOException ioe) {
ioe.printStackTrace();
} finally {
if (null != fs){
try {
fs.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
if (null != ws){
try {
ws.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
}
}
2.测试
读数据
写数据
读数据
Windows 下 JDK 版本为 19