源码下载:链接:https://pan.baidu.com/s/1D3yszkTzjwQz0vFRozQl2g?pwd=z6kb
提取码:z6kb
实现思路
1.搭建一个新的springboot项目,不会的请看我这篇博客:springboot项目搭建
2.添加maven依赖
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-core</artifactId>
<version>1.27</version>
</dependency>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>7.10.0</version>
</dependency>
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>elasticsearch-rest-high-level-client</artifactId>
<version>7.10.0</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.6</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>fastjson</artifactId>
<version>1.2.83</version>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.20</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.25</version>
</dependency>
3.创建一个类,复制代码,执行main方
package com.demo.controller;
import com.demo.bean.FileBean;
import org.apache.http.HttpHost;
import org.apache.tika.Tika;
import org.apache.tika.mime.MediaType;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.RestClient;
import org.elasticsearch.client.RestClientBuilder;
import org.elasticsearch.client.RestHighLevelClient;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.XContentType;
import com.alibaba.fastjson.JSON;
import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Base64;
public class EsController {
private static EsController FileToBase64;
// 使用tika库自动获取文件类型
public static String getFileTypeByDefaultTika(String filePathUrl) throws IOException, URISyntaxException {
// 从 URL 创建一个 File 对象
File file = new File(new URL("file:///" + filePathUrl).toURI());
// 使用 Tika 来检测文件的 MIME 类型
Tika tika = new Tika();
MediaType mediaType = MediaType.parse(tika.detect(file));
// 从 MIME 类型中提取文件的基本类型(如 pdf、image、video 等)
String fileType = mediaType.getSubtype();
return fileType;
}
// 转换文件为base64
public static String fileToBase64(String filePath) throws IOException {
byte[] fileContent = Files.readAllBytes(Paths.get(filePath));
return Base64.getEncoder().encodeToString(fileContent);
}
// 根据文件类型判断排除音视频类文件
public static String fileFilterate(String pathUrl) {
try {
String fileType = getFileTypeByDefaultTika(pathUrl);
if (!fileType.contains("video")
&& !fileType.contains("image")
&& !"application/zip".equals(fileType)) {
return fileToBase64(pathUrl);
}
return "";
} catch (IOException e) {
e.printStackTrace();
return "";
} catch (URISyntaxException e) {
e.printStackTrace();
return "";
}
}
public static void main(String[] args) throws IOException {
// 初始化RestHighLevelClient,localhost就是ES的ip地址,端口号为9200
RestClientBuilder builder = RestClient.builder(new HttpHost("localhost", 9200, "http"));
RestHighLevelClient client = new RestHighLevelClient(builder);
//文件转成base,存入ES中
String path = "C:\\Users\\83677\\Desktop\\测试4.docx";
String file_base64 = FileToBase64.fileFilterate(path);
//拿到base64,存入ES中
FileBean filebean = new FileBean();
filebean.setFile_id("1");
filebean.setFile_name("测试4.docx");
filebean.setFile_url("http://文件存储地址:8080/xxx/docs/raw/master/性能分析与内存问题排查思考.pdf");
filebean.setFile_type("docx");
filebean.setContent(file_base64);
filebean.setFile_size("33");
filebean.setFile_dir_name("yryy");
filebean.setFile_suffix(".docx");
filebean.setGroup_file_id("1234653");
//把实体对象转为字符串
String body = JSON.toJSONString(filebean);
//file_data 是索引名称,这里就是插入数据到ES的核心部分
IndexRequest indexRequest = new IndexRequest().index("file_data")
.source(body, XContentType.JSON)//请求参数,类型为JSON
.setPipeline("attachment") //上传时使用attachment pipline进行提取文件
.timeout(TimeValue.timeValueMinutes(10));
client.index(indexRequest, RequestOptions.DEFAULT);
// 关闭客户端
client.close();
}
}
4.实体类对象,我的示例
package com.demo.bean;
import lombok.Data;
@Data
public class FileBean {
private String file_id;
private String file_name;
private String file_url;
private String file_type;
private String content;
private String group_file_id;
private String file_suffix;
private String file_size;
private String file_dir_name;
}