文件上传
先简单说下文件上传的步骤吧
控制层简单左下转发
@PostMapping("uploadAndParsing")
public ResponseResult uploadAndParsing(@RequestParam("file") MultipartFile file){
try {
String accessToken = Header.getAuthorization(request.getHeader("Authorization"));
FileData fileData = fileDataService.uploadAndParsing(file.getOriginalFilename(), file.getBytes(),
file.getContentType(), file.getSize(),accessToken);
return ResponseResult.success(fileData);
} catch (IOException e) {
e.printStackTrace();
}
return ResponseResult.failure();
}
通过文件MultipartFile,来获取文件名file.getOriginalFilename(),文件流file.getBytes(),文件类型file.getContentType(),文件大小file.getSize()。
实现层
所做的操作就是将文件的详细信息返回:
String suffixPoint = fileName.substring(fileName.lastIndexOf("."));
String suffix = fileName.substring(fileName.lastIndexOf(".") + 1);
String name = fileName.substring(0, fileName.indexOf(suffixPoint));
获得文件名的前缀后缀,简单不多说
FileData fileData = new FileData();
fileData.setFileSuffix(suffix);
HashMap<String, Object> map = fileUtil.formatFileSize(size);
fileData.setFileSize(BigDecimal.valueOf(Double.valueOf(String.valueOf(map.get("value")))));
fileData.setUnit(String.valueOf(map.get("unit")));
FileData为返回的文件详细信息的对象,formatFileSize() 方法为将文件的大小转换为指定的单位:简单看下这个方法吧
public HashMap<String, Object> formatFileSize(long size) {
DecimalFormat df = new DecimalFormat("#.00");
HashMap<String, Object> hashMap = new HashMap<>();
String fileSizeString = "";
// String wrongSize = "0B";
if (size == 0) {
hashMap.put("value", 0);
hashMap.put("unit", "B");
return hashMap;
}
if (size < 1024) {
hashMap.put("value", df.format((double) size));
hashMap.put("unit", "B");
// fileSizeString = df.format((double) size) + "B";
} else if (size < 1048576) {
hashMap.put("value", df.format((double) size / 1024));
hashMap.put("unit", "KB");
// fileSizeString = df.format((double) size / 1024) + "KB";
} else if (size < 1073741824) {
hashMap.put("value", df.format((double) size / 1048576));
hashMap.put("unit", "MB");
// fileSizeString = df.format((double) size / 1024) + "MB";
} else if (size < 1099511627776L) {
hashMap.put("value", df.format((double) size / 1073741824));
hashMap.put("unit", "GB");
// fileSizeString = df.format((double) size / 1073741824) + "GB";
} else if (size < 1125899906842624L) {
hashMap.put("value", df.format((double) size / 1099511627776L));
hashMap.put("unit", "TB");
// fileSizeString = df.format((double) size / 1099511627776L) + "TB";
} else {
hashMap.put("value", df.format((double) size / 1125899906842624L));
hashMap.put("unit", "PB");
// fileSizeString = df.format((double) size / 1125899906842624L) + "PB";
}
return hashMap;
}
之后进行文件的上传:
MinioUploadDto minioUploadDto = minioService.upload(fileName, bytes, contentType);
上传的技术公司使用的技术为MinIO,将文件存储在服务器的桶中
对于上传的工具类可以给大家看一下:
public class MinioServiceImpl implements IMinioService {
@Autowired
private MinioClient minioClient;
@Value("${minio.endpoint}")
private String ENDPOINT;
@Value("${minio.bucketName}")
private String BUCKET_NAME;
@Value("${minio.accessKey}")
private String ACCESS_KEY;
@Value("${minio.secretKey}")
private String SECRET_KEY;
@Override
public MinioUploadDto upload(String fileName, byte[] bytes, String contentType) {
//创建一个MinIO的Java客户端
InputStream inputStream = new ByteArrayInputStream(bytes);
try {
String filename = fileName;
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
// 设置存储对象名称
String objectName = sdf.format(new Date()) + "/" + SnowflakeIdWorker.generateOpenId()+"/" +filename;
// 使用putObject上传一个文件到存储桶中
try {
minioClient.putObject(PutObjectArgs.builder()
.bucket(BUCKET_NAME)
.object(objectName)
.stream(inputStream, inputStream.available(), -1)
.contentType(contentType)
.build()
);
} catch (Exception e) {
e.printStackTrace();
} finally {
if (inputStream != null) {
try {
inputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
log.info("文件上传成功!");
MinioUploadDto minioUploadDto = new MinioUploadDto();
minioUploadDto.setName(filename);
minioUploadDto.setUrl(ENDPOINT + "/" + BUCKET_NAME + "/" + objectName);
//minioUploadDto.setUrl(minioClient.getObjectUrl(BUCKET_NAME,objectName));
return minioUploadDto;
} catch (Exception e) {
delete(fileName);
}
return null;
}
@Override
public MinioUploadDto upload(String fileName, InputStream inputStream, String contentType) {
try {
String filename = fileName;
SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");
// 设置存储对象名称
String objectName = sdf.format(new Date()) + "/" + SnowflakeIdWorker.generateOpenId()+"/" +filename;
// 使用putObject上传一个文件到存储桶中
try {
minioClient.putObject(PutObjectArgs.builder()
.bucket(BUCKET_NAME)
.object(objectName)
.stream(inputStream, inputStream.available(), -1)
.contentType(contentType)
.build()
);
} catch (Exception e) {
e.printStackTrace();
} finally {
if (inputStream != null) {
try {
inputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
log.info("文件上传成功!");
MinioUploadDto minioUploadDto = new MinioUploadDto();
minioUploadDto.setName(filename);
minioUploadDto.setUrl(ENDPOINT + "/" + BUCKET_NAME + "/" + objectName);
//minioUploadDto.setUrl(minioClient.getObjectUrl(BUCKET_NAME,objectName));
return minioUploadDto;
} catch (Exception e) {
delete(fileName);
}
return null;
}
@Override
public boolean delete(String objectName) {
try {
minioClient.removeObject(RemoveObjectArgs.builder()
.bucket(BUCKET_NAME)
.object(objectName)
.build());
return true;
} catch (Exception e) {
e.printStackTrace();
}
return false;
}
}
上传完之后返回文件的位移名字和url:
代码简单,自己理解哈~。
上传之后后面进行文件类型解析等操作:
if (minioUploadDto == null) {
return null;
} else {
fileData.setFileUrl(minioUploadDto.getUrl());
//获取一个文件流
InputStream inputStream = new ByteArrayInputStream(bytes);
String text = "";
int pages = 0;
try {
if (suffix.equals("doc")) {
HashMap<String, Object> textFromDoc = fileUtil.getTextFromDoc(inputStream);
text = String.valueOf(textFromDoc.get("text"));
pages = Integer.parseInt(String.valueOf(textFromDoc.get("pageCount")));
} else if (suffix.equals("docx")) {
HashMap<String, Object> textFromDocx = fileUtil.getTextFromDocx(inputStream);
text = String.valueOf(textFromDocx.get("text"));
pages = Integer.parseInt(String.valueOf(textFromDocx.get("pageCount")));
} else if (suffix.equals("pdf")) {
HashMap<String, Object> textFromPDF = fileUtil.getTextFromPDF(inputStream);
text = String.valueOf(textFromPDF.get("text"));
pages = Integer.parseInt(String.valueOf(textFromPDF.get("pageCount")));
} else if (suffix.equals("ppt")) {
HashMap<String, Object> textFromPPT = fileUtil.getTextFromPPT(inputStream);
text = String.valueOf(textFromPPT.get("text"));
pages = Integer.parseInt(String.valueOf(textFromPPT.get("pageCount")));
} else if (suffix.equals("pptx")) {
HashMap<String, Object> textFromPPTX = fileUtil.getTextFromPPTX(inputStream);
text = String.valueOf(textFromPPTX.get("text"));
pages = Integer.parseInt(String.valueOf(textFromPPTX.get("pageCount")));
} else if (suffix.equals("xls")) {
HashMap<String, Object> textFromxls = fileUtil.getTextFromxls(inputStream);
text = String.valueOf(textFromxls.get("text"));
pages = Integer.parseInt(String.valueOf(textFromxls.get("pageCount")));
} else if (suffix.equals("xlsx")) {
HashMap<String, Object> textFromxlsx = fileUtil.getTextFromxlsx(inputStream);
text = String.valueOf(textFromxlsx.get("text"));
pages = Integer.parseInt(String.valueOf(textFromxlsx.get("pageCount")));
} else if (suffix.equals("txt")) {
text = fileUtil.getTextFromTxt(inputStream);
}
LoginInfo info = tokenService.get(token);
fileData.setPages(pages);
setFileData(null, fileData, info);
this.saveOrUpdate(fileData);
//不管是否解析了文本都需要保存到es 保存到es
saveEs(fileData, text, info);
} catch (Exception e) {
e.printStackTrace();
}
}
解析:获取到文件的后缀之后根据文件的后缀去读取文件:getTextFromDoc(inputStream)
此类方法就是解析文件的详细内容,doc、docx、ppt、等文件的类型。获得文件的内容及页数
工具类列一下:
import com.huaweicloud.sdk.core.auth.BasicCredentials;
import com.huaweicloud.sdk.core.auth.ICredential;
import com.huaweicloud.sdk.core.exception.ConnectionException;
import com.huaweicloud.sdk.core.exception.RequestTimeoutException;
import com.huaweicloud.sdk.core.exception.ServiceResponseException;
import com.huaweicloud.sdk.ocr.v1.OcrClient;
import com.huaweicloud.sdk.ocr.v1.model.GeneralTableRequestBody;
import com.huaweicloud.sdk.ocr.v1.model.RecognizeGeneralTableRequest;
import com.huaweicloud.sdk.ocr.v1.model.RecognizeGeneralTableResponse;
import com.huaweicloud.sdk.ocr.v1.region.OcrRegion;
import com.youming.shuiku.archive.api.IMinioService;
import lombok.RequiredArgsConstructor;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.poi.hslf.extractor.PowerPointExtractor;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xslf.usermodel.XSLFSlide;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFRow;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.openxmlformats.schemas.drawingml.x2006.main.CTRegularTextRun;
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextBody;
import org.openxmlformats.schemas.drawingml.x2006.main.CTTextParagraph;
import org.openxmlformats.schemas.presentationml.x2006.main.CTGroupShape;
import org.openxmlformats.schemas.presentationml.x2006.main.CTShape;
import org.openxmlformats.schemas.presentationml.x2006.main.CTSlide;
import org.springframework.stereotype.Component;
import javax.crypto.interfaces.PBEKey;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectOutputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
@Component
@RequiredArgsConstructor
public class FileUtil {
private final IMinioService minioService;
/**
* 读取doc文件
*
* @param filePath
* @throws Exception
*/
public HashMap<String,Object> getTextFromDoc(InputStream filePath) throws Exception {
StringBuilder sb = new StringBuilder();
HashMap<String,Object> map=new HashMap<>();
HWPFDocument doc = new HWPFDocument(filePath);
int pageCount = doc.getSummaryInformation().getPageCount();
Range rang = doc.getRange();
sb.append(rang.text());
filePath.close();
map.put("text",sb.toString());
map.put("pageCount",pageCount);
return map;
}
/**
* 读取docx文件
*
* @param filePath
* @throws IOException
*/
public HashMap<String,Object> getTextFromDocx(InputStream filePath) throws IOException {
XWPFDocument doc = new XWPFDocument(filePath);
int pageCount = doc.getProperties().getExtendedProperties().getUnderlyingProperties().getPages();
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
String text = extractor.getText();
filePath.close();
HashMap<String,Object> map=new HashMap<>();
map.put("text",text);
map.put("pageCount",pageCount);
return map;
}
/**
* 读取pdf文件
*
* @param filePath
* @throws IOException
*/
public HashMap<String,Object> getTextFromPDF(InputStream filePath) throws IOException {
PDDocument document = PDDocument.load(filePath);
int pageCount = document.getPages().getCount();
document.getClass();
//使用PDFTextStripper 工具
PDFTextStripper tStripper = new PDFTextStripper();
//设置文本排序,有规则输出
tStripper.setSortByPosition(true);
//获取所有文字信息
String text = tStripper.getText(document);
document.close();
HashMap<String,Object> map=new HashMap<>();
map.put("text",text);
map.put("pageCount",pageCount);
return map;
}
public String getTextFromImg(byte[] bytes) throws IOException, ExecutionException, InterruptedException, TesseractException {
PDDocument document = PDDocument.load(bytes);
//创建PDF渲染器
PDFRenderer renderer = new PDFRenderer(document);
int pageSize = document.getNumberOfPages();
StringBuilder builder = new StringBuilder();
for (int i = 0; i < pageSize; i++) {
//将PDF的每一页渲染成一张图片
BufferedImage image = renderer.renderImage(i);
Tesseract tesseract = new Tesseract();
tesseract.setDatapath("D:/tesseract-ocr/tessdata");
tesseract.setLanguage("chi_sim");
System.out.println(tesseract.doOCR(image));
builder.append(tesseract.doOCR(image));
// images.add(output.toByteArray());
}
//对接华为云
// PDDocument document = PDDocument.load(bytes);
// //创建PDF渲染器
// PDFRenderer renderer = new PDFRenderer(document);
// int pageSize = document.getNumberOfPages();
// List<byte[]> images = new ArrayList<>();
// for (int i = 0; i < pageSize; i++) {
// ByteArrayOutputStream output = new ByteArrayOutputStream();
// //将PDF的每一页渲染成一张图片
// BufferedImage image = renderer.renderImage(i);
// ImageIO.write(image, "png", output);
// images.add(output.toByteArray());
// }
// String ak = "R.......T";
// String sk = "R.........................................z";
// for (byte[] image : images) {
// String encode = Base64UtilData.encode(image);
// ICredential auth = new BasicCredentials()
// .withAk(ak)
// .withSk(sk);
//
// OcrClient client = OcrClient.newBuilder()
// .withCredential(auth)
// .withRegion(OcrRegion.valueOf("cn-east-3"))
// .build();
// RecognizeGeneralTableRequest request = new RecognizeGeneralTableRequest();
// GeneralTableRequestBody body = new GeneralTableRequestBody();
// body.withImage(encode);
// request.withBody(body);
// try {
// RecognizeGeneralTableResponse response = client.recognizeGeneralTable(request);
// System.out.println(response.toString());
// } catch (ConnectionException e) {
// e.printStackTrace();
// } catch (RequestTimeoutException e) {
// e.printStackTrace();
// } catch (ServiceResponseException e) {
// e.printStackTrace();
// System.out.println(e.getHttpStatusCode());
// System.out.println(e.getErrorCode());
// System.out.println(e.getErrorMsg());
// }
// }
return null;
}
/**
* 读取ppt文件
*
* @param filePath
* @throws IOException
*/
public HashMap<String, Object> getTextFromPPT(InputStream filePath) throws IOException {
PowerPointExtractor extractor = new PowerPointExtractor(filePath);
int pageCount = extractor.getSummaryInformation().getPageCount();
String content = extractor.getText();
extractor.close();
HashMap<String,Object> map=new HashMap<>();
map.put("text",content);
map.put("pageCount",pageCount);
return map;
}
/**
* 读取pptx文件
*
* @param filePath
* @throws IOException
*/
public HashMap<String,Object> getTextFromPPTX(InputStream filePath) throws IOException {
String resultString = null;
StringBuilder sb = new StringBuilder();
int pageNum =0;
try {
XMLSlideShow xmlSlideShow = new XMLSlideShow(filePath);
pageNum = xmlSlideShow.getSlides().size();
List<XSLFSlide> slides = xmlSlideShow.getSlides();
for (XSLFSlide slide : slides) {
CTSlide rawSlide = slide.getXmlObject();
CTGroupShape gs = rawSlide.getCSld().getSpTree();
CTShape[] shapes = gs.getSpArray();
for (CTShape shape : shapes) {
CTTextBody tb = shape.getTxBody();
if (null == tb) {
continue;
}
CTTextParagraph[] paras = tb.getPArray();
for (CTTextParagraph textParagraph : paras) {
CTRegularTextRun[] textRuns = textParagraph.getRArray();
for (CTRegularTextRun textRun : textRuns) {
sb.append(textRun.getT());
}
}
}
}
resultString = sb.toString();
xmlSlideShow.close();
} catch (Exception e) {
e.printStackTrace();
}
HashMap<String,Object> map=new HashMap<>();
map.put("text",resultString);
map.put("pageCount",pageNum);
return map;
}
/**
* 读取xls
*
* @param filePath
* @throws IOException
*/
public HashMap<String,Object> getTextFromxls(InputStream filePath) throws IOException {
StringBuilder content = new StringBuilder();
HSSFWorkbook workbook = new HSSFWorkbook(filePath);
Integer sheetNums = workbook.getNumberOfSheets();
if (sheetNums > 0) {
sheetNums = workbook.getSheetAt(0).getRowBreaks().length + 1;
}
for (int sheetIndex = 0; sheetIndex < workbook.getNumberOfSheets(); sheetIndex++) {
HSSFSheet sheet = workbook.getSheetAt(sheetIndex);
for (int rowIndex = 0; rowIndex <= sheet.getLastRowNum(); rowIndex++) {
HSSFRow row = sheet.getRow(rowIndex);
if (row == null) {
continue;
}
for (int cellnum = 0; cellnum < row.getLastCellNum(); cellnum++) {
HSSFCell cell = row.getCell(cellnum);
//设置单元格类型
cell.setCellType(CellType.STRING);
if (cell != null) {
content.append(cell.getRichStringCellValue().getString() + " ");
}
}
}
}
workbook.close();
HashMap<String,Object> map=new HashMap<>();
map.put("text",content.toString());
map.put("pageCount",sheetNums);
return map;
}
/**
* 用来读取xlsx文件
*
* @param filePath
* @throws IOException
*/
public HashMap<String,Object> getTextFromxlsx(InputStream filePath) throws IOException {
StringBuilder content = new StringBuilder();
XSSFWorkbook workbook = new XSSFWorkbook(filePath);
Integer sheetNums = workbook.getNumberOfSheets();
if (sheetNums > 0) {
sheetNums = workbook.getSheetAt(0).getRowBreaks().length + 1;
}
for (int sheet = 0; sheet < workbook.getNumberOfSheets(); sheet++) {
if (null != workbook.getSheetAt(sheet)) {
XSSFSheet aSheet = workbook.getSheetAt(sheet);
for (int row = 0; row <= aSheet.getLastRowNum(); row++) {
if (null != aSheet.getRow(row)) {
XSSFRow aRow = aSheet.getRow(row);
for (int cell = 0; cell < aRow.getLastCellNum(); cell++) {
if (null != aRow.getCell(cell)) {
XSSFCell aCell = aRow.getCell(cell);
if (convertCell(aCell).length() > 0) {
content.append(convertCell(aCell));
}
}
content.append(" ");
}
}
}
}
}
workbook.close();
HashMap<String,Object> map=new HashMap<>();
map.put("text",content.toString());
map.put("pageCount",sheetNums);
return map;
}
public String getTextFromTxt(InputStream filePath) throws Exception {
StringBuilder content = new StringBuilder();
try {
// 3.读取数据
byte[] bytes = new byte[filePath.available()];// 修改了这里
int len;// 记录每次读取的字节的个数
while ((len = filePath.read(bytes)) != -1) {
String str = new String(bytes, 0, len);
content.append(str);
}
} catch (IOException e) {
e.printStackTrace();
} finally {
// 4.关闭输入流
if (filePath != null) {
filePath.close();
}
}
return content.toString();
}
/**
* 将Object对象转byte数组
* @param obj byte数组的object对象
* @return
*/
public static byte[] toByteArray(Object obj) {
byte[] bytes = null;
ByteArrayOutputStream bos = new ByteArrayOutputStream();
try {
ObjectOutputStream oos = new ObjectOutputStream(bos);
oos.writeObject(obj);
oos.flush();
bytes = bos.toByteArray ();
oos.close();
bos.close();
} catch (IOException ex) {
ex.printStackTrace();
}
return bytes;
}
public static Map<String,Object> getFileByte(URL url) throws IOException {
HttpURLConnection conn=(HttpURLConnection) url.openConnection();
//以Post方式提交表单,默认get方式
conn.setRequestMethod("GET");
conn.setDoInput(true);
conn.setDoOutput(true);
// post方式不能使用缓存
conn.setUseCaches(false);
//连接指定的资源
conn.connect();
//获取网络输入流
InputStream inStream=conn.getInputStream();
ByteArrayOutputStream swapStream = new ByteArrayOutputStream();
int size = conn.getContentLength();
//new 个 inStream 能支持到 最大的数组
byte[] buffer;
int ch;
while ((ch = inStream.read()) != -1) {
swapStream.write(ch);
}
buffer = swapStream.toByteArray();
inStream.close();
Map<String,Object> map=new HashMap<>();
map.put("size",size);
map.put("bytes",buffer);
return map;
}
private String convertCell(Cell cell) {
NumberFormat formater = NumberFormat.getInstance();
formater.setGroupingUsed(false);
String cellValue = "";
if (cell == null) {
return cellValue;
}
switch (cell.getCellType()) {
case HSSFCell.CELL_TYPE_NUMERIC:
cellValue = formater.format(cell.getNumericCellValue());
break;
case HSSFCell.CELL_TYPE_STRING:
cellValue = cell.getStringCellValue();
break;
case HSSFCell.CELL_TYPE_BLANK:
cellValue = cell.getStringCellValue();
break;
case HSSFCell.CELL_TYPE_BOOLEAN:
cellValue = Boolean.valueOf(cell.getBooleanCellValue()).toString();
break;
case HSSFCell.CELL_TYPE_ERROR:
cellValue = String.valueOf(cell.getErrorCellValue());
break;
default:
cellValue = "";
}
return cellValue.trim();
}
public HashMap<String, Object> formatFileSize(long size) {
DecimalFormat df = new DecimalFormat("#.00");
HashMap<String, Object> hashMap = new HashMap<>();
String fileSizeString = "";
// String wrongSize = "0B";
if (size == 0) {
hashMap.put("value", 0);
hashMap.put("unit", "B");
return hashMap;
}
if (size < 1024) {
hashMap.put("value", df.format((double) size));
hashMap.put("unit", "B");
// fileSizeString = df.format((double) size) + "B";
} else if (size < 1048576) {
hashMap.put("value", df.format((double) size / 1024));
hashMap.put("unit", "KB");
// fileSizeString = df.format((double) size / 1024) + "KB";
} else if (size < 1073741824) {
hashMap.put("value", df.format((double) size / 1048576));
hashMap.put("unit", "MB");
// fileSizeString = df.format((double) size / 1024) + "MB";
} else if (size < 1099511627776L) {
hashMap.put("value", df.format((double) size / 1073741824));
hashMap.put("unit", "GB");
// fileSizeString = df.format((double) size / 1073741824) + "GB";
} else if (size < 1125899906842624L) {
hashMap.put("value", df.format((double) size / 1099511627776L));
hashMap.put("unit", "TB");
// fileSizeString = df.format((double) size / 1099511627776L) + "TB";
} else {
hashMap.put("value", df.format((double) size / 1125899906842624L));
hashMap.put("unit", "PB");
// fileSizeString = df.format((double) size / 1125899906842624L) + "PB";
}
return hashMap;
}
}
返回得到文件的内容及页数
最后一步就是保存到ES代码
//不管是否解析了文本都需要保存到es 保存到es
saveEs(fileData, text, info);
保存ES
具体保存代码:
/**
* 同步保存到es
*
* @param fileData 文件对象
* @param text 解析后得到的内容
*/
public void saveEs(FileData fileData, String text, LoginInfo info) {
FileDataEsVO fileDataEsVO = new FileDataEsVO();
BeanUtils.copyProperties(fileData, fileDataEsVO);
fileDataEsVO.setText(text);
if (fileData.getCreatePeople() != null) {
fileDataEsVO.setCreatePeopleName(info.getName());
}
if (fileData.getUpdatePeople() != null) {
fileDataEsVO.setUpdatePeopleName(info.getName());
}
if (fileData.getGroupId() != null) {
fileDataEsVO.setTitle(fileTitleService.getById(fileData.getGroupId()).getTitle());
}
//聚合词条
List<String> suggestion = new ArrayList<>();
if (StringUtils.isNotBlank(text)) {
suggestion.add(text);
}
if(StringUtils.isNotBlank(fileData.getFileName())){
suggestion.add(fileData.getFileName());
}
fileDataEsVO.setSuggestion(suggestion);
String jsonString = JSON.toJSONString(fileDataEsVO);
IndexRequest request = new IndexRequest("file_data").id(fileDataEsVO.getId().toString());
request.source(jsonString, XContentType.JSON);
try {
client.index(request, RequestOptions.DEFAULT);
} catch (IOException e) {
e.printStackTrace();
}
}