excel导入/导出导致内存溢出问题,以及解决方案
- 1、内存溢出问题
- 导入功能重新修正,采用SAX的流式解析数据。并结合业务流程。
- 导出功能:由于精细化了业务流程,导致比较代码比较冗杂,就只放出最简单的案例。
1、内存溢出问题
dump日志查看
org.apache.xmlbeans.impl.store.Xobj$AttrXob,占用了大量的内存
模拟复现
原因查明 XSSF对excel内容的完全占用不释放导致内存溢出,无论导入还是导出都是一样的原理。
导入功能重新修正,采用SAX的流式解析数据。并结合业务流程。
难点应该在对不同类型数据的处理。这需要使用到StylesTable styles;对数据格式进行处理。例如时间,浮点
excel数据是 分别是
文本日期, 日期,
文本数字,浮点数字,
数字,文本,
公式=A4+1,文本浮点
最终导出结果 符合预期
package com.bookm.service.excel3;
import com.alibaba.fastjson.JSONObject;
import com.bookm.bean.ImportColumnInfo;
import com.bookm.bean.ImportStatusDTO;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedHashMap;
/**
* 数据处理
*/
public class SheetHandler extends DefaultHandler {
private SharedStringsTable sst;
private StylesTable styles;
private String lastContents;
private boolean nextIsString;
private String cellPosition;
private int cellStyleIndex = -1;
private LinkedHashMap<String, JSONObject> rowContents = new LinkedHashMap();
private HashMap<String, ImportColumnInfo> icHash = new HashMap<>();
private HashMap<String, String> columnMap = new HashMap<>();
private DataFormatter dataFormatter = new DataFormatter();
protected ImportStatusDTO imp_status = new ImportStatusDTO();
public SheetHandler(SharedStringsTable sst, StylesTable styles) {
this.sst = sst;
this.styles = styles;
setImportColumn(icHash);
}
public LinkedHashMap<String, JSONObject> getRowContents() {
return rowContents;
}
public void setRowContents(LinkedHashMap<String, JSONObject> rowContents) {
this.rowContents = rowContents;
setImportColumn(icHash);
}
public SheetHandler(SharedStringsTable sst) {
this.sst = sst;
setImportColumn(icHash);
}
// 其他方法如setImportColumn、isExistProperty、isExistColumn保持不变
public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {
if (name.equals("c")) {
cellPosition = attributes.getValue("r");
String cellType = attributes.getValue("t");
String cellStyleStr = attributes.getValue("s");
//日期类型 cellType=null,cellStyleStr=4
//文本 数字/浮点类型 cellType=s,cellStyleStr=1
//浮点/数字类型 cellType=null,cellStyleStr=3
//公式类型 cellType=null,cellStyleStr=null
cellStyleIndex = cellStyleStr != null ? Integer.parseInt(cellStyleStr) : -1;
nextIsString = "s".equals(cellType);
}
lastContents = "";
}
public void endElement(String uri, String localName, String name) throws SAXException {
if (name.equals("v")) {
String value = lastContents.trim();
if (nextIsString) {
try {
int idx = Integer.parseInt(value);
value = new XSSFRichTextString(sst.getEntryAt(idx)).toString();
} catch (NumberFormatException e) {
// 保持原值
}
} else {
if (styles != null && cellStyleIndex != -1) {
try {
CellStyle style = styles.getStyleAt(cellStyleIndex);
short formatIndex = style.getDataFormat();
String formatString = style.getDataFormatString();
double numericValue = Double.parseDouble(value);
if (DateUtil.isADateFormat(formatIndex, formatString)) {
if (DateUtil.isValidExcelDate(numericValue)) {
Date date = DateUtil.getJavaDate(numericValue, false);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
value = sdf.format(date);
}
} else {
value = dataFormatter.formatRawCellContents(numericValue, formatIndex, formatString);
}
} catch (NumberFormatException e) {
// 非数值,保持原样
}
}
}
// 处理列映射和数据存储
String positionAlpha = cellPosition.replaceAll("[0-9]", "").trim();
String positionNum = cellPosition.replaceAll("[^0-9]", "").trim();
if ("1".equals(positionNum)) {
if (!isExistProperty(value, icHash)) {
imp_status.imp_rows_nomatch += value + ",";
}
for (String key : icHash.keySet()) {
ImportColumnInfo ic = icHash.get(key);
if (ic.ici_xls_title.equalsIgnoreCase(value)) {
columnMap.put(positionAlpha, ic.ici_column);
break;
}
}
} else if (isExistColumn(positionAlpha, columnMap)) {
JSONObject rowData = rowContents.getOrDefault(positionNum, new JSONObject());
rowData.put(columnMap.get(positionAlpha), value);
rowContents.put(positionNum, rowData);
}
}
}
public void characters(char[] ch, int start, int length) {
lastContents += new String(ch, start, length);
}
/**
* 当前的列信息,是否在配置表内
*
* @param clm
* @param iciHash
* @return
*/
protected boolean isExistColumn(String clm, HashMap<String, String> iciHash) {
return iciHash.get(clm) == null ? false : true;
}
/**
* 当前的第一行的名称是否在我的配置表里面 name : name
*
* @param clm
* @param iciHash
* @return
*/
protected boolean isExistProperty(String clm, HashMap<String, ImportColumnInfo> iciHash) {
for (String key : iciHash.keySet()) {
ImportColumnInfo ic = iciHash.get(key);
if (ic.ici_xls_title.equalsIgnoreCase(clm)) {
return true;
}
}
return false;
}
// todo 为了结合过去的接口 ,创建handler必须实现这个接口
void setImportColumn(HashMap<String, ImportColumnInfo> icHash) {
icHash = new HashMap<>();
ImportColumnInfo impinfo = null;
// List<GenColumn> genColumnList = getGenColumnByTabName(tableName);
impinfo = new ImportColumnInfo();
impinfo.ici_column = "type";
impinfo.ici_xls_title = "type";
impinfo.ici_xls_type = "String";
icHash.put(impinfo.ici_column, impinfo);
impinfo = new ImportColumnInfo();
impinfo.ici_column = "name";
impinfo.ici_xls_title = "name";
impinfo.ici_xls_type = "String";
icHash.put(impinfo.ici_column, impinfo);
this.icHash = icHash;
}
}
package com.bookm.service.excel3;
import cn.hutool.json.JSONUtil;
import com.alibaba.fastjson.JSONObject;
import com.bookm.bean.Book;
import com.bookm.bean.ImportColumnInfo;
import com.bookm.service.excel2.SheetHandler;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.gson.Gson;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.*;
import java.util.stream.Collectors;
/**
* 数据量比较大(8万条以上)的excel文件解析,将excel文件解析为 行列坐标-值的形式存入map中,此方式速度快,内存耗损小 但只能读取excel文件
* 提供处理单个sheet方法 processOneSheet(String filename) 以及处理多个sheet方法 processAllSheets(String filename)
* 只需传入文件路径+文件名即可 调用处理方法结束后,只需接收ExcelUtil.getRowContents()返回值即可获得解析后的数据
*/
public class ExcelUtil {
private static LinkedHashMap<String, JSONObject> rowContents = new LinkedHashMap<String, JSONObject>();
public static SheetHandler sheetHandler;
public LinkedHashMap<String, JSONObject> getRowContents() {
return rowContents;
}
public static void setRowContents(LinkedHashMap<String, JSONObject> rc) {
rowContents = rc;
}
public SheetHandler getSheetHandler() {
return sheetHandler;
}
public static void setSheetHandler(SheetHandler sh) {
sheetHandler = sh;
}
public static List<LinkedHashMap<String, JSONObject>> processSheetByRId(InputStream in, Integer count) throws Exception {
OPCPackage pkg = null;
InputStream sheet = null;
List<LinkedHashMap<String, JSONObject>> results = new ArrayList<>();
try {
pkg = OPCPackage.open(in);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
StylesTable stylesTable = r.getStylesTable();
for (int i = 0; i < count; i++) {
sheet = r.getSheet("rId" + (i + 1));
InputSource sheetSource = new InputSource(sheet);
HashMap<String, ImportColumnInfo> stringImportColumnInfoHashMap = setImportColumn(new HashMap<>());
XMLReader parser = fetchSheetParser(sst, stylesTable, stringImportColumnInfoHashMap);
parser.parse(sheetSource);
results.add(sheetHandler.getRowContents());
}
return results;
} catch (Exception e) {
e.printStackTrace();
throw e;
} finally {
if (pkg != null) {
pkg.close();
}
if (sheet != null) {
sheet.close();
}
}
}
// 处理一个sheet
public static void processOneSheet(String filename) throws Exception {
InputStream sheet2 = null;
OPCPackage pkg = null;
try {
pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
StylesTable stylesTable = r.getStylesTable();
HashMap<String, ImportColumnInfo> stringImportColumnInfoHashMap = setImportColumn(new HashMap<>());
XMLReader parser = fetchSheetParser(sst, stylesTable, stringImportColumnInfoHashMap);
sheet2 = r.getSheet("rId1");
InputSource sheetSource = new InputSource(sheet2);
parser.parse(sheetSource);
setRowContents(sheetHandler.getRowContents());
} catch (Exception e) {
e.printStackTrace();
throw e;
} finally {
if (pkg != null) {
pkg.close();
}
if (sheet2 != null) {
sheet2.close();
}
}
}
// 处理多个sheet
public static void processAllSheets(String filename) throws Exception {
OPCPackage pkg = null;
InputStream sheet = null;
try {
pkg = OPCPackage.open(filename);
XSSFReader r = new XSSFReader(pkg);
SharedStringsTable sst = r.getSharedStringsTable();
StylesTable stylesTable = r.getStylesTable();
setImportColumn(new HashMap<>());
HashMap<String, ImportColumnInfo> stringImportColumnInfoHashMap = setImportColumn(new HashMap<>());
XMLReader parser = fetchSheetParser(sst, stylesTable, stringImportColumnInfoHashMap);
Iterator<InputStream> sheets = r.getSheetsData();
while (sheets.hasNext()) {
System.out.println("Processing new sheet:\n");
sheet = sheets.next();
InputSource sheetSource = new InputSource(sheet);
parser.parse(sheetSource);
}
} catch (Exception e) {
e.printStackTrace();
throw e;
} finally {
if (pkg != null) {
pkg.close();
}
if (sheet != null) {
sheet.close();
}
}
}
public static XMLReader fetchSheetParser(SharedStringsTable sst, StylesTable sharedStringsTable, HashMap importNeedColumnMap) throws SAXException {
XMLReader parser = XMLReaderFactory.createXMLReader("com.sun.org.apache.xerces.internal.parsers.SAXParser");
final SheetHandler sheetHandler = new SheetHandler(sst, sharedStringsTable);
sheetHandler.importNeedColumnMap = importNeedColumnMap;
setSheetHandler(sheetHandler);
ContentHandler handler = (ContentHandler) sheetHandler;
parser.setContentHandler(handler);
return parser;
}
static HashMap<String, ImportColumnInfo> setImportColumn(HashMap<String, ImportColumnInfo> icHash) {
icHash = new HashMap<>();
ImportColumnInfo impinfo = null;
// List<GenColumn> genColumnList = getGenColumnByTabName(tableName);
impinfo = new ImportColumnInfo();
impinfo.ici_column = "type";
impinfo.ici_xls_title = "type";
impinfo.ici_xls_type = "String";
icHash.put(impinfo.ici_column, impinfo);
impinfo = new ImportColumnInfo();
impinfo.ici_column = "name";
impinfo.ici_xls_title = "name";
impinfo.ici_xls_type = "String";
icHash.put(impinfo.ici_column, impinfo);
return icHash;
}
/**
* See org.xml.sax.helpers.DefaultHandler
*/
public static void main(String[] args) throws Exception {
// test();
test2();
}
public static void test() throws Exception {
ExcelUtil example = new ExcelUtil();
example.processOneSheet("data.xlsx");
LinkedHashMap<String, JSONObject> map = example.getRowContents();
ObjectMapper mapper = new ObjectMapper();
List<Book> books = map.values().stream().parallel()
.map(jsonObject -> {
try {
return mapper.readValue(jsonObject.toString(), Book.class);
} catch (IOException e) {
throw new RuntimeException(e);
}
})
.collect(Collectors.toList());
books.forEach(item -> System.out.println(item));
}
public static void test2() throws Exception {
ExcelUtil example = new ExcelUtil();
final List<LinkedHashMap<String, JSONObject>> linkedHashMaps = example.processSheetByRId(new FileInputStream(new File("data.xlsx")), 2);
linkedHashMaps.forEach(item->{
System.out.println(item);
});
}
}
导出功能:由于精细化了业务流程,导致比较代码比较冗杂,就只放出最简单的案例。
package com.bookm;
import org.apache.poi.xssf.streaming.SXSSFSheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.SpringBootTest;
import java.io.FileOutputStream;
import java.io.OutputStream;
@SpringBootTest
public class ExportTest {
/**
* SXSSFWorkbook : 100w条数据写入Excel 消耗时间:8706
*/
@Test
public void test1() {
try {
long t1 = System.currentTimeMillis();
SXSSFWorkbook workbook = new SXSSFWorkbook();
workbook.createSheet("aaa");
SXSSFSheet aaa = workbook.getSheetAt(0);
for (int i = 0; i < 1000000; i++) {
aaa.createRow(i);
aaa.getRow(i).createCell(0).setCellValue("aaaaaaaaaaaaaaaaaaaaaaa");
aaa.getRow(i).createCell(1).setCellValue("aaaaaaaaaaaaaaaaaaaaaaa");
aaa.getRow(i).createCell(2).setCellValue("aaaaaaaaaaaaaaaaaaaaaaa");
aaa.getRow(i).createCell(3).setCellValue("aaaaaaaaaaaaaaaaaaaaaaa");
aaa.getRow(i).createCell(4).setCellValue("aaaaaaaaaaaaaaaaaaaaaaa");
}
OutputStream outputStream = null;
// 打开目的输入流,不存在则会创建
outputStream = new FileOutputStream("out.xlsx");
workbook.write(outputStream);
outputStream.close();
long t2 = System.currentTimeMillis();
System.out.println("SXSSFWorkbook : 100w条数据写入Excel 消耗时间:" + (t2 - t1));
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* SXSSFWorkbook : 1w条数据写入Excel 消耗时间:2151
* SXSSFWorkbook : 100w条数据写入Excel 消耗时间:215100, 如果说组织分配
*/
@Test
public void test2() {
try {
long t1 = System.currentTimeMillis();
XSSFWorkbook workbook = new XSSFWorkbook();
workbook.createSheet("aaa");
XSSFSheet aaa = workbook.getSheetAt(0);
for (int i = 0; i < 10000; i++) {
aaa.createRow(i);
aaa.getRow(i).createCell(0).setCellValue("aaaaaaaaaaaaaaaaaaaaaaa");
aaa.getRow(i).createCell(1).setCellValue("aaaaaaaaaaaaaaaaaaaaaaa");
aaa.getRow(i).createCell(2).setCellValue("aaaaaaaaaaaaaaaaaaaaaaa");
aaa.getRow(i).createCell(3).setCellValue("aaaaaaaaaaaaaaaaaaaaaaa");
aaa.getRow(i).createCell(4).setCellValue("aaaaaaaaaaaaaaaaaaaaaaa");
}
OutputStream outputStream = null;
// 打开目的输入流,不存在则会创建
outputStream = new FileOutputStream("out2.xlsx");
workbook.write(outputStream);
outputStream.close();
long t2 = System.currentTimeMillis();
System.out.println("XSSFWorkbook : 100w条数据写入Excel 消耗时间:" + (t2 - t1));
} catch (Exception e) {
e.printStackTrace();
}
}
}
引用: 感谢大佬们的付出
[1]: https://blog.csdn.net/u013803955/article/details/137792819
如果有需要练手,到时候有空再把代码传git吧
似乎代码仍然是不完全的。如果有需要再传吧
仓库地址
git@gitee.com:ssm785265/12.BookM_in_export_jvm.git