一.背景
供应商系统下载的csv文件不支持域控(主要是第三方wps服务不能对csv文件加密,但是可以对office系列产品进行权限访问的加密控制)。因此思路就改为现将csv文件转为excel文件,然后对excel文件进行加域控制。本文主要介绍如何将csv文件转为excel文件。
二.要求
- Csv文件可能比较大,达到40-60M,需要控制内存使用率;
- 考虑接口的并发,需要进行接口的限流
-
三.方案
- 采用alibaba的easyexcel,降低内存占用率,根据压测结果,设置合理的接口限流参数(限流
- 本文不再介绍,可以使用java注解+redis+lua, 或者nginx限流等)
-
四.代码
-
CsvController
-
package com.xxx.xxx.controller; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; import javax.annotation.Resource; import javax.servlet.http.HttpServletResponse; import com.xxx.xxx.common.utils.EasyExcelUtil; import com.xxx.xxx.common.utils.ObjectUtil; import com.xxx.xxx.service.ExcelAnalysisService; import lombok.extern.slf4j.Slf4j; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestParam; import org.springframework.web.bind.annotation.RestController; import org.springframework.web.multipart.MultipartFile; /** * description: * * @author: lgq * @create: 2024-04-16 11:06 */ @Slf4j @RestController @RequestMapping("/csv") public class CsvController { @Resource private ExcelAnalysisService excelAnalysisService; /** * 读取传入的csv 文本的内容可以存入数据库 * * @param file * @return */ @PostMapping("/uploadCsvAndImportExcel") public void uploadCsvAndImportExcel(@RequestParam("file") MultipartFile file, HttpServletResponse response) { String[] splitName = file.getOriginalFilename().split(".csv"); if (ObjectUtil.isEmpty(splitName) || ObjectUtil.isEmpty(splitName[0])) { return; } EasyExcelUtil.setResponseParam(response, splitName[0]); long startTime = System.currentTimeMillis(); log.info("导出开始时间:{}", startTime); try { // 输出流可以为本地文件 // OutputStream outputStream = new FileOutputStream("D:\\templateExcel\\filename.xlsx"); OutputStream outputStream = response.getOutputStream(); InputStream inputStream = file.getInputStream(); Future<String> future = excelAnalysisService.csv2Excel(inputStream, outputStream); future.get(); } catch (IOException ioException) { log.error("csv转为excel出错!", ioException.getMessage()); ioException.printStackTrace(); } catch (InterruptedException interruptedException) { log.error("csv转为excel出错!", interruptedException.getMessage()); interruptedException.printStackTrace(); } catch (ExecutionException executionException) { log.error("csv转为excel出错!", executionException.getMessage()); executionException.printStackTrace(); } // 导出时间结束 long endTime = System.currentTimeMillis(); log.info("导出结束时间:{}", endTime + "ms"); log.info("导出所用时间:{}", (endTime - startTime) / 1000 + "秒"); } }
EasyExcelGeneralCsvListener
-
package com.xxx.xxx.listener; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Map; import com.alibaba.excel.ExcelWriter; import com.alibaba.excel.context.AnalysisContext; import com.alibaba.excel.event.AnalysisEventListener; import com.alibaba.excel.write.metadata.WriteSheet; import com.xxx.xxx.constants.ExcelConstants; /** * description: * * @author: lgq * @create: 2024-04-16 11:25 */ public class EasyExcelGeneralCsvListener extends AnalysisEventListener<Map<Integer, String>> { /** * 用于存储读取的数据 */ private List<Map<Integer, String>> dataList = new ArrayList<>(); private ExcelWriter excelWriter; private WriteSheet writeSheet; public EasyExcelGeneralCsvListener() { } public EasyExcelGeneralCsvListener(ExcelWriter excelWriter, WriteSheet writeSheet) { this.excelWriter = excelWriter; this.writeSheet = writeSheet; } @Override public void invoke(Map<Integer, String> data, AnalysisContext context) { // 数据add进入集合 dataList.add(data); // size是否为2000条:这里其实就是分批.当数据等于2k的时候执行一次写入excel if (dataList.size() >= ExcelConstants.PER_WRITE_EXCEL_ROW_COUNT) { save2Excel(); // 清理集合便于GC回收 dataList.clear(); } } @Override public void invokeHeadMap(Map<Integer, String> headers, AnalysisContext context) { List<List<String>> titles = new ArrayList<>(); for (int i = 0; i < headers.size(); i++) { titles.add(Collections.singletonList(headers.get(i))); } this.writeSheet.setHead(titles); } /** * 保存数据到 excel */ private void save2Excel() { if (dataList.size() > 0) { List<List<String>> consumerDataList = new ArrayList<>(); dataList.stream().forEach( e -> { List<String> objects = new ArrayList<>(); for (int i = 0; i < e.size(); i++) { objects.add(e.get(i)); } consumerDataList.add(objects); } ); this.excelWriter.write(consumerDataList, writeSheet); } } /** * Excel 中所有数据解析完毕会调用此方法 */ @Override public void doAfterAllAnalysed(AnalysisContext context) { save2Excel(); dataList.clear(); } }
VisiableThreadPoolTaskExecutor
-
package com.xxx.xxx.task; import java.util.concurrent.Callable; import java.util.concurrent.Future; import java.util.concurrent.ThreadPoolExecutor; import lombok.extern.slf4j.Slf4j; import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; import org.springframework.util.concurrent.ListenableFuture; /** * description:VisiableThreadPoolTaskExecutor * * @author: lgq * @create: 2024-04-17 10:52 */ @Slf4j public class VisiableThreadPoolTaskExecutor extends ThreadPoolTaskExecutor { private void showThreadPoolInfo(String prefix){ ThreadPoolExecutor threadPoolExecutor = getThreadPoolExecutor(); if(null==threadPoolExecutor){ return; } log.info("{}, {},taskCount [{}], completedTaskCount [{}], activeCount [{}], queueSize [{}]", this.getThreadNamePrefix(), prefix, threadPoolExecutor.getTaskCount(), threadPoolExecutor.getCompletedTaskCount(), threadPoolExecutor.getActiveCount(), threadPoolExecutor.getQueue().size()); } @Override public void execute(Runnable task) { showThreadPoolInfo("1. do execute"); super.execute(task); } @Override public void execute(Runnable task, long startTimeout) { showThreadPoolInfo("2. do execute"); super.execute(task, startTimeout); } @Override public Future<?> submit(Runnable task) { showThreadPoolInfo("1. do submit"); return super.submit(task); } @Override public <T> Future<T> submit(Callable<T> task) { showThreadPoolInfo("2. do submit"); return super.submit(task); } @Override public ListenableFuture<?> submitListenable(Runnable task) { showThreadPoolInfo("1. do submitListenable"); return super.submitListenable(task); } @Override public <T> ListenableFuture<T> submitListenable(Callable<T> task) { showThreadPoolInfo("2. do submitListenable"); return super.submitListenable(task); } }
ExcelAnalysisService
-
package com.xxx.xxx.service; import java.io.OutputStream; import java.io.InputStream; import java.util.concurrent.Future; /** * description:excel文档分析处理类 * * @author: lgq * @create: 2024-04-17 11:42 */ public interface ExcelAnalysisService { /** * csv文档转为excel文档 */ Future<String> csv2Excel(InputStream inputStream, OutputStream outputStream); }
ExcelAnalysisServiceImpl
-
package com.xxx.xxx.service.impl; import java.io.OutputStream; import java.nio.charset.Charset; import com.alibaba.excel.EasyExcel; import com.alibaba.excel.ExcelWriter; import com.alibaba.excel.support.ExcelTypeEnum; import com.alibaba.excel.write.metadata.WriteSheet; import com.xxx.xxx.listener.EasyExcelGeneralCsvListener; import com.xxx.xxx.service.ExcelAnalysisService; import lombok.extern.slf4j.Slf4j; import java.io.InputStream; import java.util.concurrent.Future; import org.springframework.scheduling.annotation.Async; import org.springframework.scheduling.annotation.AsyncResult; import org.springframework.stereotype.Service; /** * description:ExcelAnalysisService实现类 * * @author: lgq * @create: 2024-04-17 14:53 */ @Service @Slf4j public class ExcelAnalysisServiceImpl implements ExcelAnalysisService { @Async("asyncExcelAnalysisServiceExecutor") @Override public Future<String> csv2Excel(InputStream inputStream, OutputStream outputStream) { try { ExcelWriter writer = EasyExcel.write(outputStream).excelType(ExcelTypeEnum.XLSX).build(); EasyExcel.read(inputStream, new EasyExcelGeneralCsvListener(writer, new WriteSheet())) .excelType(ExcelTypeEnum.CSV) .charset(Charset.forName("UTF-8")) .sheet() .doRead(); writer.finish(); outputStream.flush(); } catch (Exception e) { log.error("csv转为excel出错!", e.getMessage()); e.printStackTrace(); } finally { if (outputStream != null) { try { outputStream.close(); } catch (Exception e) { log.error("outputStream.close() -> csv转为excel出错!", e.getMessage()); e.printStackTrace(); } } if (inputStream != null) { try { inputStream.close(); } catch (Exception e) { log.error("inputStream.close() -> csv转为excel出错!", e.getMessage()); e.printStackTrace(); } } } return new AsyncResult<>("task complete!"); } }
ExecutorConfig
-
package com.xxx.xxx.config; import java.util.concurrent.Executor; import java.util.concurrent.ThreadPoolExecutor; import com.xxx.xxx.task.VisiableThreadPoolTaskExecutor; import lombok.extern.slf4j.Slf4j; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; import org.springframework.scheduling.annotation.EnableAsync; import org.springframework.scheduling.concurrent.ThreadPoolTaskExecutor; /** * description:线程池配置类 * * @author: lgq * @create: 2024-04-17 10:28 */ @Configuration @Slf4j @EnableAsync public class ExecutorConfig { private static int corePoolSize = Runtime.getRuntime().availableProcessors() + 1; private static int maxPoolSize = Runtime.getRuntime().availableProcessors() + 1; private static int queueCapacity = 100; private static final String namePrefix = "ExcelAnalysis"; @Bean(name = "asyncExcelAnalysisServiceExecutor") public Executor asyncExcelServiceExecutor() { log.info("start asyncExcelAnalysisServiceExecutor----------------"); //ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor(); //使用可视化运行状态的线程池 ThreadPoolTaskExecutor executor = new VisiableThreadPoolTaskExecutor(); //配置核心线程数 executor.setCorePoolSize(corePoolSize); //配置最大线程数 executor.setMaxPoolSize(maxPoolSize); //配置队列大小 executor.setQueueCapacity(queueCapacity); //配置线程池中的线程的名称前缀 executor.setThreadNamePrefix(namePrefix); // rejection-policy:当pool已经达到max size的时候,如何处理新任务 // CALLER_RUNS:不在新线程中执行任务,而是有调用者所在的线程来执行 executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy()); //执行初始化 executor.initialize(); log.info("end asyncExcelAnalysisServiceExecutor------------"); return executor; } }
ExcelConstants
-
package com.xxx.xxx.constants; /** * description:线程池配置类 * * @author: lgq * @create: 2024-04-17 10:28 */ public class ExcelConstants { public static final Integer PER_SHEET_ROW_COUNT = 100*10000; public static final Integer PER_WRITE_ROW_COUNT = 20*10000; public static final Integer PER_WRITE_EXCEL_ROW_COUNT = 2 * 1000; public static final Integer GENERAL_ONCE_SAVE_TO_DB_ROWS_JDBC = 10*10000; public static final Integer GENERAL_ONCE_SAVE_TO_DB_ROWS_MYBATIS = 5*10000; }
配置文件
-
spring: servlet: multipart: enabled: true max-file-size: 100MB # 单个文件的最大值 max-request-size: 100MB # 上传文件总的最大值
pom依赖
-
<dependency> <groupId>com.alibaba</groupId> <artifactId>easyexcel</artifactId> <version>3.3.2</version> </dependency>
-
五.压测
- jvm参数(本地电脑,性能较差)
- -Xms2g -Xmx2g
- 导出日志
性能监控
压测结果