EasyExcel实现100w数据导入导出
他山之石,可以攻玉
参考文章链接:百万数据的导入导出解决方案
个人功能实现github代码地址:batchExcelData-import-export
前置准备
Java17 SpringBoot3
100w数据生成SQL
CREATE TABLE `student_info` (
`id` INT ( 11 ) NOT NULL AUTO_INCREMENT,
`student_id` INT NOT NULL,
`name` VARCHAR ( 20 ) DEFAULT NULL,
`course_id` INT NOT NULL,
`class_id` INT ( 11 ) DEFAULT NULL,
`create_time` DATETIME DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
PRIMARY KEY ( `id` )
) ENGINE = INNODB AUTO_INCREMENT = 1 DEFAULT CHARSET = utf8;
CREATE TABLE `course` (
`id` INT ( 11 ) NOT NULL AUTO_INCREMENT,
`course_id` INT NOT NULL,
`course_name` VARCHAR ( 40 ) DEFAULT NULL,
PRIMARY KEY ( `id` )
) ENGINE = INNODB AUTO_INCREMENT = 1 DEFAULT CHARSET = utf8;
DELIMITER //
CREATE FUNCTION rand_string ( n INT ) RETURNS VARCHAR (255) #该函数会返回一个字符串
BEGIN
DECLARE
chars_str VARCHAR ( 100 ) DEFAULT 'abcdefghijklmnopqrstuvwxyzABCDEFJHIJKLMNOPQRSTUVWXYZ';
DECLARE
return_str VARCHAR ( 255 ) DEFAULT '';
DECLARE
i INT DEFAULT 0;
WHILE i < n DO
SET return_str = CONCAT(return_str,SUBSTRING( chars_str, FLOOR(1+RAND()* 52), 1 ));
SET i = i + 1;
END WHILE;
RETURN return_str;
END //
DELIMITER;
#函数2:创建随机数函数
DELIMITER //
CREATE FUNCTION rand_num (from_num INT ,to_num INT) RETURNS INT(11)
BEGIN
DECLARE i INT DEFAULT 0;
SET i = FLOOR(from_num +RAND()*(to_num - from_num+1)) ;
RETURN i;
END //
DELIMITER ;
show variables like 'log_bin_trust_function_creators';
set global log_bin_trust_function_creators=1;
# 存储过程1:创建插入课程表存储过程
DELIMITER //
CREATE PROCEDURE insert_course( max_num INT )
BEGIN
DECLARE i INT DEFAULT 0;
SET autocommit = 0; #设置手动提交事务
REPEAT #循环
SET i = i + 1; #赋值
INSERT INTO course (course_id, course_name ) VALUES
(rand_num(10000,10100),rand_string(6));
UNTIL i = max_num END REPEAT;
COMMIT; #提交事务
END //
DELIMITER ;
# 存储过程2:创建插入学生信息表存储过程
DELIMITER //
CREATE PROCEDURE insert_stu( max_num INT )
BEGIN
DECLARE i INT DEFAULT 0;
SET autocommit = 0; #设置手动提交事务
REPEAT #循环
SET i = i + 1; #赋值
INSERT INTO student_info (course_id, class_id ,student_id ,NAME ) VALUES
(rand_num(10000,10100),rand_num(10000,10200),rand_num(1,200000),rand_string(6));
UNTIL i = max_num
END REPEAT;
COMMIT; #提交事务
END //
DELIMITER ;
CALL insert_course(100); -- 0.038
CALL insert_stu(1000000); -- 341.03s
pom
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>com.baomidou</groupId>
<artifactId>mybatis-plus-spring-boot3-starter</artifactId>
<version>3.5.7</version>
</dependency>
<dependency>
<groupId>com.alibaba.fastjson2</groupId>
<artifactId>fastjson2</artifactId>
<version>2.0.47</version>
</dependency>
<dependency>
<groupId>com.alibaba</groupId>
<artifactId>easyexcel</artifactId>
<version>3.1.0</version>
</dependency>
<!-- <dependency>-->
<!-- <groupId>org.mybatis.spring.boot</groupId>-->
<!-- <artifactId>mybatis-spring-boot-starter</artifactId>-->
<!-- <version>3.0.0</version>-->
<!-- </dependency>-->
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
</dependency>
</dependencies>
yml
spring:
datasource:
url: jdbc:mysql://localhost:3306/mysql_study_db?useSSL=false&serverTimezone=UTC&allowPublicKeyRetrieval=true
username: root
password: root123
driver-class-name: com.mysql.cj.jdbc.Driver
type: com.alibaba.druid.pool.DruidDataSource
druid:
# 初始连接数
initialSize: 5
# 最小连接池数量
minIdle: 10
# 最大连接池数量
maxActive: 20
# 配置获取连接等待超时的时间
maxWait: 60000
# 配置连接超时时间
connectTimeout: 30000
# 配置网络超时时间
socketTimeout: 60000
# 配置间隔多久才进行一次检测,检测需要关闭的空闲连接,单位是毫秒
timeBetweenEvictionRunsMillis: 60000
# 配置一个连接在池中最小生存的时间,单位是毫秒
minEvictableIdleTimeMillis: 300000
# 配置一个连接在池中最大生存的时间,单位是毫秒
maxEvictableIdleTimeMillis: 900000
# 配置检测连接是否有效
validationQuery: SELECT 1 FROM DUAL
# 增加上传文件的大小限制
servlet:
multipart:
max-file-size: 50MB
max-request-size: 50MB
mybatis-plus:
configuration:
# log-impl: org.apache.ibatis.logging.stdout.StdOutImpl # 配置MyBatis日志
mapper-locations: classpath:mapper/*.xml
server:
port: 7777
导出:
思路:不能写到一个Sheet就写多个,不能一次性读取100w就分批次读取,不能一行行写Excel就达到一定数量就写
写100w数据到excel中 导出所用时间:41秒
写10个sheet 每个10w 每次写2w
业务代码
@GetMapping("/export")
public String exportExcel(HttpServletResponse response) {
{
OutputStream outputStream = null;
try {
long startTime = System.currentTimeMillis();
System.out.println("导出开始时间:" + startTime);
outputStream = response.getOutputStream();
String fileName = new String(("excel100w.xlsx").getBytes(), "UTF-8");
// 创建ExcelWriter
// ExcelWriter excelWriter = EasyExcel.write(fileName, StudentInfo.class).build();//本地文件
ExcelWriter excelWriter = EasyExcel.write(outputStream, StudentInfo.class).build();//浏览器
//模拟统计查询的数据数量这里模拟100w
//记录总数:实际中需要根据查询条件进行统计即可
Integer totalCount = Math.toIntExact(service.findCount());
//每一个Sheet存放10w条数据
// Integer sheetDataRows = ExcelConstants.PER_SHEET_ROW_COUNT;
Integer sheetDataRows = 100000;
//每次写入的数据量2w
// Integer writeDataRows = ExcelConstants.PER_WRITE_ROW_COUNT;
Integer writeDataRows = 20000;
//计算需要的Sheet数量
Integer sheetNum = totalCount % sheetDataRows == 0 ? (totalCount / sheetDataRows) : (totalCount / sheetDataRows + 1);
//计算一般情况下每一个Sheet需要写入的次数(一般情况不包含最后一个sheet,因为最后一个sheet不确定会写入多少条数据)
Integer oneSheetWriteCount = sheetDataRows / writeDataRows;
//计算最后一个sheet需要写入的次数
Integer lastSheetWriteCount = totalCount % sheetDataRows == 0 ? oneSheetWriteCount : (totalCount % sheetDataRows % writeDataRows == 0 ? (totalCount / sheetDataRows / writeDataRows) : (totalCount / sheetDataRows / writeDataRows + 1));
//开始分批查询分次写入
//注意这次的循环就需要进行嵌套循环了,外层循环是Sheet数目,内层循环是写入次数
for (int sheetIndex = 0; sheetIndex < sheetNum; sheetIndex++) {
long s1 = System.currentTimeMillis();
log.info("sheet:{} start", sheetIndex + 1);
// 创建新的 sheet
WriteSheet writeSheet = EasyExcel.writerSheet("Sheet" + (sheetIndex + 1)).build();
//循环写入次数: j的自增条件是当不是最后一个Sheet的时候写入次数为正常的每个Sheet写入的次数,如果是最后一个就需要使用计算的次数lastSheetWriteCount
for (int j = 0; j < (sheetIndex != sheetNum - 1 ? oneSheetWriteCount : lastSheetWriteCount); j++) {
//分页查询一次2w
List<StudentInfo> stuList = service.findByPage(j + 1 + oneSheetWriteCount * sheetIndex, writeDataRows);
if (!CollectionUtils.isEmpty(stuList)) {
//写数据
excelWriter.write(stuList, writeSheet);
}
}
long s2 = System.currentTimeMillis();
log.info("sheet:{} end {} ms", sheetIndex + 1, s2 - s1);
}
// 下载EXCEL
response.setContentType("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
response.setCharacterEncoding("utf-8");
response.setHeader("Content-Disposition", "attachment;filename=" + fileName);
excelWriter.finish();
outputStream.flush();
//导出时间结束
long endTime = System.currentTimeMillis();
System.out.println("导出结束时间:" + endTime + "ms");
System.out.println("导出所用时间:" + (endTime - startTime) / 1000 + "秒");
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (outputStream != null) {
try {
outputStream.close();
} catch (Exception e) {
e.printStackTrace();
}
}
}
}
return "success";
}
结果
导出开始时间:1723191448429
2024-08-09T16:17:28.744+08:00 INFO 18552 --- [nio-7777-exec-1] com.alibaba.druid.pool.DruidDataSource : {dataSource-1} inited
2024-08-09T16:17:29.114+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:1 start
2024-08-09T16:17:32.155+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:1 end 3041 ms
2024-08-09T16:17:32.155+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:2 start
2024-08-09T16:17:34.787+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:2 end 2632 ms
2024-08-09T16:17:34.787+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:3 start
2024-08-09T16:17:37.695+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:3 end 2908 ms
2024-08-09T16:17:37.695+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:4 start
2024-08-09T16:17:40.722+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:4 end 3027 ms
2024-08-09T16:17:40.722+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:5 start
2024-08-09T16:17:44.782+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:5 end 4060 ms
2024-08-09T16:17:44.782+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:6 start
2024-08-09T16:17:50.179+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:6 end 5397 ms
2024-08-09T16:17:50.179+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:7 start
2024-08-09T16:17:56.353+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:7 end 6174 ms
2024-08-09T16:17:56.354+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:8 start
2024-08-09T16:18:02.744+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:8 end 6391 ms
2024-08-09T16:18:02.744+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:9 start
2024-08-09T16:18:09.716+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:9 end 6972 ms
2024-08-09T16:18:09.716+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:10 start
2024-08-09T16:18:16.214+08:00 INFO 18552 --- [nio-7777-exec-1] c.c.controller.StudentInfoController : sheet:10 end 6498 ms
导出结束时间:1723191507222ms
导出所用时间:58秒
ps:使用分页查询的时候要注意引入MyBatisPlus的分页插件
导入:
思路:分批读取+JDBC分批插入+手动事务控制ps:JDBC连接MySQL时,如果要使用批处理功能,需要在url 中加入参数?rewriteBatchedStatements=true
业务代码
/**
* the request was rejected because its size (45665808) exceeds the configured maximum (10485760)
* <p>
* spring.servlet.multipart.max-file-size=50MB
* spring.servlet.multipart.max-request-size=50MB
*
*
* ΔJDBC连接MySQL时,如果要使用批处理功能,需要在url 中加入参数?rewriteBatchedStatements=true
* 1 没加 ------总耗时:159188ms------
* 2 加了 43533ms
*
* 3.65倍
*
* @param file
* @return
*/
@PostMapping("/import")
public String importExcel(@RequestParam("file") MultipartFile file) {
//记录开始读取Excel时间,也是导入程序开始时间
long startReadTime = System.currentTimeMillis();
System.out.println("------开始读取Excel的Sheet时间(包括导入数据过程):" + startReadTime + "ms------");
//读取所有Sheet的数据.每次读完一个Sheet就会调用这个方法
try {
EasyExcel.read(file.getInputStream(), StudentInfo.class, new EasyExceGeneralDatalListener(service)).doReadAll();
} catch (IOException e) {
throw new RuntimeException(e);
}
long endReadTime = System.currentTimeMillis();
System.out.println("------结束读取Excel的Sheet时间(包括导入数据过程):" + endReadTime + "ms------");
System.out.println("------总耗时:" + (endReadTime-startReadTime) + "ms------");
return "success";
}
EasyExcel监听器
package com.coding.entity.listener;
import com.alibaba.excel.context.AnalysisContext;
import com.alibaba.excel.event.AnalysisEventListener;
import com.coding.entity.StudentInfo;
import com.coding.service.StudentInfoService;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
// 事件监听
public class EasyExceGeneralDatalListener extends AnalysisEventListener<StudentInfo> {
/**
* 处理业务逻辑的Service,也可以是Mapper
*/
private StudentInfoService service;
/**
* 用于存储读取的数据
*/
private List<StudentInfo> dataList = new ArrayList<StudentInfo>();
public EasyExceGeneralDatalListener() {
}
public EasyExceGeneralDatalListener(StudentInfoService service) {
this.service = service;
}
/**
* easyexcel invoke 报错 class java.util.LinkedHashMap cannot be cast to class com.coding.entity.StudentInfo (java.util.LinkedHashMap is in module java.base of loader 'bootstrap'; com.coding.entity.StudentInfo is in unnamed module of loader 'app')
EasyExcel.read(fileName, StudentInfo.class, listener).sheet().doRead();
*/
@Override
public void invoke(StudentInfo data, AnalysisContext context) {
//数据add进入集合
dataList.add(data);
//size是否为100000条:这里其实就是分批.当数据等于10w的时候执行一次插入
if (dataList.size() >= 100000) {
//存入数据库:数据小于1w条使用Mybatis的批量插入即可;
saveData();
//清理集合便于GC回收
dataList.clear();
}
}
/**
* 保存数据到DB
*
* @param
* @MethodName: saveData
* @return: void
*/
private void saveData() {
service.import2DBFromExcel10w(dataList);
dataList.clear();
}
/**
* Excel中所有数据解析完毕会调用此方法
*
* @param: context
* @MethodName: doAfterAllAnalysed
* @return: void
*/
@Override
public void doAfterAllAnalysed(AnalysisContext context) {
saveData();
dataList.clear();
}
}
JDBC批处理 事务
@Override
public void import2DBFromExcel10w(List<StudentInfo> dataList) {
//结果集中数据为0时,结束方法.进行下一次调用
if (dataList.size() == 0) {
return ;
}
//JDBC分批插入+事务操作完成对10w数据的插入
Connection conn = null;
PreparedStatement ps = null;
try {
long startTime = System.currentTimeMillis();
System.out.println(dataList.size() + "条,开始导入到数据库时间:" + startTime + "ms");
conn = JDBCDruidUtils.getConnection();
//控制事务:默认不提交
conn.setAutoCommit(false);
String sql = "insert into student_info (student_id,name,course_id,class_id,create_time) values(?,?,?,?,?)";
ps = conn.prepareStatement(sql);
//循环结果集:这里循环不支持"烂布袋"表达式
for (int i = 0; i < dataList.size(); i++) {
StudentInfo item = dataList.get(i);
ps.setInt(1, item.getStudentId());
ps.setString(2, item.getName());
ps.setInt(3, item.getCourseId());
ps.setInt(4, item.getClassId());
ps.setDate(5, new Date(System.currentTimeMillis()));
//将一组参数添加到此 PreparedStatement 对象的批处理命令中。
ps.addBatch();
}
//执行批处理
ps.executeBatch();
//手动提交事务
conn.commit();
long endTime = System.currentTimeMillis();
System.out.println(dataList.size() + "条,结束导入到数据库时间:" + endTime + "ms");
System.out.println(dataList.size() + "条,导入用时:" + (endTime - startTime) + "ms");
} catch (Exception e) {
e.printStackTrace();
} finally {
//关连接
JDBCDruidUtils.close(conn, ps);
}
}
JDBCDruidUtils
package com.coding.util;
import com.alibaba.druid.pool.DruidDataSourceFactory;
import javax.sql.DataSource;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Properties;
/**
* @description <JDBC工具类>
* @author: zhouchaoyu
* @Date: 2024-08-08
*/
//JDBC工具类
public class JDBCDruidUtils {
private static DataSource dataSource;
/*
创建数据Properties集合对象加载加载配置文件
*/
static {
Properties pro = new Properties();
//加载数据库连接池对象
try {
//获取数据库连接池对象
pro.load(JDBCDruidUtils.class.getClassLoader().getResourceAsStream("druid.properties"));
// pro.forEach((key, value) -> System.out.println(key + ": " + value));
dataSource = DruidDataSourceFactory.createDataSource(pro);
} catch (Exception e) {
e.printStackTrace();
}
}
/*
获取连接
*/
public static Connection getConnection() throws SQLException {
return dataSource.getConnection();
}
/**
* 关闭conn,和 statement独对象资源
*
* @param connection
* @param statement
* @MethodName: close
* @return: void
*/
public static void close(Connection connection, Statement statement) {
if (connection != null) {
try {
connection.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
if (statement != null) {
try {
statement.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
/**
* 关闭 conn , statement 和resultset三个对象资源
*
* @param connection
* @param statement
* @param resultSet
* @MethodName: close
* @return: void
*/
public static void close(Connection connection, Statement statement, ResultSet resultSet) {
close(connection, statement);
if (resultSet != null) {
try {
resultSet.close();
} catch (SQLException e) {
e.printStackTrace();
}
}
}
/*
获取连接池对象
*/
public static DataSource getDataSource() {
return dataSource;
}
}
druid.properties
# Database connection settings
driverClassName=com.mysql.cj.jdbc.Driver
url=jdbc:mysql://localhost:3306/mysql_study_db?useSSL=false&serverTimezone=UTC&allowPublicKeyRetrieval=true&rewriteBatchedStatements=true
username=root
password=root123
# Druid connection pool settings
initialSize=5
minIdle=5
maxActive=20
maxWait=60000
timeBetweenEvictionRunsMillis=60000
minEvictableIdleTimeMillis=300000
validationQuery=SELECT 1
testWhileIdle=true
testOnBorrow=false
testOnReturn=false
poolPreparedStatements=true
maxPoolPreparedStatementPerConnectionSize=20
结果
------开始读取Excel的Sheet时间(包括导入数据过程):1723192568422ms------
100000条,开始导入到数据库时间:1723192572571ms
2024-08-09T16:36:12.611+08:00 INFO 18552 --- [nio-7777-exec-3] com.alibaba.druid.pool.DruidDataSource : {dataSource-2} inited
100000条,结束导入到数据库时间:1723192575060ms
100000条,导入用时:2489ms
100000条,开始导入到数据库时间:1723192577309ms
100000条,结束导入到数据库时间:1723192579305ms
100000条,导入用时:1996ms
100000条,开始导入到数据库时间:1723192582523ms
100000条,结束导入到数据库时间:1723192584428ms
100000条,导入用时:1905ms
100000条,开始导入到数据库时间:1723192586686ms
100000条,结束导入到数据库时间:1723192588795ms
100000条,导入用时:2109ms
100000条,开始导入到数据库时间:1723192590817ms
100000条,结束导入到数据库时间:1723192592851ms
100000条,导入用时:2034ms
100000条,开始导入到数据库时间:1723192594977ms
100000条,结束导入到数据库时间:1723192597116ms
100000条,导入用时:2139ms
100000条,开始导入到数据库时间:1723192599090ms
100000条,结束导入到数据库时间:1723192601174ms
100000条,导入用时:2084ms
100000条,开始导入到数据库时间:1723192603465ms
100000条,结束导入到数据库时间:1723192605340ms
100000条,导入用时:1875ms
100000条,开始导入到数据库时间:1723192607375ms
100000条,结束导入到数据库时间:1723192609421ms
100000条,导入用时:2046ms
100000条,开始导入到数据库时间:1723192611420ms
100000条,结束导入到数据库时间:1723192613705ms
100000条,导入用时:2285ms
2024-08-09T16:36:53.875+08:00 INFO 18552 --- [nio-7777-exec-3] org.ehcache.core.EhcacheManager : Cache 'a1eb40dd-8471-4d69-b6c5-5b7cca3a84b6' removed from EhcacheManager.
2024-08-09T16:36:53.875+08:00 INFO 18552 --- [nio-7777-exec-3] org.ehcache.core.EhcacheManager : Cache 'a1eb40dd-8471-4d69-b6c5-5b7cca3a84b6' removed from EhcacheManager.
------结束读取Excel的Sheet时间(包括导入数据过程):1723192613880ms------
------总耗时:45458ms------
本站资源均来自互联网,仅供研究学习,禁止违法使用和商用,产生法律纠纷本站概不负责!如果侵犯了您的权益请与我们联系!
转载请注明出处: 免费源码网-免费的源码资源网站 » EasyExcel实现100w数据导入导出
发表评论 取消回复