python_使用多进程来处理数据写入Excel文件
优势:与多线程相比,多进程写入速度要更快,12万多行数据处理用时3.52秒,比多进程快了1秒左右。
import pandas as pd
from io import BytesIO
import multiprocessing
import time
import math
# 假设我们有12万行的数据
data = pd.DataFrame({
'A': range(120001),
'B': range(120001),
'C': range(120001)
})
def write_to_excel(df, filename):
with pd.ExcelWriter(filename) as writer:
df.to_excel(writer, index=False)
def process_write(df, filename):
process = multiprocessing.Process(target=write_to_excel, args=(df, filename))
process.start()
return process
def test_multiprocessing(data):
processes = []
# 分割数据
# 分割数据
split_size=30000
#向上取整
num_processes = math.ceil(120001/split_size)
data_splits = [data[i:i + split_size] for i in range(0, len(data), split_size)]
filenames = [f'D:\\desktop\\new\\process_{i}.xlsx' for i in range(num_processes)]
start_time = time.time()
for i in range(num_processes):
processes.append(process_write(data_splits[i], filenames[i]))
for process in processes:
process.join()
print(f"Multiprocessed execution took {time.time() - start_time:.2f} seconds")
# 模拟主函数入口
if __name__ == '__main__':
test_multiprocessing(data)