python-docx把dataframe表格添加到word文件中思路较为简单:
- 先把
dataframe
格式转变为table
- 新建一个段落:
document.add_paragraph()
- 把
table
添加到这个段落下方
效果图
示例代码
from docx import Document, oxml
import pandas as pd
import numpy as np
from docx.oxml.ns import qn
from docx.oxml import OxmlElement
import random
# 设置 table 的边框,用法与 cell 类似
def set_table_boarder(table, **kwargs):
"""
Set table`s border
Usage:
set_table_border(
cell,
top={"sz": 12, "val": "single", "color": "#FF0000"},
bottom={"sz": 12, "color": "#00FF00", "val": "single"},
left={"sz": 24, "val": "dashed"},
right={"sz": 12, "val": "dashed"},
)
"""
borders = OxmlElement('w:tblBorders')
for tag in ('bottom', 'top', 'left', 'right', 'insideV', 'insideH'):
edge_data = kwargs.get(tag)
if edge_data:
any_border = OxmlElement(f'w:{tag}')
for key in ["sz", "val", "color", "space", "shadow"]:
if key in edge_data:
any_border.set(qn(f'w:{key}'), str(edge_data[key]))
borders.append(any_border)
table._tbl.tblPr.append(borders)
return table
def set_table_singleBoard(table):
"""为表格添加边框"""
return set_table_boarder(
table,
top={"sz": 4, "val": "single", "color": "#000000"},
bottom={"sz": 4, "val": "single", "color": "#000000"},
left={"sz": 4, "val": "single", "color": "#000000"},
right={"sz": 4, "val": "single", "color": "#000000"},
insideV={"sz": 4, "val": "single", "color": "#000000"},
insideH={"sz": 4, "val": "single", "color": "#000000"}
)
def convert_df_to_table(document, dataframe: pd.DataFrame, index_list=None, column_list=None):
"""把table转为dataframe
:param document: 文档对象
:param dataframe: dataframe格式数据
:param index_list: 最左边一列显示的内容
:param column_list: (第一行)列名称需要显示的内容
"""
rows = dataframe.shape[0]
cols = dataframe.shape[1]
if index_list is not None:
cols += 1
if column_list is not None:
rows += 1
table = document.add_table(rows=rows, cols=cols)
row_i = 0
col_i = 0
if index_list is not None:
raise
if column_list is not None:
hdr_cells = table.rows[row_i].cells
for _col_i, _v in enumerate(column_list):
hdr_cells[_col_i].text = str(_v)
row_i += 1
for _i, series_info in enumerate(dataframe.iterrows()):
series = series_info[1]
hdr_cells = table.rows[row_i + _i].cells
for _c_i, _cell_value in enumerate(series):
hdr_cells[col_i + _c_i].text = str(_cell_value)
return table
def main():
# 1. 把要插入的数据调整为dataframe格式
dataframe = pd.DataFrame({
"列1": [round(random.random(), 2) for _ in range(3)],
"列2": [round(random.random(), 2) for _ in range(3)],
"列3": [round(random.random(), 2) for _ in range(3)],
})
document = Document()
# 2. 插入表格
table = convert_df_to_table(document, dataframe, column_list=dataframe.columns.tolist())
table = set_table_singleBoard(table) # 表格添加边框
base_paragraphs = document.add_paragraph("下面插入表格:")
base_paragraphs._p.addnext(table._tbl)
# 3. 保存修改后的结果
document.save('测试_添加表格.docx') # 保存后导出
if __name__ == '__main__':
main()
上述代码会得到如下效果图: