WPSoffice环境下,需要批量替换doc文档及xls表格某些内容,在windows下,可以用VBA宏实现,先建一个标签表格,然后按标签批量替换。但在Linux下,WPS表格宏不能跨文档操作WPS文字,于是想用python实现。
1、标签文件与模板文件格式:
标签文件A列为标签名,B列为值。
模板文件关键词用#包围,如下:
2、基本功能
读入标签内容,然后打开模板文件,替换关键词为标签值,保存到新文件。
用到的python库:xlrd、xlutils、tkinter。读入的标签及文件列表使用Treeview展示,并提供修改、增加项、删除项功能。
3、几个重要代码片段
3.1 读xls内容
# 读入标签表
# 打开一个 Excel 文件
workbook = xlrd.open_workbook(fname)
# 通过索引获取第一个工作表
sheet = workbook.sheet_by_index(0)
da = []
# 遍历所有行和列输出
for i in range(sheet.nrows):
# 键值分布在1、2列
key = str(sheet.cell_value(i, 0)).replace(' ', '') # 删除空格
val = sheet.cell_value(i, 1)
date_format = sheet.cell_type(i, 1)
# 日期类型
if date_format == xlrd.XL_CELL_DATE:
date_tuple = xlrd.xldate_as_tuple(val, workbook.datemode)
val = '' + str(date_tuple[0]) + "年" + str(date_tuple[1]) + "月" + str(date_tuple[2]) + "日"
# 文本类型
if date_format == xlrd.XL_CELL_TEXT:
val = '' + sheet.cell_value(i, 1)
if str(key) != '':
da.append((key, val))
# print(da)
workbook.release_resources()
del workbook
3.2 替换docx文档关键词
def procdocx(self, fname, lbitems, foutname):
document = Document(fname) # 打开文档
for paragraph in document.paragraphs:
runs = paragraph.runs # 得到所有run
for i, run in enumerate(runs):
key = run.text
# 当前run不包含key值
if key.count('#') == 0:
continue
counter = i # 记录起始位置
if key.count('#') == 1: # 如先导符与key值分在多个run中
while counter < len(runs): # 将先导符与后续run组成key后再查找
counter = counter + 1
if counter >= len(runs):
break
key += runs[counter].text
tmp = runs[counter].text
runs[counter].clear() # 清空当前runs,本算法会改变文档内容格式
if tmp.count('#') > 0: # 注:如果有多个连续#,且第二个key分在多个run中,会有可能出现替换错误
break
# 开始替换
for item in lbitems:
key1 = '#' + self.labeltree.item(item)['values'][0].replace(' ', '') + '#'
if key.find(key1) >= 0:
# for j in range(i, counter + 1):
# runs[j].clear()
key = key.replace(key1, self.labeltree.item(item)['values'][1])
runs[i].text = key
# 另存到输出文件夹
document.save(foutname)
3.3 替换xls表格关键词
def procxls(self, fname, lbitems, foutname):
modxls = xlrd.open_workbook(fname, formatting_info=True)
# 将操作文件对象拷贝,变成可写
newxls = copy(modxls)
# 获取所有sheet页及数量
sheets = modxls.sheet_names()
for shn in range(len(sheets)):
# 通过索引获取第shn个工作表
modsheet = modxls.sheet_by_index(shn)
newsheet = newxls.get_sheet(shn)
# 遍历mod所有行和列输出
for i in range(modsheet.nrows):
for j in range(modsheet.ncols):
val = modsheet.cell_value(i, j)
val_format = modsheet.cell_type(i, j)
if val_format != xlrd.XL_CELL_TEXT:
continue
# 开始替换
for item in lbitems:
key1 = '#' + self.labeltree.item(item)['values'][0].replace(' ', '') + '#'
if val.find(key1) >= 0:
newval = val.replace(key1, self.labeltree.item(item)['values'][1])
newsheet.write(i, j, newval)
newxls.save(foutname)
3.4 Treeview代码
1) 创建一个treeview列表
# 创建 self.labeltree 并添加到 self.mainframe 中
columns1 = ("label", "value")
self.labeltree = ttk.Treeview(frame51, show="headings", columns=columns1)
self.labeltree.column('label', width=100, anchor='w')
self.labeltree.heading('label', text="标签名")
self.labeltree.column('value', width=300, anchor='w')
self.labeltree.heading('value', text="标签值")
self.labelscrollx = ttk.Scrollbar(frame51, orient="horizontal", command=self.labeltree.xview)
self.labelscrolly = ttk.Scrollbar(frame51, orient="vertical", command=self.labeltree.yview)
self.labeltree.configure(xscrollcommand=self.labelscrollx.set)
self.labeltree.configure(yscrollcommand=self.labelscrolly.set)
self.labelscrollx.pack(side="bottom", fill="x")
self.labelscrolly.pack(side="right", fill="y")
self.labeltree.pack(expand=1, fill='both', padx='1', pady='1')
2) 双击treeview编辑
# 双击标签库列表
def set_lbcell_value2(self, event):
# 获取选中的项目
col = self.labeltree.identify_column(event.x) # 列
row = self.labeltree.identify_row(event.y) # 行
if col == '' or row == '':
return
selected_item = self.labeltree.selection()[0]
cn = int(str(col).replace('#', ''))
rn = int(str(row).replace('I', ''))
x, y, w, h = self.labeltree.bbox(selected_item, col)
txt = self.labeltree.item(selected_item, 'values')[cn-1]
edstr = StringVar()
edstr.set(txt)
entryedit = ttk.Entry(self.labeltree, width=w//8, textvariable=edstr)
entryedit.place(x=x, y=y)
def saveedit(even):
self.labeltree.set(selected_item, column=col, value=edstr.get())
entryedit.destroy()
entryedit.bind("<FocusOut>", saveedit)
# 按下时触发
entryedit.bind('<Return>', saveedit) # 不是Enter而是Return
3) 右击treeview,弹出操作菜单
# 右击标签库
def set_lbcel_popup(self, event):
# 创建右键菜单
self.right_click_menu = Menu(self.labeltree, tearoff=False)
self.right_click_menu.add_command(label="增加", command=lambda: self.on_menu_click_add(event))
if self.labeltree.selection():
self.right_click_menu.add_command(label="删除", command=lambda: self.on_menu_click_del(event))
self.right_click_menu.post(event.x_root, event.y_root)
# 新增标签
def on_menu_click_add(self, event):
self.labeltree.insert('', 'end', values=('新标签', '值'))
self.labeltree.update()
# 删除标签
def on_menu_click_del(self, event):
if self.labeltree.selection():
self.labeltree.delete(self.labeltree.selection())
4、几个不足之处:
1)python-docx库只能操作docx文件,对doc文档支持不好。
2)对模板文档中标签格式有一定要求,但基本功能已实现。
5、源代码已上传,链接:https://download.csdn.net/download/zhoury/89612390