支持xlsx,xls文件,相互对比字段列
输出两个表格文件相同字段,置底色为绿色
存在差异的不同字段,输出两个新的表格文件,差异字段,置底色为红色
import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import PatternFill
import os
def convert_xls_to_xlsx(file):
if file.endswith('.xls'):
df = pd.read_excel(file, engine='xlrd')
xlsx_file = file.replace('.xls', '.xlsx')
df.to_excel(xlsx_file, index=False)
return xlsx_file
return file
def read_excel_file(file):
if file.endswith('.xlsx') or file.endswith('.xls'):
return pd.read_excel(file)
else:
raise ValueError("Unsupported file format: {}".format(file))
def load_workbook_file(file):
if file.endswith('.xlsx'):
return load_workbook(file)
elif file.endswith('.xls'):
raise ValueError("openpyxl does not support .xls files directly. Convert to .xlsx first.")
else:
raise ValueError("Unsupported file format: {}".format(file))
def compare_headers(file1, file2):
# 检查文件是否存在
if not os.path.exists(file1):
print(f"文件 {file1} 不存在")
return
if not os.path.exists(file2):
print(f"文件 {file2} 不存在")
return
# 如果是 .xls 文件,先转换为 .xlsx 文件
file1 = convert_xls_to_xlsx(file1)
file2 = convert_xls_to_xlsx(file2)
try:
# 读取两个文件的数据
df1 = read_excel_file(file1)
df2 = read_excel_file(file2)
except Exception as e:
print(f"读取文件时出错: {e}")
return
# 获取两个文件的列名
headers1 = df1.columns.tolist()
headers2 = df2.columns.tolist()
# 找出两个文件中列名相同和不同的列
same_headers = [h for h in headers1 if h in headers2]
diff_headers1 = [h for h in headers1 if h not in headers2]
diff_headers2 = [h for h in headers2 if h not in headers1]
# 设置填充颜色
green_fill = PatternFill(fill_type='solid', fgColor='00FF00')
red_fill = PatternFill(fill_type='solid', fgColor='FF0000')
try:
# 加载两个工作簿
wb1 = load_workbook_file(file1)
wb2 = load_workbook_file(file2)
except Exception as e:
print(f"加载工作簿时出错: {e}")
return
# 获取第一个工作表
ws1 = wb1.active
ws2 = wb2.active
# 遍历列,设置颜色
for i, header in enumerate(headers1, start=1):
if header in same_headers:
ws1.cell(row=1, column=i).fill = green_fill
else:
ws1.cell(row=1, column=i).fill = red_fill
for i, header in enumerate(headers2, start=1):
if header in same_headers:
ws2.cell(row=1, column=i).fill = green_fill
else:
ws2.cell(row=1, column=i).fill = red_fill
# 保存文件
output_file1 = 'output1.xlsx'
output_file2 = 'output2.xlsx'
try:
wb1.save(output_file1)
wb2.save(output_file2)
except Exception as e:
print(f"保存文件时出错: {e}")
return
# 输出对比结果
print('两表格文件对比后相同字段:', same_headers)
print('表格文件1,存在差异字段:', diff_headers1)
print('表格文件2,存在差异字段:', diff_headers2)
# xlsx 和 xls 文件对比
compare_headers('file1.xls', 'file2.xls')