筛选出列长度不为指定长度的列
from os import path
from tkinter import (BOTH, BROWSE, EXTENDED, INSERT, Button, Frame, Label,
Text, Tk, filedialog, mainloop, messagebox)
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image, ImageTk
from tkinter import Entry, StringVar, messagebox, Tk, mainloop
import dataframe_image as dfi
class CleanCode():
def __init__(self) -> None:
self.root = Tk()
self.root.title("显示需要清洗代码列")
self.root.geometry(f"{500}x{300}")
self.frame = Frame(self.root).grid(row=0, column=0, rowspan=4)
self.old_pic_frame = Frame(self.root).grid(row=3, column=0)
self.new_pic_frame = Frame(self.root).grid(row=3, column=1)
self.width = 10
self.col = StringVar()
self.col_length = StringVar()
btn_open = Button(self.frame, text="打开excel文件", width=self.width, height=1, command=self.open_xlsx_file).grid(row=0, column=0) #
Label(self.frame, text="输入清洗的列", height=1, width=self.width).grid(row=1, column=0)
Entry(self.frame, textvariable=self.col).grid(row=1, column=1)
Label(self.frame, text="列的正确长度", height=1, width=self.width).grid(row=1, column=2)
Entry(self.frame, textvariable=self.col_length).grid(row=1, column=3)
btn_process = Button(self.frame, text="筛选", width=self.width, height=1, command=self.process_all,).grid(row=2, column= 3, padx = 15)
btn_reload = Button(self.frame, text="重载该文件", width=self.width, height=1, command=self.re_load).grid(row=0, column= 3, padx = 15)
def re_load(self):
global df
df = pd.read_excel(self.filepath, keep_default_na=False)
new_img = self.df2image(df)
Label(self.old_pic_frame, image=new_img).grid(row=3, column=0)
messagebox.showinfo('重载', '重载文件成功')
def df2image(self,df):
df_style = df.iloc[:50,].style.background_gradient()
table_filename = "./test.png"
dfi.export(obj=df_style, filename=table_filename, fontsize=30)
# 加载图像到Tkinter PhotoImage对象
new_img = Image.open(table_filename).convert("RGB")
# new_img = Image.open(fp=self.filepath).convert("RGB")
self.img_width, self.img_height = new_img.size
print(self.img_width, self.img_height)
self.rate = self.img_width / self.img_height
# 如果图片高度过高则进行缩小
if self.img_height > self.screenheight * 0.5:
width = int(0.5 * self.screenwidth)
height = int(width / self.rate)
new_img = ImageTk.PhotoImage(
image=new_img.resize(size=(width, height)))
else:
new_img = ImageTk.PhotoImage(new_img)
return new_img
def open_xlsx_file(self):
global df
global new_img
self.screenwidth = self.root.winfo_screenwidth()
self.screenheight = self.root.winfo_screenheight()
self.root.geometry(f"{self.screenwidth}x{self.screenheight}+0+0")
self.filepath = filedialog.askopenfilename(title='选择文件', filetypes=[('文件', ['*.xlsx', '*.xls', '*.csv'])])
# messagebox.showinfo('已经选中文件', self.filepath)
df = pd.read_excel(self.filepath, keep_default_na=False)
new_img = self.df2image(df)
Label(self.old_pic_frame, image=new_img).grid(row=3, column=0)
def process_all(self):
global df
number_mode = r'(\d+)'
normal_mode = r'([0-9a-zA-Z]+)'
col_length_str = self.col_length.get()
col_length_list = col_length_str.split(',')
col = self.col.get()
df['code_length'] = df.loc[:, col].astype(str).str.extract(normal_mode)
df['code_length'].fillna( '',inplace=True)
df['code_length'] = df['code_length'] .str.len()
df['code_length'] = df['code_length'].astype(str)
doubt_df =df[~df['code_length'].isin(col_length_list)]
new_img = self.df2image(doubt_df)
Label(self.new_pic_frame, image=new_img).grid(row=3, column=1)
if __name__ == "__main__":
main = CleanCode()
df = None
new_img = None
mainloop()