解析 wxPython 和 Pandas 实现的 XLSX 分析器和网页打开器

在本文中，我们将分析一个使用 wxPython 和 Pandas 库编写的 Python 应用程序，名为 “XLSX Analyzer and Web Opener”。该应用程序的核心功能是：从 Excel 文件中读取数据并显示在网格中，此外，还允许用户使用 Google Chrome 批量打开 Excel 文件中的 URL 列表。
C:\pythoncode\new\analysisxlsx.py

全部代码

import wx
import wx.grid
import pandas as pd
import subprocess
import os

CHROME_PATH = r"C:\Program Files\Google\Chrome\Application\chrome.exe"

class XlsxAnalyzerFrame(wx.Frame):
    def __init__(self):
        super().__init__(parent=None, title='XLSX Analyzer and Web Opener', size=(1200, 800))
        panel = wx.Panel(self)

        main_sizer = wx.BoxSizer(wx.VERTICAL)

        self.file_picker = wx.FilePickerCtrl(panel, wildcard="Excel files (*.xlsx)|*.xlsx")
        self.file_picker.Bind(wx.EVT_FILEPICKER_CHANGED, self.on_file_selected)
        main_sizer.Add(self.file_picker, 0, wx.ALL | wx.EXPAND, 10)

        self.grid = wx.grid.Grid(panel)
        main_sizer.Add(self.grid, 1, wx.ALL | wx.EXPAND, 10)

        open_button = wx.Button(panel, label='Open URLs in Chrome')
        open_button.Bind(wx.EVT_BUTTON, self.on_open_urls)
        main_sizer.Add(open_button, 0, wx.ALL | wx.CENTER, 10)

        panel.SetSizer(main_sizer)
        self.Layout()
        self.Show()

        self.grid_created = False

    def on_file_selected(self, event):
        file_path = self.file_picker.GetPath()
        if file_path:
            try:
                df = pd.read_excel(file_path, sheet_name='sheet1')
                expected_columns = [
                    "blog-list-box href", "course-img src", "blog-list-box-top", 
                    "blog-list-content", "article-type", "view-time-box", "view-num", 
                    "give-like-num", "comment-num", "comment-num 2", "btn-edit-article href"
                ]
                if not all(col in df.columns for col in expected_columns):
                    raise ValueError("Excel file does not contain all expected columns")
                self.update_grid(df)
            except Exception as e:
                wx.MessageBox(f'Error reading file: {str(e)}', 'Error', wx.OK | wx.ICON_ERROR)

    def update_grid(self, df):
        if not self.grid_created:
            self.grid.CreateGrid(df.shape[0], df.shape[1])
            self.grid_created = True
        else:
            current_rows = self.grid.GetNumberRows()
            current_cols = self.grid.GetNumberCols()
            
            if current_rows < df.shape[0]:
                self.grid.AppendRows(df.shape[0] - current_rows)
            elif current_rows > df.shape[0]:
                self.grid.DeleteRows(0, current_rows - df.shape[0])
            
            if current_cols < df.shape[1]:
                self.grid.AppendCols(df.shape[1] - current_cols)
            elif current_cols > df.shape[1]:
                self.grid.DeleteCols(0, current_cols - df.shape[1])

        for i, col in enumerate(df.columns):
            self.grid.SetColLabelValue(i, str(col))
            for j, val in enumerate(df[col]):
                self.grid.SetCellValue(j, i, str(val))

        self.grid.AutoSizeColumns()
        self.grid.ForceRefresh()
        self.Layout()

    def get_urls(self):
        if self.grid.GetNumberRows() == 0:
            wx.MessageBox('No data loaded', 'Error', wx.OK | wx.ICON_ERROR)
            return []

        try:
            url_col_index = next(i for i in range(self.grid.GetNumberCols()) if "blog-list-box href" in self.grid.GetColLabelValue(i))
            return [self.grid.GetCellValue(row, url_col_index) for row in range(self.grid.GetNumberRows()) if self.grid.GetCellValue(row, url_col_index).strip()]
        except StopIteration:
            wx.MessageBox('Could not find "blog-list-box href" column', 'Error', wx.OK | wx.ICON_ERROR)
            return []

    def on_open_urls(self, event):
        if not os.path.exists(CHROME_PATH):
            wx.MessageBox(f'Chrome executable not found at {CHROME_PATH}', 'Error', wx.OK | wx.ICON_ERROR)
            return

        urls = self.get_urls()
        if not urls:
            return

        for i in range(0, len(urls), 10):
            batch = urls[i:i+10]
            for url in batch:
                try:
                    subprocess.Popen([CHROME_PATH, url])
                except Exception as e:
                    wx.MessageBox(f'Error opening URL {url}: {str(e)}', 'Error', wx.OK | wx.ICON_ERROR)
            
            if i + 10 < len(urls):
                should_continue = wx.MessageBox('Open next 10 URLs?', 'Continue',
                                                wx.YES_NO | wx.ICON_QUESTION)
                if should_continue == wx.NO:
                    break

if __name__ == '__main__':
    app = wx.App()
    frame = XlsxAnalyzerFrame()
    app.MainLoop()

核心功能概述

选择并解析 XLSX 文件：用户通过文件选择器选择一个 Excel 文件，程序读取其中的数据，并在网格中显示。
批量打开 URL：如果 Excel 文件包含一个 URL 列，用户可以点击按钮，程序会批量使用 Chrome 打开这些 URL。
错误处理：当文件不符合预期格式，Chrome 浏览器不可用或打开 URL 失败时，程序会显示相应的错误消息。

导入的库

import wx
import wx.grid
import pandas as pd
import subprocess
import os

wx 和 wx.grid：用于创建图形用户界面（GUI），包括窗口、文件选择器、按钮和数据网格。
pandas (pd)：用于从 Excel 文件中读取数据，并处理这些数据以显示在 GUI 网格中。
subprocess：用于通过系统命令启动 Chrome 浏览器。
os：用于检查 Chrome 浏览器的路径是否存在。

Google Chrome 路径

CHROME_PATH = r"C:\Program Files\Google\Chrome\Application\chrome.exe"

该常量存储了 Chrome 浏览器的路径，程序将使用这个路径来启动 Chrome。如果用户的系统上 Chrome 位于不同的路径，需要修改该值。

类 `XlsxAnalyzerFrame`

主框架类 XlsxAnalyzerFrame 继承自 wx.Frame，实现了应用的 GUI 和逻辑。下面是它的初始化部分：

class XlsxAnalyzerFrame(wx.Frame):
    def __init__(self):
        super().__init__(parent=None, title='XLSX Analyzer and Web Opener', size=(1200, 800))
        panel = wx.Panel(self)

        main_sizer = wx.BoxSizer(wx.VERTICAL)

        self.file_picker = wx.FilePickerCtrl(panel, wildcard="Excel files (*.xlsx)|*.xlsx")
        self.file_picker.Bind(wx.EVT_FILEPICKER_CHANGED, self.on_file_selected)
        main_sizer.Add(self.file_picker, 0, wx.ALL | wx.EXPAND, 10)

        self.grid = wx.grid.Grid(panel)
        main_sizer.Add(self.grid, 1, wx.ALL | wx.EXPAND, 10)

        open_button = wx.Button(panel, label='Open URLs in Chrome')
        open_button.Bind(wx.EVT_BUTTON, self.on_open_urls)
        main_sizer.Add(open_button, 0, wx.ALL | wx.CENTER, 10)

        panel.SetSizer(main_sizer)
        self.Layout()
        self.Show()

        self.grid_created = False

界面元素：

文件选择器 (self.file_picker)：允许用户选择 Excel 文件，并绑定 on_file_selected 事件处理函数。当用户选择文件时，该函数将解析并加载数据。
数据网格 (self.grid)：这是用于显示 Excel 文件数据的表格。wx.grid.Grid 是 wxPython 提供的网格控件，允许显示类似 Excel 的数据表。
打开 URL 按钮 (open_button)：该按钮用于批量打开 Excel 文件中的 URL。当用户点击按钮时，on_open_urls 事件处理函数会处理并打开这些 URL。

处理 Excel 文件

读取并加载 Excel 数据

当用户选择一个 Excel 文件时，触发 on_file_selected 事件：

def on_file_selected(self, event):
    file_path = self.file_picker.GetPath()
    if file_path:
        try:
            df = pd.read_excel(file_path, sheet_name='sheet1')
            expected_columns = [
                "blog-list-box href", "course-img src", "blog-list-box-top", 
                "blog-list-content", "article-type", "view-time-box", "view-num", 
                "give-like-num", "comment-num", "comment-num 2", "btn-edit-article href"
            ]
            if not all(col in df.columns for col in expected_columns):
                raise ValueError("Excel file does not contain all expected columns")
            self.update_grid(df)
        except Exception as e:
            wx.MessageBox(f'Error reading file: {str(e)}', 'Error', wx.OK | wx.ICON_ERROR)

file_path = self.file_picker.GetPath()：获取用户选择的文件路径。
pd.read_excel()：使用 Pandas 从 Excel 文件中读取数据。程序假定数据位于名为 'sheet1' 的工作表中。
expected_columns：指定预期的列名。如果 Excel 文件不包含所有这些列，程序会抛出异常并显示错误消息。

更新数据网格

数据成功加载后，通过 update_grid 函数将数据更新到网格中：

def update_grid(self, df):
    if not self.grid_created:
        self.grid.CreateGrid(df.shape[0], df.shape[1])
        self.grid_created = True
    else:
        current_rows = self.grid.GetNumberRows()
        current_cols = self.grid.GetNumberCols()
        
        if current_rows < df.shape[0]:
            self.grid.AppendRows(df.shape[0] - current_rows)
        elif current_rows > df.shape[0]:
            self.grid.DeleteRows(0, current_rows - df.shape[0])
        
        if current_cols < df.shape[1]:
            self.grid.AppendCols(df.shape[1] - current_cols)
        elif current_cols > df.shape[1]:
            self.grid.DeleteCols(0, current_cols - df.shape[1])

    for i, col in enumerate(df.columns):
        self.grid.SetColLabelValue(i, str(col))
        for j, val in enumerate(df[col]):
            self.grid.SetCellValue(j, i, str(val))

    self.grid.AutoSizeColumns()
    self.grid.ForceRefresh()
    self.Layout()

该函数根据 Excel 文件的行数和列数动态调整网格大小，并逐行逐列填充数据。

批量打开 URL

程序从 Excel 文件中获取一个名为 "blog-list-box href" 的列，用户可以点击按钮，程序会逐批打开这些 URL。每次打开 10 个 URL，并询问用户是否继续：

def on_open_urls(self, event):
    if not os.path.exists(CHROME_PATH):
        wx.MessageBox(f'Chrome executable not found at {CHROME_PATH}', 'Error', wx.OK | wx.ICON_ERROR)
        return

    urls = self.get_urls()
    if not urls:
        return

    for i in range(0, len(urls), 10):
        batch = urls[i:i+10]
        for url in batch:
            try:
                subprocess.Popen([CHROME_PATH, url])
            except Exception as e:
                wx.MessageBox(f'Error opening URL {url}: {str(e)}', 'Error', wx.OK | wx.ICON_ERROR)
        
        if i + 10 < len(urls):
            should_continue = wx.MessageBox('Open next 10 URLs?', 'Continue',
                                            wx.YES_NO | wx.ICON_QUESTION)
            if should_continue == wx.NO:
                break

核心步骤：

检查 Chrome 路径：首先检查 Chrome 浏览器是否存在于指定路径中。
获取 URL 列表：调用 get_urls 函数，提取网格中的 URL 列表。
分批打开 URL：使用 subprocess.Popen 启动 Chrome 并打开这些 URL。每次打开 10 个 URL，并询问用户是否继续打开下一个 10 个 URL。

运行结果

在这里插入图片描述

总结

此程序实现了通过 Excel 文件进行数据分析，并能够批量打开其中的 URL。它结合了 wxPython 用于构建 GUI、Pandas 用于处理 Excel 数据，以及 subprocess 来控制系统程序。程序还包含基本的错误处理和用户交互提示，适合在需要从表格数据中提取和操作 URL 的场景下使用。