目录
1、项目效果
2、项目源码
3、技术实现
4、总结
前言
我的这个项目是做的一个豆瓣电影爬取,爬取了豆瓣电影的TOP排行榜的数据 包括电影的名称 演员 评分 评价人数等等 运用了TK布局助手 布了4个界面 有登录 注册 首页 详情
注意:项目并没有连接数据库
一、项目效果
登录
注册
首页
详情
二、项目源码
登录
from tkinter import *
from tkinter.ttk import *
from tkinter import Button
import subprocess
from tkinter import messagebox
class WinGUI(Tk):
def __init__(self):
super().__init__()
self.__win()
self.name_var = "root" # 固定的姓名
self.password_var = "123" # 固定的密码
self.tk_label_lwudgfpe = self.__tk_label_lwudgfpe(self)
self.tk_label_lwudgqcp = self.__tk_label_lwudgqcp(self)
self.tk_input_lwudhq1s = self.__tk_input_lwudhq1s(self)
self.tk_input_lwudi5tc = self.__tk_input_lwudi5tc(self)
self.tk_button_lwudit80 = self.__tk_button_lwudit80(self)
self.tk_button_zc = self.__tk_button_zc(self)
# 设置主窗口的背景颜色
# self.config(bg='#B0E2FF')
def __win(self):
self.title("登录")
# 设置窗口大小、居中
width = 400
height = 300
screenwidth = self.winfo_screenwidth()
screenheight = self.winfo_screenheight()
geometry = '%dx%d+%d+%d' % (width, height, (screenwidth - width) / 2, (screenheight - height) / 2)
self.geometry(geometry)
self.resizable(width=False, height=False)
def scrollbar_autohide(self, vbar, hbar, widget):
"""自动隐藏滚动条"""
def show():
if vbar: vbar.lift(widget)
if hbar: hbar.lift(widget)
def hide():
if vbar: vbar.lower(widget)
if hbar: hbar.lower(widget)
hide()
widget.bind("<Enter>", lambda e: show())
if vbar: vbar.bind("<Enter>", lambda e: show())
if vbar: vbar.bind("<Leave>", lambda e: hide())
if hbar: hbar.bind("<Enter>", lambda e: show())
if hbar: hbar.bind("<Leave>", lambda e: hide())
widget.bind("<Leave>", lambda e: hide())
def v_scrollbar(self, vbar, widget, x, y, w, h, pw, ph):
widget.configure(yscrollcommand=vbar.set)
vbar.config(command=widget.yview)
vbar.place(relx=(w + x) / pw, rely=y / ph, relheight=h / ph, anchor='ne')
def h_scrollbar(self, hbar, widget, x, y, w, h, pw, ph):
widget.configure(xscrollcommand=hbar.set)
hbar.config(command=widget.xview)
hbar.place(relx=x / pw, rely=(y + h) / ph, relwidth=w / pw, anchor='sw')
def create_bar(self, master, widget, is_vbar, is_hbar, x, y, w, h, pw, ph):
vbar, hbar = None, None
if is_vbar:
vbar = Scrollbar(master)
self.v_scrollbar(vbar, widget, x, y, w, h, pw, ph)
if is_hbar:
hbar = Scrollbar(master, orient="horizontal")
self.h_scrollbar(hbar, widget, x, y, w, h, pw, ph)
self.scrollbar_autohide(vbar, hbar, widget)
def __tk_label_lwudgfpe(self, parent):
label = Label(parent, text="用户名", anchor="center", )
label.place(x=20, y=60, width=80, height=38)
return label
def __tk_label_lwudgqcp(self, parent):
label = Label(parent, text="密码", anchor="center", )
label.place(x=20, y=140, width=78, height=38)
return label
def __tk_input_lwudhq1s(self, parent):
ipt = Entry(parent, )
ipt.place(x=117, y=60, width=263, height=39)
self.ipt1 = ipt
return ipt
def __tk_input_lwudi5tc(self, parent):
ipt = Entry(parent, )
ipt.place(x=117, y=140, width=258, height=39)
self.ipt2 = ipt
return ipt
def __tk_button_lwudit80(self, parent):
btn = Button(parent, text="登录", takefocus=False, )
btn.place(x=120, y=220, width=79, height=41)
# 添加事件绑定
btn.bind("<Button-1>", self.on_login_click) # <Button-1> 是鼠标左键点击的事件
return btn
def __tk_button_zc(self,parent):
btn = Button(parent,text="注册",takefocus=False,)
btn.place(x=250, y=220, width=79, height=41)
btn.bind("<Button-1>", self.on_zc_click) # <Button-1> 是鼠标左键点击的事件
return btn
# 添加一个新的方法作为回调函数
# 登录
def on_login_click(self, event):
# 输入框里值
entered_name = self.ipt2.get()
entered_password =self.ipt1.get()
if self.password_var== entered_name and self.name_var== entered_password:
self.run_index_script()
else:
# 显示错误提示
messagebox.showerror("登录失败", "账号或密码不正确,请重新输入。")
# 添加一个新的方法作为回调函数
# 注册
def on_zc_click(self, event):
try:
# 使用subprocess模块启动新的Python脚本
subprocess.Popen(['python', 'register.py'])
# 关闭当前窗口
# self.destroy()
except Exception as e:
print(f"Error occurred while running index.py: {e}")
def run_index_script(self):
try:
# 使用subprocess模块启动新的Python脚本
subprocess.Popen(['python', 'index.py'])
# 关闭当前窗口
# self.destroy()
except Exception as e:
print(f"Error occurred while running index.py: {e}")
def __event_bind(self):
pass
def __style_config(self):
pass
if __name__ == "__main__":
win = WinGUI()
win.mainloop()
注册(与登录页面相似,只是判断不相同)
# 添加一个新的方法作为回调函数
def on_register_click(self, event):
# 输入框里的值
# 注册新用户
new_username = self.ipt2.get().strip()
new_password = self.ipt1.get().strip()
if new_username and new_password:
messagebox.showinfo("注册", "注册成功!")
else:
messagebox.showwarning("注册", "用户名或密码不能为空!")
首页(布局是运用了TK助手 大家可自行设置 我这里只展示了部分功能性代码)
def thread_it(func, *args):
"""
将函数打包进线程(重要)
:param func:
:param args:
:return:
"""
# 创建
t = Thread(target=func, args=args)
# 守护
# t.setDaemon(True)
# 启动
t.start()
class movie_ui():
# def ll(self):
def __init__(self):
self.jsonData = []
def clear_treeview(self, tree):
"""
清空表格
:param tree:
:return:
"""
rows = tree.get_children()
for r in rows:
tree.delete(r)
def add_treeview(self, rows, tree):
"""
新增表格数据
:param rows:
:param tree:
:return:
"""
for r in rows:
tree.insert('', END, values=(r['title'], r['rank'], r['score'], r['vote_count']))
def select_treeview(self, event):
"""
treeview行双击选中事件
:return:
"""
# 获取选中的treeview数据行
item = self.movie_tv.selection()
values = self.movie_tv.item(item, "values")
print(values[0])
try:
# 跳转详情页面
d = WinGUI()
with open('movies.json', 'r', encoding='utf-8') as fp:
# 获取json数据
movies = json.load(fp)
for movie in movies:
if movie['title'].__contains__(values[0]):
# 将值绑定到详情信息文本框中
d.mc.insert("1.0", movie["title"])
d.pj.insert("1.0", movie["score"])
d.rq.insert("1.0", movie["release_date"])
d.lx.insert("1.0", movie["types"])
d.yy.insert("1.0", movie["actors"])
except Exception as e:
print(f"Error occurred while running index.py: {e}")
def do_search_top(self):
"""
排行榜查询按钮事件
:return:
"""
# 清空表格
self.clear_treeview(self.movie_tv)
# 设置按钮为灰色
self.btn_top['state'] = DISABLED
# 下拉框的数据
jsonMovieData = loads(movieData)
# 循环获取选择下拉框中选中的值
movie_type = None
for subMovieData in jsonMovieData:
if subMovieData['title'] == self.movie_combo.get():
movie_type = subMovieData['type']
break
# 调用查询接口获取数据
res = get_movie_top(movie_type)
# 判断是否成功
if res['code'] == 200:
self.jsonData = res['movies']
self.add_treeview(res['movies'], self.movie_tv)
else:
messagebox.showinfo('提示', res['msg'][:1000])
# 按钮设置为正常状态
self.btn_top['state'] = NORMAL
def do_search_kw(self):
"""
关键字查询按钮事件
:return:
"""
# 清空表格
self.clear_treeview(self.movie_tv)
# 设置按钮为灰色
self.btn_top['state'] = DISABLED
self.btn_keyword['state'] = DISABLED
# 调用查询接口获取数据
res = get_movie_kw(self.movie_keyword_entry.get())
# 判断是否成功
if res['code'] == 200 and len(res['movies']) > 0:
self.jsonData = res['movies']
self.add_treeview(res['movies'], self.movie_tv)
else:
messagebox.showwarning('提示', '请输入关键字!')
# 按钮设置为正常状态
# 从排行榜搜索
self.btn_top['state'] = NORMAL
# 从关键字搜索
self.btn_keyword['state'] = NORMAL
详情
from tkinter import *
from tkinter.ttk import *
class WinGUI(Tk):
def __init__(self):
super().__init__()
self.__win()
self.tk_label_frame_lwym93kr = self.__tk_label_frame_lwym93kr(self)
self.tk_label_lwym9yej = self.__tk_label_lwym9yej( self.tk_label_frame_lwym93kr)
self.tk_label_lwymcscf = self.__tk_label_lwymcscf( self.tk_label_frame_lwym93kr)
self.tk_label_lwymdu0i = self.__tk_label_lwymdu0i( self.tk_label_frame_lwym93kr)
self.tk_label_lwymfoy1 = self.__tk_label_lwymfoy1( self.tk_label_frame_lwym93kr)
self.tk_label_lwymgb8y = self.__tk_label_lwymgb8y( self.tk_label_frame_lwym93kr)
self.tk_text_lwymink1 = self.__tk_text_lwymink1( self.tk_label_frame_lwym93kr)
self.tk_text_lwynp020 = self.__tk_text_lwynp020( self.tk_label_frame_lwym93kr)
self.tk_text_lwynpnhj = self.__tk_text_lwynpnhj( self.tk_label_frame_lwym93kr)
self.tk_text_lwynq5ny = self.__tk_text_lwynq5ny( self.tk_label_frame_lwym93kr)
self.tk_text_lwynqjmm = self.__tk_text_lwynqjmm( self.tk_label_frame_lwym93kr)
# 设置主窗口的背景颜色
# self.config(bg='#B0E2FF')
def __win(self):
self.title("豆瓣电影TOP250")
# 设置窗口大小、居中
width = 730
height = 370
screenwidth = self.winfo_screenwidth()
screenheight = self.winfo_screenheight()
geometry = '%dx%d+%d+%d' % (width, height, (screenwidth - width) / 2, (screenheight - height) / 2)
self.geometry(geometry)
self.resizable(width=False, height=False)
def scrollbar_autohide(self,vbar, hbar, widget):
"""自动隐藏滚动条"""
def show():
if vbar: vbar.lift(widget)
if hbar: hbar.lift(widget)
def hide():
if vbar: vbar.lower(widget)
if hbar: hbar.lower(widget)
hide()
widget.bind("<Enter>", lambda e: show())
if vbar: vbar.bind("<Enter>", lambda e: show())
if vbar: vbar.bind("<Leave>", lambda e: hide())
if hbar: hbar.bind("<Enter>", lambda e: show())
if hbar: hbar.bind("<Leave>", lambda e: hide())
widget.bind("<Leave>", lambda e: hide())
def v_scrollbar(self,vbar, widget, x, y, w, h, pw, ph):
widget.configure(yscrollcommand=vbar.set)
vbar.config(command=widget.yview)
vbar.place(relx=(w + x) / pw, rely=y / ph, relheight=h / ph, anchor='ne')
def h_scrollbar(self,hbar, widget, x, y, w, h, pw, ph):
widget.configure(xscrollcommand=hbar.set)
hbar.config(command=widget.xview)
hbar.place(relx=x / pw, rely=(y + h) / ph, relwidth=w / pw, anchor='sw')
def create_bar(self,master, widget,is_vbar,is_hbar, x, y, w, h, pw, ph):
vbar, hbar = None, None
if is_vbar:
vbar = Scrollbar(master)
self.v_scrollbar(vbar, widget, x, y, w, h, pw, ph)
if is_hbar:
hbar = Scrollbar(master, orient="horizontal")
self.h_scrollbar(hbar, widget, x, y, w, h, pw, ph)
self.scrollbar_autohide(vbar, hbar, widget)
def __tk_label_frame_lwym93kr(self,parent):
frame = LabelFrame(parent,text="电影详情",)
frame.place(x=18, y=11, width=698, height=346)
return frame
def __tk_label_lwym9yej(self,parent):
label = Label(parent,text="影片名称",anchor="center", )
label.place(x=58, y=11, width=120, height=38)
return label
def __tk_label_lwymcscf(self,parent):
label = Label(parent,text="电影评价",anchor="center", )
label.place(x=59, y=71, width=119, height=37)
return label
def __tk_label_lwymdu0i(self,parent):
label = Label(parent,text="电影日期",anchor="center", )
label.place(x=60, y=131, width=119, height=39)
return label
def __tk_label_lwymfoy1(self,parent):
label = Label(parent,text="电影类型",anchor="center", )
label.place(x=60, y=191, width=119, height=39)
return label
def __tk_label_lwymgb8y(self,parent):
label = Label(parent,text="电影演员",anchor="center", )
label.place(x=60, y=251, width=119, height=39)
return label
# 电影演员
def __tk_text_lwymink1(self,parent):
self.yy = Text(parent)
self.yy.place(x=218, y=250, width=444, height=60)
return self.yy
# 电影名称
def __tk_text_lwynp020(self,parent):
self.mc = Text(parent)
self.mc.place(x=279, y=10, width=299, height=41)
return self.mc
# 电影评价
def __tk_text_lwynpnhj(self,parent):
self.pj = Text(parent)
self.pj.place(x=280, y=72, width=299, height=37)
return self.pj
# 电影日期
def __tk_text_lwynq5ny(self,parent):
self.rq = Text(parent)
self.rq.place(x=280, y=131, width=298, height=36)
return self.rq
# 电影类型
def __tk_text_lwynqjmm(self,parent):
self.lx = Text(parent)
self.lx.place(x=280, y=191, width=298, height=38)
return self.lx
class Win(WinGUI):
def __init__(self, controller):
self.ctl = controller
super().__init__()
self.__event_bind()
self.__style_config()
self.ctl.init(self)
def __event_bind(self):
pass
def __style_config(self):
pass
if __name__ == "__main__":
win = WinGUI()
win.mainloop()
爬取代码
import json
import urllib
from json import loads
from urllib import request
import requests
from requests_html import HTMLSession
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
movieData = ' [' \
'{"title":"纪录片", "type":"1", "interval_id":"100:90"}, ' \
' {"title":"传记", "type":"2", "interval_id":"100:90"}, ' \
' {"title":"犯罪", "type":"3", "interval_id":"100:90"}, ' \
' {"title":"历史", "type":"4", "interval_id":"100:90"}, ' \
' {"title":"动作", "type":"5", "interval_id":"100:90"}, ' \
' {"title":"情色", "type":"6", "interval_id":"100:90"}, ' \
' {"title":"歌舞", "type":"7", "interval_id":"100:90"}, ' \
' {"title":"儿童", "type":"8", "interval_id":"100:90"}, ' \
' {"title":"悬疑", "type":"10", "interval_id":"100:90"}, ' \
' {"title":"剧情", "type":"11", "interval_id":"100:90"}, ' \
' {"title":"灾难", "type":"12", "interval_id":"100:90"}, ' \
' {"title":"爱情", "type":"13", "interval_id":"100:90"}, ' \
' {"title":"音乐", "type":"14", "interval_id":"100:90"}, ' \
' {"title":"冒险", "type":"15", "interval_id":"100:90"}, ' \
' {"title":"奇幻", "type":"16", "interval_id":"100:90"}, ' \
' {"title":"科幻", "type":"17", "interval_id":"100:90"}, ' \
' {"title":"运动", "type":"18", "interval_id":"100:90"}, ' \
' {"title":"惊悚", "type":"19", "interval_id":"100:90"}, ' \
' {"title":"恐怖", "type":"20", "interval_id":"100:90"}, ' \
' {"title":"战争", "type":"22", "interval_id":"100:90"}, ' \
' {"title":"短片", "type":"23", "interval_id":"100:90"}, ' \
' {"title":"喜剧", "type":"24", "interval_id":"100:90"}, ' \
' {"title":"动画", "type":"25", "interval_id":"100:90"}, ' \
' {"title":"同性", "type":"26", "interval_id":"100:90"}, ' \
' {"title":"西部", "type":"27", "interval_id":"100:90"}, ' \
' {"title":"家庭", "type":"28", "interval_id":"100:90"}, ' \
' {"title":"武侠", "type":"29", "interval_id":"100:90"}, ' \
' {"title":"古装", "type":"30", "interval_id":"100:90"}, ' \
' {"title":"黑色电影", "type":"31", "interval_id":"100:90"}' \
']'
# 电影类型
def get_movie_top(m_type: str):
"""
排行榜查询方法
https://movie.douban.com/j/chart/top_list?type=&interval_id=100:90&action=unwatched&start=0&limit=
:param m_type: 电影类型
:param num: 爬取数量
:param rating: 影片评分
:param pj: 评价人数
:return:
"""
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'
}
# url = 'https://movie.douban.com/j/chart/top_list?type=' + str(
# m_type) + '&interval_id=100:90&action=unwatched&start=0&limit=' + str(num)
# req = request.Request(url=url, headers=headers)
# # 用于打开一个远程的url连接,并且向这个连接发出请求,获取响应结果
# f = request.urlopen(req)
# # 获取响应对象
# response = f.read()
# # 将json转为python对象
# jsonData = loads(response)
url = "https://movie.douban.com/j/chart/top_list"
params = {
"type": m_type,
"interval_id": "100:90",
"action": "unwatched",
"start": "0",
# 固定每次搜索条数为50条
"limit": 50
}
resp = requests.get(url, headers=headers, params=params)
jsonData = resp.json()
movies_list = []
# 循环获取的电影信息并提取有效数据
for subData in jsonData:
# 将评分设定为大于1.0以上 评价人数大于100000
if (float(subData['score']) >= float(1.0)) and (float(subData['vote_count']) >= float(100000)):
movie = {
"title": subData["title"],
"rank": subData["rank"],
"cover_url": subData["cover_url"],
"types": "/".join(subData["types"]),
"regions": subData["regions"][0],
"release_date": subData["release_date"],
"vote_count": subData["vote_count"],
"score": subData["score"],
"actors": "/".join(subData["actors"])
}
movies_list.append(movie)
with open('movies.json', 'w', encoding='utf-8') as fp:
json.dump(movies_list, fp, ensure_ascii=False, indent=2)
return {'code': 200, 'movies': movies_list}
except Exception as ex:
err_str = "出现未知异常:{}".format(ex)
return {'code': 500, 'msg': err_str}
def get_movie_kw2(kw:str):
params = {
"search_text": "指环王",
"cat": "1002"
}
session = HTMLSession(
browser_args=[
'--no-sand',
'--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
]
)
resp = session.get("https://search.douban.com/movie/subject_search", params=params)
resp.html.render()
html = resp.html;
movies_list = []
divs = html.xpath("//div[@class='item-root'")
print(divs)
return movies_list
def get_movie_kw(kw: str):
"""
基于关键字查询电影信息
https://movie.douban.com/subject_search?search_text=&cat=1002
:param kw:
:return:
"""
chrome_options = Options()
# 设置为无头模式,即不显示浏览器
chrome_options.add_argument('--headless')
# 设置user=agent
chrome_options.add_argument(
'user-agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36"')
# 此步骤很重要,设置为开发者模式,防止被各大网站识别出来使用了Selenium
# chrome_options.add_experimental_option('excludeSwitches', ['enable-automation'])
# # 不加载图片,加快访问速度
# chrome_options.add_experimental_option("prefs", {"profile.managed_default_content_settings.images": 2})
# 加载chromedriver驱动是否成功
load_driver_success = False
browser = None
try:
# 设置chromedriver驱动路径
browser = webdriver.Chrome(options=chrome_options)
# 页面加载超时时间为10s
browser.set_page_load_timeout(10)
# 页面js加载超时时间为10s
browser.set_script_timeout(10)
load_driver_success = True
except Exception as ex:
load_driver_success = False
err_str = "加载chromedriver驱动失败,请下载chromedriver驱动并填写正确的路径。\n\n异常信息:{}".format(ex)
return {'code': 500, 'msg': err_str}
if load_driver_success:
# print(load_driver_success)
try:
url = 'https://search.douban.com/movie/subject_search?search_text=' + urllib.parse.quote(kw) + '&cat=1002'
# print(url)
# get方式获取返回数据
browser.get(url)
# 通过样式选择器获取满足div.item-root要求的所有数据
divs = browser.find_elements(By.CSS_SELECTOR, "div.item-root")
movies_list = []
for div in divs:
movie = {
"title": '',
"rank": '',
"cover_url": '',
"types": '',
"regions": '',
"release_date": '',
"vote_count": '',
"score": '',
"actors": ''
}
cover_url = div.find_elements_by_css_selector(".cover")
# print(cover_url)
if cover_url:
movie["cover_url"] = cover_url[0].get_attribute("src")
movie["title"] = cover_url[0].get_attribute("alt")
rating = div.find_elements_by_css_selector("span.rating_nums")
if rating:
movie["score"] = rating[0].text
pl = div.find_elements_by_css_selector("span.pl")
if pl:
movie["vote_count"] = pl[0].text.replace("(", "").replace(")", "").replace("人评价", "")
regions = div.find_elements_by_css_selector("div.abstract")
if regions:
movie["regions"] = regions[0].text
actors = div.find_elements_by_css_selector("div.abstract_2")
if actors:
movie["actors"] = actors[0].text
movies_list.append(movie)
return {'code': 200, 'movies': movies_list}
except Exception as ex:
# 关闭浏览器
browser.quit()
err_str = "chromedriver驱动加载成功,但是Selenium获取数据出现其他未知异常:{}".format(ex)
return {'code': 200, 'msg': err_str}
if __name__ == "__main__":
# movies = get_movie_top("25", 20, 8.8, 100000)
# movies = get_movie_kw("指环王")
movies = get_movie_kw2("指环王")
print(movies)
# for m in movies['movies']:
# print(m)
三、技术实现
tkinter 布局
json 获取数据
thread 线程
requests 模块
selenium 爬取
四、总结
此次项目结合了许多之前学过的知识 有最基本的python语法 也有界面的融合 还有selenium数据的爬取 字典 集合 列表 函数 方法也有用到
其实可以连接数据库效果会更好 大家可以去尝试一下