- 与爬虫的关联
可以便携的获取网站中动态加载的数据
便携实现模拟登录
基于游览器自动化的一个模块(按键精灵脚本)
- 使用流程
环境安装pip install selenium
下载一个游览器的驱动程序:谷歌浏览器驱动安装 - 琳达的博客 - 博客园 (cnblogs.com)
from selenium import webdriver
实例化一个游览器对象
编写基于游览器自动化的操作代码
- 一些自动化操作
发起请求:get(url)
标签定位:find系列的方法
标签交互:send_keys('xxx')
点击:click()
执行js程序:execute_script('jsCode')
前进、后退:back()、forward()
关闭游览器:quit()
from selenium import webdriver
from lxml import etree
import time
# 实例化一个游览器对象
bro = webdriver.Chrome(executable_path='chromedriver.exe')
# 让游览器发起一个指定url对应请求
bro.get('https://i.qq.com/')
# 切换作用域
bro.switch_to.frame('login_frame')
a_tag=bro.find_element_by_id('switcher_plogin')
a_tag.click()
userName_tag=bro.find_element_by_id('u')
passWord_tag=bro.find_element_by_id('p')
time.sleep(3)
userName_tag.send_keys('2371964121')
time.sleep(3)
passWord_tag.send_keys('xxxxxx')
time.sleep(3)
btn=bro.find_element_by_id('login_button')
btn.click()
time.sleep(3)
bro.quit()
- 处理iframe
如果定位的标签存在iframe标签内的,则必须使用switch_to.frame(id)
动作链
from selenium.webdriver import ActionChains
action=ActionChains(bro):实例化动作链
action.click_and_hold(指定标签):点击长按指定的标签
action.move_by_offset(x,y).perform():偏移一定像素,x是水平方向,y是竖直方向
perfrom():立即执行动作链操作
action.release():释放动作链
无可视化界面(无头游览器)
from selenium import webdriver
# 无可视化界面操作
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument('headless')
chrome_options.add_argument('disable-gpu')
# 实现让selenium规避被检测到的风险
from selenium.webdriver import ChromeOptions
option = ChromeOptions()
option.add_experimental_option('excludeSwitches', ['enable-automation'])
bro = webdriver.Chrome(executable_path='chromedriver.exe', chrome_options=chrome_options, options=option)
bro.get('https://www.baidu.com/')
print(bro.page_source)
案例
12306登录
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains # 动作链
# 实现规避检测
from selenium.webdriver import ChromeOptions
import time
def login():
driver.find_element(By.ID, 'J-userName').send_keys('zh')
driver.find_element(By.ID, 'J-password').send_keys('mm')
driver.find_element(By.ID, 'J-login').click()
time.sleep(2)
# 滑动模块
clock = driver.find_element(By.CLASS_NAME, 'nc_iconfont')
action = ActionChains(driver)
# 点击长按滑动模块
action.click_and_hold(clock).perform()
for i in range(5):
action.move_by_offset(60, 0)
time.sleep(0.1)
action.release().perform()
if __name__ == '__main__':
url = 'https://kyfw.12306.cn/otn/resources/login.html'
options = ChromeOptions()
options.add_argument("--disable-blink-features=AutomationControlled")
options.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(executable_path='./chromedriver.exe', options=options)
# 设置浏览器,防止selenium被检测出来
driver.get(url)
login()