闲得无聊,趁着休息研究了一下极验4滑块验证码的安全性,是否有机器识别、自动化拖拽的可能性。首先看一下效果
如何识别验证码
1、下载图片
下载图片可以参考博客《采集极验4滑块验证码图片数据》
2、标记图片
3、标记滑动距离
实现代码
__author__ = "dengxinyan"
import io
import re
import time
import json
import base64
import random
import requests
import urllib
from io import BytesIO
from PIL import Image
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver import ChromeOptions
from selenium.webdriver import FirefoxOptions
# PIL图片保存为base64编码
def PIL_base64(img, coding='utf-8'):
img_format = img.format
if img_format == None:
img_format = 'JPEG'
format_str = 'JPEG'
if 'png' == img_format.lower():
format_str = 'PNG'
if 'gif' == img_format.lower():
format_str = 'gif'
if img.mode == "P":
img = img.convert('RGB')
if img.mode == "RGBA":
format_str = 'PNG'
img_format = 'PNG'
output_buffer = BytesIO()
# img.save(output_buffer, format=format_str)
img.save(output_buffer, quality=100, format=format_str)
byte_data = output_buffer.getvalue()
base64_str = 'data:image/' + img_format.lower() + ';base64,' + base64.b64encode(byte_data).decode(coding)
return base64_str
# 验证码识别接口
def shibie(img):
url = "http://www.detayun.cn/openapi/verify_code_identify/"
data = {
# 用户的key
"key":"nWrzPFUgFuqXQrCJJUM6",
# 验证码类型
"verify_idf_id":"23",
# 样例图片
"img_base64":PIL_base64(img),
"img_byte": None,
# 中文点选,空间语义类型验证码的文本描述(这里缺省为空字符串)
"words":""
}
header = {"Content-Type": "application/json"}
# 发送请求调用接口
response = requests.post(url=url, json=data, headers=header)
print(response.text)
return response.json()
def run(headless=False):
# 保存已经下载过的图片,防止重复
img_url_list = []
# 配置参数
option = FirefoxOptions()
if headless:
option.add_argument('--headless')
else:
option.add_argument('--window-size=100,100')
driver = webdriver.Firefox(executable_path=r'webdriver\geckodriver.exe', options=option)
# 伪装浏览器
driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => false,});")
navigator_webdriver = driver.execute_script("return navigator.webdriver")
driver.execute_script("Object.defineProperty(navigator, 'plugins', {get: () => [1, 2, 3, 4, 5],});")
plugins_length = driver.execute_script("return navigator.plugins.length")
# 发送请求
driver.get('https://www.geetest.com/adaptive-captcha-demo')
# 等待【滑动拼图验证】元素出现
WebDriverWait(driver, 20).until(lambda x: x.find_element_by_xpath('//button[contains(text(),"一点即过验证")]'))
# 创建 ActionChains 对象
action = ActionChains(driver)
# 向窗口发送按键事件,例如向下滚动 1000px
action.send_keys([Keys.DOWN, Keys.DOWN, Keys.DOWN, Keys.DOWN, Keys.DOWN, Keys.DOWN], 1000)
action.perform()
for i in range(5):
# 等待【滑动拼图验证】元素出现
WebDriverWait(driver, 20).until(lambda x: x.find_element_by_xpath('//button[contains(text(),"一点即过验证")]'))
# 找到【滑动拼图验证】元素
tag1 = driver.find_element_by_xpath('//button[contains(text(),"一点即过验证")]/..')
# 点击
tag1.click()
time.sleep(1)
# 等待【滑动拼图验证】元素出现
WebDriverWait(driver, 20).until(lambda x: x.find_element_by_xpath('//button[contains(text(),"滑动拼图验证")]'))
# 找到【滑动拼图验证】元素
tag1 = driver.find_element_by_xpath('//button[contains(text(),"滑动拼图验证")]/..')
# 点击
tag1.click()
time.sleep(1)
# 等待【点击按钮开始验证】元素出现
WebDriverWait(driver, 20).until(lambda x: x.find_element_by_xpath('//div[@aria-label="点击按钮开始验证"]'))
# 找到【点击按钮开始验证】元素
tag2 = driver.find_element_by_xpath('//div[@aria-label="点击按钮开始验证"]')
# 点击
tag2.click()
# 等待【验证码大图】元素出现
WebDriverWait(driver, 20).until(lambda x: x.find_element_by_xpath('//div[contains(@class,"geetest_bg")]'))
while 1:
try:
# 找到【注册】元素
tag7 = driver.find_element_by_xpath('//div[contains(@class,"geetest_bg")]')
# 获取 img 标签的 src 属性
style = tag7.get_attribute('style')
# 提取图片链接
img_url = re.findall('url[(]"(.+?)"[)]', style)[0]
break
except:
continue
# 请求获取图片
response = requests.get(url=img_url)
img = Image.open(BytesIO(response.content))
# img = base64_PIL(base64_str)
# 保存图片
img.save('temp_img/{}.jpg'.format(int(time.time() * 1000)))
img_url_list.append(img_url)
# 验证码识别接口
result = shibie(img)
if result['code'] != 200:
print(result)
return
move_x = int(result['data']['res_str'].replace('滑动', '').replace('px', ''))
print('结果:', move_x)
time.sleep(2)
# 找到滑块 geetest_btn
# 等待【找到滑块】元素出现
WebDriverWait(driver, 20).until(lambda x: x.find_element_by_xpath('//div[contains(@class,"geetest_btn")]'))
# 找到【找到滑块】元素
tag8 = driver.find_elements_by_xpath('//div[contains(@class,"geetest_btn")]')[1]
print(tag8)
# 滑动滑块
action = ActionChains(driver)
action.click_and_hold(tag8).perform()
# 计算实际滑动距离 = 像素距离 + 前面空白距离
move_x = move_x - 15
action.move_by_offset(move_x, 0)
# n = (random.randint(3, 5))
# move_x = move_x / n
# for i in range(n):
# action.move_by_offset(move_x, 0)
# time.sleep(0.02)
action.release().perform()
# 判断是否滑动成功
time.sleep(2)
if __name__ == '__main__':
run(headless=False)
极验4滑块验证码识别我也封装成了API接口,可以提供给大家免费调用:得塔云
总结
1、和极验3相比,极验4大图反爬能力变弱了。因为极验3图片链接很快就会失效。
2、和极验3相比,极验4图片的解析难度变小了。因为极验3的图片是打乱的,还需要拼图。
3、极验4滑动过程也没有验证滑动轨迹