效果如下:
实现流程:
一、Pyppeteer打开网址
import asyncio
from pyppeteer import launch
import pdb
import random
# 启动 Pyppeteer
browser = await launch({'headless': False})
page = await browser.newPage()
# 打开登录页面
await page.goto('http://localhost:8080/login.html')
二、调用后台springboot接口,springboot调用验证码ocr识别接口识别,返回识别结果
核心代码如下:
public static String getImgWord(String body) {
// 【1】请求地址 支持http 和 https 及 WEBSOCKET
String host = "https://imgurlocr.market.alicloudapi.com";
// 【2】后缀
String path = "/urlimages";
// 【3】开通服务后 买家中心-查看AppCode,有100次免费
String appcode = "xxxxx";
// 【4】请求参数,详见文档描述
String urlSend = host + path; // 【5】拼接请求链接
try {
URL url = new URL(urlSend);
HttpURLConnection httpURLCon = (HttpURLConnection) url.openConnection();
httpURLCon.setRequestMethod("POST");
httpURLCon.setRequestProperty("Authorization", "APPCODE " + appcode);// 格式
StringBuilder postData = new StringBuilder(body);
byte[] postDataBytes = postData.toString().getBytes("UTF-8");
httpURLCon.setDoOutput(true);
OutputStream out = httpURLCon.getOutputStream();
out.write(postDataBytes);
out.close();
int httpCode = httpURLCon.getResponseCode();
if (httpCode == 200) {
String json = read(httpURLCon.getInputStream());
System.out.println("正常请求计费(其他均不计费)");
System.out.println("获取返回的json:");
System.out.print(json);
return json.substring(json.indexOf("words\":\"")).replace("words\":\"", "").replace("\"}]}", "");
} else {
Map<String, List<String>> map = httpURLCon.getHeaderFields();
String error = map.get("X-Ca-Error-Message").get(0);
if (httpCode == 400 && error.equals("Invalid AppCode")) {
System.out.println("AppCode错误 ");
} else if (httpCode == 400 && error.equals("Invalid Url")) {
System.out.println("请求的 Method、Path 或者环境错误");
} else if (httpCode == 400 && error.equals("Invalid Param Location")) {
System.out.println("参数错误");
} else if (httpCode == 403 && error.equals("Unauthorized")) {
System.out.println("服务未被授权(或URL和Path不正确)");
} else if (httpCode == 403 && error.equals("Quota Exhausted")) {
System.out.println("套餐包次数用完 ");
} else if (httpCode == 403 && error.equals("Api Market Subscription quota exhausted")) {
System.out.println("套餐包次数用完,请续购套餐");
} else {
System.out.println(httpCode);
System.out.println("参数名错误 或 其他错误");
System.out.println(error);
}
return error;
}
} catch (MalformedURLException e) {
System.out.println("URL格式错误");
return e.getMessage();
} catch (UnknownHostException e) {
System.out.println("URL地址错误");
return e.getMessage();
} catch (Exception e) {
// 打开注释查看详细报错异常信息
// e.printStackTrace();
return e.getMessage();
}
}
/*
* 读取返回结果
*/
private static String read(InputStream is) throws IOException {
StringBuffer sb = new StringBuffer();
BufferedReader br = new BufferedReader(new InputStreamReader(is));
String line = null;
while ((line = br.readLine()) != null) {
line = new String(line.getBytes(), "utf-8");
sb.append(line);
}
br.close();
return sb.toString();
}
三、将验证码识别结果自动填充到input组件
# 执行JavaScript函数并传递参数,等待结果
response_text = await page.evaluate(postFunction, data)
print(response_text)
input_verify_code = await page.xpath("//input[@name='verifyCode']")
await input_verify_code[0].type(response_text, {'delay': random.randint(100, 151) - 50})
input_username = await page.xpath("//input[@name='username']")
await input_username[0].type('admin', {'delay': random.randint(100, 151) - 50})
input_password = await page.xpath("//input[@name='password']")
await input_password[0].type('123456', {'delay': random.randint(100, 151) - 50})
四、自动登录
# 自动点击"立即登录"按钮
button = await page.xpath('//button[@type="submit"]')
# 如果找到了button,则执行回车操作
if button:
await button[0].press('Enter')
python完整代码:
import asyncio
from pyppeteer import launch
import pdb
import random
async def main():
# 启动 Pyppeteer
browser = await launch({'headless': False})
page = await browser.newPage()
# 打开登录页面
await page.goto('http://localhost:8080/login.html')
# 等待
await asyncio.sleep(5)
# 获取验证码图片组件
img = await page.xpath("//*[@id='vCode']")
# 获取img的src
src = await (await img[0].getProperty('src')).jsonValue()
# 定义ajax post请求函数
postFunction = """(data) => {
// 这里使用fetch API发起POST请求
return fetch('http://localhost:8080/getImgWord', {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(data)
}).then(response => response.text())
}"""
# 传递参数给JavaScript函数
data = {'img': src}
# 执行JavaScript函数并传递参数,等待结果
response_text = await page.evaluate(postFunction, data)
print(response_text)
input_verify_code = await page.xpath("//input[@name='verifyCode']")
await input_verify_code[0].type(response_text, {'delay': random.randint(100, 151) - 50})
input_username = await page.xpath("//input[@name='username']")
await input_username[0].type('admin', {'delay': random.randint(100, 151) - 50})
input_password = await page.xpath("//input[@name='password']")
await input_password[0].type('123456', {'delay': random.randint(100, 151) - 50})
# 自动点击"立即登录"按钮
button = await page.xpath('//button[@type="submit"]')
# 如果找到了button,则执行回车操作
if button:
await button[0].press('Enter')
# 关闭浏览器
# await browser.close()
# 运行爬虫
asyncio.get_event_loop().run_until_complete(main())
完整资源包:
https://download.csdn.net/download/svygh123/89254844