1.找开发去掉验证码或者使用万能验证码
2.使用OCR自动识别
使用OCR自动化识别,一般识别率不是太高,处理一般简单验证码还是没问题
这里使用的是Tesseract-OCR,下载地址:https://github.com/A9T9/Free-Ocr-Windows-Desktop/releases
怎么使用呢?
进入安装后的目录:
tesseract.exe test.png test -1
准备一份网页,上面使用该验证码
<html>
<head>
<title>Table test by Young</title>
</head>
<body>
</br>
<h1> Test </h1>
<img src="http://csujwc.its.csu.edu.cn/sys/ValidateCode.aspx?t=1">
</br>
</body>
</html>
要识别验证码,首先得取得验证码,这两款采取对 页面元素部分截图的方式,首先获取整个页面的截图
然后找到页面元素坐标进行截取
/**
* This method for screen shot element
*
* @param driver
* @param element
* @param path
* @throws InterruptedException
*/
public static void screenShotForElement(WebDriver driver,
WebElement element, String path) throws InterruptedException {
File scrFile = ((TakesScreenshot) driver)
.getScreenshotAs(OutputType.FILE);
try {
Point p = element.getLocation();
int width = element.getSize().getWidth();
int height = element.getSize().getHeight();
Rectangle rect = new Rectangle(width, height);
BufferedImage img = ImageIO.read(scrFile);
BufferedImage dest = img.getSubimage(p.getX(), p.getY(),
rect.width, rect.height);
ImageIO.write(dest, "png", scrFile);
Thread.sleep(1000);
FileUtils.copyFile(scrFile, new File(path));
} catch (IOException e) {
e.printStackTrace();
}
}
截取完元素,就可以调用Tesseract-OCR生成text
// use Tesseract to get strings
Runtime rt = Runtime.getRuntime();
rt.exec("cmd.exe /C tesseract.exe D:\\Tesseract-OCR\\test.png D:\\Tesseract-OCR\\test -1 ");
接下来通过java读取txt
/**
* This method for read TXT file
*
* @param filePath
*/
public static void readTextFile(String filePath) {
try {
String encoding = "GBK";
File file = new File(filePath);
if (file.isFile() && file.exists()) { // 判断文件是否存在
InputStreamReader read = new InputStreamReader(
new FileInputStream(file), encoding);// 考虑到编码格式
BufferedReader bufferedReader = new BufferedReader(read);
String lineTxt = null;
while ((lineTxt = bufferedReader.readLine()) != null) {
System.out.println(lineTxt);
}
read.close();
} else {
System.out.println("找不到指定的文件");
}
} catch (Exception e) {
System.out.println("读取文件内容出错");
e.printStackTrace();
}
}
整体代码如下:
1 package com.dbyl.tests;
2
3 import java.awt.Rectangle;
4 import java.awt.image.BufferedImage;
5 import java.io.BufferedReader;
6 import java.io.File;
7 import java.io.FileInputStream;
8 import java.io.IOException;
9 import java.io.InputStreamReader;
10 import java.io.Reader;
11 import java.util.concurrent.TimeUnit;
12
13 import javax.imageio.ImageIO;
14
15 import org.apache.commons.io.FileUtils;
16 import org.openqa.selenium.By;
17 import org.openqa.selenium.OutputType;
18 import org.openqa.selenium.Point;
19 import org.openqa.selenium.TakesScreenshot;
20 import org.openqa.selenium.WebDriver;
21 import org.openqa.selenium.WebElement;
22
23 import com.dbyl.libarary.utils.DriverFactory;
24
25 public class TesseractTest {
26
27 public static void main(String[] args) throws IOException,
28 InterruptedException {
29
30 WebDriver driver = DriverFactory.getChromeDriver();
31 driver.get("file:///C:/Users/validation.html");
32 driver.manage().timeouts().pageLoadTimeout(30, TimeUnit.SECONDS);
33 WebElement element = driver.findElement(By.xpath("//img"));
34
35 // take screen shot for element
36 screenShotForElement(driver, element, "D:\\Tesseract-OCR\\test.png");
37
38 driver.quit();
39
40 // use Tesseract to get strings
41 Runtime rt = Runtime.getRuntime();
42 rt.exec("cmd.exe /C tesseract.exe D:\\Tesseract-OCR\\test.png D:\\Tesseract-OCR\\test -1 ");
43
44 Thread.sleep(1000);
45 // Read text
46 readTextFile("D:\\Tesseract-OCR\\test.txt");
47 }
48
49 /**
50 * This method for read TXT file
51 *
52 * @param filePath
53 */
54 public static void readTextFile(String filePath) {
55 try {
56 String encoding = "GBK";
57 File file = new File(filePath);
58 if (file.isFile() && file.exists()) { // 判断文件是否存在
59 InputStreamReader read = new InputStreamReader(
60 new FileInputStream(file), encoding);// 考虑到编码格式
61 BufferedReader bufferedReader = new BufferedReader(read);
62 String lineTxt = null;
63 while ((lineTxt = bufferedReader.readLine()) != null) {
64 System.out.println(lineTxt);
65 }
66 read.close();
67 } else {
68 System.out.println("找不到指定的文件");
69 }
70 } catch (Exception e) {
71 System.out.println("读取文件内容出错");
72 e.printStackTrace();
73 }
74 }
75
76 /**
77 * This method for screen shot element
78 *
79 * @param driver
80 * @param element
81 * @param path
82 * @throws InterruptedException
83 */
84 public static void screenShotForElement(WebDriver driver,
85 WebElement element, String path) throws InterruptedException {
86 File scrFile = ((TakesScreenshot) driver)
87 .getScreenshotAs(OutputType.FILE);
88 try {
89 Point p = element.getLocation();
90 int width = element.getSize().getWidth();
91 int height = element.getSize().getHeight();
92 Rectangle rect = new Rectangle(width, height);
93 BufferedImage img = ImageIO.read(scrFile);
94 BufferedImage dest = img.getSubimage(p.getX(), p.getY(),
95 rect.width, rect.height);
96 ImageIO.write(dest, "png", scrFile);
97 Thread.sleep(1000);
98 FileUtils.copyFile(scrFile, new File(path));
99 } catch (IOException e) {
100 e.printStackTrace();
101 }
102 }
103
104 }