# -*- coding:utf-8 _*-from selenium import webdriver
import os
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
# 打开网页,会呼出浏览器defseleniumOpenUrl(initUrl):
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('---ignore-certificate-errors-spki-list')
chrome_options.add_argument('--ignore-ssl-errors')
chrome_options.add_argument('--ignore-ssl-error')
chrome_options.add_argument('log-level=2')# 解决谷歌堆栈溢出问题
chrome_options.add_experimental_option('useAutomationExtension',False)# https://blog.csdn.net/huashao0602/article/details/124322123# 解决遇到反爬虫 获取不到数据问题 Selenium绕过检测 & 隐藏特征
chrome_options.add_argument("--disable-blink-features=AutomationControlled")# chromedriver.exe 放在根目录下
path = os.path.join(os.getcwd(),'chromedriver.exe')print('chromePath:', path)
service = Service(executable_path=path)
driver = webdriver.Chrome(options=chrome_options,service=service)
driver.get(initUrl)
driver.maximize_window()return driver
# 打开网页,无浏览器模式defseleniumOpenUrlHeadless(initUrl):
chrome_options = webdriver.ChromeOptions()
chrome_options.add_argument('---ignore-certificate-errors-spki-list')
chrome_options.add_argument('--ignore-ssl-errors')
chrome_options.add_argument('--ignore-ssl-error')
chrome_options.add_argument('log-level=2')# 解决谷歌堆栈溢出问题
chrome_options.add_experimental_option('useAutomationExtension',False)# https://blog.csdn.net/huashao0602/article/details/124322123# 解决遇到反爬虫 获取不到数据问题 Selenium绕过检测 & 隐藏特征
chrome_options.add_argument("--disable-blink-features=AutomationControlled")# chromedriver.exe 放在根目录下
path = os.path.join(os.getcwd(),'chromedriver.exe')print('chromePath:', path)
browser_locale ='fr'
chrome_options.add_argument("--lang={}".format(browser_locale))
chrome_options.add_argument("--headless")
chrome_options.add_argument('headless')
service = Service(executable_path=path)
driver = webdriver.Chrome(options=chrome_options, service=service)
driver.get(initUrl)
driver.maximize_window()return driver
测试打开百度 - test.py
# -*- coding:utf-8 _*-from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from seleniumkit import*defopenBaiDu():
initUrl ="https://www.baidu.com/"
driver = seleniumOpenUrl(initUrl)return driver
if __name__ =="__main__":
openBaiDu()
time.sleep(10)
1、安装tesseract
Index of /tesseract
https://digi.bib.uni-mannheim.de/tesseract/tesseract-ocr-w64-setup-v5.3.0.20221214.exe
2、安装中文语言包
https://digi.bib.uni-mannheim.de/tesseract/tessdata_fast/ 拷贝到C:\Program Files\Tesseract-OCR\tessdata
3、注…