官网
https://github.com/hardkoded/puppeteer-sharp
安装
创建控制台项目,安装PuppeteerSharp 18.1.0
编写代码
安装chrome
async static Task Main(string[] args)
{
//如果Chromium不存在则先下载
var browserFetcher = new BrowserFetcher();
//获取安装的浏览器版本
//var browsers = browserFetcher.GetInstalledBrowsers();
//下载指定版本的浏览器或者浏览器标签BrowserTag.Dev
await browserFetcher.DownloadAsync("127.0.6533.99");
Console.WriteLine("安装完毕");
}
执行完成之后会多个文件
浏览器设置
var browser = await Puppeteer.LaunchAsync(new LaunchOptions
{
ExecutablePath =
@"./Chrome/Win64-127.0.6533.99/chrome-win64/chrome.exe",
Headless = false,
IgnoreHTTPSErrors = true,
UserDataDir = "./chromeData",
Args = new string[]
{
//开启无痕模式
"--incognito",
//启动的时候最大化
"--start-maximized",
//不启用沙盒环境
//"--no-sandbox",
//"--disable-setuid-sandbox",
//https://blog.csdn.net/charon_0928/article/details/134051787
//不开启浏览器自动化标识
/*
if (window.navigator.webdriver) {
// 浏览器由 WebDriver 驱动打开
} else {
// 浏览器是正常手动打开的
}
var userAgent = window.navigator.userAgent;
if (userAgent.includes("webdriver") || userAgent.includes("selenium")) {
// 浏览器由 WebDriver 驱动打开
} else {
// 浏览器是正常手动打开的
}
//这个需要手动设置USER_AGENT参数, Selenium会用到
*/
"--disable-blink-features=AutomationControlled",
}
});
打开新的选项卡
var page = await browser.NewPageAsync();
await page.GoToAsync("https://www.baidu.com/");
会开两个选项卡,想在原来的基础上使用
IPage currentPage = null;
var pages = await browser.PagesAsync();
if (pages?.Length > 0)
{
currentPage = pages[0];
}
else
{
currentPage = await browser.NewPageAsync();
}
await currentPage.GoToAsync("https://www.baidu.com/");
设置窗口的显示大小
currentPage.SetViewportAsync(new ViewPortOptions()
{
Width = 1920,
Height = 1080
});
await currentPage.GoToAsync("https://www.baidu.com/");
等待网页加载完毕
/*
'load':等待 load 事件触发。这意呀着页面上的所有资源(如样式表、图片和字体)都已完成加载。
'domcontentloaded':等待 DOMContentLoaded 事件触发。这通常意味着 HTML 文档已被完全加载和解析,但不一定意味着所有依赖的资源(如图片)都已完成加载。
'networkidle0':等待网络空闲,即没有网络连接超过 500ms。这通常意味着页面已经加载完成,并且没有更多的网络请求正在进行。
'networkidle2':与 'networkidle0' 类似,但仅考虑那些与页面加载直接相关的请求(如脚本、样式表和图片)。
*/
await currentPage
.GoToAsync("https://www.baidu.com/",
WaitUntilNavigation.Networkidle0);
Console.WriteLine("加载完毕");
input输入框
//清空之前的数据,TypeAsync方法是追加
await listPage.EvaluateExpressionAsync("document.querySelector('input#kw').value=''");
var searchInput = await currentPage.QuerySelectorAsync("input#kw");
if (searchInput != null)
{
await searchInput.TypeAsync("蜡笔小新");
}
搜索按钮点击
var submitButton = await currentPage.QuerySelectorAsync("input[type=\"submit\"]");
if (submitButton != null)
{
await submitButton.ClickAsync();
}
打开新连接监听请求和响应
currentPage.Request += (object sender, RequestEventArgs args) =>
{
Console.WriteLine(args.Request.Url);
Console.WriteLine(args.Request.Method);
if (args.Request.HasPostData)
{
Console.WriteLine(JsonConvert.SerializeObject(args.Request.PostData));
}
};
currentPage.Response += async (object sender, ResponseCreatedEventArgs args) =>
{
Console.WriteLine(args.Response.Url);
if (args.Response.Url.StartsWith("https://ug.baidu.com/mcp/pc/pcsearch"))
{
var jsonData = await args.Response.JsonAsync();
Console.WriteLine(jsonData);
}
if (args.Response.Url.StartsWith("https://www.baidu.com/s"))
{
var textData = await args.Response.TextAsync();
Console.WriteLine(textData);
}
};
如果是在页面里面打开新的链接,则监听TargetChanged
事件
browser.TargetChanged += async (object sender,TargetChangedArgs args) =>
{
Console.WriteLine($"url:{args.Target.Url}");
//列表页面
if (args.Target.Url.StartsWith("https://www.baidu.com/"))
{
var searchResultPage = await args.Target.PageAsync();
searchResultPage.Response += async (object sender, ResponseCreatedEventArgs args) =>
{
Console.WriteLine(args.Response.Url);
if (args.Response.Url.StartsWith("https://ug.baidu.com/mcp/pc/pcsearch"))
{
var jsonData = await args.Response.JsonAsync();
Console.WriteLine(jsonData);
}
if (args.Response.Url.StartsWith("https://www.baidu.com/s"))
{
var textData = await args.Response.TextAsync();
Console.WriteLine(textData);
}
};
};
// 等待新标签页加载完成
// var newPage = await browser.WaitForTargetAsync((target) => target.Url == "https://www.nmpa.gov.cn/datasearch/search-result.html");
// currentPage = await newPage.PageAsync();
// Console.WriteLine("新的tab页面加载完成");
获取网页内容
//需要等待一会要不然拿不到数据
//await currentPage.WaitForSelectorAsync("div.c-container");
var response = await currentPage.WaitForResponseAsync((response) => true);
Console.WriteLine(response?.Ok);
Thread.Sleep(3*1000);
var searchResults = await currentPage.QuerySelectorAllAsync(".c-container");
if (searchResults != null && searchResults.Length > 0)
{
foreach (var searchResult in searchResults)
{
var searchContent = await searchResult.Frame.GetContentAsync();
Console.WriteLine(searchContent);
}
}
关闭浏览器
await browser.CloseAsync();
参考
https://blog.csdn.net/fenglingguitar/article/details/137477378
https://www.nmpa.gov.cn/datasearch/search-result.html
https://blog.csdn.net/charon_0928/article/details/134051787
https://blog.csdn.net/joy1793/article/details/116207436