metagpt源码 (PlaywrightWrapper类)

news2024/12/16 12:28:17

前提条件,安装Playwright, 教程见 Getting started - Library
主要命令:

pip install playwright
playwright install

1. PlaywrightWrapper 源码

#!/usr/bin/env python
# -*- coding: utf-8 -*-

from __future__ import annotations

import asyncio
import sys
from pathlib import Path
from typing import Literal, Optional

from playwright.async_api import async_playwright
from pydantic import BaseModel, Field, PrivateAttr

from metagpt.logs import logger
from metagpt.utils.parse_html import WebPage


class PlaywrightWrapper(BaseModel):
    """Wrapper around Playwright.

    To use this module, you should have the `playwright` Python package installed and ensure that
    the required browsers are also installed. You can install playwright by running the command
    `pip install metagpt[playwright]` and download the necessary browser binaries by running the
    command `playwright install` for the first time.
    """

    browser_type: Literal["chromium", "firefox", "webkit"] = "chromium"
    launch_kwargs: dict = Field(default_factory=dict)
    proxy: Optional[str] = None
    context_kwargs: dict = Field(default_factory=dict)
    _has_run_precheck: bool = PrivateAttr(False)

    def __init__(self, **kwargs):
        super().__init__(**kwargs)

        launch_kwargs = self.launch_kwargs
        if self.proxy and "proxy" not in launch_kwargs:
            args = launch_kwargs.get("args", [])
            if not any(str.startswith(i, "--proxy-server=") for i in args):
                launch_kwargs["proxy"] = {"server": self.proxy}

        if "ignore_https_errors" in kwargs:
            self.context_kwargs["ignore_https_errors"] = kwargs["ignore_https_errors"]

    async def run(self, url: str, *urls: str) -> WebPage | list[WebPage]:
        async with async_playwright() as ap:
            browser_type = getattr(ap, self.browser_type)
            await self._run_precheck(browser_type)
            browser = await browser_type.launch(**self.launch_kwargs)
            _scrape = self._scrape

            if urls:
                return await asyncio.gather(
                    _scrape(browser, url), *(_scrape(browser, i) for i in urls)
                )
            return await _scrape(browser, url)

    async def _scrape(self, browser, url):
        context = await browser.new_context(**self.context_kwargs)
        page = await context.new_page()
        async with page:
            try:
                await page.goto(url)
                await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
                html = await page.content()
                inner_text = await page.evaluate("() => document.body.innerText")
            except Exception as e:
                inner_text = f"Fail to load page content for {e}"
                html = ""
            return WebPage(inner_text=inner_text, html=html, url=url)

    async def _run_precheck(self, browser_type):
        if self._has_run_precheck:
            return

        executable_path = Path(browser_type.executable_path)
        if not executable_path.exists() and "executable_path" not in self.launch_kwargs:
            kwargs = {}
            if self.proxy:
                kwargs["env"] = {"ALL_PROXY": self.proxy}
            await _install_browsers(self.browser_type, **kwargs)

            if self._has_run_precheck:
                return

            if not executable_path.exists():
                parts = executable_path.parts
                available_paths = list(Path(*parts[:-3]).glob(f"{self.browser_type}-*"))
                if available_paths:
                    logger.warning(
                        "It seems that your OS is not officially supported by Playwright. "
                        "Try to set executable_path to the fallback build version."
                    )
                    executable_path = available_paths[0].joinpath(*parts[-2:])
                    self.launch_kwargs["executable_path"] = str(executable_path)
        self._has_run_precheck = True


def _get_install_lock():
    global _install_lock
    if _install_lock is None:
        _install_lock = asyncio.Lock()
    return _install_lock


async def _install_browsers(*browsers, **kwargs) -> None:
    async with _get_install_lock():
        browsers = [i for i in browsers if i not in _install_cache]
        if not browsers:
            return
        process = await asyncio.create_subprocess_exec(
            sys.executable,
            "-m",
            "playwright",
            "install",
            *browsers,
            # "--with-deps",
            stdout=asyncio.subprocess.PIPE,
            stderr=asyncio.subprocess.PIPE,
            **kwargs,
        )

        await asyncio.gather(
            _log_stream(process.stdout, logger.info),
            _log_stream(process.stderr, logger.warning),
        )

        if await process.wait() == 0:
            logger.info("Install browser for playwright successfully.")
        else:
            logger.warning("Fail to install browser for playwright.")
        _install_cache.update(browsers)


async def _log_stream(sr, log_func):
    while True:
        line = await sr.readline()
        if not line:
            return
        log_func(f"[playwright install browser]: {line.decode().strip()}")


_install_lock: asyncio.Lock = None
_install_cache = set()

2. 测试

async def webTest():
    websearch = PlaywrightWrapper()
    result = await websearch.run(url="https://playwright.dev/")
    print(result)

if __name__ == "__main__":
    asyncio.run(webTest())

输出:

inner_text='Skip to main content\nPlaywright\nDocs\nAPI\nNode.js\nCommunity\nSearch\nK\nPlaywright enables reliable end-to-end testing for modern web apps.\nGET STARTED\nStar\n67k+\n\n\n\n\n\nAny browser • Any platform • One API\n\nCross-browser. Playwright supports all modern rendering engines including Chromium, WebKit, and Firefox.\n\nCross-platform. Test on Windows, Linux, and macOS, locally or on CI, headless or headed.\n\nCross-language. Use the Playwright API in TypeScript, JavaScript, Python, .NET, Java.\n\nTest Mobile Web. Native mobile emulation of Google Chrome for Android and Mobile Safari. The same rendering engine works on your Desktop and in the Cloud.\n\nResilient • No flaky tests\n\nAuto-wait. Playwright waits for elements to be actionable prior to performing actions. It also has a rich set of introspection events. The combination of the two eliminates the need for artificial timeouts - the primary cause of flaky tests.\n\nWeb-first assertions. Playwright assertions are created specifically for the dynamic web. Checks are automatically retried until the necessary conditions are met.\n\nTracing. Configure test retry strategy, capture execution trace, videos, screenshots to eliminate flakes.\n\nNo trade-offs • No limits\n\nBrowsers run web content belonging to different origins in different processes. Playwright is aligned with the modern browsers architecture and runs tests out-of-process. This makes Playwright free of the typical in-process test runner limitations.\n\nMultiple everything. Test scenarios that span multiple tabs, multiple origins and multiple users. Create scenarios with different contexts for different users and run them against your server, all in one test.\n\nTrusted events. Hover elements, interact with dynamic controls, produce trusted events. Playwright uses real browser input pipeline indistinguishable from the real user.\n\nTest frames, pierce Shadow DOM. Playwright selectors pierce shadow DOM and allow entering frames seamlessly.\n\nFull isolation • Fast execution\n\nBrowser contexts. Playwright creates a browser context for each test. Browser context is equivalent to a brand new browser profile. This delivers full test isolation with zero overhead. Creating a new browser context only takes a handful of milliseconds.\n\nLog in once. Save the authentication state of the context and reuse it in all the tests. This bypasses repetitive log-in operations in each test, yet delivers full isolation of independent tests.\n\nPowerful Tooling\n\nCodegen. Generate tests by recording your actions. Save them into any language.\n\nPlaywright inspector. Inspect page, generate selectors, step through the test execution, see click points, explore execution logs.\n\nTrace Viewer. Capture all the information to investigate the test failure. Playwright trace contains test execution screencast, live DOM snapshots, action explorer, test source, and many more.\n\nChosen by companies and open source projects\nLearn\nGetting started\nPlaywright Training\nLearn Videos\nFeature Videos\nCommunity\nStack Overflow\nDiscord\nTwitter\nLinkedIn\nMore\nGitHub\nYouTube\nBlog\nAmbassadors\nCopyright © 2024 Microsoft' html='<!DOCTYPE html><html lang="en" dir="ltr" class="plugin-pages plugin-id-default" data-has-hydrated="true" data-theme="light" data-rh="lang,dir,class,data-has-hydrated"><head><meta charset="UTF-8"><meta name="generator" content="Docusaurus v3.6.3"><title>Fast and reliable end-to-end testing for modern web apps | Playwright</title><meta data-rh="true" name="twitter:card" content="summary_large_image"><meta data-rh="true" property="og:image" content="https://repository-images.githubusercontent.com/221981891/8c5c6942-c91f-4df1-825f-4cf474056bd7"><meta data-rh="true" name="twitter:image" content="https://repository-images.githubusercontent.com/221981891/8c5c6942-c91f-4df1-825f-4cf474056bd7"><meta data-rh="true" property="og:url" content="https://playwright.dev/"><meta data-rh="true" property="og:locale" content="en"><meta data-rh="true" name="docusaurus_locale" content="en"><meta data-rh="true" name="docusaurus_tag" content="default"><meta data-rh="true" name="docsearch:language" content="en"><meta data-rh="true" name="docsearch:docusaurus_tag" content="default"><meta data-rh="true" property="og:title" content="Fast and reliable end-to-end testing for modern web apps | Playwright"><meta data-rh="true" name="description" content="Cross-browser end-to-end testing for modern web apps"><meta data-rh="true" property="og:description" content="Cross-browser end-to-end testing for modern web apps"><link data-rh="true" rel="icon" href="/img/playwright-logo.svg"><link data-rh="true" rel="canonical" href="https://playwright.dev/"><link data-rh="true" rel="alternate" href="https://playwright.dev/" hreflang="en"><link data-rh="true" rel="alternate" href="https://playwright.dev/" hreflang="x-default"><link rel="search" type="application/opensearchdescription+xml" title="Playwright" href="/opensearch.xml"><script src="/js/redirection.js"></script><link rel="stylesheet" href="/assets/css/styles.f6afdb5c.css"><script src="/assets/js/runtime~main.eab3411e.js" defer=""></script><script src="/assets/js/main.89c99183.js" defer=""></script><meta name="viewport" content="width=device-width, initial-scale=1.0" data-rh="true"><link rel="preconnect" href="https://K09ICMCV6X-dsn.algolia.net" crossorigin="anonymous" data-rh="true"><link rel="prefetch" href="/assets/js/5e95c892.d02a82d0.js"><link rel="prefetch" href="/assets/js/aba21aa0.c688fdbe.js"><link rel="prefetch" href="/assets/js/a7bd4aaa.beab1732.js"><link rel="prefetch" href="/assets/js/0058b4c6.14b9012b.js"><link rel="prefetch" href="/assets/js/a94703ab.16a84712.js"><link rel="prefetch" href="/assets/js/5e95c892.d02a82d0.js"><link rel="prefetch" href="/assets/js/aba21aa0.c688fdbe.js"><link rel="prefetch" href="/assets/js/a7bd4aaa.beab1732.js"><link rel="prefetch" href="/assets/js/0058b4c6.14b9012b.js"><link rel="prefetch" href="/assets/js/a94703ab.16a84712.js"><link rel="prefetch" href="/assets/js/5e95c892.d02a82d0.js"><link rel="prefetch" href="/assets/js/aba21aa0.c688fdbe.js"><link rel="prefetch" href="/assets/js/a7bd4aaa.beab1732.js"><link rel="prefetch" href="/assets/js/0058b4c6.14b9012b.js"><link rel="prefetch" href="/assets/js/a94703ab.16a84712.js"><link rel="prefetch" href="/assets/js/17896441.df3d9d27.js"><link rel="prefetch" href="/assets/js/4cf51b27.1271ec49.js"><link rel="prefetch" href="/assets/js/5e95c892.d02a82d0.js"><link rel="prefetch" href="/assets/js/aba21aa0.c688fdbe.js"><link rel="prefetch" href="/assets/js/a7bd4aaa.beab1732.js"><link rel="prefetch" href="/assets/js/0058b4c6.14b9012b.js"><link rel="prefetch" href="/assets/js/a94703ab.16a84712.js"><link rel="prefetch" href="/assets/js/5e95c892.d02a82d0.js"><link rel="prefetch" href="/assets/js/aba21aa0.c688fdbe.js"><link rel="prefetch" href="/assets/js/a7bd4aaa.beab1732.js"><link rel="prefetch" href="/assets/js/0058b4c6.14b9012b.js"><link rel="prefetch" href="/assets/js/a94703ab.16a84712.js"><link rel="prefetch" href="/assets/js/5e95c892.d02a82d0.js"><link rel="prefetch" href="/assets/js/aba21aa0.c688fdbe.js"><link rel="prefetch" href="/assets/js/a7bd4aaa.beab1732.js"><link rel="prefetch" href="/assets/js/0058b4c6.14b9012b.js"><link rel="prefetch" href="/assets/js/a94703ab.16a84712.js"><link rel="prefetch" href="/assets/js/17896441.df3d9d27.js"><link rel="prefetch" href="/assets/js/90f396e5.be5740f0.js"><link rel="prefetch" href="/assets/js/5e95c892.d02a82d0.js"><link rel="prefetch" href="/assets/js/e0719818.d9721f6c.js"><link rel="prefetch" href="/assets/js/a7bd4aaa.beab1732.js"><link rel="prefetch" href="/assets/js/d2436a2b.e2223d7d.js"><link rel="prefetch" href="/assets/js/a94703ab.16a84712.js"><link rel="prefetch" href="/assets/js/5e95c892.d02a82d0.js"><link rel="prefetch" href="/assets/js/e0719818.d9721f6c.js"><link rel="prefetch" href="/assets/js/a7bd4aaa.beab1732.js"><link rel="prefetch" href="/assets/js/d2436a2b.e2223d7d.js"><link rel="prefetch" href="/assets/js/a94703ab.16a84712.js"><link rel="prefetch" href="/assets/js/5e95c892.d02a82d0.js"><link rel="prefetch" href="/assets/js/e0719818.d9721f6c.js"><link rel="prefetch" href="/assets/js/a7bd4aaa.beab1732.js"><link rel="prefetch" href="/assets/js/d2436a2b.e2223d7d.js"><link rel="prefetch" href="/assets/js/a94703ab.16a84712.js"><link rel="prefetch" href="/assets/js/17896441.df3d9d27.js"><link rel="prefetch" href="/assets/js/083f60f3.579058f4.js"><link rel="prefetch" href="/assets/js/1df93b7f.f328e6f7.js"><link rel="prefetch" href="/assets/js/a7456010.b01acea0.js"></head><body class="navigation-with-keyboard" data-rh="class" style="overflow: visible;"><script>!function(){var t,e=function(){try{return new URLSearchParams(window.location.search).get("docusaurus-theme")}catch(t){}}()||function(){try{return window.localStorage.getItem("theme")}catch(t){}}();t=null!==e?e:window.matchMedia("(prefers-color-scheme: dark)").matches?"dark":window.matchMedia("(prefers-color-scheme: light)").matches?"light":"dark",document.documentElement.setAttribute("data-theme",t)}(),function(){try{for(var[t,e]of new URLSearchParams(window.location.search).entries())if(t.startsWith("docusaurus-data-")){var a=t.replace("docusaurus-data-","data-");document.documentElement.setAttribute(a,e)}}catch(t){}}()</script><div id="__docusaurus"><div role="region" aria-label="Skip to main content"><a class="skipToContent_fXgn" href="#__docusaurus_skipToContent_fallback">Skip to main content</a></div><nav aria-label="Main" class="navbar navbar--fixed-top"><div class="navbar__inner"><div class="navbar__items"><button aria-label="Toggle navigation bar" aria-expanded="false" class="navbar__toggle clean-btn" type="button"><svg width="30" height="30" viewBox="0 0 30 30" aria-hidden="true"><path stroke="currentColor" stroke-linecap="round" stroke-miterlimit="10" stroke-width="2" d="M4 7h22M4 15h22M4 23h22"></path></svg></button><a class="navbar__brand" href="/"><div class="navbar__logo"><img src="/img/playwright-logo.svg" alt="Playwright logo" class="themedComponent_mlkZ themedComponent--light_NVdE"></div><b class="navbar__title text--truncate">Playwright</b></a><a class="navbar__item navbar__link" href="/docs/intro">Docs</a><a class="navbar__item navbar__link" href="/docs/api/class-playwright">API</a><div class="navbar__item dropdown dropdown--hoverable"><a href="#" aria-haspopup="true" aria-expanded="false" role="button" class="navbar__link">Node.js</a><ul class="dropdown__menu"><li><a href="/" rel="noopener noreferrer" class="dropdown__link undefined dropdown__link--active" data-language-prefix="/">Node.js</a></li><li><a href="/python/" rel="noopener noreferrer" class="dropdown__link" data-language-prefix="/python/">Python</a></li><li><a href="/java/" rel="noopener noreferrer" class="dropdown__link" data-language-prefix="/java/">Java</a></li><li><a href="/dotnet/" rel="noopener noreferrer" class="dropdown__link" data-language-prefix="/dotnet/">.NET</a></li></ul></div><a class="navbar__item navbar__link" href="/community/welcome">Community</a></div><div class="navbar__items navbar__items--right"><a href="https://github.com/microsoft/playwright" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-github-link" aria-label="GitHub repository"></a><a href="https://aka.ms/playwright/discord" target="_blank" rel="noopener noreferrer" class="navbar__item navbar__link header-discord-link" aria-label="Discord server"></a><div class="toggle_vylO colorModeToggle_DEke"><button class="clean-btn toggleButton_gllP" type="button" title="Switch between dark and light mode (currently dark mode)" aria-label="Switch between dark and light mode (currently dark mode)" aria-live="polite" aria-pressed="true"><svg viewBox="0 0 24 24" width="24" height="24" class="lightToggleIcon_pyhR"><path fill="currentColor" d="M12,9c1.65,0,3,1.35,3,3s-1.35,3-3,3s-3-1.35-3-3S10.35,9,12,9 M12,7c-2.76,0-5,2.24-5,5s2.24,5,5,5s5-2.24,5-5 S14.76,7,12,7L12,7z M2,13l2,0c0.55,0,1-0.45,1-1s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S1.45,13,2,13z M20,13l2,0c0.55,0,1-0.45,1-1 s-0.45-1-1-1l-2,0c-0.55,0-1,0.45-1,1S19.45,13,20,13z M11,2v2c0,0.55,0.45,1,1,1s1-0.45,1-1V2c0-0.55-0.45-1-1-1S11,1.45,11,2z M11,20v2c0,0.55,0.45,1,1,1s1-0.45,1-1v-2c0-0.55-0.45-1-1-1C11.45,19,11,19.45,11,20z M5.99,4.58c-0.39-0.39-1.03-0.39-1.41,0 c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0s0.39-1.03,0-1.41L5.99,4.58z M18.36,16.95 c-0.39-0.39-1.03-0.39-1.41,0c-0.39,0.39-0.39,1.03,0,1.41l1.06,1.06c0.39,0.39,1.03,0.39,1.41,0c0.39-0.39,0.39-1.03,0-1.41 L18.36,16.95z M19.42,5.99c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06c-0.39,0.39-0.39,1.03,0,1.41 s1.03,0.39,1.41,0L19.42,5.99z M7.05,18.36c0.39-0.39,0.39-1.03,0-1.41c-0.39-0.39-1.03-0.39-1.41,0l-1.06,1.06 c-0.39,0.39-0.39,1.03,0,1.41s1.03,0.39,1.41,0L7.05,18.36z"></path></svg><svg viewBox="0 0 24 24" width="24" height="24" class="darkToggleIcon_wfgR"><path fill="currentColor" d="M9.37,5.51C9.19,6.15,9.1,6.82,9.1,7.5c0,4.08,3.32,7.4,7.4,7.4c0.68,0,1.35-0.09,1.99-0.27C17.45,17.19,14.93,19,12,19 c-3.86,0-7-3.14-7-7C5,9.07,6.81,6.55,9.37,5.51z M12,3c-4.97,0-9,4.03-9,9s4.03,9,9,9s9-4.03,9-9c0-0.46-0.04-0.92-0.1-1.36 c-0.98,1.37-2.58,2.26-4.4,2.26c-2.98,0-5.4-2.42-5.4-5.4c0-1.81,0.89-3.42,2.26-4.4C12.92,3.04,12.46,3,12,3L12,3z"></path></svg></button></div><div class="navbarSearchContainer_Bca1"><button type="button" class="DocSearch DocSearch-Button" aria-label="Search (Ctrl+K)"><span class="DocSearch-Button-Container"><svg width="20" height="20" class="DocSearch-Search-Icon" viewBox="0 0 20 20" aria-hidden="true"><path d="M14.386 14.386l4.0877 4.0877-4.0877-4.0877c-2.9418 2.9419-7.7115 2.9419-10.6533 0-2.9419-2.9418-2.9419-7.7115 0-10.6533 2.9418-2.9419 7.7115-2.9419 10.6533 0 2.9419 2.9418 2.9419 7.7115 0 10.6533z" stroke="currentColor" fill="none" fill-rule="evenodd" stroke-linecap="round" stroke-linejoin="round"></path></svg><span class="DocSearch-Button-Placeholder">Search</span></span><span class="DocSearch-Button-Keys"><kbd class="DocSearch-Button-Key"><svg width="15" height="15" class="DocSearch-Control-Key-Icon"><path d="M4.505 4.496h2M5.505 5.496v5M8.216 4.496l.055 5.993M10 7.5c.333.333.5.667.5 1v2M12.326 4.5v5.996M8.384 4.496c1.674 0 2.116 0 2.116 1.5s-.442 1.5-2.116 1.5M3.205 9.303c-.09.448-.277 1.21-1.241 1.203C1 10.5.5 9.513.5 8V7c0-1.57.5-2.5 1.464-2.494.964.006 1.134.598 1.24 1.342M12.553 10.5h1.953" stroke-width="1.2" stroke="currentColor" fill="none" stroke-linecap="square"></path></svg></kbd><kbd class="DocSearch-Button-Key">K</kbd></span></button></div></div></div><div role="presentation" class="navbar-sidebar__backdrop"></div></nav><div id="__docusaurus_skipToContent_fallback" class="main-wrapper mainWrapper_z2l0"><header class="hero hero--primary heroBanner_UJJx"><div class="container"><h1 class="hero__title heroTitle_ohkl"><span class="highlight_gXVj">Playwright</span> enables reliable end-to-end testing for modern web apps.</h1><div class="buttons_pzbO"><a class="getStarted_Sjon" href="/docs/intro">Get started</a><span class="github-btn github-stargazers github-btn-large"><a class="gh-btn" href="https://github.com/microsoft/playwright" rel="noopener noreferrer" target="_blank" aria-label="Star microsoft/playwright on GitHub"><span class="gh-ico" aria-hidden="true"></span><span class="gh-text">Star</span></a><a class="gh-count" href="https://github.com/microsoft/playwright/stargazers" rel="noopener noreferrer" target="_blank" aria-label="67k+ stargazers on GitHub" style="display:block">67k+</a></span></div></div></header><br><main><br><br><div style="text-align:center"><img src="img/logos/Browsers.png" width="40%" alt="Browsers (Chromium, Firefox, WebKit)"></div><section class="features_keug"><div class="container"><div class="row"><div class="col col--6" style="margin-top:40px"><h3>Any browser • Any platform • One API</h3><div><p><b>Cross-browser.</b> Playwright supports all modern rendering engines including Chromium, WebKit, and Firefox.</p><p><b>Cross-platform.</b> Test on Windows, Linux, and macOS, locally or on CI, headless or headed.</p><p><b>Cross-language.</b> Use the Playwright API in <a href="https://playwright.dev/docs/intro">TypeScript</a>, <a href="https://playwright.dev/docs/intro">JavaScript</a>, <a href="https://playwright.dev/python/docs/intro">Python</a>, <a href="https://playwright.dev/dotnet/docs/intro">.NET</a>, <a href="https://playwright.dev/java/docs/intro">Java</a>.</p><p><b>Test Mobile Web.</b> Native mobile emulation of Google Chrome for Android and Mobile Safari. The same rendering engine works on your Desktop and in the Cloud.</p></div></div><div class="col col--6" style="margin-top:40px"><h3></h3><div></div></div><div class="col col--6" style="margin-top:40px"><h3></h3><div></div></div><div class="col col--6" style="margin-top:40px"><h3>Resilient • No flaky tests</h3><div><p><b>Auto-wait.</b> Playwright waits for elements to be actionable prior to performing actions. It also has a rich set of introspection events. The combination of the two eliminates the need for artificial timeouts - the primary cause of flaky tests.</p><p><b>Web-first assertions.</b> Playwright assertions are created specifically for the dynamic web. Checks are automatically retried until the necessary conditions are met.</p><p><b>Tracing.</b> Configure test retry strategy, capture execution trace, videos, screenshots to eliminate flakes.</p></div></div><div class="col col--6" style="margin-top:40px"><h3>No trade-offs • No limits</h3><div><p>Browsers run web content belonging to different origins in different processes. Playwright is aligned with the modern browsers architecture and runs tests out-of-process. This makes Playwright free of the typical in-process test runner limitations.</p><p><b>Multiple everything.</b> Test scenarios that span multiple <b>tabs</b>, multiple <b>origins</b> and multiple <b>users</b>. Create scenarios with different contexts for different users and run them against your server, all in one test.</p><p><b>Trusted events.</b> Hover elements, interact with dynamic controls, produce trusted events. Playwright uses real browser input pipeline indistinguishable from the real user.</p><p><b>Test frames, pierce Shadow DOM.</b> Playwright selectors pierce shadow DOM and allow entering frames seamlessly.</p></div></div><div class="col col--6" style="margin-top:40px"><h3></h3><div></div></div><div class="col col--6" style="margin-top:40px"><h3></h3><div></div></div><div class="col col--6" style="margin-top:40px"><h3>Full isolation • Fast execution</h3><div><p><b>Browser contexts.</b> Playwright creates a browser context for each test. Browser context is equivalent to a brand new browser profile. This delivers full test isolation with zero overhead. Creating a new browser context only takes a handful of milliseconds.</p><p><b>Log in once.</b> Save the authentication state of the context and reuse it in all the tests. This bypasses repetitive log-in operations in each test, yet delivers full isolation of independent tests.</p></div></div><div class="col col--6" style="margin-top:40px"><h3>Powerful Tooling</h3><div><p><b><a href="docs/codegen">Codegen.</a></b> Generate tests by recording your actions. Save them into any language.</p><p><b><a href="docs/debug#playwright-inspector">Playwright inspector.</a></b> Inspect page, generate selectors, step through the test execution, see click points, explore execution logs.</p><p><b><a href="docs/trace-viewer-intro">Trace Viewer.</a></b> Capture all the information to investigate the test failure. Playwright trace contains test execution screencast, live DOM snapshots, action explorer, test source, and many more.</p></div></div></div></div></section><section class="logosSection_gMWS"><div class="container"><div class="row"><div class="col col--12 logosColumn_GJVT"><h2>Chosen by companies and open source projects</h2><ul class="logosList_zAAF"><li><a href="https://code.visualstudio.com" target="_blank" rel="noreferrer noopener"><img src="img/logos/VSCode.png" alt="VS Code"></a></li><li><a href="https://bing.com" target="_blank" rel="noreferrer noopener"><img src="img/logos/Bing.png" alt="Bing"></a></li><li><a href="https://outlook.com" target="_blank" rel="noreferrer noopener"><img src="img/logos/Outlook.png" alt="Outlook"></a></li><li><a href="https://www.hotstar.com/" target="_blank" rel="noreferrer noopener"><img src="img/logos/DHotstar.jpg" alt="Disney+ Hotstar"></a></li><li><a href="https://github.com/mui-org/material-ui" target="_blank" rel="noreferrer noopener"><img src="img/logos/MUI.png" alt="Material UI"></a></li><li><a href="https://github.com/ing-bank/lion" target="_blank" rel="noreferrer noopener"><img src="img/logos/ING.png" alt="ING"></a></li><li><a href="https://github.com/adobe/spectrum-web-components" target="_blank" rel="noreferrer noopener"><img src="img/logos/Adobe2.png" alt="Adobe"></a></li><li><a href="https://github.com/react-navigation/react-navigation" target="_blank" rel="noreferrer noopener"><img src="img/logos/ReactNavigation.png" alt="React Navigation"></a></li><li><a href="https://accessibilityinsights.io/" target="_blank" rel="noreferrer noopener"><img src="img/logos/accessibilityinsights.png" alt="Accessibility Insights"></a></li></ul></div></div></div></section></main></div><footer class="footer footer--dark"><div class="container container-fluid"><div class="row footer__links"><div class="col footer__col"><div class="footer__title">Learn</div><ul class="footer__items clean-list"><li class="footer__item"><a class="footer__link-item" href="/docs/intro">Getting started</a></li><li class="footer__item"><a href="https://learn.microsoft.com/en-us/training/modules/build-with-playwright/" target="_blank" rel="noopener noreferrer" class="footer__link-item">Playwright Training<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a class="footer__link-item" href="/community/learn-videos">Learn Videos</a></li><li class="footer__item"><a class="footer__link-item" href="/community/feature-videos">Feature Videos</a></li></ul></div><div class="col footer__col"><div class="footer__title">Community</div><ul class="footer__items clean-list"><li class="footer__item"><a href="https://stackoverflow.com/questions/tagged/playwright" target="_blank" rel="noopener noreferrer" class="footer__link-item">Stack Overflow<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://aka.ms/playwright/discord" target="_blank" rel="noopener noreferrer" class="footer__link-item">Discord<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://twitter.com/playwrightweb" target="_blank" rel="noopener noreferrer" class="footer__link-item">Twitter<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://www.linkedin.com/company/playwrightweb" target="_blank" rel="noopener noreferrer" class="footer__link-item">LinkedIn<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li></ul></div><div class="col footer__col"><div class="footer__title">More</div><ul class="footer__items clean-list"><li class="footer__item"><a href="https://github.com/microsoft/playwright" target="_blank" rel="noopener noreferrer" class="footer__link-item">GitHub<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://www.youtube.com/channel/UC46Zj8pDH5tDosqm1gd7WTg" target="_blank" rel="noopener noreferrer" class="footer__link-item">YouTube<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a href="https://dev.to/playwright" target="_blank" rel="noopener noreferrer" class="footer__link-item">Blog<svg width="13.5" height="13.5" aria-hidden="true" viewBox="0 0 24 24" class="iconExternalLink_nPIU"><path fill="currentColor" d="M21 13v10h-21v-19h12v2h-10v15h17v-8h2zm3-12h-10.988l4.035 4-6.977 7.07 2.828 2.828 6.977-7.07 4.125 4.172v-11z"></path></svg></a></li><li class="footer__item"><a class="footer__link-item" href="/community/ambassadors">Ambassadors</a></li></ul></div></div><div class="footer__bottom text--center"><div class="footer__copyright">Copyright © 2024 Microsoft</div></div></div></footer></div></body></html>' url='https://playwright.dev/'

3. 代码解释

这段代码是一个基于 Playwright 的 Python 异步工具,用于通过网页浏览器操作和抓取网页内容。它使用了多个 Python 包,包括 asyncio, pydantic 和 playwright.async_api,以实现灵活的浏览器控制功能。以下是对主要部分的逐段解释:

  1. 模块导入
from __future__ import annotations

启用 Python 未来功能支持,例如类型提示中的前置引用(| 用于联合类型)。

import asyncio
import sys
from pathlib import Path
from typing import Literal, Optional

asyncio: 用于实现异步编程。
sys: 提供与解释器交互的工具。
Path: 用于文件路径操作。
Literal, Optional: 类型注解工具。

from playwright.async_api import async_playwright
from pydantic import BaseModel, Field, PrivateAttr

async_playwright: Playwright 的异步 API,用于浏览器操作。
pydantic: 提供强大的数据验证和模型支持。

  1. 核心类 PlaywrightWrapper
    概述
    封装了 Playwright 的功能,用于创建浏览器会话、抓取网页内容和动态处理。

属性

browser_type: Literal["chromium", "firefox", "webkit"] = "chromium"

指定使用的浏览器类型(默认 chromium)。

launch_kwargs: dict = Field(default_factory=dict)

浏览器启动的参数(例如无头模式等)。

proxy: Optional[str] = None

可选代理服务器地址。

context_kwargs: dict = Field(default_factory=dict)

浏览器上下文参数(例如忽略 HTTPS 错误)。

_has_run_precheck: bool = PrivateAttr(False)

内部属性,用于标记是否完成预检查。

方法

def __init__(self, **kwargs):
    ...

初始化 PlaywrightWrapper 对象。
处理代理配置和上下文参数。

  1. run 方法
async def run(self, url: str, *urls: str) -> WebPage | list[WebPage]:

接受单个或多个 URL,返回网页内容(WebPage 对象)。
使用 asyncio.gather 并发抓取多个 URL。

  1. _scrape 方法
async def _scrape(self, browser, url):

核心抓取逻辑,使用 Playwright 打开页面并提取内容。
包括 HTML 和页面文本。

关键逻辑:

await page.goto(url): 访问 URL。
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)"): 模拟滚动操作。
await page.content(): 获取 HTML。
await page.evaluate("() => document.body.innerText"): 提取文本内容。
  1. _run_precheck 方法
async def _run_precheck(self, browser_type):

检查 Playwright 是否正确安装所需的浏览器。
如果未安装,调用 _install_browsers 下载。

  1. 浏览器安装工具
_install_browsers

异步安装指定的浏览器。
使用 asyncio.create_subprocess_exec 调用 playwright install 安装。

_log_stream

实时处理安装过程中产生的日志输出。

  1. 全局变量
_install_lock: asyncio.Lock = None

防止并发安装操作。

_install_cache: set()

缓存已安装的浏览器,避免重复安装。

用途

网页抓取:从动态网页中提取内容。
代理支持:通过代理服务器访问网页。
浏览器自动安装:根据需求自动下载并配置浏览器。
高效并发:通过 asyncio 实现对多个 URL 的并发抓取。

参考链接: https://github.com/geekan/MetaGPT
https://playwright.dev/python/docs/library

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.coloradmin.cn/o/2260493.html

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈,一经查实,立即删除!

相关文章

前端成长之路:CSS盒子模型

盒子模型是页面布局的核心&#xff0c;通过盒子模型才能更好的进行页面布局。 网页布局的本质 网页布局的核心本质其实是&#xff1a;HTML网页元素就是一个个的盒子box&#xff0c;通过CSS可以设置好盒子的样式&#xff0c;和盒子需要摆放的位置&#xff1b;简单说来就是通过…

LeetCode刷题 -- 字符串

目录 最长公共前缀题目解析算法原理代码 最长回文子串题目解析算法原理代码 二进制求和题目解析算法原理代码 字符串相乘题目解析算法原理代码 最长公共前缀 题目链接 题目解析 只需找出字符串中的公共的最长字符串即可 算法原理 1.法一&#xff1a;两两字符串比较&#xff0c;…

4G模块详解

在之前的教程中&#xff0c;无线通信技术我们学习了蓝牙和 WiFi&#xff0c;今天我们要来学习 4G。 4G 模块在距离上有个突破&#xff0c;它不像蓝牙短距离&#xff0c;也不像 WiFi 只能在局域网&#xff0c;4G 模块可使用户无论在哪&#xff0c;只要有 4G 网络信号覆盖&#…

uni-app实现小程序、H5图片轮播预览、双指缩放、双击放大、单击还原、滑动切换功能

前言 这次的标题有点长&#xff0c;主要是想要表述的功能点有点多&#xff1b; 简单做一下需求描述 产品要求在商品详情页的头部轮播图部分&#xff0c;可以单击预览大图&#xff0c;同时在预览界面可以双指放大缩小图片并且可以移动查看图片&#xff0c;双击放大&#xff0…

数据地图怎么做?推荐这款数据可视化地图生成器

在数字化与信息化高速发展的今天&#xff0c;企业迎来了前所未有的发展机遇&#xff0c;规模迅速扩张&#xff0c;市场版图不断延伸。然而&#xff0c;伴随着这种快速的发展&#xff0c;一个不容忽视的问题逐渐浮出水面——如何精准高效地掌握分布在各地的分公司、业务点乃至整…

hive—常用的日期函数

目录 1、current_date 当前日期 2、now() 或 current_timestamp() 当前时间 3、datediff(endDate, startDate) 计算日期相差天数 4、months_between(endDate, startDate) 日期相差月数 5、date_add(startDate, numDays) 日期加N天 6、date_sub(startDate, numDays) 日期减…

【原生js案例】如何让你的网页实现图片的按需加载

按需加载&#xff0c;这个词应该都不陌生了。我用到你的时候&#xff0c;你才出现就可以了。对于一个很多图片的网站&#xff0c;按需加载图片是优化网站性能的一个关键点。减少无效的http请求&#xff0c;提升网站加载速度。 感兴趣的可以关注下我的系列课程【webApp之h5端实…

大模型运用-Prompt Engineering(提示工程)

什么是提示工程 提示工程 提示工程也叫指令工程&#xff0c;涉及到如何设计、优化和管理这些Prompt&#xff0c;以确保AI模型能够准确、高效地执行用户的指令&#xff0c;如&#xff1a;讲个笑话、java写个排序算法等 使用目的 1.获得具体问题的具体结果。&#xff08;如&…

相位小数偏差(UPD)估计基本原理

PPP中的一个关键性难题在于非差模糊度固定&#xff0c;成功固定非差模糊度可以使 PPP 的收敛速度和定位精度得到显著提升 。 相位小数偏差 (UPD) 是致使相位模糊度失去整数特性的主要因素&#xff0c;精确估计并校正 UPD 是实现非差模糊度固定的重要前提&#xff0c;也是实现…

ES倒排索引实现? ES 索引文档过程?ES并发下读写一致?

ES倒排索引实现&#xff0c; ES 索引文档过程&#xff0c;ES并发下读写一致 一、Elasticsearch的基本概念二、Elasticsearch的工作原理简述三、Elasticsearch的应用场景四、面试问题问题1&#xff1a;Elasticsearch中的分片和副本是如何工作的&#xff1f;分片和副本的大小和数…

【h5py】 提取mat文件中的HDF5格式的数据

h5py 提取mat文件中的HDF5格式的数据 使用纯Python查看数据配合Matlab后&#xff0c;使用Python查看数据 一、使用纯Python查看文件数据内容 原理&#xff1a;当HDF5存储的是struct类型数据&#xff0c;解析时要像一棵树&#xff0c;我们需要逐层次的去解析&#xff0c;直到…

【数据分享】2013-2023年我国省市县三级的逐年CO数据(免费获取\excel\shp格式)

空气质量数据是在我们日常研究中经常使用的数据&#xff01;之前我们给大家分享了2000-2023年的省市县三级的逐年PM2.5数据、2000-2023年的省市县三级的逐年PM10数据、2013-2023年的省市县三级的逐年SO2数据、2000-2023年省市县三级的逐年O3数据和2008-2023年我国省市县三级的逐…

《九重紫》逐集分析鉴赏第一集(下)

主标题&#xff1a;《九重紫》一起追剧吧 副标题&#xff1a;《九重紫》逐集分析鉴赏第一集&#xff08;下&#xff09;/《九重紫》逐集分析鉴赏1 接上回分解&#xff0c;窦昭和宋墨都安置城外万佛寺 交谈没一会儿&#xff0c;天还未亮&#xff0c;兵临寺下 记住这个人&…

Summarizing and Understanding Large Graphs

Summarizing and Understanding Large Graphs【总结和理解大规模图】 ☆ 研究背景 大规模图的理解和可视化是一个重要的开放性问题现有的社区发现和聚类方法无法很好地总结图的特征需要一种能够发现和描述图中重要结构的方法 ★ 成果简介 提出了VoG(Vocabulary-based summar…

人工智能系统

介绍人工智能 的基础书 点击这里 1.1 深度学习的历史&#xff0c;现状与发展 本章将介绍深度学习的由来&#xff0c;现状和趋势&#xff0c;让读者能够了解人工智能系统之上的深度学习负载的由来与趋势&#xff0c;为后面理解深度学习系统的设计和权衡形成初步的基础。我们在后…

康佳Android面试题及参考答案(多张原理图)

JVM 内存分布和分代回收机制是什么? JVM 内存主要分为以下几个区域。 堆(Heap)是 JVM 管理的最大的一块内存区域,主要用于存放对象实例。所有线程共享堆内存,在堆中又分为年轻代(Young Generation)和老年代(Old Generation)。年轻代又分为 Eden 区和两个 Survivor 区(…

(css)element中el-select下拉框整体样式修改

(css)element中el-select下拉框整体样式修改 重点代码&#xff08;颜色可行修改&#xff09; // 修改input默认值颜色 兼容其它主流浏览器 /deep/ input::-webkit-input-placeholder {color: rgba(255, 255, 255, 0.50); } /deep/ input::-moz-input-placeholder {color: rgba…

论文概览 |《Sustainable Cities and Society》2024.12 Vol.116

本次给大家整理的是《Sustainable Cities and Society》杂志2024年12月第116期的论文的题目和摘要&#xff0c;一共包括52篇SCI论文&#xff01; 论文1 Enhancing road traffic flow in sustainable cities through transformer models: Advancements and challenges 通过变压…

【电源专题】开关转换器的三种过流保护方案

开关转换器内部集成功率开关,使限流保护成为基本功能。常用限流方案有三种:恒流限流、折返限流和打嗝模式限流。 恒流限流 对于恒流限流方案,当发生过载情况时,输出电流保持恒定值(ILIMIT)。因此,输出电压会下降。这种方案通过逐周期限流实现,利用流经功率开关的峰值电感…

网络与安全

文章目录 网络协议OSI七层模型TCP/IP协议族TCP协议UDP协议HTTP协议HTTPS协议 SocketSocket编程粘包与拆包 网络安全常见网络攻击及防护放火墙网络加密技术 跨域问题 网络协议 网络协议是计算机网络中设备和系统之间进行数据交换的规则和约定。它定义了数据的格式、传输方式、处…