shixiaolong0
shixiaolong0

变化!如果今天的自己和昨天点自己没有差异,那么就是白活! 简书:https://www.jianshu.com/u/dd76e4af1f33 twitter:https://twitter.com/dragon72463399 rust学习笔记:https://dev.to/dragon72463399

playwright防止机器人检测的实现方式(容器内可用)

核心代码片段

```

# 绕过无头模式检测,开启有头模式也能正常工作
with Display():
    async with async_playwright() as p:
        browser = await p.chromium.launch(**launch_data)
        for url in urls:
            page = await browser.new_page()
            # 绕过navigator.webdriver检测
            await page.add_init_script("Object.defineProperties(navigator, {webdriver:{get:()=>undefined}});")
            page.on('response', self.on_response)
            await page.goto(url,timeout=0)
            time.sleep(10)
            # timeout=0表示永远不超时,以此来确保标签页和浏览器都不会关闭
            await page.wait_for_function("() => window.x > 0", timeout=0)


```

完整代码

```

import asyncio
import json
import time
# from fastapi import FastAPI
from playwright.async_api import async_playwright
# from pydantic import BaseModel, Field
from pyvirtualdisplay import Display
from base64 import b64decode
from urllib.parse import parse_qsl
# from cf_clearance import async_cf_retry, async_stealth
import logging
from playwright.async_api import Page as AsyncPage


class Play:



    async def on_response(self, response):
        if '/v2/api/dapps?params' in response.url:
            oridata = await response.body()
            data = json.loads(oridata)
            print(data)
            url = response.url
            params = url.split('?params=')[-1]
            new_params = b64decode(b64decode(params)).decode('utf-8')
            params_dic = dict(parse_qsl(new_params))
            if 'protocol' in params_dic:
                chain = params_dic['protocol']
            else:
                chain = 'all'
            if 'category' in params_dic:
                category = params_dic['category']
            else:
                category = 'all'
            print(chain,category)


    async def open_site(self):
        # 容器内也能适配
        launch_data = {
            "headless": False,
            "args": [
                "--disable-gpu",
                "--no-sandbox",
                "--disable-dev-shm-usage",
                "--no-first-run",
                "--no-service-autorun",
                "--no-default-browser-check",
                "--password-store=basic",
            ],
        }
        urls = [
            "http://javabin.cn/bot/bot.html",
            ]
        # 绕过无头模式检测,开启有头模式也能正常工作
        with Display():
            async with async_playwright() as p:
                browser = await p.chromium.launch(**launch_data)
                for url in urls:
                    page = await browser.new_page()
                    # 绕过navigator.webdriver检测
                    await page.add_init_script("Object.defineProperties(navigator, {webdriver:{get:()=>undefined}});")
                    page.on('response', self.on_response)
                    await page.goto(url,timeout=0)
                    time.sleep(10)
                    # timeout=0表示永远不超时,以此来确保标签页和浏览器都不会关闭
                    await page.wait_for_function("() => window.x > 0", timeout=0)
    def test(self):
        asyncio.run(self.open_site())


if __name__ == '__main__':

    Play().test()


```

CC BY-NC-ND 2.0 版权声明

喜欢我的文章吗?
别忘了给点支持与赞赏,让我知道创作的路上有你陪伴。

加载中…

发布评论