简介
在介绍Puphpeteer
之前,必须得先说说Puppeteer
。Puppeteer是Google Chrome官方团队进行维护的一个node库,他提供了一组用来操纵Chrome的API(默认headless也就是无UI的chrome,也可以配置为有UI),在浏览器中手动完成的大多数操作都可以使用 Puppeteer 完成。Puphpeteer
是通过PHP来操作Puppeteer,支持Puphpeteer
全部的API。如果你对nodejs比较熟,完全可以直接使用Puppeteer
。
Puppeteer官网:https://pptr.dev/
Puphpeteer官网:https://github.com/rialto-php/puphpeteer
可以做什么
生成页面的屏幕截图和 PDF
爬取网页动态渲染(vue、react)后的内容
自动提交表单、UI 测试、键盘输入等
使用最新的 JavaScript 和浏览器功能创建自动化测试环境
捕获站点的时间线跟踪,以帮助诊断性能问题
安装
环境要求
PHP >= 7.3
Node >= 8
安装
composer require nesk/puphpeteer
npm install @nesk/puphpeteer
使用示例
获取网页HTML内容
require_once __DIR__ . '/vendor/autoload.php';
use NeskPuphpeteerPuppeteer;
$puppeteer = new Puppeteer;
$browser = $puppeteer->launch(['headless'=>false]); //是否使用无头模式,默认为true,这里为了观察效果使用的是有头模式
$page = $browser->newPage();
$page->goto('https://example.com/');
$page->setViewport(['width'=>1920, 'height'=>1080]);
$html = $page->content();
$browser->close();
echo $html;
网页截图
$puppeteer = new Puppeteer;
$browser = $puppeteer->launch(['headless'=>false]);
$page = $browser->newPage();
$page->goto('https://example.com/');
$page->setViewport(['width'=>1920, 'height'=>1080]);
$page->screenshot(['path' => 'example.png','fullPage' => true]);//将网页截图保存至当前目录下的example.png
$browser->close();
网页截图(适合有图片懒加载的)
use Nesk\Puphpeteer\Puppeteer;
use Nesk\Rialto\Data\JsFunction;
$puppeteer = new Puppeteer;
$browser = $puppeteer->launch(['headless'=>false]);
$page = $browser->newPage();
$page->goto('https://www.dedemao.com/');
$page->setViewport(['width'=>1920, 'height'=>1080]);
$pageFunction = JsFunction::createWithParameters([])
->body("window.scrollTo(0, window.document.body.scrollHeight);"); //控制滚动条滚动到页面底部
$page->evaluate($pageFunction);
$page->waitFor(5000); //等待5秒使图片加载完毕
$page->screenshot(['path' => 'dedemao.png','fullPage' => true]);//将网页截图保存至当前目录下的example.png
$browser->close();
打印pdf
$puppeteer = new Puppeteer;
$browser = $puppeteer->launch(['headless'=>true]);
$page = $browser->newPage();
$page->goto('https://example.com/',[
'timeout'=>300 * 1000,
'waitUntil' => [
// 'load', //等待 “load” 事件触发
// 'domcontentloaded', //等待 “domcontentloaded” 事件触发
'networkidle0',//在 500ms 内没有任何网络连接
// 'networkidle2' //在 500ms 内网络连接个数不超过 2 个
]
]);
$page->setViewport(['width'=>1920, 'height'=>1080]);
$page->emulateMediaType('screen');
$page->pdf(['path' => 'example.pdf','format' => 'A4','printBackground'=>true,'margin'=>['top'=>20,'bottom'=>20]]);
$browser->close();
登录github
$puppeteer = new Puppeteer;
$browser = $puppeteer->launch(['headless'=>false]);
$page = $browser->newPage();
$page->goto('https://github.com/login');
$page->setViewport(['width'=>1920, 'height'=>1080]);
$page->type('#login_field', '账号'); //输入账号
$page->type('#password', '密码');//输入密码
$page->click('.js-sign-in-button'); //点击登录
$browser->close();
其他
设置UserAgent
$page->setUserAgent( 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36');
浏览器窗口最大化
$puppeteer = new Puppeteer;
$browser = $puppeteer->launch(['headless'=>false,'defaultViewport'=>null,'args'=>['--start-maximized']]);
$page = $browser->newPage();
$page->goto('https://example.com/');
$page->waitFor(5000);
$browser->close();
$cookie = [
[
'name' => 'PHPSESSID',
'value' => 'xxxxxxx',
'domain' => 'www.baidu.com',
'path' => '/',
'httpOnly' => true,
'secure' => true,
'expires' => time() + 3600 * 24 * 365,
]
];
$page->setCookie(...$cookie);