/* ******************************************************************************************* * PUPPETEER * https://pptr.dev/ * ******************************************************************************************* */ // When you install Puppeteer, it downloads a recent version of Chromium (~170MB Mac, ~282MB Linux, ~280MB Win) // that is guaranteed to work with the API. npm install puppeteer // A lightweight version of Puppeteer for launching an existing browser installation or for connecting to a remote one. // Be sure that the version of puppeteer-core you install is compatible with the browser you intend to connect to. // More about the difference between puppeteer vs puppeteer-core // https://pptr.dev/#?product=Puppeteer&version=v2.1.1&show=api-puppeteer-vs-puppeteer-core npm install puppeteer-core /* ******************************************************************************************* * ENVIRONMENT VARIABLES * ******************************************************************************************* */ HTTP_PROXY // Defines HTTP proxy settings that are used to download and run Chromium. HTTPS_PROXY // Defines HTTP proxy settings that are used to download and run Chromium. NO_PROXY // Defines HTTP proxy settings that are used to download and run Chromium. PUPPETEER_SKIP_CHROMIUM_DOWNLOAD // Do not download bundled Chromium during installation step. PUPPETEER_DOWNLOAD_HOST // Overwrite URL prefix that is used to download Chromium. Note: this includes protocol and might even include path prefix. Defaults to https://storage.googleapis.com. PUPPETEER_CHROMIUM_REVISION // Specify a certain version of Chromium you'd like Puppeteer to use. See puppeteer.launch([options]) on how executable path is inferred. BEWARE: Puppeteer is only guaranteed to work with the bundled Chromium, use at your own risk. PUPPETEER_EXECUTABLE_PATH // Specify an executable path to be used in puppeteer.launch. See puppeteer.launch([options]) on how the executable path is inferred. BEWARE: Puppeteer is only guaranteed to work with the bundled Chromium, use at your own risk. PUPPETEER_PRODUCT // Specify which browser you'd like Puppeteer to use. Must be one of chrome or firefox. Setting product programmatically in puppeteer.launch([options]) supercedes this environment variable. The product is exposed in puppeteer.product /* ******************************************************************************************* * API * ******************************************************************************************* */ // PUPPETEER // ----- // Puppeteer module provides a method to launch a Chromium instance. // https://pptr.dev/#?product=Puppeteer&version=v2.1.1&show=api-class-puppeteer const puppeteer = require('puppeteer'); puppeteer.devices; // Returns a list of devices to be used with page.emulate(options) puppeteer.errors; // Returns the specific error classes puppeteer.product; // Returns the name of the browser that is under automation ("chrome" or "firefox") puppeteer.connect(options); // Attaches Puppeteer to an existing Chromium instance. puppeteer.createBrowserFetcher([options]); // Create a brower fetcher instance puppeteer.defaultArgs([options]); // The default flags that Chromium will be launched with. puppeteer.executablePath(); // Returns a path where Puppeteer expects to find bundled Chromium. puppeteer.launch([options]); // Launch a browser instance // BROWSER FETCHER // ----- // BrowserFetcher can download and manage different versions of Chromium. // https://pptr.dev/#?product=Puppeteer&version=v2.1.1&show=api-class-browserfetcher const browserFetcher = puppeteer.createBrowserFetcher(); browserFetcher.canDownload(revision) // Initiates a HEAD request to check if the revision is available. browserFetcher.download(revision[, progressCallback]) // Initiates a GET request to download the revision from the host. browserFetcher.localRevisions() // Returns a list of all revisions available locally on disk. browserFetcher.platform() // Returns one of mac, linux, win32 or win64. browserFetcher.remove(revision) // Resolves when the revision has been removed. browserFetcher.revisionInfo(revision) // Returns all the information on the revision. // BROWSER // ----- // A Browser is created when Puppeteer connects to a Chromium instance, either through puppeteer.launch or puppeteer.connect. // https://pptr.dev/#?product=Puppeteer&version=v2.1.1&show=api-class-browser const browser = await puppeteer.launch(); browser.on('disconnected') // Emitted when Puppeteer gets disconnected from the Chromium instance. browser.on('targetchanged') // Emitted when the url of a target changes. browser.on('targetcreated') // Emitted when a target is created, for example when a new page is opened by window.open or browser.newPage. browser.on('targetdestroyed') // Emitted when a target is destroyed, for example when a page is closed. browser.browserContexts() // Returns an array of all open browser contexts. browser.close() // Closes Chromium and all of its pages (if any were opened). browser.createIncognitoBrowserContext() // Creates a new incognito browser context. browser.defaultBrowserContext() // Returns the default browser context. browser.disconnect() // Disconnects Puppeteer from the browser, but leaves the Chromium process running. browser.isConnected() // Indicates that the browser is connected. browser.newPage() // Promise which resolves to a new Page object. browser.pages() // An array of all pages inside the Browser. browser.process() // Returns Spawned browser process. browser.target() // A target associated with the browser. browser.targets() // An array of all active targets inside the Browser. browser.userAgent() // Promise which resolves to the browser's original user agent. browser.version() // Returns the browser version (e.g. Chrome/61.0.3153.0) browser.waitForTarget(predicate[, options]) // Promise which resolves to the first target found that matches the predicate function. browser.wsEndpoint() // Returns the browser websocket url. // BROWSER CONTEXT // ----- // BrowserContexts provide a way to operate multiple independent browser sessions // https://pptr.dev/#?product=Puppeteer&version=v2.1.1&show=api-class-browsercontext const context = await browser.createIncognitoBrowserContext(); browserContext.on('targetchanged') // Emitted when the url of a target inside the browser context changes. browserContext.on('targetcreated') // Emitted when a new target is created inside the browser context. browserContext.on('targetdestroyed') // Emitted when a target inside the browser context is destroyed, for example when a page is closed. browserContext.browser() // The browser this browser context belongs to. browserContext.clearPermissionOverrides() // Clears all permission overrides for the browser context. browserContext.close() // Closes the browser context. All the targets that belong to the browser context will be closed. browserContext.isIncognito() // Returns whether BrowserContext is incognito. browserContext.newPage() // Creates a new page in the browser context. browserContext.overridePermissions(origin, permissions) // Overrides permissions to grant. browserContext.pages() // An array of all pages inside the browser context. browserContext.targets() // An array of all active targets inside the browser context. browserContext.waitForTarget(predicate[, options]) // Promise which resolves to the first target found that matches the predicate function. // PAGE // ----- // Page provides methods to interact with a single tab or extension background page in Chromium. // One Browser instance might have multiple Page instances. // https://pptr.dev/#?product=Puppeteer&version=v2.1.1&show=api-class-page const page = await browser.newPage(); page.on('close') // Emitted when the page closes. page.on('console') // Emitted when JavaScript within the page calls one of console API methods. page.on('dialog') // Emitted when a JavaScript dialog appears, such as alert, prompt, confirm or beforeunload. page.on('domcontentloaded') // Emitted when the JavaScript DOMContentLoaded event is dispatched. page.on('error') // Emitted when the page crashes. page.on('frameattached') // Emitted when a frame is attached. page.on('framedetached') // Emitted when a frame is detached. page.on('framenavigated') // Emitted when a frame is navigated to a new url. page.on('load') // Emitted when the JavaScript load event is dispatched. page.on('metrics') // Emitted when the JavaScript code makes a call to console.timeStamp. page.on('pageerror') // Emitted when an uncaught exception happens within the page. page.on('popup') // Emitted when the page opens a new tab or window. page.on('request') // Emitted when a page issues a request. page.on('requestfailed') // Emitted when a request fails, for example by timing out. page.on('requestfinished') // Emitted when a request finishes successfully. page.on('response') // Emitted when a response is received. page.on('workercreated') // Emitted when a dedicated WebWorker is spawned by the page. page.on('workerdestroyed') // Emitted when a dedicated WebWorker is terminated. page.accessibility // returns Accessibility page.coverage // returns Coverage page.keyboard // returns Keyboard page.mouse // returns Mouse page.touchscreen // returns Touchscreen page.tracing // returns Tracing page.$(selector) // The method runs document.querySelector within the page. If no element matches the selector, the return value resolves to null. page.$$(selector) // The method runs document.querySelectorAll within the page. If no elements match the selector, the return value resolves to []. page.$eval(selector, pageFunction[, ...args]) // This method runs document.querySelector within the page and passes it as the first argument to pageFunction. If there's no element matching selector, the method throws an error. page.$$eval(selector, pageFunction[, ...args]) // This method runs Array.from(document.querySelectorAll(selector)) within the page and passes it as the first argument to pageFunction. page.$x(expression) // The method evaluates the XPath expression. page.addScriptTag(options) // Adds a