lib/crawler.js - Issue 29338242: Issue 3792 - Fix to support multiprocess firefox

Unified Diff: lib/crawler.js

Issue 29338242: Issue 3792 - Fix to support multiprocess firefox (Closed)

Patch Set: address comments Created Sept. 29, 2016, 3:33 p.m.

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View side-by-side diff with in-line comments

Index: lib/crawler.js

diff --git a/lib/crawler.js b/lib/crawler.js

index 933e4fe84c6c0f5b39fe7595894ad93a4fa48d08..fa3077a9af6c24841a66a97dfa8433554b56c0cb 100644

--- a/lib/crawler.js

+++ b/lib/crawler.js

@@ -13,6 +13,7 @@

const {Services} = Cu.import("resource://gre/modules/Services.jsm", {});

const {XPCOMUtils} = Cu.import("resource://gre/modules/XPCOMUtils.jsm", {});

const {Task} = Cu.import("resource://gre/modules/Task.jsm", {});

+const {setTimeout, clearTimeout} = Cu.import("resource://gre/modules/Timer.jsm", {});

function abprequire(module)

{

@@ -109,8 +110,8 @@ TabAllocator.prototype = {

this._browser.removeTab(tab);

else

{

- // navigate away from early opened URL

- tab.linkedBrowser.loadURI('about:blank', null, null);

+ // navigate away from previously opened URL

+ tab.linkedBrowser.loadURI("about:blank", null, null);

this._tabKeepingWindowAlive = tab;

}

@@ -123,79 +124,6 @@ TabAllocator.prototype = {

};

/**

- * Observes page loads in a particular tabbed browser.

- *

- * @param {tabbrowser} browser

- * The tabbed browser to be observed

- * @param {int} timeout

- * Load timeout in milliseconds

- * @constructor

- */

-function LoadListener(browser, timeout)

- this._browser = browser;

- this._deferred = new Map();

- this._timeout = timeout;

- browser.addTabsProgressListener(this);

-LoadListener.prototype = {

- /**

- * Returns a promise that will be resolved when the page in the specified tab

- * finishes loading. Loading will be stopped if the timeout is reached.

- *

- * @param {tab} tab

- * @result {Promise}

- */

- waitForLoad: function(tab)

- {

- let deferred = Promise.defer();

- this._deferred.set(tab.linkedBrowser, deferred);

- tab.ownerDocument.defaultView.setTimeout(function()

- {

- tab.linkedBrowser.stop();

- }, this._timeout);

- return deferred.promise;

- },

- /**

- * Deactivates this object.

- */

- stop: function()

- {

- this._browser.removeTabsProgressListener(this);

- },

- onStateChange: function(browser, progress, request, flags, status)

- {

- if ((flags & Ci.nsIWebProgressListener.STATE_STOP) && (flags & Ci.nsIWebProgressListener.STATE_IS_WINDOW))

- {

- let deferred = this._deferred.get(browser);

- if (deferred)

- {

- this._deferred.delete(browser);

- let headers = [];

- if (request instanceof Ci.nsIHttpChannel)

- {

- try

- {

- headers.push("HTTP/x.x " + request.responseStatus + " " + request.responseStatusText);

- request.visitResponseHeaders((header, value) => headers.push(header + ": " + value));

- }

- catch (e)

- {

- // Exceptions are expected here

- }

- deferred.resolve([status, headers]);

- }

-};

-/**

* Once created, this object will make sure all new windows are dismissed

* immediately.

@@ -230,6 +158,17 @@ WindowCloser.prototype = {

QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakReference])

};

+function configureFrameScript()

+ const frameScriptPath = info.addonRoot + "/lib/child/frameScript.js";

+ Services.mm.loadFrameScript(frameScriptPath, true);

+ onShutdown.add(() =>

+ {

+ Services.mm.removeDelayedFrameScript(frameScriptPath);

+ });

/**

* Starts the crawling session. The crawler opens each URL in a tab and stores

* the results.

@@ -249,6 +188,7 @@ WindowCloser.prototype = {

function run(window, urls, timeout, maxtabs, targetURL, onDone)

{

+ configureFrameScript();

new Promise((resolve, reject) =>

{

if (FilterStorage.subscriptions.length > 0)

@@ -289,7 +229,7 @@ exports.run = run;

function crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone)

{

let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs);

- let loadListener = new LoadListener(window.getBrowser(), timeout);

let running = 0;

let windowCloser = new WindowCloser();

let taskDone = function()

@@ -297,7 +237,6 @@ function crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone)

running--;

if (running <= 0)

{

- loadListener.stop();

windowCloser.stop();

onDone();

}

@@ -306,7 +245,7 @@ function crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone)

for (let url of urls)

{

running++;

- Task.spawn(crawl_url.bind(null, url, tabAllocator, loadListener)).then(function(result)

+ Task.spawn(crawl_url.bind(null, url, tabAllocator, timeout)).then(function(result)

{

let request = new XMLHttpRequest();

request.open("POST", targetURL);

@@ -331,15 +270,43 @@ function crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone)

}

/**

+ * Expects to receive page info gathered in a content process for the specified

+ * `tab`. If there is no relevant message within specified `timeout` then

+ * the result promise is resolved with error object.

+ * @param tab

+ * Tab in which we are interested in

+ * @param {int} timeout

+ * Timeout in milliseconds

+ * @return {Promise} promise which will be resolved with the received page info

+ */

+function getPageInfo(tab, timeout)

+ return new Promise((resolve, result) =>

+ {

+ const mm = tab.linkedBrowser.messageManager;

+ let timerID;

+ let onDone = (msg) =>

+ {

+ mm.removeMessageListener("abpcrawler:pageInfoGathered", onDone);

+ clearTimeout(timerID);

+ resolve(msg.data);

+ }

+ mm.addMessageListener("abpcrawler:pageInfoGathered", onDone);

+ timerID = setTimeout(() => onDone({data: {error: "timeout"}}), timeout);

+ });

+/**

* Crawls a URL. This is a generator meant to be used via a Task object.

* @param {String} url

* @param {TabAllocator} tabAllocator

- * @param {loadListener} loadListener

+ * @param {int} timeout

+ * Load timeout in milliseconds

* @result {Object}

* Crawling result

-function* crawl_url(url, tabAllocator, loadListener)

+function* crawl_url(url, tabAllocator, timeout)

{

let tab = yield tabAllocator.getTab();

let result = {url, requests: []};

@@ -357,33 +324,10 @@ function* crawl_url(url, tabAllocator, loadListener)

});

tab.linkedBrowser.loadURI(url, null, null);

- [result.channelStatus, result.headers] = yield loadListener.waitForLoad(tab);

- result.endTime = Date.now();

- result.finalUrl = tab.linkedBrowser.currentURI.spec;

- let document = tab.linkedBrowser.contentDocument;

- if (document.documentElement)

- {

- try

- {

- let canvas = document.createElementNS("http://www.w3.org/1999/xhtml", "canvas");

- canvas.width = document.documentElement.scrollWidth;

- canvas.height = document.documentElement.scrollHeight;

- let context = canvas.getContext("2d");

- context.drawWindow(document.defaultView, 0, 0, canvas.width, canvas.height, "rgb(255, 255, 255)");

- result.screenshot = canvas.toDataURL("image/jpeg", 0.8);

- }

- catch (e)

- {

- reportException(e);

- result.error = "Capturing screenshot failed: " + e;

- }

- // TODO: Capture frames as well?

- let serializer = new tab.ownerDocument.defaultView.XMLSerializer();

- result.source = serializer.serializeToString(document.documentElement);

- }

+ Object.assign(result, yield getPageInfo(tab, timeout));

+ result.finalUrl = tab.linkedBrowser.currentURI.spec;

+ result.endTime = Date.now();

}

finally

{

« no previous file with comments | « lib/child/frameScript.js ('k') | no next file » | no next file with comments »