Index: lib/crawler.js |
diff --git a/lib/crawler.js b/lib/crawler.js |
index 83c5f40e4fe873373b70dcc465bc22958610edf3..86b5e3bd70d68316dcfbbc54e98c03bad3893aaf 100644 |
--- a/lib/crawler.js |
+++ b/lib/crawler.js |
@@ -11,6 +11,7 @@ |
Cu.import("resource://gre/modules/Services.jsm"); |
Cu.import("resource://gre/modules/Task.jsm"); |
Cu.import("resource://gre/modules/Promise.jsm"); |
+Cu.import("resource://gre/modules/Timer.jsm"); |
function abprequire(module) |
{ |
@@ -21,7 +22,6 @@ function abprequire(module) |
} |
let {RequestNotifier} = abprequire("requestNotifier"); |
- |
let {FilterNotifier} = abprequire("filterNotifier"); |
let {FilterStorage} = abprequire("filterStorage"); |
@@ -84,79 +84,6 @@ TabAllocator.prototype = { |
}; |
/** |
- * Observes page loads in a particular tabbed browser. |
- * |
- * @param {tabbrowser} browser |
- * The tabbed browser to be observed |
- * @param {int} timeout |
- * Load timeout in milliseconds |
- * @constructor |
- */ |
-function LoadListener(browser, timeout) |
-{ |
- this._browser = browser; |
- this._deferred = new Map(); |
- this._timeout = timeout; |
- browser.addTabsProgressListener(this); |
-} |
-LoadListener.prototype = { |
- /** |
- * Returns a promise that will be resolved when the page in the specified tab |
- * finishes loading. Loading will be stopped if the timeout is reached. |
- * |
- * @param {tab} tab |
- * @result {Promise} |
- */ |
- waitForLoad: function(tab) |
- { |
- let deferred = Promise.defer(); |
- this._deferred.set(tab.linkedBrowser, deferred); |
- |
- tab.ownerDocument.defaultView.setTimeout(function() |
- { |
- tab.linkedBrowser.stop(); |
- }, this._timeout); |
- |
- return deferred.promise; |
- }, |
- |
- /** |
- * Deactivates this object. |
- */ |
- stop: function() |
- { |
- this._browser.removeTabsProgressListener(this); |
- }, |
- |
- onStateChange: function(browser, progress, request, flags, status) |
- { |
- if ((flags & Ci.nsIWebProgressListener.STATE_STOP) && (flags & Ci.nsIWebProgressListener.STATE_IS_WINDOW)) |
- { |
- let deferred = this._deferred.get(browser); |
- if (deferred) |
- { |
- this._deferred.delete(browser); |
- |
- let headers = []; |
- if (request instanceof Ci.nsIHttpChannel) |
- { |
- try |
- { |
- headers.push("HTTP/x.x " + request.responseStatus + " " + request.responseStatusText); |
- request.visitResponseHeaders((header, value) => headers.push(header + ": " + value)); |
- } |
- catch (e) |
- { |
- // Exceptions are expected here |
- } |
- } |
- deferred.resolve([status, headers]); |
- } |
- } |
- } |
-}; |
Wladimir Palant
2016/03/15 10:07:10
Why did you move this functionality into the conte
sergei
2016/03/15 16:40:10
In e10s it does not work in chrome process, we can
|
- |
-/** |
* Once created, this object will make sure all new windows are dismissed |
* immediately. |
* |
@@ -207,7 +134,7 @@ WindowCloser.prototype = { |
function run(window, urls, timeout, maxtabs, targetURL, onDone) |
{ |
let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); |
- let loadListener = new LoadListener(window.getBrowser(), timeout); |
+ |
let running = 0; |
let windowCloser = new WindowCloser(); |
let taskDone = function() |
@@ -215,7 +142,6 @@ function run(window, urls, timeout, maxtabs, targetURL, onDone) |
running--; |
if (running <= 0) |
{ |
- loadListener.stop(); |
windowCloser.stop(); |
onDone(); |
} |
@@ -240,7 +166,7 @@ function run(window, urls, timeout, maxtabs, targetURL, onDone) |
for (let url of urls) |
{ |
running++; |
- Task.spawn(crawl_url.bind(null, url, tabAllocator, loadListener)).then(function(result) |
+ Task.spawn(crawl_url.bind(null, url, tabAllocator, timeout)).then(function(result) |
{ |
let request = new XMLHttpRequest(); |
request.open("POST", targetURL); |
@@ -274,11 +200,10 @@ exports.run = run; |
* |
* @param {String} url |
* @param {TabAllocator} tabAllocator |
- * @param {loadListener} loadListener |
* @result {Object} |
* Crawling result |
*/ |
-function* crawl_url(url, tabAllocator, loadListener) |
+function* crawl_url(url, tabAllocator, timeout) |
{ |
let tab = yield tabAllocator.getTab(); |
let result = {url, requests: []}; |
@@ -292,33 +217,26 @@ function* crawl_url(url, tabAllocator, loadListener) |
}); |
tab.linkedBrowser.loadURI(url, null, null); |
- [result.channelStatus, result.headers] = yield loadListener.waitForLoad(tab); |
- result.endTime = Date.now(); |
- result.finalUrl = tab.linkedBrowser.currentURI.spec; |
- let document = tab.linkedBrowser.contentDocument; |
- if (document.documentElement) |
+ let mm = tab.linkedBrowser.messageManager; |
+ let pageInfoFuture = new Promise((resolve, result) => |
{ |
- try |
+ let timerID; |
+ let onDone = (pageInfo) => |
{ |
- let canvas = document.createElementNS("http://www.w3.org/1999/xhtml", "canvas"); |
- canvas.width = document.documentElement.scrollWidth; |
- canvas.height = document.documentElement.scrollHeight; |
- |
- let context = canvas.getContext("2d"); |
- context.drawWindow(document.defaultView, 0, 0, canvas.width, canvas.height, "rgb(255, 255, 255)"); |
- result.screenshot = canvas.toDataURL("image/jpeg", 0.8); |
- } |
- catch (e) |
- { |
- reportException(e); |
- result.error = "Capturing screenshot failed: " + e; |
+ mm.removeMessageListener("abpcrawler:pageInfoGathered", onDone); |
Wladimir Palant
2016/03/15 10:07:10
So, which tab did you get the page info for?
The
sergei
2016/03/16 14:44:23
For the `tab`, it's "browser message manager" it a
|
+ clearTimeout(timerID); |
+ resolve(pageInfo); |
} |
+ mm.addMessageListener("abpcrawler:pageInfoGathered", (msg) => onDone(msg.data));; |
+ timerID = setTimeout(onDone.bind(this, {error: "timeout"}), timeout); |
+ }); |
Wladimir Palant
2016/03/15 10:07:10
Please move this functionality into a separate fun
sergei
2016/03/16 14:44:23
Done.
|
- // TODO: Capture frames as well? |
- let serializer = new tab.ownerDocument.defaultView.XMLSerializer(); |
- result.source = serializer.serializeToString(document.documentElement); |
- } |
+ let pageInfo = yield pageInfoFuture; |
+ |
+ result.finalUrl = tab.linkedBrowser.currentURI.spec; |
+ Object.assign(result, pageInfo); |
+ result.endTime = Date.now(); |
} |
finally |
{ |
@@ -336,3 +254,20 @@ function reportException(e) |
Cu.reportError(e); |
dump(e + "\n" + stack + "\n"); |
} |
+ |
+let {addonRoot} = require("info"); |
+let frameScriptPath = addonRoot + "/lib/child/frameScript.js"; |
+let globalMessageManager = Services.mm; |
+globalMessageManager.loadFrameScript(frameScriptPath, true); |
Wladimir Palant
2016/03/15 10:07:10
This should be a process script, no point using a
|
+ |
+let onReportException = function(msg) |
+{ |
+ reportException(msg.objects); |
Wladimir Palant
2016/03/15 10:07:10
Please don't use msg.objects - ever. That's a wrap
sergei
2016/03/16 14:44:23
Acknowledged. Actually, I wanted to avoid duplicat
sergei
2016/03/16 14:44:23
I know, I used `msg.objects` because we don't know
Wladimir Palant
2016/09/14 16:11:46
Worst-case scenario: deadlocks because all of that
|
+} |
+globalMessageManager.addMessageListener("abpcrawler:reportException", onReportException); |
+ |
+onShutdown.add(() => |
+{ |
+ globalMessageManager.removeMessageListener("abpcrawler:reportException", onReportException); |
+ globalMessageManager.removeDelayedFrameScript(frameScriptPath); |
+}); |