Left: | ||
Right: |
OLD | NEW |
---|---|
1 /* | 1 /* |
2 * This Source Code is subject to the terms of the Mozilla Public License | 2 * This Source Code is subject to the terms of the Mozilla Public License |
3 * version 2.0 (the "License"). You can obtain a copy of the License at | 3 * version 2.0 (the "License"). You can obtain a copy of the License at |
4 * http://mozilla.org/MPL/2.0/. | 4 * http://mozilla.org/MPL/2.0/. |
5 */ | 5 */ |
6 | 6 |
7 'use strict'; | 7 'use strict'; |
8 | 8 |
9 /** | 9 /** |
10 * @module crawler | 10 * @module crawler |
11 */ | 11 */ |
12 | 12 |
13 const {XPCOMUtils} = Cu.import("resource://gre/modules/XPCOMUtils.jsm", {}); | 13 const {XPCOMUtils} = Cu.import("resource://gre/modules/XPCOMUtils.jsm", {}); |
14 const {Task} = Cu.import("resource://gre/modules/Task.jsm"); | 14 const {Task} = Cu.import("resource://gre/modules/Task.jsm"); |
15 const {setTimeout, clearTimeout} = Cu.import("resource://gre/modules/Timer.jsm", {}); | |
15 | 16 |
16 function abprequire(module) | 17 function abprequire(module) |
17 { | 18 { |
18 let result = {}; | 19 let result = {}; |
19 result.wrappedJSObject = result; | 20 result.wrappedJSObject = result; |
20 Services.obs.notifyObservers(result, "adblockplus-require", module); | 21 Services.obs.notifyObservers(result, "adblockplus-require", module); |
21 return result.exports; | 22 return result.exports; |
22 } | 23 } |
23 | 24 |
24 let {RequestNotifier} = abprequire("requestNotifier"); | 25 let {RequestNotifier} = abprequire("requestNotifier"); |
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
101 * @param {tab} tab | 102 * @param {tab} tab |
102 */ | 103 */ |
103 releaseTab: function(tab) | 104 releaseTab: function(tab) |
104 { | 105 { |
105 // If we are about to close last tab don't close it immediately to keep | 106 // If we are about to close last tab don't close it immediately to keep |
106 // the window alive. It will be closed when a new tab is created. | 107 // the window alive. It will be closed when a new tab is created. |
107 if (this._tabs > 1) | 108 if (this._tabs > 1) |
108 this._browser.removeTab(tab); | 109 this._browser.removeTab(tab); |
109 else | 110 else |
110 { | 111 { |
111 // navigate away from early opened URL | 112 // navigate away from early opened URL |
Wladimir Palant
2016/09/29 11:44:59
early => previously
sergei
2016/09/29 15:36:22
Done.
| |
112 tab.linkedBrowser.loadURI('about:blank', null, null); | 113 tab.linkedBrowser.loadURI('about:blank', null, null); |
Wladimir Palant
2016/09/29 11:44:59
Double quotation marks please.
sergei
2016/09/29 15:36:22
Done.
| |
113 this._tabKeepingWindowAlive = tab; | 114 this._tabKeepingWindowAlive = tab; |
114 } | 115 } |
115 | 116 |
116 this._tabs--; | 117 this._tabs--; |
117 if (this._resolvers.length && this._tabs < this._maxtabs) | 118 if (this._resolvers.length && this._tabs < this._maxtabs) |
118 { | 119 { |
119 this._resolvers.shift()(this._createTab()); | 120 this._resolvers.shift()(this._createTab()); |
120 } | 121 } |
121 }, | 122 }, |
122 }; | 123 }; |
123 | 124 |
124 /** | 125 /** |
125 * Observes page loads in a particular tabbed browser. | |
126 * | |
127 * @param {tabbrowser} browser | |
128 * The tabbed browser to be observed | |
129 * @param {int} timeout | |
130 * Load timeout in milliseconds | |
131 * @constructor | |
132 */ | |
133 function LoadListener(browser, timeout) | |
134 { | |
135 this._browser = browser; | |
136 this._deferred = new Map(); | |
137 this._timeout = timeout; | |
138 browser.addTabsProgressListener(this); | |
139 } | |
140 LoadListener.prototype = { | |
141 /** | |
142 * Returns a promise that will be resolved when the page in the specified tab | |
143 * finishes loading. Loading will be stopped if the timeout is reached. | |
144 * | |
145 * @param {tab} tab | |
146 * @result {Promise} | |
147 */ | |
148 waitForLoad: function(tab) | |
149 { | |
150 let deferred = Promise.defer(); | |
151 this._deferred.set(tab.linkedBrowser, deferred); | |
152 | |
153 tab.ownerDocument.defaultView.setTimeout(function() | |
154 { | |
155 tab.linkedBrowser.stop(); | |
156 }, this._timeout); | |
157 | |
158 return deferred.promise; | |
159 }, | |
160 | |
161 /** | |
162 * Deactivates this object. | |
163 */ | |
164 stop: function() | |
165 { | |
166 this._browser.removeTabsProgressListener(this); | |
167 }, | |
168 | |
169 onStateChange: function(browser, progress, request, flags, status) | |
170 { | |
171 if ((flags & Ci.nsIWebProgressListener.STATE_STOP) && (flags & Ci.nsIWebProg ressListener.STATE_IS_WINDOW)) | |
172 { | |
173 let deferred = this._deferred.get(browser); | |
174 if (deferred) | |
175 { | |
176 this._deferred.delete(browser); | |
177 | |
178 let headers = []; | |
179 if (request instanceof Ci.nsIHttpChannel) | |
180 { | |
181 try | |
182 { | |
183 headers.push("HTTP/x.x " + request.responseStatus + " " + request.re sponseStatusText); | |
184 request.visitResponseHeaders((header, value) => headers.push(header + ": " + value)); | |
185 } | |
186 catch (e) | |
187 { | |
188 // Exceptions are expected here | |
189 } | |
190 } | |
191 deferred.resolve([status, headers]); | |
192 } | |
193 } | |
194 } | |
195 }; | |
196 | |
197 /** | |
198 * Once created, this object will make sure all new windows are dismissed | 126 * Once created, this object will make sure all new windows are dismissed |
199 * immediately. | 127 * immediately. |
200 * | 128 * |
201 * @constructor | 129 * @constructor |
202 */ | 130 */ |
203 function WindowCloser() | 131 function WindowCloser() |
204 { | 132 { |
205 Services.obs.addObserver(this, "xul-window-registered", true) | 133 Services.obs.addObserver(this, "xul-window-registered", true) |
206 } | 134 } |
207 WindowCloser.prototype = { | 135 WindowCloser.prototype = { |
(...skipping 14 matching lines...) Expand all Loading... | |
222 if (window.document.documentElement.localName == 'dialog') | 150 if (window.document.documentElement.localName == 'dialog') |
223 window.document.documentElement.acceptDialog(); | 151 window.document.documentElement.acceptDialog(); |
224 else | 152 else |
225 window.close(); | 153 window.close(); |
226 }, false); | 154 }, false); |
227 }, | 155 }, |
228 | 156 |
229 QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakRefer ence]) | 157 QueryInterface: XPCOMUtils.generateQI([Ci.nsIObserver, Ci.nsISupportsWeakRefer ence]) |
230 }; | 158 }; |
231 | 159 |
160 function configureFrameScript() | |
161 { | |
162 const info = require("info"); | |
163 const frameScriptPath = info.addonRoot + '/lib/child/frameScript.js?' + Math.r andom() + | |
164 '&info=' + encodeURIComponent(JSON.stringify(info)); | |
Wladimir Palant
2016/09/29 11:44:59
Double quotation marks please.
What's the point o
sergei
2016/09/29 15:36:22
Done.
| |
165 Services.mm.loadFrameScript(frameScriptPath, true); | |
166 | |
167 onShutdown.add(() => | |
168 { | |
169 Services.mm.removeDelayedFrameScript(frameScriptPath); | |
170 }); | |
171 } | |
172 | |
232 /** | 173 /** |
233 * Starts the crawling session. The crawler opens each URL in a tab and stores | 174 * Starts the crawling session. The crawler opens each URL in a tab and stores |
234 * the results. | 175 * the results. |
235 * | 176 * |
236 * @param {Window} window | 177 * @param {Window} window |
237 * The browser window we're operating in | 178 * The browser window we're operating in |
238 * @param {String[]} urls | 179 * @param {String[]} urls |
239 * URLs to be crawled | 180 * URLs to be crawled |
240 * @param {int} timeout | 181 * @param {int} timeout |
241 * Load timeout in milliseconds | 182 * Load timeout in milliseconds |
242 * @param {int} maxtabs | 183 * @param {int} maxtabs |
243 * Maximum number of tabs to be opened | 184 * Maximum number of tabs to be opened |
244 * @param {String} targetURL | 185 * @param {String} targetURL |
245 * URL that should receive the results | 186 * URL that should receive the results |
246 * @param {Function} onDone | 187 * @param {Function} onDone |
247 * The callback which is called after finishing of crawling of all URLs. | 188 * The callback which is called after finishing of crawling of all URLs. |
248 */ | 189 */ |
249 function run(window, urls, timeout, maxtabs, targetURL, onDone) | 190 function run(window, urls, timeout, maxtabs, targetURL, onDone) |
250 { | 191 { |
192 configureFrameScript(); | |
251 new Promise((resolve, reject) => | 193 new Promise((resolve, reject) => |
252 { | 194 { |
253 if (FilterStorage.subscriptions.length > 0) | 195 if (FilterStorage.subscriptions.length > 0) |
254 { | 196 { |
255 resolve(); | 197 resolve(); |
256 return; | 198 return; |
257 } | 199 } |
258 let onFiltersLoaded = (action, item, newValue, oldValue) => | 200 let onFiltersLoaded = (action, item, newValue, oldValue) => |
259 { | 201 { |
260 if (action == "load") | 202 if (action == "load") |
(...skipping 20 matching lines...) Expand all Loading... | |
281 * @param {int} maxtabs | 223 * @param {int} maxtabs |
282 * Maximum number of tabs to be opened | 224 * Maximum number of tabs to be opened |
283 * @param {String} targetURL | 225 * @param {String} targetURL |
284 * URL that should receive the results | 226 * URL that should receive the results |
285 * @param {Function} onDone | 227 * @param {Function} onDone |
286 * The callback which is called after finishing of all tasks. | 228 * The callback which is called after finishing of all tasks. |
287 */ | 229 */ |
288 function crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone) | 230 function crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone) |
289 { | 231 { |
290 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); | 232 let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs); |
291 let loadListener = new LoadListener(window.getBrowser(), timeout); | 233 |
292 let running = 0; | 234 let running = 0; |
293 let windowCloser = new WindowCloser(); | 235 let windowCloser = new WindowCloser(); |
294 let taskDone = function() | 236 let taskDone = function() |
295 { | 237 { |
296 running--; | 238 running--; |
297 if (running <= 0) | 239 if (running <= 0) |
298 { | 240 { |
299 loadListener.stop(); | |
300 windowCloser.stop(); | 241 windowCloser.stop(); |
301 onDone(); | 242 onDone(); |
302 } | 243 } |
303 }; | 244 }; |
304 | 245 |
305 for (let url of urls) | 246 for (let url of urls) |
306 { | 247 { |
307 running++; | 248 running++; |
308 Task.spawn(crawl_url.bind(null, url, tabAllocator, loadListener)).then(funct ion(result) | 249 Task.spawn(crawl_url.bind(null, url, tabAllocator, timeout)).then(function(r esult) |
309 { | 250 { |
310 let request = new XMLHttpRequest(); | 251 let request = new XMLHttpRequest(); |
311 request.open("POST", targetURL); | 252 request.open("POST", targetURL); |
312 request.addEventListener("load", taskDone, false); | 253 request.addEventListener("load", taskDone, false); |
313 request.addEventListener("error", taskDone, false); | 254 request.addEventListener("error", taskDone, false); |
314 request.send(JSON.stringify(result)); | 255 request.send(JSON.stringify(result)); |
315 }, function(url, exception) | 256 }, function(url, exception) |
316 { | 257 { |
317 reportException(exception); | 258 reportException(exception); |
318 | 259 |
319 let request = new XMLHttpRequest(); | 260 let request = new XMLHttpRequest(); |
320 request.open("POST", targetURL); | 261 request.open("POST", targetURL); |
321 request.addEventListener("load", taskDone, false); | 262 request.addEventListener("load", taskDone, false); |
322 request.addEventListener("error", taskDone, false); | 263 request.addEventListener("error", taskDone, false); |
323 request.send(JSON.stringify({ | 264 request.send(JSON.stringify({ |
324 url: url, | 265 url: url, |
325 startTime: Date.now(), | 266 startTime: Date.now(), |
326 error: String(exception) | 267 error: String(exception) |
327 })); | 268 })); |
328 }.bind(null, url)); | 269 }.bind(null, url)); |
329 } | 270 } |
330 } | 271 } |
331 | 272 |
332 /** | 273 /** |
274 * Expects to receive page info gathered in a content process for the specified | |
275 * `tab`. If there is no relevant message within specified `timeout` then | |
276 * the result promise is resolved with error object. | |
277 * @param tab | |
278 * Tab in which we are interested in | |
279 * @param {int} timeout | |
280 * Timeout in milliseconds | |
281 * @return {Promise} promise which will be resolved with the received page info | |
282 */ | |
283 function getPageInfo(tab, timeout) | |
284 { | |
285 return new Promise((resolve, result) => | |
286 { | |
287 const mm = tab.linkedBrowser.messageManager; | |
288 let timerID; | |
289 let onDone = (msg) => | |
290 { | |
291 mm.removeMessageListener("abpcrawler:pageInfoGathered", onDone); | |
292 clearTimeout(timerID); | |
293 resolve(msg.data); | |
294 } | |
295 mm.addMessageListener("abpcrawler:pageInfoGathered", onDone); | |
296 timerID = setTimeout(() => onDone({data: {error: "timeout"}}), timeout); | |
297 }); | |
298 } | |
299 | |
300 /** | |
333 * Crawls a URL. This is a generator meant to be used via a Task object. | 301 * Crawls a URL. This is a generator meant to be used via a Task object. |
334 * | 302 * |
335 * @param {String} url | 303 * @param {String} url |
336 * @param {TabAllocator} tabAllocator | 304 * @param {TabAllocator} tabAllocator |
337 * @param {loadListener} loadListener | 305 * @param {int} timeout |
306 * Load timeout in milliseconds | |
338 * @result {Object} | 307 * @result {Object} |
339 * Crawling result | 308 * Crawling result |
340 */ | 309 */ |
341 function* crawl_url(url, tabAllocator, loadListener) | 310 function* crawl_url(url, tabAllocator, timeout) |
342 { | 311 { |
343 let tab = yield tabAllocator.getTab(); | 312 let tab = yield tabAllocator.getTab(); |
344 let result = {url, requests: []}; | 313 let result = {url, requests: []}; |
345 let requestNotifier; | 314 let requestNotifier; |
346 try | 315 try |
347 { | 316 { |
348 result.startTime = Date.now(); | 317 result.startTime = Date.now(); |
349 requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID, | 318 requestNotifier = new RequestNotifier(tab.linkedBrowser.outerWindowID, |
350 function(entry, scanComplete) | 319 function(entry, scanComplete) |
351 { | 320 { |
352 if (!entry) | 321 if (!entry) |
353 return; | 322 return; |
354 let {type: contentType, location, filter} = entry; | 323 let {type: contentType, location, filter} = entry; |
355 result.requests.push({location, contentType, filter}); | 324 result.requests.push({location, contentType, filter}); |
356 }); | 325 }); |
357 | 326 |
358 tab.linkedBrowser.loadURI(url, null, null); | 327 tab.linkedBrowser.loadURI(url, null, null); |
359 [result.channelStatus, result.headers] = yield loadListener.waitForLoad(tab) ; | 328 |
329 Object.assign(result, yield getPageInfo(tab, timeout)); | |
330 result.finalUrl = tab.linkedBrowser.currentURI.spec; | |
360 result.endTime = Date.now(); | 331 result.endTime = Date.now(); |
361 result.finalUrl = tab.linkedBrowser.currentURI.spec; | |
362 | |
363 let document = tab.linkedBrowser.contentDocument; | |
364 if (document.documentElement) | |
365 { | |
366 try | |
367 { | |
368 let canvas = document.createElementNS("http://www.w3.org/1999/xhtml", "c anvas"); | |
369 canvas.width = document.documentElement.scrollWidth; | |
370 canvas.height = document.documentElement.scrollHeight; | |
371 | |
372 let context = canvas.getContext("2d"); | |
373 context.drawWindow(document.defaultView, 0, 0, canvas.width, canvas.heig ht, "rgb(255, 255, 255)"); | |
374 result.screenshot = canvas.toDataURL("image/jpeg", 0.8); | |
375 } | |
376 catch (e) | |
377 { | |
378 reportException(e); | |
379 result.error = "Capturing screenshot failed: " + e; | |
380 } | |
381 | |
382 // TODO: Capture frames as well? | |
383 let serializer = new tab.ownerDocument.defaultView.XMLSerializer(); | |
384 result.source = serializer.serializeToString(document.documentElement); | |
385 } | |
386 } | 332 } |
387 finally | 333 finally |
388 { | 334 { |
389 if (requestNotifier) | 335 if (requestNotifier) |
390 requestNotifier.shutdown(); | 336 requestNotifier.shutdown(); |
391 tabAllocator.releaseTab(tab); | 337 tabAllocator.releaseTab(tab); |
392 } | 338 } |
393 return result; | 339 return result; |
394 } | 340 } |
395 | 341 |
396 function reportException(e) | 342 function reportException(e) |
397 { | 343 { |
398 let stack = ""; | 344 let stack = ""; |
399 if (e && typeof e == "object" && "stack" in e) | 345 if (e && typeof e == "object" && "stack" in e) |
400 stack = e.stack + "\n"; | 346 stack = e.stack + "\n"; |
401 | 347 |
402 Cu.reportError(e); | 348 Cu.reportError(e); |
403 dump(e + "\n" + stack + "\n"); | 349 dump(e + "\n" + stack + "\n"); |
404 } | 350 } |
OLD | NEW |