Index: lib/crawler.js |
=================================================================== |
new file mode 100644 |
--- /dev/null |
+++ b/lib/crawler.js |
@@ -0,0 +1,126 @@ |
+Cu.import("resource://gre/modules/Services.jsm"); |
+ |
+function require(module) |
+{ |
Wladimir Palant
2012/09/21 15:36:18
This is unnecessary - module in lib/ directory can
|
+ let result = {}; |
+ result.wrappedJSObject = result; |
+ Services.obs.notifyObservers(result, "abpcrawler-require", module); |
+ return result.exports; |
+} |
+ |
+function abprequire(module) |
+{ |
+ let result = {}; |
+ result.wrappedJSObject = result; |
+ Services.obs.notifyObservers(result, "adblockplus-require", module); |
+ if ("exports" in result) |
+ return result.exports; |
+ else |
+ return Cu.import("chrome://adblockplus-modules/content/" + |
Wladimir Palant
2012/09/21 15:36:18
This is unnecessary - Diagnostics is currently com
|
+ module[0].toUpperCase() + module.substr(1) + ".jsm", null); |
+} |
+ |
+let {Storage} = require("storage"); |
+let {Client} = require("client"); |
+ |
+let {Policy} = abprequire("contentPolicy"); |
+let {Filter} = abprequire("filterClasses"); |
+ |
+let origProcessNode = Policy.processNode; |
+ |
+let siteTabs; |
+let currentTabs; |
+ |
+function processNode(wnd, node, contentType, location, collapse) |
+{ |
+ let result = origProcessNode.apply(this, arguments); |
+ let url = location.spec; |
+ if (url) |
Wladimir Palant
2012/09/21 15:36:18
I think that you can remove that check - an nsIURI
|
+ { |
+ let site = siteTabs[wnd.top.location.href]; |
+ let filtered = !result; |
+ Storage.write([url, site, filtered]); |
+ } |
+ return result; |
+} |
+ |
+function loadSite(site, window, callback) |
+{ |
+ if (!site) |
+ return; |
+ |
+ let tabbrowser = window.gBrowser; |
+ let tab = tabbrowser.addTab(site); |
+ let browser = tabbrowser.getBrowserForTab(tab); |
+ |
+ let progressListener = { |
+ onStateChange: function(aBrowser, aWebProgress, aRequest, aStateFlags, aStatus) |
+ { |
+ if (browser !== aBrowser) |
+ return; |
+ |
+ if (!(aStateFlags & Ci.nsIWebProgressListener.STATE_STOP)) |
+ return; |
+ |
+ tabbrowser.removeTabsProgressListener(progressListener); |
+ tabbrowser.removeTab(tab); |
+ callback(); |
+ }, |
+ onLocationChange: function(aBrowser, aWebProgress, aRequest, aLocation, aFlags) |
+ { |
+ if (browser === aBrowser) |
+ siteTabs[aLocation.spec] = site; |
Wladimir Palant
2012/09/21 15:36:18
Ok, that's a hack. What if two webpages in differe
|
+ } |
+ }; |
+ tabbrowser.addTabsProgressListener(progressListener); |
+} |
+ |
+function loadSites(backendUrl, parallelTabs, window, sites, callback) |
+{ |
+ while (currentTabs < parallelTabs && sites.length) |
+ { |
+ currentTabs++; |
+ let site = sites[0]; |
+ sites = sites.slice(1); |
Wladimir Palant
2012/09/21 15:36:18
let site = sites.shift()?
|
+ loadSite(site, window, function() |
+ { |
+ currentTabs--; |
+ if (!sites.length && !currentTabs) |
+ { |
+ Storage.finish(); |
+ let dataFilePath = Storage.dataFile.path; |
+ Client.sendCrawlerDataFile(backendUrl, window, dataFilePath, function() |
+ { |
+ Storage.destroy(); |
+ callback(); |
+ }); |
+ } |
+ else |
+ loadSites(backendUrl, parallelTabs, window, sites, callback); |
+ }); |
+ } |
+} |
+ |
+let Crawler = exports.Crawler = {}; |
+ |
+Crawler.crawl = function(backendUrl, parallelTabs, window, callback) |
+{ |
+ if (Policy.processNode != origProcessNode) |
+ return; |
+ |
+ Policy.processNode = processNode; |
+ |
+ siteTabs = {}; |
+ currentTabs = 0; |
+ |
+ Storage.init(); |
+ |
+ Client.fetchCrawlableSites(backendUrl, function(sites) |
+ { |
+ loadSites(backendUrl, parallelTabs, window, sites, function() |
+ { |
+ Policy.processNode = origProcessNode; |
+ callback(); |
+ }); |
+ }); |
+}; |