Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: lib/crawler.js

Issue 8402021: Crawler frontend (Closed)
Patch Set: Created Sept. 21, 2012, 1:16 p.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 Cu.import("resource://gre/modules/Services.jsm");
2
3 function require(module)
4 {
Wladimir Palant 2012/09/21 15:36:18 This is unnecessary - module in lib/ directory can
5 let result = {};
6 result.wrappedJSObject = result;
7 Services.obs.notifyObservers(result, "abpcrawler-require", module);
8 return result.exports;
9 }
10
11 function abprequire(module)
12 {
13 let result = {};
14 result.wrappedJSObject = result;
15 Services.obs.notifyObservers(result, "adblockplus-require", module);
16 if ("exports" in result)
17 return result.exports;
18 else
19 return Cu.import("chrome://adblockplus-modules/content/" +
Wladimir Palant 2012/09/21 15:36:18 This is unnecessary - Diagnostics is currently com
20 module[0].toUpperCase() + module.substr(1) + ".jsm", null);
21 }
22
23 let {Storage} = require("storage");
24 let {Client} = require("client");
25
26 let {Policy} = abprequire("contentPolicy");
27 let {Filter} = abprequire("filterClasses");
28
29 let origProcessNode = Policy.processNode;
30
31 let siteTabs;
32 let currentTabs;
33
34 function processNode(wnd, node, contentType, location, collapse)
35 {
36 let result = origProcessNode.apply(this, arguments);
37 let url = location.spec;
38 if (url)
Wladimir Palant 2012/09/21 15:36:18 I think that you can remove that check - an nsIURI
39 {
40 let site = siteTabs[wnd.top.location.href];
41 let filtered = !result;
42 Storage.write([url, site, filtered]);
43 }
44 return result;
45 }
46
47 function loadSite(site, window, callback)
48 {
49 if (!site)
50 return;
51
52 let tabbrowser = window.gBrowser;
53 let tab = tabbrowser.addTab(site);
54 let browser = tabbrowser.getBrowserForTab(tab);
55
56 let progressListener = {
57 onStateChange: function(aBrowser, aWebProgress, aRequest, aStateFlags, aStat us)
58 {
59 if (browser !== aBrowser)
60 return;
61
62 if (!(aStateFlags & Ci.nsIWebProgressListener.STATE_STOP))
63 return;
64
65 tabbrowser.removeTabsProgressListener(progressListener);
66 tabbrowser.removeTab(tab);
67 callback();
68 },
69 onLocationChange: function(aBrowser, aWebProgress, aRequest, aLocation, aFla gs)
70 {
71 if (browser === aBrowser)
72 siteTabs[aLocation.spec] = site;
Wladimir Palant 2012/09/21 15:36:18 Ok, that's a hack. What if two webpages in differe
73 }
74 };
75 tabbrowser.addTabsProgressListener(progressListener);
76 }
77
78 function loadSites(backendUrl, parallelTabs, window, sites, callback)
79 {
80 while (currentTabs < parallelTabs && sites.length)
81 {
82 currentTabs++;
83 let site = sites[0];
84 sites = sites.slice(1);
Wladimir Palant 2012/09/21 15:36:18 let site = sites.shift()?
85 loadSite(site, window, function()
86 {
87 currentTabs--;
88 if (!sites.length && !currentTabs)
89 {
90 Storage.finish();
91 let dataFilePath = Storage.dataFile.path;
92 Client.sendCrawlerDataFile(backendUrl, window, dataFilePath, function()
93 {
94 Storage.destroy();
95 callback();
96 });
97 }
98 else
99 loadSites(backendUrl, parallelTabs, window, sites, callback);
100 });
101 }
102 }
103
104 let Crawler = exports.Crawler = {};
105
106 Crawler.crawl = function(backendUrl, parallelTabs, window, callback)
107 {
108 if (Policy.processNode != origProcessNode)
109 return;
110
111 Policy.processNode = processNode;
112
113 siteTabs = {};
114 currentTabs = 0;
115
116 Storage.init();
117
118 Client.fetchCrawlableSites(backendUrl, function(sites)
119 {
120 loadSites(backendUrl, parallelTabs, window, sites, function()
121 {
122 Policy.processNode = origProcessNode;
123 callback();
124 });
125 });
126 };
OLDNEW

Powered by Google App Engine
This is Rietveld