Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/crawler.js

Issue 29338153: Issue 3780 - wait for the loading of filters and only afterwards start to fetch pages (Closed)
Patch Set: don't use internal FilterStorage._loading Created March 15, 2016, 2:42 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/crawler.js
diff --git a/lib/crawler.js b/lib/crawler.js
index 3445b2f5424fd846e71c3ffe3cb6fe964fc78329..a0deabcc9fd415ab4350dff55e0d8826eafb1707 100644
--- a/lib/crawler.js
+++ b/lib/crawler.js
@@ -21,7 +21,8 @@ function abprequire(module)
}
let {RequestNotifier} = abprequire("requestNotifier");
-
+let {FilterNotifier} = abprequire("filterNotifier");
+let {FilterStorage} = abprequire("filterStorage");
/**
* Creates a pool of tabs and allocates them to tasks on request.
@@ -197,13 +198,53 @@ WindowCloser.prototype = {
* The browser window we're operating in
* @param {String[]} urls
* URLs to be crawled
- * @param {int} number_of_tabs
+ * @param {int} timeout
+ * Load timeout in milliseconds
+ * @param {int} maxtabs
* Maximum number of tabs to be opened
* @param {String} targetURL
* URL that should receive the results
+ * @param {Function} onDone
+ * The callback which is called after finishing of crawling of all URLs.
*/
function run(window, urls, timeout, maxtabs, targetURL, onDone)
{
+ new Promise((resolve, reject) =>
+ {
+ if (FilterStorage.subscriptions.length > 0)
+ {
+ resolve();
+ return;
+ }
+ FilterNotifier.addListener((action, item, newValue, oldValue) =>
Wladimir Palant 2016/03/15 14:50:58 Just realized: you have to remove that listener af
sergei 2016/03/15 14:58:45 Acknowledged.
+ {
+ if (action == "load")
+ {
+ resolve();
+ }
+ });
+ }).then(() => crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone)).catch(reportException);
Wladimir Palant 2016/03/15 14:50:58 Sorry but now having catch() on the same line no l
sergei 2016/03/15 14:58:45 Done.
+}
+exports.run = run;
+
+/**
+ * Spawns a {Task} task to crawl each url from `urls` argument and calls
+ * `onDone` when all tasks are finished.
+ * @param {Window} window
+ * The browser window we're operating in
+ * @param {String[]} urls
+ * URLs to be crawled
+ * @param {int} timeout
+ * Load timeout in milliseconds
+ * @param {int} maxtabs
+ * Maximum number of tabs to be opened
+ * @param {String} targetURL
+ * URL that should receive the results
+ * @param {Function} onDone
+ * The callback which is called after finishing of all tasks.
+ */
+function crawl_urls(window, urls, timeout, maxtabs, targetURL, onDone)
+{
let tabAllocator = new TabAllocator(window.getBrowser(), maxtabs);
let loadListener = new LoadListener(window.getBrowser(), timeout);
let running = 0;
@@ -245,7 +286,6 @@ function run(window, urls, timeout, maxtabs, targetURL, onDone)
}.bind(null, url));
}
}
-exports.run = run;
/**
* Crawls a URL. This is a generator meant to be used via a Task object.
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld