Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/matcher.js

Issue 30000586: Issue 7265 - Orgnanize request blocking filters by domain (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/
Patch Set: Rebase Created Feb. 7, 2019, 3:45 a.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « lib/filterClasses.js ('k') | test/filterClasses.js » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/matcher.js
===================================================================
--- a/lib/matcher.js
+++ b/lib/matcher.js
@@ -18,16 +18,17 @@
"use strict";
/**
* @fileOverview Matcher class implementing matching addresses against
* a list of filters.
*/
const {RegExpFilter, WhitelistFilter} = require("./filterClasses");
+const {suffixes} = require("./domain");
/**
* Regular expression for matching a keyword in a filter.
* @type {RegExp}
*/
const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/;
/**
@@ -58,16 +59,23 @@
* @type {number}
*/
const WHITELIST_ONLY_TYPES = RegExpFilter.typeMap.DOCUMENT |
RegExpFilter.typeMap.ELEMHIDE |
RegExpFilter.typeMap.GENERICHIDE |
RegExpFilter.typeMap.GENERICBLOCK;
/**
+ * Map to be used instead when a filter has a blank <code>domains</code>
+ * property.
+ * @type {Map.<string, boolean>}
+ */
+let defaultDomains = new Map([["", true]]);
+
+/**
* Yields individual non-default types from a filter's type mask.
* @param {number} contentType A filter's type mask.
* @yields {number}
*/
function* nonDefaultTypes(contentType)
{
for (let mask = contentType & NON_DEFAULT_TYPES, bitIndex = 0;
mask != 0; mask >>>= 1, bitIndex++)
@@ -167,32 +175,42 @@
/**
* Lookup table for complex filters by their associated keyword
* @type {Map.<string,(RegExpFilter|Set.<RegExpFilter>)>}
* @private
*/
this._complexFiltersByKeyword = new Map();
/**
+ * Lookup table of domain maps for complex filters by their associated
+ * keyword
+ * @type {Map.<string,Map.<string,(RegExpFilter|
+ * Map.<RegExpFilter,boolean>)>>}
+ * @private
+ */
+ this._filterDomainMapsByKeyword = new Map();
+
+ /**
* Lookup table of type-specific lookup tables for complex filters by their
* associated keyword
* @type {Map.<string,Map.<string,(RegExpFilter|Set.<RegExpFilter>)>>}
* @private
*/
this._filterMapsByType = new Map();
}
/**
* Removes all known filters
*/
clear()
{
this._keywordByFilter.clear();
this._simpleFiltersByKeyword.clear();
this._complexFiltersByKeyword.clear();
+ this._filterDomainMapsByKeyword.clear();
this._filterMapsByType.clear();
}
/**
* Adds a filter to the matcher
* @param {RegExpFilter} filter
*/
add(filter)
@@ -216,16 +234,45 @@
for (let type of nonDefaultTypes(filter.contentType))
{
let map = this._filterMapsByType.get(type);
if (!map)
this._filterMapsByType.set(type, map = new Map());
addFilterByKeyword(filter, keyword, map);
}
+
+ let filtersByDomain = this._filterDomainMapsByKeyword.get(keyword);
+ if (!filtersByDomain)
+ this._filterDomainMapsByKeyword.set(keyword, filtersByDomain = new Map());
+
+ for (let [domain, include] of filter.domains || defaultDomains)
+ {
+ if (!include && domain == "")
+ continue;
+
+ let map = filtersByDomain.get(domain);
+ if (!map)
+ {
+ filtersByDomain.set(domain, include ? filter :
+ map = new Map([[filter, false]]));
+ }
+ else if (map.size == 1 && !(map instanceof Map))
+ {
+ if (filter != map)
+ {
+ filtersByDomain.set(domain, new Map([[map, true],
+ [filter, include]]));
+ }
+ }
+ else
+ {
+ map.set(filter, include);
+ }
+ }
}
/**
* Removes a filter from the matcher
* @param {RegExpFilter} filter
*/
remove(filter)
{
@@ -245,16 +292,40 @@
return;
for (let type of nonDefaultTypes(filter.contentType))
{
let map = this._filterMapsByType.get(type);
if (map)
removeFilterByKeyword(filter, keyword, map);
}
+
+ let filtersByDomain = this._filterDomainMapsByKeyword.get(keyword);
+ if (filtersByDomain)
+ {
+ let domains = filter.domains || defaultDomains;
+ for (let domain of domains.keys())
+ {
+ let map = filtersByDomain.get(domain);
+ if (map)
+ {
+ if (map.size > 1 || map instanceof Map)
+ {
+ map.delete(filter);
+
+ if (map.size == 0)
+ filtersByDomain.delete(domain);
+ }
+ else if (filter == map)
+ {
+ filtersByDomain.delete(domain);
+ }
+ }
+ }
+ }
}
/**
* Chooses a keyword to be associated with the filter
* @param {Filter} filter
* @returns {string} keyword or an empty string if no keyword could be found
* @protected
*/
@@ -286,16 +357,119 @@
result = candidate;
resultCount = count;
resultLength = candidate.length;
}
}
return result;
}
+ _checkEntryMatchSimple(keyword, location, typeMask, docDomain, thirdParty,
+ sitekey, specificOnly, collection)
+ {
+ let filters = this._simpleFiltersByKeyword.get(keyword);
+ if (filters)
+ {
+ let lowerCaseLocation = location.toLowerCase();
+
+ for (let filter of filters)
+ {
+ if (specificOnly && !(filter instanceof WhitelistFilter))
+ continue;
+
+ if (filter.matchesLocation(location, lowerCaseLocation))
+ {
+ if (!collection)
+ return filter;
+
+ collection.push(filter);
+ }
+ }
+ }
+
+ return null;
+ }
+
+ _checkEntryMatchForType(keyword, location, typeMask, docDomain, thirdParty,
+ sitekey, specificOnly, collection)
+ {
+ let filtersForType = this._filterMapsByType.get(typeMask);
+ if (filtersForType)
+ {
+ let filters = filtersForType.get(keyword);
+ if (filters)
+ {
+ for (let filter of filters)
+ {
+ if (specificOnly && filter.isGeneric() &&
+ !(filter instanceof WhitelistFilter))
+ continue;
+
+ if (filter.matches(location, typeMask, docDomain, thirdParty,
+ sitekey))
+ {
+ if (!collection)
+ return filter;
+
+ collection.push(filter);
+ }
+ }
+ }
+ }
+
+ return null;
+ }
+
+ _checkEntryMatchByDomain(keyword, location, typeMask, docDomain, thirdParty,
+ sitekey, specificOnly, collection)
+ {
+ let filtersByDomain = this._filterDomainMapsByKeyword.get(keyword);
+ if (filtersByDomain)
+ {
+ // The code in this block is similar to the generateStyleSheetForDomain
+ // function in lib/elemHide.js.
+
+ if (docDomain)
+ {
+ if (docDomain[docDomain.length - 1] == ".")
+ docDomain = docDomain.replace(/\.+$/, "");
+
+ docDomain = docDomain.toLowerCase();
+ }
+
+ let excluded = new Set();
+
+ for (let suffix of suffixes(docDomain || "", !specificOnly))
+ {
+ let filters = filtersByDomain.get(suffix);
+ if (filters)
+ {
+ for (let [filter, include] of filters.entries())
+ {
+ if (!include)
+ {
+ excluded.add(filter);
+ }
+ else if ((excluded.size == 0 || !excluded.has(filter)) &&
+ filter.matchesWithoutDomain(location, typeMask,
+ thirdParty, sitekey))
+ {
+ if (!collection)
+ return filter;
+
+ collection.push(filter);
+ }
+ }
+ }
+ }
+ }
+
+ return null;
+ }
+
/**
* Checks whether the entries for a particular keyword match a URL
* @param {string} keyword
* @param {string} location
* @param {number} typeMask
* @param {string} [docDomain]
* @param {boolean} [thirdParty]
* @param {string} [sitekey]
@@ -309,70 +483,38 @@
*/
checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey,
specificOnly, collection)
{
// We need to skip the simple (location-only) filters if the type mask does
// not contain any default content types.
if (!specificOnly && (typeMask & DEFAULT_TYPES) != 0)
{
- let simpleSet = this._simpleFiltersByKeyword.get(keyword);
- if (simpleSet)
- {
- let lowerCaseLocation = location.toLowerCase();
-
- for (let filter of simpleSet)
- {
- if (filter.matchesLocation(location, lowerCaseLocation))
- {
- if (!collection)
- return filter;
-
- collection.push(filter);
- }
- }
- }
+ let filter = this._checkEntryMatchSimple(keyword, location, typeMask,
+ docDomain, thirdParty, sitekey,
+ specificOnly, collection);
+ if (filter)
+ return filter;
}
- let complexSet = null;
-
// If the type mask contains a non-default type (first condition) and it is
// the only type in the mask (second condition), we can use the
// type-specific map, which typically contains a lot fewer filters. This
// enables faster lookups for whitelisting types like $document, $elemhide,
// and so on, as well as other special types like $csp.
if ((typeMask & NON_DEFAULT_TYPES) != 0 && (typeMask & typeMask - 1) == 0)
{
- let map = this._filterMapsByType.get(typeMask);
- if (map)
- complexSet = map.get(keyword);
- }
- else
- {
- complexSet = this._complexFiltersByKeyword.get(keyword);
+ return this._checkEntryMatchForType(keyword, location, typeMask,
+ docDomain, thirdParty, sitekey,
+ specificOnly, collection);
}
- if (complexSet)
- {
- for (let filter of complexSet)
- {
- if (specificOnly && filter.isGeneric())
- continue;
-
- if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey))
- {
- if (!collection)
- return filter;
-
- collection.push(filter);
- }
- }
- }
-
- return null;
+ return this._checkEntryMatchByDomain(keyword, location, typeMask,
+ docDomain, thirdParty, sitekey,
+ specificOnly, collection);
}
/**
* Tests whether the URL matches any of the known filters
* @param {string} location
* URL to be tested
* @param {number} typeMask
* bitmask of content / request types to match
« no previous file with comments | « lib/filterClasses.js ('k') | test/filterClasses.js » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld