Index: lib/matcher.js |
=================================================================== |
--- a/lib/matcher.js |
+++ b/lib/matcher.js |
@@ -18,16 +18,17 @@ |
"use strict"; |
/** |
* @fileOverview Matcher class implementing matching addresses against |
* a list of filters. |
*/ |
const {RegExpFilter, WhitelistFilter} = require("./filterClasses"); |
+const {suffixes} = require("./domain"); |
/** |
* Regular expression for matching a keyword in a filter. |
* @type {RegExp} |
*/ |
const keywordRegExp = /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/; |
/** |
@@ -58,16 +59,23 @@ |
* @type {number} |
*/ |
const WHITELIST_ONLY_TYPES = RegExpFilter.typeMap.DOCUMENT | |
RegExpFilter.typeMap.ELEMHIDE | |
RegExpFilter.typeMap.GENERICHIDE | |
RegExpFilter.typeMap.GENERICBLOCK; |
/** |
+ * Map to be used instead when a filter has a blank <code>domains</code> |
+ * property. |
+ * @type {Map.<string, boolean>} |
+ */ |
+let defaultDomains = new Map([["", true]]); |
+ |
+/** |
* Yields individual non-default types from a filter's type mask. |
* @param {number} contentType A filter's type mask. |
* @yields {number} |
*/ |
function* nonDefaultTypes(contentType) |
{ |
for (let mask = contentType & NON_DEFAULT_TYPES, bitIndex = 0; |
mask != 0; mask >>>= 1, bitIndex++) |
@@ -167,32 +175,42 @@ |
/** |
* Lookup table for complex filters by their associated keyword |
* @type {Map.<string,(RegExpFilter|Set.<RegExpFilter>)>} |
* @private |
*/ |
this._complexFiltersByKeyword = new Map(); |
/** |
+ * Lookup table of domain maps for complex filters by their associated |
+ * keyword |
+ * @type {Map.<string,Map.<string,(RegExpFilter| |
+ * Map.<RegExpFilter,boolean>)>>} |
+ * @private |
+ */ |
+ this._filterDomainMapsByKeyword = new Map(); |
+ |
+ /** |
* Lookup table of type-specific lookup tables for complex filters by their |
* associated keyword |
* @type {Map.<string,Map.<string,(RegExpFilter|Set.<RegExpFilter>)>>} |
* @private |
*/ |
this._filterMapsByType = new Map(); |
} |
/** |
* Removes all known filters |
*/ |
clear() |
{ |
this._keywordByFilter.clear(); |
this._simpleFiltersByKeyword.clear(); |
this._complexFiltersByKeyword.clear(); |
+ this._filterDomainMapsByKeyword.clear(); |
this._filterMapsByType.clear(); |
} |
/** |
* Adds a filter to the matcher |
* @param {RegExpFilter} filter |
*/ |
add(filter) |
@@ -216,16 +234,45 @@ |
for (let type of nonDefaultTypes(filter.contentType)) |
{ |
let map = this._filterMapsByType.get(type); |
if (!map) |
this._filterMapsByType.set(type, map = new Map()); |
addFilterByKeyword(filter, keyword, map); |
} |
+ |
+ let filtersByDomain = this._filterDomainMapsByKeyword.get(keyword); |
+ if (!filtersByDomain) |
+ this._filterDomainMapsByKeyword.set(keyword, filtersByDomain = new Map()); |
+ |
+ for (let [domain, include] of filter.domains || defaultDomains) |
+ { |
+ if (!include && domain == "") |
+ continue; |
+ |
+ let map = filtersByDomain.get(domain); |
+ if (!map) |
+ { |
+ filtersByDomain.set(domain, include ? filter : |
+ map = new Map([[filter, false]])); |
+ } |
+ else if (map.size == 1 && !(map instanceof Map)) |
+ { |
+ if (filter != map) |
+ { |
+ filtersByDomain.set(domain, new Map([[map, true], |
+ [filter, include]])); |
+ } |
+ } |
+ else |
+ { |
+ map.set(filter, include); |
+ } |
+ } |
} |
/** |
* Removes a filter from the matcher |
* @param {RegExpFilter} filter |
*/ |
remove(filter) |
{ |
@@ -245,16 +292,40 @@ |
return; |
for (let type of nonDefaultTypes(filter.contentType)) |
{ |
let map = this._filterMapsByType.get(type); |
if (map) |
removeFilterByKeyword(filter, keyword, map); |
} |
+ |
+ let filtersByDomain = this._filterDomainMapsByKeyword.get(keyword); |
+ if (filtersByDomain) |
+ { |
+ let domains = filter.domains || defaultDomains; |
+ for (let domain of domains.keys()) |
+ { |
+ let map = filtersByDomain.get(domain); |
+ if (map) |
+ { |
+ if (map.size > 1 || map instanceof Map) |
Sebastian Noack
2019/02/06 16:30:58
In which scenario would "map" not be a Map object,
Manish Jethani
2019/02/06 19:25:05
It's a hack.
The filter object doubles up as a fa
|
+ { |
+ map.delete(filter); |
+ |
+ if (map.size == 0) |
+ filtersByDomain.delete(domain); |
+ } |
+ else if (filter == map) |
+ { |
+ filtersByDomain.delete(domain); |
+ } |
+ } |
+ } |
+ } |
} |
/** |
* Chooses a keyword to be associated with the filter |
* @param {Filter} filter |
* @returns {string} keyword or an empty string if no keyword could be found |
* @protected |
*/ |
@@ -286,16 +357,119 @@ |
result = candidate; |
resultCount = count; |
resultLength = candidate.length; |
} |
} |
return result; |
} |
+ _checkEntryMatchSimple(keyword, location, typeMask, docDomain, thirdParty, |
+ sitekey, specificOnly, collection) |
+ { |
+ let filters = this._simpleFiltersByKeyword.get(keyword); |
+ if (filters) |
+ { |
+ let lowerCaseLocation = location.toLowerCase(); |
+ |
+ for (let filter of filters) |
+ { |
+ if (specificOnly && !(filter instanceof WhitelistFilter)) |
+ continue; |
+ |
+ if (filter.matchesLocation(location, lowerCaseLocation)) |
+ { |
+ if (!collection) |
+ return filter; |
+ |
+ collection.push(filter); |
+ } |
+ } |
+ } |
+ |
+ return null; |
+ } |
+ |
+ _checkEntryMatchForType(keyword, location, typeMask, docDomain, thirdParty, |
+ sitekey, specificOnly, collection) |
+ { |
+ let filtersForType = this._filterMapsByType.get(typeMask); |
+ if (filtersForType) |
+ { |
+ let filters = filtersForType.get(keyword); |
+ if (filters) |
+ { |
+ for (let filter of filters) |
+ { |
+ if (specificOnly && filter.isGeneric() && |
+ !(filter instanceof WhitelistFilter)) |
+ continue; |
+ |
+ if (filter.matches(location, typeMask, docDomain, thirdParty, |
+ sitekey)) |
+ { |
+ if (!collection) |
+ return filter; |
+ |
+ collection.push(filter); |
+ } |
+ } |
+ } |
+ } |
+ |
+ return null; |
+ } |
+ |
+ _checkEntryMatchByDomain(keyword, location, typeMask, docDomain, thirdParty, |
+ sitekey, specificOnly, collection) |
+ { |
+ let filtersByDomain = this._filterDomainMapsByKeyword.get(keyword); |
+ if (filtersByDomain) |
+ { |
+ // The code in this block is similar to the generateStyleSheetForDomain |
+ // function in lib/elemHide.js. |
+ |
+ if (docDomain) |
+ { |
+ if (docDomain[docDomain.length - 1] == ".") |
+ docDomain = docDomain.replace(/\.+$/, ""); |
+ |
+ docDomain = docDomain.toLowerCase(); |
+ } |
+ |
+ let excluded = new Set(); |
+ |
+ for (let suffix of suffixes(docDomain || "", !specificOnly)) |
+ { |
+ let filters = filtersByDomain.get(suffix); |
+ if (filters) |
+ { |
+ for (let [filter, include] of filters.entries()) |
+ { |
+ if (!include) |
+ { |
+ excluded.add(filter); |
+ } |
+ else if ((excluded.size == 0 || !excluded.has(filter)) && |
+ filter.matchesWithoutDomain(location, typeMask, |
+ thirdParty, sitekey)) |
+ { |
+ if (!collection) |
+ return filter; |
+ |
+ collection.push(filter); |
+ } |
+ } |
+ } |
+ } |
+ } |
+ |
+ return null; |
+ } |
+ |
/** |
* Checks whether the entries for a particular keyword match a URL |
* @param {string} keyword |
* @param {string} location |
* @param {number} typeMask |
* @param {string} [docDomain] |
* @param {boolean} [thirdParty] |
* @param {string} [sitekey] |
@@ -309,74 +483,38 @@ |
*/ |
checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey, |
specificOnly, collection) |
{ |
// We need to skip the simple (location-only) filters if the type mask does |
// not contain any default content types. |
if ((typeMask & DEFAULT_TYPES) != 0) |
{ |
- let simpleSet = this._simpleFiltersByKeyword.get(keyword); |
- if (simpleSet) |
- { |
- let lowerCaseLocation = location.toLowerCase(); |
- |
- for (let filter of simpleSet) |
- { |
- if (specificOnly && !(filter instanceof WhitelistFilter)) |
- continue; |
- |
- if (filter.matchesLocation(location, lowerCaseLocation)) |
- { |
- if (!collection) |
- return filter; |
- |
- collection.push(filter); |
- } |
- } |
- } |
+ let filter = this._checkEntryMatchSimple(keyword, location, typeMask, |
+ docDomain, thirdParty, sitekey, |
+ specificOnly, collection); |
+ if (filter) |
+ return filter; |
} |
- let complexSet = null; |
- |
// If the type mask contains a non-default type (first condition) and it is |
// the only type in the mask (second condition), we can use the |
// type-specific map, which typically contains a lot fewer filters. This |
// enables faster lookups for whitelisting types like $document, $elemhide, |
// and so on, as well as other special types like $csp. |
if ((typeMask & NON_DEFAULT_TYPES) != 0 && (typeMask & typeMask - 1) == 0) |
{ |
- let map = this._filterMapsByType.get(typeMask); |
- if (map) |
- complexSet = map.get(keyword); |
- } |
- else |
- { |
- complexSet = this._complexFiltersByKeyword.get(keyword); |
+ return this._checkEntryMatchForType(keyword, location, typeMask, |
+ docDomain, thirdParty, sitekey, |
+ specificOnly, collection); |
} |
- if (complexSet) |
- { |
- for (let filter of complexSet) |
- { |
- if (specificOnly && filter.isGeneric() && |
- !(filter instanceof WhitelistFilter)) |
- continue; |
- |
- if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey)) |
- { |
- if (!collection) |
- return filter; |
- |
- collection.push(filter); |
- } |
- } |
- } |
- |
- return null; |
+ return this._checkEntryMatchByDomain(keyword, location, typeMask, |
+ docDomain, thirdParty, sitekey, |
+ specificOnly, collection); |
} |
/** |
* Tests whether the URL matches any of the known filters |
* @param {string} location |
* URL to be tested |
* @param {number} typeMask |
* bitmask of content / request types to match |