Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Unified Diff: lib/matcher.js

Issue 29556737: Issue 5141 - Convert filter match to C++ (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore/
Patch Set: Cleanup. Fixed the bindings to export what we actually need. Created Sept. 27, 2017, 3:27 p.m.
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « compiled/library.js ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: lib/matcher.js
===================================================================
--- a/lib/matcher.js
+++ b/lib/matcher.js
@@ -12,444 +12,13 @@
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
*/
"use strict";
-/**
- * @fileOverview Matcher class implementing matching addresses against
- * a list of filters.
- */
-
-const {Filter, WhitelistFilter} = require("filterClasses");
-
-/**
- * Blacklist/whitelist filter matching
- * @constructor
- */
-function Matcher()
+const compiled = require("compiled");
+for (let cls of ["Matcher", "defaultMatcher"])
{
- this.clear();
+ exports[cls] = compiled[cls];
}
-exports.Matcher = Matcher;
-
-Matcher.prototype = {
- /**
- * Lookup table for filters by their associated keyword
- * @type {Object}
- */
- filterByKeyword: null,
-
- /**
- * Lookup table for keywords by the filter text
- * @type {Object}
- */
- keywordByFilter: null,
-
- /**
- * Removes all known filters
- */
- clear()
- {
- this.filterByKeyword = Object.create(null);
- this.keywordByFilter = Object.create(null);
- },
-
- /**
- * Adds a filter to the matcher
- * @param {RegExpFilter} filter
- */
- add(filter)
- {
- if (filter.text in this.keywordByFilter)
- return;
-
- // Look for a suitable keyword
- let keyword = this.findKeyword(filter);
- let oldEntry = this.filterByKeyword[keyword];
- if (typeof oldEntry == "undefined")
- this.filterByKeyword[keyword] = filter;
- else if (oldEntry.length == 1)
- this.filterByKeyword[keyword] = [oldEntry, filter];
- else
- oldEntry.push(filter);
- this.keywordByFilter[filter.text] = keyword;
- },
-
- /**
- * Removes a filter from the matcher
- * @param {RegExpFilter} filter
- */
- remove(filter)
- {
- if (!(filter.text in this.keywordByFilter))
- return;
-
- let keyword = this.keywordByFilter[filter.text];
- let list = this.filterByKeyword[keyword];
- if (list.length <= 1)
- delete this.filterByKeyword[keyword];
- else
- {
- let index = list.indexOf(filter);
- if (index >= 0)
- {
- list.splice(index, 1);
- if (list.length == 1)
- this.filterByKeyword[keyword] = list[0];
- }
- }
-
- delete this.keywordByFilter[filter.text];
- },
-
- /**
- * Chooses a keyword to be associated with the filter
- * @param {Filter} filter
- * @return {string} keyword or an empty string if no keyword could be found
- */
- findKeyword(filter)
- {
- let result = "";
- let {text} = filter;
- if (Filter.regexpRegExp.test(text))
- return result;
-
- // Remove options
- let match = Filter.optionsRegExp.exec(text);
- if (match)
- text = match.input.substr(0, match.index);
-
- // Remove whitelist marker
- if (text.substr(0, 2) == "@@")
- text = text.substr(2);
-
- let candidates = text.toLowerCase().match(
- /[^a-z0-9%*][a-z0-9%]{3,}(?=[^a-z0-9%*])/g
- );
- if (!candidates)
- return result;
-
- let hash = this.filterByKeyword;
- let resultCount = 0xFFFFFF;
- let resultLength = 0;
- for (let i = 0, l = candidates.length; i < l; i++)
- {
- let candidate = candidates[i].substr(1);
- let count = (candidate in hash ? hash[candidate].length : 0);
- if (count < resultCount ||
- (count == resultCount && candidate.length > resultLength))
- {
- result = candidate;
- resultCount = count;
- resultLength = candidate.length;
- }
- }
- return result;
- },
-
- /**
- * Checks whether a particular filter is being matched against.
- * @param {RegExpFilter} filter
- * @return {boolean}
- */
- hasFilter(filter)
- {
- return (filter.text in this.keywordByFilter);
- },
-
- /**
- * Returns the keyword used for a filter, null for unknown filters.
- * @param {RegExpFilter} filter
- * @return {string}
- */
- getKeywordForFilter(filter)
- {
- if (filter.text in this.keywordByFilter)
- return this.keywordByFilter[filter.text];
- return null;
- },
-
- /**
- * Checks whether the entries for a particular keyword match a URL
- * @param {string} keyword
- * @param {string} location
- * @param {number} typeMask
- * @param {string} docDomain
- * @param {boolean} thirdParty
- * @param {string} sitekey
- * @param {boolean} specificOnly
- * @return {?Filter}
- */
- _checkEntryMatch(keyword, location, typeMask, docDomain, thirdParty, sitekey,
- specificOnly)
- {
- let list = this.filterByKeyword[keyword];
- for (let i = 0; i < list.length; i++)
- {
- let filter = list[i];
-
- if (specificOnly && filter.isGeneric() &&
- !(filter instanceof WhitelistFilter))
- continue;
-
- if (filter.matches(location, typeMask, docDomain, thirdParty, sitekey))
- return filter;
- }
- return null;
- },
-
- /**
- * Tests whether the URL matches any of the known filters
- * @param {string} location
- * URL to be tested
- * @param {number} typeMask
- * bitmask of content / request types to match
- * @param {string} docDomain
- * domain name of the document that loads the URL
- * @param {boolean} thirdParty
- * should be true if the URL is a third-party request
- * @param {string} sitekey
- * public key provided by the document
- * @param {boolean} specificOnly
- * should be true if generic matches should be ignored
- * @return {?RegExpFilter}
- * matching filter or null
- */
- matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly)
- {
- let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
- if (candidates === null)
- candidates = [];
- candidates.push("");
- for (let i = 0, l = candidates.length; i < l; i++)
- {
- let substr = candidates[i];
- if (substr in this.filterByKeyword)
- {
- let result = this._checkEntryMatch(substr, location, typeMask,
- docDomain, thirdParty, sitekey,
- specificOnly);
- if (result)
- return result;
- }
- }
-
- return null;
- }
-};
-
-/**
- * Combines a matcher for blocking and exception rules, automatically sorts
- * rules into two Matcher instances.
- * @constructor
- * @augments Matcher
- */
-function CombinedMatcher()
-{
- this.blacklist = new Matcher();
- this.whitelist = new Matcher();
- this.resultCache = Object.create(null);
-}
-exports.CombinedMatcher = CombinedMatcher;
-
-/**
- * Maximal number of matching cache entries to be kept
- * @type {number}
- */
-CombinedMatcher.maxCacheEntries = 1000;
-
-CombinedMatcher.prototype =
-{
- /**
- * Matcher for blocking rules.
- * @type {Matcher}
- */
- blacklist: null,
-
- /**
- * Matcher for exception rules.
- * @type {Matcher}
- */
- whitelist: null,
-
- /**
- * Lookup table of previous matchesAny results
- * @type {Object}
- */
- resultCache: null,
-
- /**
- * Number of entries in resultCache
- * @type {number}
- */
- cacheEntries: 0,
-
- /**
- * @see Matcher#clear
- */
- clear()
- {
- this.blacklist.clear();
- this.whitelist.clear();
- this.resultCache = Object.create(null);
- this.cacheEntries = 0;
- },
-
- /**
- * @see Matcher#add
- * @param {Filter} filter
- */
- add(filter)
- {
- if (filter instanceof WhitelistFilter)
- this.whitelist.add(filter);
- else
- this.blacklist.add(filter);
-
- if (this.cacheEntries > 0)
- {
- this.resultCache = Object.create(null);
- this.cacheEntries = 0;
- }
- },
-
- /**
- * @see Matcher#remove
- * @param {Filter} filter
- */
- remove(filter)
- {
- if (filter instanceof WhitelistFilter)
- this.whitelist.remove(filter);
- else
- this.blacklist.remove(filter);
-
- if (this.cacheEntries > 0)
- {
- this.resultCache = Object.create(null);
- this.cacheEntries = 0;
- }
- },
-
- /**
- * @see Matcher#findKeyword
- * @param {Filter} filter
- * @return {string} keyword
- */
- findKeyword(filter)
- {
- if (filter instanceof WhitelistFilter)
- return this.whitelist.findKeyword(filter);
- return this.blacklist.findKeyword(filter);
- },
-
- /**
- * @see Matcher#hasFilter
- * @param {Filter} filter
- * @return {boolean}
- */
- hasFilter(filter)
- {
- if (filter instanceof WhitelistFilter)
- return this.whitelist.hasFilter(filter);
- return this.blacklist.hasFilter(filter);
- },
-
- /**
- * @see Matcher#getKeywordForFilter
- * @param {Filter} filter
- * @return {string} keyword
- */
- getKeywordForFilter(filter)
- {
- if (filter instanceof WhitelistFilter)
- return this.whitelist.getKeywordForFilter(filter);
- return this.blacklist.getKeywordForFilter(filter);
- },
-
- /**
- * Checks whether a particular filter is slow
- * @param {RegExpFilter} filter
- * @return {boolean}
- */
- isSlowFilter(filter)
- {
- let matcher = (
- filter instanceof WhitelistFilter ? this.whitelist : this.blacklist
- );
- if (matcher.hasFilter(filter))
- return !matcher.getKeywordForFilter(filter);
- return !matcher.findKeyword(filter);
- },
-
- /**
- * Optimized filter matching testing both whitelist and blacklist matchers
- * simultaneously. For parameters see Matcher.matchesAny().
- * @see Matcher#matchesAny
- * @inheritdoc
- */
- matchesAnyInternal(location, typeMask, docDomain, thirdParty, sitekey,
- specificOnly)
- {
- let candidates = location.toLowerCase().match(/[a-z0-9%]{3,}/g);
- if (candidates === null)
- candidates = [];
- candidates.push("");
-
- let blacklistHit = null;
- for (let i = 0, l = candidates.length; i < l; i++)
- {
- let substr = candidates[i];
- if (substr in this.whitelist.filterByKeyword)
- {
- let result = this.whitelist._checkEntryMatch(
- substr, location, typeMask, docDomain, thirdParty, sitekey
- );
- if (result)
- return result;
- }
- if (substr in this.blacklist.filterByKeyword && blacklistHit === null)
- {
- blacklistHit = this.blacklist._checkEntryMatch(
- substr, location, typeMask, docDomain, thirdParty, sitekey,
- specificOnly
- );
- }
- }
- return blacklistHit;
- },
-
- /**
- * @see Matcher#matchesAny
- * @inheritdoc
- */
- matchesAny(location, typeMask, docDomain, thirdParty, sitekey, specificOnly)
- {
- let key = location + " " + typeMask + " " + docDomain + " " + thirdParty +
- " " + sitekey + " " + specificOnly;
- if (key in this.resultCache)
- return this.resultCache[key];
-
- let result = this.matchesAnyInternal(location, typeMask, docDomain,
- thirdParty, sitekey, specificOnly);
-
- if (this.cacheEntries >= CombinedMatcher.maxCacheEntries)
- {
- this.resultCache = Object.create(null);
- this.cacheEntries = 0;
- }
-
- this.resultCache[key] = result;
- this.cacheEntries++;
-
- return result;
- }
-};
-
-/**
- * Shared CombinedMatcher instance that should usually be used.
- * @type {CombinedMatcher}
- */
-exports.defaultMatcher = new CombinedMatcher();
« no previous file with comments | « compiled/library.js ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld