Index: lib/abp2blocklist.js |
diff --git a/lib/abp2blocklist.js b/lib/abp2blocklist.js |
index 1bece259e455539c7aebdbf220479425f7eab0e3..0a1ec26e3e7907c5cd76357ba7887c5770c974a5 100644 |
--- a/lib/abp2blocklist.js |
+++ b/lib/abp2blocklist.js |
@@ -66,28 +66,38 @@ function convertElemHideFilter(filter, elemhideSelectorExceptions) |
} |
/** |
- * Convert the given filter "regexpSource" string into a regular expression, |
- * handling the conversion of unicode inside hostnames to punycode. |
- * (Also deciding if the regular expression can be safely converted to and |
- * matched as lower case or not.) |
+ * Parse the given filter "regexpSource" string. Producing a regular expression, |
+ * extracting the hostname (if any), deciding if the regular expression is safe |
+ * to be converted + matched as lower case and noting if the source contains |
+ * anything after the hostname.) |
* |
* @param {string} text regexpSource property of a filter |
- * @returns {object} An object containing a regular expression string and a bool |
+ * @returns {object} An object containing a regular expression string, a bool |
* indicating if the filter can be safely matched as lower |
- * case: {regexp: "...", canSafelyMatchAsLowercase: true/false} |
+ * case, a hostname string (or undefined) and a bool |
+ * indicating if the source only contains a hostname or not: |
+ * {regexp: "...", |
+ * canSafelyMatchAsLowercase: true/false, |
+ * hostname: "...", |
+ * justHostname: true/false} |
*/ |
-function toRegExp(text) |
+function parseFilterRegexpSource(text) |
{ |
- let result = []; |
+ let regexp = []; |
let lastIndex = text.length - 1; |
+ let hostname; |
let hostnameStart = null; |
let hostnameFinished = false; |
+ let justHostname = false; |
let canSafelyMatchAsLowercase = false; |
for (let i = 0; i < text.length; i++) |
{ |
let c = text[i]; |
+ if (hostnameFinished) |
+ justHostname = false; |
+ |
// If we're currently inside the hostname we have to be careful not to |
// escape any characters until after we have converted it to punycode. |
if (hostnameStart != null && !hostnameFinished) |
@@ -97,9 +107,11 @@ function toRegExp(text) |
if (!endingChar && i != lastIndex) |
continue; |
- let hostname = text.substring(hostnameStart, endingChar ? i : i + 1); |
- hostnameFinished = true; |
- result.push(escapeRegExp(punycode.toASCII(hostname))); |
+ hostname = punycode.toASCII( |
+ text.substring(hostnameStart, endingChar ? i : i + 1) |
+ ); |
+ hostnameFinished = justHostname = true; |
+ regexp.push(escapeRegExp(hostname)); |
if (!endingChar) |
break; |
} |
@@ -107,32 +119,32 @@ function toRegExp(text) |
switch (c) |
{ |
case "*": |
- if (result.length > 0 && i < lastIndex && text[i + 1] != "*") |
- result.push(".*"); |
+ if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*") |
+ regexp.push(".*"); |
break; |
case "^": |
if (i < lastIndex) |
- result.push("."); |
+ regexp.push("."); |
break; |
case "|": |
if (i == 0) |
{ |
- result.push("^"); |
+ regexp.push("^"); |
break; |
} |
if (i == lastIndex) |
{ |
- result.push("$"); |
+ regexp.push("$"); |
break; |
} |
if (i == 1 && text[0] == "|") |
{ |
hostnameStart = i + 1; |
canSafelyMatchAsLowercase = true; |
- result.push("https?://"); |
+ regexp.push("https?://"); |
break; |
} |
- result.push("\\|"); |
+ regexp.push("\\|"); |
break; |
case "/": |
if (!hostnameFinished && |
@@ -141,44 +153,27 @@ function toRegExp(text) |
hostnameStart = i + 1; |
canSafelyMatchAsLowercase = true; |
} |
- result.push("/"); |
+ regexp.push("/"); |
break; |
case ".": case "+": case "$": case "?": |
case "{": case "}": case "(": case ")": |
case "[": case "]": case "\\": |
- result.push("\\", c); |
+ regexp.push("\\", c); |
break; |
default: |
if (hostnameFinished && (c >= "a" && c <= "z" || |
c >= "A" && c <= "Z")) |
canSafelyMatchAsLowercase = false; |
- result.push(c); |
+ regexp.push(c); |
} |
} |
- return {regexp: result.join(""), |
- canSafelyMatchAsLowercase: canSafelyMatchAsLowercase}; |
-} |
- |
-function getRegExpTrigger(filter) |
-{ |
- let result = toRegExp(filter.regexpSource); |
- |
- let trigger = {"url-filter": result.regexp}; |
- |
- // Limit rules to to HTTP(S) URLs |
- if (!/^(\^|http)/i.test(trigger["url-filter"])) |
- trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; |
- |
- // For rules containing only a hostname we know that we're matching against |
- // a lowercase string unless the matchCase option was passed. |
- if (result.canSafelyMatchAsLowercase && !filter.matchCase) |
- trigger["url-filter"] = trigger["url-filter"].toLowerCase(); |
- |
- if (result.canSafelyMatchAsLowercase || filter.matchCase) |
- trigger["url-filter-is-case-sensitive"] = true; |
- |
- return trigger; |
+ return { |
+ regexp: regexp.join(""), |
+ canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, |
+ hostname: hostname, |
+ justHostname: justHostname |
+ }; |
} |
function getResourceTypes(filter) |
@@ -225,7 +220,29 @@ function addDomainPrefix(domains) |
function convertFilter(filter, action, withResourceTypes) |
{ |
- let trigger = getRegExpTrigger(filter); |
+ let parsed = parseFilterRegexpSource(filter.regexpSource); |
+ |
+ // For the special case of $document whitelisting filters with just a domain |
+ // we can generate an equivalent blocking rule exception using if-domain. |
+ if (filter.contentType == typeMap.DOCUMENT && parsed.justHostname) |
Sebastian Noack
2016/05/12 12:12:25
For filters like example.com$document,image we wou
kzar
2016/05/16 16:22:36
Done.
|
+ return {trigger: {"url-filter": ".*", |
Sebastian Noack
2016/05/12 12:12:26
Nit: Mind wrapping the nested object for better re
Sebastian Noack
2016/05/12 12:12:26
Wouldn't an empty string be sufficient as url-filt
kzar
2016/05/16 16:22:36
Done.
kzar
2016/05/16 16:22:36
Unfortunately this causes a "Extension compilation
|
+ "if-domain": addDomainPrefix([parsed.hostname])}, |
+ action: {type: "ignore-previous-rules"}}; |
+ |
+ let trigger = {"url-filter": parsed.regexp}; |
+ |
+ // Limit rules to to HTTP(S) URLs |
Sebastian Noack
2016/05/12 12:12:26
Typo: to to
kzar
2016/05/16 16:22:36
Done.
|
+ if (!/^(\^|http)/i.test(trigger["url-filter"])) |
+ trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; |
+ |
+ // For rules containing only a hostname we know that we're matching against |
+ // a lowercase string unless the matchCase option was passed. |
+ if (parsed.canSafelyMatchAsLowercase && !filter.matchCase) |
+ trigger["url-filter"] = trigger["url-filter"].toLowerCase(); |
+ |
+ if (parsed.canSafelyMatchAsLowercase || filter.matchCase) |
+ trigger["url-filter-is-case-sensitive"] = true; |
+ |
let included = []; |
let excluded = []; |
@@ -352,7 +369,8 @@ ContentBlockerList.prototype.addFilter = function(filter) |
if (filter instanceof filterClasses.WhitelistFilter) |
{ |
- if (filter.contentType & (typeMap.IMAGE |
+ if (filter.contentType & (typeMap.DOCUMENT |
+ | typeMap.IMAGE |
| typeMap.STYLESHEET |
| typeMap.SCRIPT |
| typeMap.FONT |