Index: lib/abp2blocklist.js |
diff --git a/lib/abp2blocklist.js b/lib/abp2blocklist.js |
index 1bece259e455539c7aebdbf220479425f7eab0e3..fa5a525b1c3a1539f4e08bec323ccdec33026b3a 100644 |
--- a/lib/abp2blocklist.js |
+++ b/lib/abp2blocklist.js |
@@ -66,28 +66,38 @@ function convertElemHideFilter(filter, elemhideSelectorExceptions) |
} |
/** |
- * Convert the given filter "regexpSource" string into a regular expression, |
- * handling the conversion of unicode inside hostnames to punycode. |
- * (Also deciding if the regular expression can be safely converted to and |
- * matched as lower case or not.) |
+ * Parse the given filter "regexpSource" string. Producing a regular expression, |
+ * extracting the hostname (if any), deciding if the regular expression is safe |
+ * to be converted + matched as lower case and noting if the source contains |
+ * anything after the hostname.) |
* |
* @param {string} text regexpSource property of a filter |
- * @returns {object} An object containing a regular expression string and a bool |
+ * @returns {object} An object containing a regular expression string, a bool |
* indicating if the filter can be safely matched as lower |
- * case: {regexp: "...", canSafelyMatchAsLowercase: true/false} |
+ * case, a hostname string (or undefined) and a bool |
+ * indicating if the source only contains a hostname or not: |
+ * {regexp: "...", |
+ * canSafelyMatchAsLowercase: true/false, |
+ * hostname: "...", |
+ * justHostname: true/false} |
*/ |
-function toRegExp(text) |
+function parseFilterRegexpSource(text) |
{ |
- let result = []; |
+ let regexp = []; |
let lastIndex = text.length - 1; |
+ let hostname; |
let hostnameStart = null; |
let hostnameFinished = false; |
+ let justHostname = false; |
let canSafelyMatchAsLowercase = false; |
for (let i = 0; i < text.length; i++) |
{ |
let c = text[i]; |
+ if (hostnameFinished) |
+ justHostname = false; |
+ |
// If we're currently inside the hostname we have to be careful not to |
// escape any characters until after we have converted it to punycode. |
if (hostnameStart != null && !hostnameFinished) |
@@ -97,9 +107,11 @@ function toRegExp(text) |
if (!endingChar && i != lastIndex) |
continue; |
- let hostname = text.substring(hostnameStart, endingChar ? i : i + 1); |
- hostnameFinished = true; |
- result.push(escapeRegExp(punycode.toASCII(hostname))); |
+ hostname = punycode.toASCII( |
+ text.substring(hostnameStart, endingChar ? i : i + 1) |
+ ); |
+ hostnameFinished = justHostname = true; |
+ regexp.push(escapeRegExp(hostname)); |
if (!endingChar) |
break; |
} |
@@ -107,32 +119,32 @@ function toRegExp(text) |
switch (c) |
{ |
case "*": |
- if (result.length > 0 && i < lastIndex && text[i + 1] != "*") |
- result.push(".*"); |
+ if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*") |
+ regexp.push(".*"); |
break; |
case "^": |
if (i < lastIndex) |
- result.push("."); |
+ regexp.push("."); |
break; |
case "|": |
if (i == 0) |
{ |
- result.push("^"); |
+ regexp.push("^"); |
break; |
} |
if (i == lastIndex) |
{ |
- result.push("$"); |
+ regexp.push("$"); |
break; |
} |
if (i == 1 && text[0] == "|") |
{ |
hostnameStart = i + 1; |
canSafelyMatchAsLowercase = true; |
- result.push("https?://"); |
+ regexp.push("https?://"); |
break; |
} |
- result.push("\\|"); |
+ regexp.push("\\|"); |
break; |
case "/": |
if (!hostnameFinished && |
@@ -141,44 +153,27 @@ function toRegExp(text) |
hostnameStart = i + 1; |
canSafelyMatchAsLowercase = true; |
} |
- result.push("/"); |
+ regexp.push("/"); |
break; |
case ".": case "+": case "$": case "?": |
case "{": case "}": case "(": case ")": |
case "[": case "]": case "\\": |
- result.push("\\", c); |
+ regexp.push("\\", c); |
break; |
default: |
if (hostnameFinished && (c >= "a" && c <= "z" || |
c >= "A" && c <= "Z")) |
canSafelyMatchAsLowercase = false; |
- result.push(c); |
+ regexp.push(c); |
} |
} |
- return {regexp: result.join(""), |
- canSafelyMatchAsLowercase: canSafelyMatchAsLowercase}; |
-} |
- |
-function getRegExpTrigger(filter) |
-{ |
- let result = toRegExp(filter.regexpSource); |
- |
- let trigger = {"url-filter": result.regexp}; |
- |
- // Limit rules to to HTTP(S) URLs |
- if (!/^(\^|http)/i.test(trigger["url-filter"])) |
- trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; |
- |
- // For rules containing only a hostname we know that we're matching against |
- // a lowercase string unless the matchCase option was passed. |
- if (result.canSafelyMatchAsLowercase && !filter.matchCase) |
- trigger["url-filter"] = trigger["url-filter"].toLowerCase(); |
- |
- if (result.canSafelyMatchAsLowercase || filter.matchCase) |
- trigger["url-filter-is-case-sensitive"] = true; |
- |
- return trigger; |
+ return { |
+ regexp: regexp.join(""), |
+ canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, |
+ hostname: hostname, |
+ justHostname: justHostname |
+ }; |
} |
function getResourceTypes(filter) |
@@ -223,9 +218,43 @@ function addDomainPrefix(domains) |
return result; |
} |
-function convertFilter(filter, action, withResourceTypes) |
+function convertFilterAddRules(rules, filter, action, withResourceTypes) |
{ |
- let trigger = getRegExpTrigger(filter); |
+ let parsed = parseFilterRegexpSource(filter.regexpSource); |
+ |
+ // For the special case of $document whitelisting filters with just a domain |
+ // we can generate an equivalent blocking rule exception using if-domain. |
+ if (filter instanceof filterClasses.WhitelistFilter && |
+ filter.contentType & typeMap.DOCUMENT && |
+ parsed.justHostname) |
+ { |
+ rules.push({ |
+ trigger: { |
+ "url-filter": ".*", |
+ "if-domain": addDomainPrefix([parsed.hostname]) |
+ }, |
+ action: {type: "ignore-previous-rules"} |
+ }); |
+ // If the filter contains multiple options we'll need to generate further |
+ // rules for it, but if not we can simply return now. |
+ if (filter.contentType == typeMap.DOCUMENT) |
Sebastian Noack
2016/05/17 10:55:24
What if the filter is @@||example.com$document,ele
kzar
2016/05/17 11:23:34
Done.
|
+ return rules; |
+ } |
+ |
+ let trigger = {"url-filter": parsed.regexp}; |
+ |
+ // Limit rules to HTTP(S) URLs |
+ if (!/^(\^|http)/i.test(trigger["url-filter"])) |
+ trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; |
+ |
+ // For rules containing only a hostname we know that we're matching against |
+ // a lowercase string unless the matchCase option was passed. |
+ if (parsed.canSafelyMatchAsLowercase && !filter.matchCase) |
+ trigger["url-filter"] = trigger["url-filter"].toLowerCase(); |
+ |
+ if (parsed.canSafelyMatchAsLowercase || filter.matchCase) |
+ trigger["url-filter-is-case-sensitive"] = true; |
+ |
let included = []; |
let excluded = []; |
@@ -241,7 +270,9 @@ function convertFilter(filter, action, withResourceTypes) |
else if (excluded.length > 0) |
trigger["unless-domain"] = addDomainPrefix(excluded); |
- return {trigger: trigger, action: {type: action}}; |
+ rules.push({trigger: trigger, action: {type: action}}); |
+ |
+ return rules; |
Sebastian Noack
2016/05/17 10:55:24
You don't have to return the rules here anymore.
kzar
2016/05/17 11:23:34
Oops, Done.
|
} |
function hasNonASCI(obj) |
@@ -352,7 +383,8 @@ ContentBlockerList.prototype.addFilter = function(filter) |
if (filter instanceof filterClasses.WhitelistFilter) |
{ |
- if (filter.contentType & (typeMap.IMAGE |
+ if (filter.contentType & (typeMap.DOCUMENT |
+ | typeMap.IMAGE |
| typeMap.STYLESHEET |
| typeMap.SCRIPT |
| typeMap.FONT |
@@ -392,12 +424,6 @@ ContentBlockerList.prototype.generateRules = function(filter) |
{ |
let rules = []; |
- function addRule(rule) |
- { |
- if (!hasNonASCI(rule)) |
- rules.push(rule); |
- } |
- |
let groupedElemhideFilters = new Map(); |
for (let filter of this.elemhideFilters) |
{ |
@@ -426,7 +452,7 @@ ContentBlockerList.prototype.generateRules = function(filter) |
// this by converting to the attribute format [id="elementID"] |
selector = convertIDSelectorsToAttributeSelectors(selector); |
- addRule({ |
+ rules.push({ |
trigger: {"url-filter": matchDomain, |
"url-filter-is-case-sensitive": true}, |
action: {type: "css-display-none", |
@@ -436,11 +462,11 @@ ContentBlockerList.prototype.generateRules = function(filter) |
}); |
for (let filter of this.elemhideExceptions) |
- addRule(convertFilter(filter, "ignore-previous-rules", false)); |
+ convertFilterAddRules(rules, filter, "ignore-previous-rules", false); |
for (let filter of this.requestFilters) |
- addRule(convertFilter(filter, "block", true)); |
+ convertFilterAddRules(rules, filter, "block", true); |
for (let filter of this.requestExceptions) |
- addRule(convertFilter(filter, "ignore-previous-rules", true)); |
+ convertFilterAddRules(rules, filter, "ignore-previous-rules", true); |
- return rules; |
+ return rules.filter(rule => !hasNonASCI(rule)); |
}; |