Left: | ||
Right: |
OLD | NEW |
---|---|
1 /* | 1 /* |
2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
3 * Copyright (C) 2006-2016 Eyeo GmbH | 3 * Copyright (C) 2006-2016 Eyeo GmbH |
4 * | 4 * |
5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
8 * | 8 * |
9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
(...skipping 48 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
59 let excluded = []; | 59 let excluded = []; |
60 let rules = []; | 60 let rules = []; |
61 | 61 |
62 parseDomains(filter.domains, included, excluded); | 62 parseDomains(filter.domains, included, excluded); |
63 | 63 |
64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) | 64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions)) |
65 return {matchDomains: included.map(matchDomain), selector: filter.selector}; | 65 return {matchDomains: included.map(matchDomain), selector: filter.selector}; |
66 } | 66 } |
67 | 67 |
68 /** | 68 /** |
69 * Convert the given filter "regexpSource" string into a regular expression, | 69 * Parse the given filter "regexpSource" string. Producing a regular expression, |
70 * handling the conversion of unicode inside hostnames to punycode. | 70 * extracting the hostname (if any), deciding if the regular expression is safe |
71 * (Also deciding if the regular expression can be safely converted to and | 71 * to be converted + matched as lower case and noting if the source contains |
72 * matched as lower case or not.) | 72 * anything after the hostname.) |
73 * | 73 * |
74 * @param {string} text regexpSource property of a filter | 74 * @param {string} text regexpSource property of a filter |
75 * @returns {object} An object containing a regular expression string and a bool | 75 * @returns {object} An object containing a regular expression string, a bool |
76 * indicating if the filter can be safely matched as lower | 76 * indicating if the filter can be safely matched as lower |
77 * case: {regexp: "...", canSafelyMatchAsLowercase: true/false } | 77 * case, a hostname string (or undefined) and a bool |
78 * indicating if the source only contains a hostname or not: | |
79 * {regexp: "...", | |
80 * canSafelyMatchAsLowercase: true/false, | |
81 * hostname: "...", | |
82 * justHostname: true/false} | |
78 */ | 83 */ |
79 function toRegExp(text) | 84 function parseFilterRegexpSource(text) |
80 { | 85 { |
81 let result = []; | 86 let regexp = []; |
82 let lastIndex = text.length - 1; | 87 let lastIndex = text.length - 1; |
88 let hostname; | |
83 let hostnameStart = null; | 89 let hostnameStart = null; |
84 let hostnameFinished = false; | 90 let hostnameFinished = false; |
91 let justHostname = false; | |
85 let canSafelyMatchAsLowercase = false; | 92 let canSafelyMatchAsLowercase = false; |
86 | 93 |
87 for (let i = 0; i < text.length; i++) | 94 for (let i = 0; i < text.length; i++) |
88 { | 95 { |
89 let c = text[i]; | 96 let c = text[i]; |
90 | 97 |
98 if (hostnameFinished) | |
99 justHostname = false; | |
100 | |
91 // If we're currently inside the hostname we have to be careful not to | 101 // If we're currently inside the hostname we have to be careful not to |
92 // escape any characters until after we have converted it to punycode. | 102 // escape any characters until after we have converted it to punycode. |
93 if (hostnameStart != null && !hostnameFinished) | 103 if (hostnameStart != null && !hostnameFinished) |
94 { | 104 { |
95 let endingChar = (c == "*" || c == "^" || | 105 let endingChar = (c == "*" || c == "^" || |
96 c == "?" || c == "/" || c == "|"); | 106 c == "?" || c == "/" || c == "|"); |
97 if (!endingChar && i != lastIndex) | 107 if (!endingChar && i != lastIndex) |
98 continue; | 108 continue; |
99 | 109 |
100 let hostname = text.substring(hostnameStart, endingChar ? i : i + 1); | 110 hostname = punycode.toASCII( |
101 hostnameFinished = true; | 111 text.substring(hostnameStart, endingChar ? i : i + 1) |
102 result.push(escapeRegExp(punycode.toASCII(hostname))); | 112 ); |
113 hostnameFinished = justHostname = true; | |
114 regexp.push(escapeRegExp(hostname)); | |
103 if (!endingChar) | 115 if (!endingChar) |
104 break; | 116 break; |
105 } | 117 } |
106 | 118 |
107 switch (c) | 119 switch (c) |
108 { | 120 { |
109 case "*": | 121 case "*": |
110 if (result.length > 0 && i < lastIndex && text[i + 1] != "*") | 122 if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*") |
111 result.push(".*"); | 123 regexp.push(".*"); |
112 break; | 124 break; |
113 case "^": | 125 case "^": |
114 if (i < lastIndex) | 126 if (i < lastIndex) |
115 result.push("."); | 127 regexp.push("."); |
116 break; | 128 break; |
117 case "|": | 129 case "|": |
118 if (i == 0) | 130 if (i == 0) |
119 { | 131 { |
120 result.push("^"); | 132 regexp.push("^"); |
121 break; | 133 break; |
122 } | 134 } |
123 if (i == lastIndex) | 135 if (i == lastIndex) |
124 { | 136 { |
125 result.push("$"); | 137 regexp.push("$"); |
126 break; | 138 break; |
127 } | 139 } |
128 if (i == 1 && text[0] == "|") | 140 if (i == 1 && text[0] == "|") |
129 { | 141 { |
130 hostnameStart = i + 1; | 142 hostnameStart = i + 1; |
131 canSafelyMatchAsLowercase = true; | 143 canSafelyMatchAsLowercase = true; |
132 result.push("https?://"); | 144 regexp.push("https?://"); |
133 break; | 145 break; |
134 } | 146 } |
135 result.push("\\|"); | 147 regexp.push("\\|"); |
136 break; | 148 break; |
137 case "/": | 149 case "/": |
138 if (!hostnameFinished && | 150 if (!hostnameFinished && |
139 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") | 151 text.charAt(i-2) == ":" && text.charAt(i-1) == "/") |
140 { | 152 { |
141 hostnameStart = i + 1; | 153 hostnameStart = i + 1; |
142 canSafelyMatchAsLowercase = true; | 154 canSafelyMatchAsLowercase = true; |
143 } | 155 } |
144 result.push("/"); | 156 regexp.push("/"); |
145 break; | 157 break; |
146 case ".": case "+": case "$": case "?": | 158 case ".": case "+": case "$": case "?": |
147 case "{": case "}": case "(": case ")": | 159 case "{": case "}": case "(": case ")": |
148 case "[": case "]": case "\\": | 160 case "[": case "]": case "\\": |
149 result.push("\\", c); | 161 regexp.push("\\", c); |
150 break; | 162 break; |
151 default: | 163 default: |
152 if (hostnameFinished && (c >= "a" && c <= "z" || | 164 if (hostnameFinished && (c >= "a" && c <= "z" || |
153 c >= "A" && c <= "Z")) | 165 c >= "A" && c <= "Z")) |
154 canSafelyMatchAsLowercase = false; | 166 canSafelyMatchAsLowercase = false; |
155 result.push(c); | 167 regexp.push(c); |
156 } | 168 } |
157 } | 169 } |
158 | 170 |
159 return {regexp: result.join(""), | 171 return { |
160 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase}; | 172 regexp: regexp.join(""), |
161 } | 173 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase, |
162 | 174 hostname: hostname, |
163 function getRegExpTrigger(filter) | 175 justHostname: justHostname |
164 { | 176 }; |
165 let result = toRegExp(filter.regexpSource); | |
166 | |
167 let trigger = {"url-filter": result.regexp}; | |
168 | |
169 // Limit rules to to HTTP(S) URLs | |
170 if (!/^(\^|http)/i.test(trigger["url-filter"])) | |
171 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; | |
172 | |
173 // For rules containing only a hostname we know that we're matching against | |
174 // a lowercase string unless the matchCase option was passed. | |
175 if (result.canSafelyMatchAsLowercase && !filter.matchCase) | |
176 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); | |
177 | |
178 if (result.canSafelyMatchAsLowercase || filter.matchCase) | |
179 trigger["url-filter-is-case-sensitive"] = true; | |
180 | |
181 return trigger; | |
182 } | 177 } |
183 | 178 |
184 function getResourceTypes(filter) | 179 function getResourceTypes(filter) |
185 { | 180 { |
186 let types = []; | 181 let types = []; |
187 | 182 |
188 if (filter.contentType & typeMap.IMAGE) | 183 if (filter.contentType & typeMap.IMAGE) |
189 types.push("image"); | 184 types.push("image"); |
190 if (filter.contentType & typeMap.STYLESHEET) | 185 if (filter.contentType & typeMap.STYLESHEET) |
191 types.push("style-sheet"); | 186 types.push("style-sheet"); |
(...skipping 26 matching lines...) Expand all Loading... | |
218 | 213 |
219 if (tldjs.getDomain(domain) == domain) | 214 if (tldjs.getDomain(domain) == domain) |
220 result.push("www." + domain); | 215 result.push("www." + domain); |
221 } | 216 } |
222 | 217 |
223 return result; | 218 return result; |
224 } | 219 } |
225 | 220 |
226 function convertFilter(filter, action, withResourceTypes) | 221 function convertFilter(filter, action, withResourceTypes) |
227 { | 222 { |
228 let trigger = getRegExpTrigger(filter); | 223 let rules = []; |
224 let parsed = parseFilterRegexpSource(filter.regexpSource); | |
225 | |
226 // For the special case of $document whitelisting filters with just a domain | |
227 // we can generate an equivalent blocking rule exception using if-domain. | |
228 if (filter instanceof filterClasses.WhitelistFilter && | |
229 filter.contentType & typeMap.DOCUMENT && | |
230 parsed.justHostname) | |
231 { | |
232 rules.push({ | |
233 trigger: { | |
234 "url-filter": ".*", | |
235 "if-domain": addDomainPrefix([parsed.hostname]) | |
236 }, | |
237 action: {type: "ignore-previous-rules"} | |
238 }); | |
239 // If the filter contains multiple options we'll need to generate further | |
240 // rules for it, but if not we can simply return now. | |
241 if (filter.contentType == typeMap.DOCUMENT) | |
242 return rules; | |
243 } | |
244 | |
245 let trigger = {"url-filter": parsed.regexp}; | |
246 | |
247 // Limit rules to HTTP(S) URLs | |
248 if (!/^(\^|http)/i.test(trigger["url-filter"])) | |
249 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"]; | |
250 | |
251 // For rules containing only a hostname we know that we're matching against | |
252 // a lowercase string unless the matchCase option was passed. | |
253 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase) | |
254 trigger["url-filter"] = trigger["url-filter"].toLowerCase(); | |
255 | |
256 if (parsed.canSafelyMatchAsLowercase || filter.matchCase) | |
257 trigger["url-filter-is-case-sensitive"] = true; | |
258 | |
229 let included = []; | 259 let included = []; |
230 let excluded = []; | 260 let excluded = []; |
231 | 261 |
232 parseDomains(filter.domains, included, excluded); | 262 parseDomains(filter.domains, included, excluded); |
233 | 263 |
234 if (withResourceTypes) | 264 if (withResourceTypes) |
235 trigger["resource-type"] = getResourceTypes(filter); | 265 trigger["resource-type"] = getResourceTypes(filter); |
236 if (filter.thirdParty != null) | 266 if (filter.thirdParty != null) |
237 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; | 267 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"]; |
238 | 268 |
239 if (included.length > 0) | 269 if (included.length > 0) |
240 trigger["if-domain"] = addDomainPrefix(included); | 270 trigger["if-domain"] = addDomainPrefix(included); |
241 else if (excluded.length > 0) | 271 else if (excluded.length > 0) |
242 trigger["unless-domain"] = addDomainPrefix(excluded); | 272 trigger["unless-domain"] = addDomainPrefix(excluded); |
243 | 273 |
244 return {trigger: trigger, action: {type: action}}; | 274 rules.push({trigger: trigger, action: {type: action}}); |
275 | |
276 return rules; | |
245 } | 277 } |
246 | 278 |
247 function hasNonASCI(obj) | 279 function hasNonASCI(obj) |
248 { | 280 { |
249 if (typeof obj == "string") | 281 if (typeof obj == "string") |
250 { | 282 { |
251 if (/[^\x00-\x7F]/.test(obj)) | 283 if (/[^\x00-\x7F]/.test(obj)) |
252 return true; | 284 return true; |
253 } | 285 } |
254 | 286 |
(...skipping 90 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
345 return; | 377 return; |
346 if (filter instanceof filterClasses.RegExpFilter && | 378 if (filter instanceof filterClasses.RegExpFilter && |
347 filter.regexpSource == null) | 379 filter.regexpSource == null) |
348 return; | 380 return; |
349 | 381 |
350 if (filter instanceof filterClasses.BlockingFilter) | 382 if (filter instanceof filterClasses.BlockingFilter) |
351 this.requestFilters.push(filter); | 383 this.requestFilters.push(filter); |
352 | 384 |
353 if (filter instanceof filterClasses.WhitelistFilter) | 385 if (filter instanceof filterClasses.WhitelistFilter) |
354 { | 386 { |
355 if (filter.contentType & (typeMap.IMAGE | 387 if (filter.contentType & (typeMap.DOCUMENT |
388 | typeMap.IMAGE | |
356 | typeMap.STYLESHEET | 389 | typeMap.STYLESHEET |
357 | typeMap.SCRIPT | 390 | typeMap.SCRIPT |
358 | typeMap.FONT | 391 | typeMap.FONT |
359 | typeMap.MEDIA | 392 | typeMap.MEDIA |
360 | typeMap.POPUP | 393 | typeMap.POPUP |
361 | typeMap.OBJECT | 394 | typeMap.OBJECT |
362 | typeMap.OBJECT_SUBREQUEST | 395 | typeMap.OBJECT_SUBREQUEST |
363 | typeMap.XMLHTTPREQUEST | 396 | typeMap.XMLHTTPREQUEST |
364 | typeMap.PING | 397 | typeMap.PING |
365 | typeMap.SUBDOCUMENT | 398 | typeMap.SUBDOCUMENT |
(...skipping 19 matching lines...) Expand all Loading... | |
385 | 418 |
386 /** | 419 /** |
387 * Generate content blocker list for all filters that were added | 420 * Generate content blocker list for all filters that were added |
388 * | 421 * |
389 * @returns {Filter} filter Filter to convert | 422 * @returns {Filter} filter Filter to convert |
390 */ | 423 */ |
391 ContentBlockerList.prototype.generateRules = function(filter) | 424 ContentBlockerList.prototype.generateRules = function(filter) |
392 { | 425 { |
393 let rules = []; | 426 let rules = []; |
394 | 427 |
395 function addRule(rule) | 428 function addRules(newRules) |
396 { | 429 { |
430 for (let rule of newRules) | |
397 if (!hasNonASCI(rule)) | 431 if (!hasNonASCI(rule)) |
398 rules.push(rule); | 432 rules.push(rule); |
399 } | 433 } |
400 | 434 |
401 let groupedElemhideFilters = new Map(); | 435 let groupedElemhideFilters = new Map(); |
402 for (let filter of this.elemhideFilters) | 436 for (let filter of this.elemhideFilters) |
403 { | 437 { |
404 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); | 438 let result = convertElemHideFilter(filter, this.elemhideSelectorExceptions); |
405 if (!result) | 439 if (!result) |
406 continue; | 440 continue; |
(...skipping 12 matching lines...) Expand all Loading... | |
419 groupedElemhideFilters.forEach((selectors, matchDomain) => | 453 groupedElemhideFilters.forEach((selectors, matchDomain) => |
420 { | 454 { |
421 while (selectors.length) | 455 while (selectors.length) |
422 { | 456 { |
423 let selector = selectors.splice(0, selectorLimit).join(", "); | 457 let selector = selectors.splice(0, selectorLimit).join(", "); |
424 | 458 |
425 // As of Safari 9.0 element IDs are matched as lowercase. We work around | 459 // As of Safari 9.0 element IDs are matched as lowercase. We work around |
426 // this by converting to the attribute format [id="elementID"] | 460 // this by converting to the attribute format [id="elementID"] |
427 selector = convertIDSelectorsToAttributeSelectors(selector); | 461 selector = convertIDSelectorsToAttributeSelectors(selector); |
428 | 462 |
429 addRule({ | 463 addRules([{ |
Sebastian Noack
2016/05/17 10:17:54
We are creating quite a few temporary arrays now.
kzar
2016/05/17 10:38:02
Done.
| |
430 trigger: {"url-filter": matchDomain, | 464 trigger: {"url-filter": matchDomain, |
431 "url-filter-is-case-sensitive": true}, | 465 "url-filter-is-case-sensitive": true}, |
432 action: {type: "css-display-none", | 466 action: {type: "css-display-none", |
433 selector: selector} | 467 selector: selector} |
434 }); | 468 }]); |
435 } | 469 } |
436 }); | 470 }); |
437 | 471 |
438 for (let filter of this.elemhideExceptions) | 472 for (let filter of this.elemhideExceptions) |
439 addRule(convertFilter(filter, "ignore-previous-rules", false)); | 473 addRules(convertFilter(filter, "ignore-previous-rules", false)); |
440 for (let filter of this.requestFilters) | 474 for (let filter of this.requestFilters) |
441 addRule(convertFilter(filter, "block", true)); | 475 addRules(convertFilter(filter, "block", true)); |
442 for (let filter of this.requestExceptions) | 476 for (let filter of this.requestExceptions) |
443 addRule(convertFilter(filter, "ignore-previous-rules", true)); | 477 addRules(convertFilter(filter, "ignore-previous-rules", true)); |
444 | 478 |
445 return rules; | 479 return rules; |
446 }; | 480 }; |
OLD | NEW |