lib/abp2blocklist.js - Issue 29340694: Issue 3956 - Convert domain whitelisting filters

Side by Side Diff: lib/abp2blocklist.js

Issue 29340694: Issue 3956 - Convert domain whitelisting filters (Closed)

Patch Set: Created April 20, 2016, 5:09 p.m.

Left:
Right:

Use n/p to move between diff chunks; N/P to move between comments.

Jump to:

View unified diff | Download patch

OLD	NEW
1 /*	1 /*

2 * This file is part of Adblock Plus <https://adblockplus.org/>,	2 * This file is part of Adblock Plus <https://adblockplus.org/>,

3 * Copyright (C) 2006-2016 Eyeo GmbH	3 * Copyright (C) 2006-2016 Eyeo GmbH

4 *	4 *

5 * Adblock Plus is free software: you can redistribute it and/or modify	5 * Adblock Plus is free software: you can redistribute it and/or modify

6 * it under the terms of the GNU General Public License version 3 as	6 * it under the terms of the GNU General Public License version 3 as

7 * published by the Free Software Foundation.	7 * published by the Free Software Foundation.

8 *	8 *

9 * Adblock Plus is distributed in the hope that it will be useful,	9 * Adblock Plus is distributed in the hope that it will be useful,

10 * but WITHOUT ANY WARRANTY; without even the implied warranty of	10 * but WITHOUT ANY WARRANTY; without even the implied warranty of

(...skipping 48 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
59 let excluded = [];	59 let excluded = [];

60 let rules = [];	60 let rules = [];

61	61

62 parseDomains(filter.domains, included, excluded);	62 parseDomains(filter.domains, included, excluded);

63	63

64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))	64 if (excluded.length == 0 && !(filter.selector in elemhideSelectorExceptions))

65 return {matchDomains: included.map(matchDomain), selector: filter.selector};	65 return {matchDomains: included.map(matchDomain), selector: filter.selector};

66 }	66 }

67	67

68 /**	68 /**

69 * Convert the given filter "regexpSource" string into a regular expression,	69 * Parse the given filter "regexpSource" string. Producing a regular expression,

70 * handling the conversion of unicode inside hostnames to punycode.	70 * extracting the hostname (if any), deciding if the regular expression is safe

71 * (Also deciding if the regular expression can be safely converted to and	71 * to be converted + matched as lower case and noting if the source contains

72 * matched as lower case or not.)	72 * anything after the hostname.)

73 *	73 *

74 * @param {string} text regexpSource property of a filter	74 * @param {string} text regexpSource property of a filter

75 * @returns {object} An object containing a regular expression string and a bool	75 * @returns {object} An object containing a regular expression string, a bool

76 * indicating if the filter can be safely matched as lower	76 * indicating if the filter can be safely matched as lower

77 * case: {regexp: "...", canSafelyMatchAsLowercase: true/false }	77 * case, a hostname string (or undefined) and a bool

	78 * indicating if the source only contains a hostname or not:

	79 * {regexp: "...",

	80 * canSafelyMatchAsLowercase: true/false,

	81 * hostname: "...",

	82 * justHostname: true/false}

78 */	83 */

79 function toRegExp(text)	84 function parseFilterRegexpSource(text)

80 {	85 {

81 let result = [];	86 let regexp = [];

82 let lastIndex = text.length - 1;	87 let lastIndex = text.length - 1;

	88 let hostname;

83 let hostnameStart = null;	89 let hostnameStart = null;

84 let hostnameFinished = false;	90 let hostnameFinished = false;

	91 let justHostname = false;

85 let canSafelyMatchAsLowercase = false;	92 let canSafelyMatchAsLowercase = false;

86	93

87 for (let i = 0; i < text.length; i++)	94 for (let i = 0; i < text.length; i++)

88 {	95 {

89 let c = text[i];	96 let c = text[i];

90	97

	98 if (hostnameFinished)

	99 justHostname = false;

	100

91 // If we're currently inside the hostname we have to be careful not to	101 // If we're currently inside the hostname we have to be careful not to

92 // escape any characters until after we have converted it to punycode.	102 // escape any characters until after we have converted it to punycode.

93 if (hostnameStart != null && !hostnameFinished)	103 if (hostnameStart != null && !hostnameFinished)

94 {	104 {

95 let endingChar = (c == "*" \|\| c == "^" \|\|	105 let endingChar = (c == "*" \|\| c == "^" \|\|

96 c == "?" \|\| c == "/" \|\| c == "\|");	106 c == "?" \|\| c == "/" \|\| c == "\|");

97 if (!endingChar && i != lastIndex)	107 if (!endingChar && i != lastIndex)

98 continue;	108 continue;

99	109

100 let hostname = text.substring(hostnameStart, endingChar ? i : i + 1);	110 hostname = punycode.toASCII(

101 hostnameFinished = true;	111 text.substring(hostnameStart, endingChar ? i : i + 1)

102 result.push(escapeRegExp(punycode.toASCII(hostname)));	112 );

	113 hostnameFinished = justHostname = true;

	114 regexp.push(escapeRegExp(hostname));

103 if (!endingChar)	115 if (!endingChar)

104 break;	116 break;

105 }	117 }

106	118

107 switch (c)	119 switch (c)

108 {	120 {

109 case "*":	121 case "*":

110 if (result.length > 0 && i < lastIndex && text[i + 1] != "*")	122 if (regexp.length > 0 && i < lastIndex && text[i + 1] != "*")

111 result.push(".*");	123 regexp.push(".*");

112 break;	124 break;

113 case "^":	125 case "^":

114 if (i < lastIndex)	126 if (i < lastIndex)

115 result.push(".");	127 regexp.push(".");

116 break;	128 break;

117 case "\|":	129 case "\|":

118 if (i == 0)	130 if (i == 0)

119 {	131 {

120 result.push("^");	132 regexp.push("^");

121 break;	133 break;

122 }	134 }

123 if (i == lastIndex)	135 if (i == lastIndex)

124 {	136 {

125 result.push("$");	137 regexp.push("$");

126 break;	138 break;

127 }	139 }

128 if (i == 1 && text[0] == "\|")	140 if (i == 1 && text[0] == "\|")

129 {	141 {

130 hostnameStart = i + 1;	142 hostnameStart = i + 1;

131 canSafelyMatchAsLowercase = true;	143 canSafelyMatchAsLowercase = true;

132 result.push("https?://");	144 regexp.push("https?://");

133 break;	145 break;

134 }	146 }

135 result.push("\\\|");	147 regexp.push("\\\|");

136 break;	148 break;

137 case "/":	149 case "/":

138 if (!hostnameFinished &&	150 if (!hostnameFinished &&

139 text.charAt(i-2) == ":" && text.charAt(i-1) == "/")	151 text.charAt(i-2) == ":" && text.charAt(i-1) == "/")

140 {	152 {

141 hostnameStart = i + 1;	153 hostnameStart = i + 1;

142 canSafelyMatchAsLowercase = true;	154 canSafelyMatchAsLowercase = true;

143 }	155 }

144 result.push("/");	156 regexp.push("/");

145 break;	157 break;

146 case ".": case "+": case "$": case "?":	158 case ".": case "+": case "$": case "?":

147 case "{": case "}": case "(": case ")":	159 case "{": case "}": case "(": case ")":

148 case "[": case "]": case "\\":	160 case "[": case "]": case "\\":

149 result.push("\\", c);	161 regexp.push("\\", c);

150 break;	162 break;

151 default:	163 default:

152 if (hostnameFinished && (c >= "a" && c <= "z" \|\|	164 if (hostnameFinished && (c >= "a" && c <= "z" \|\|

153 c >= "A" && c <= "Z"))	165 c >= "A" && c <= "Z"))

154 canSafelyMatchAsLowercase = false;	166 canSafelyMatchAsLowercase = false;

155 result.push(c);	167 regexp.push(c);

156 }	168 }

157 }	169 }

158	170

159 return {regexp: result.join(""),	171 return {

160 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase};	172 regexp: regexp.join(""),

161 }	173 canSafelyMatchAsLowercase: canSafelyMatchAsLowercase,

162	174 hostname: hostname,

163 function getRegExpTrigger(filter)	175 justHostname: justHostname

164 {	176 };

165 let result = toRegExp(filter.regexpSource);

166

167 let trigger = {"url-filter": result.regexp};

168

169 // Limit rules to to HTTP(S) URLs

170 if (!/^(\^\|http)/i.test(trigger["url-filter"]))

171 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];

172

173 // For rules containing only a hostname we know that we're matching against

174 // a lowercase string unless the matchCase option was passed.

175 if (result.canSafelyMatchAsLowercase && !filter.matchCase)

176 trigger["url-filter"] = trigger["url-filter"].toLowerCase();

177

178 if (result.canSafelyMatchAsLowercase \|\| filter.matchCase)

179 trigger["url-filter-is-case-sensitive"] = true;

180

181 return trigger;

182 }	177 }

183	178

184 function getResourceTypes(filter)	179 function getResourceTypes(filter)

185 {	180 {

186 let types = [];	181 let types = [];

187	182

188 if (filter.contentType & typeMap.IMAGE)	183 if (filter.contentType & typeMap.IMAGE)

189 types.push("image");	184 types.push("image");

190 if (filter.contentType & typeMap.STYLESHEET)	185 if (filter.contentType & typeMap.STYLESHEET)

191 types.push("style-sheet");	186 types.push("style-sheet");

(...skipping 26 matching lines...) Expand all Loading...
218	213

219 if (tldjs.getDomain(domain) == domain)	214 if (tldjs.getDomain(domain) == domain)

220 result.push("www." + domain);	215 result.push("www." + domain);

221 }	216 }

222	217

223 return result;	218 return result;

224 }	219 }

225	220

226 function convertFilter(filter, action, withResourceTypes)	221 function convertFilter(filter, action, withResourceTypes)

227 {	222 {

228 let trigger = getRegExpTrigger(filter);	223 let parsed = parseFilterRegexpSource(filter.regexpSource);

	224

	225 // For the special case of $document whitelisting filters with just a domain

	226 // we can generate an equivalent blocking rule exception using if-domain.

	227 if (filter.contentType == typeMap.DOCUMENT && parsed.justHostname)
	Sebastian Noack 2016/05/12 12:12:25 For filters like example.com$document,image we wou For filters like example.com$document,image we would have to generate both, one rule with if-domain and one with url-filter. kzar 2016/05/16 16:22:36 Done. Show quoted text On 2016/05/12 12:12:25, Sebastian Noack wrote: > For filters like example.com$document,image we would have to generate both, one > rule with if-domain and one with url-filter. Done.
	228 return {trigger: {"url-filter": ".*",
	Sebastian Noack 2016/05/12 12:12:26 Nit: Mind wrapping the nested object for better re Nit: Mind wrapping the nested object for better readability? Sebastian Noack 2016/05/12 12:12:26 Wouldn't an empty string be sufficient as url-filt Wouldn't an empty string be sufficient as url-filter to match all URLs? kzar 2016/05/16 16:22:36 Done. Show quoted text On 2016/05/12 12:12:26, Sebastian Noack wrote: > Nit: Mind wrapping the nested object for better readability? Done. kzar 2016/05/16 16:22:36 Unfortunately this causes a "Extension compilation Show quoted text On 2016/05/12 12:12:26, Sebastian Noack wrote: > Wouldn't an empty string be sufficient as url-filter to match all URLs? Unfortunately this causes a "Extension compilation failed: Invalid url-filter object." exception.
	229 "if-domain": addDomainPrefix([parsed.hostname])},

	230 action: {type: "ignore-previous-rules"}};

	231

	232 let trigger = {"url-filter": parsed.regexp};

	233

	234 // Limit rules to to HTTP(S) URLs
	Sebastian Noack 2016/05/12 12:12:26 Typo: to to Typo: to to kzar 2016/05/16 16:22:36 Done. Show quoted text On 2016/05/12 12:12:26, Sebastian Noack wrote: > Typo: to to Done.
	235 if (!/^(\^\|http)/i.test(trigger["url-filter"]))

	236 trigger["url-filter"] = "^https?://.*" + trigger["url-filter"];

	237

	238 // For rules containing only a hostname we know that we're matching against

	239 // a lowercase string unless the matchCase option was passed.

	240 if (parsed.canSafelyMatchAsLowercase && !filter.matchCase)

	241 trigger["url-filter"] = trigger["url-filter"].toLowerCase();

	242

	243 if (parsed.canSafelyMatchAsLowercase \|\| filter.matchCase)

	244 trigger["url-filter-is-case-sensitive"] = true;

	245

229 let included = [];	246 let included = [];

230 let excluded = [];	247 let excluded = [];

231	248

232 parseDomains(filter.domains, included, excluded);	249 parseDomains(filter.domains, included, excluded);

233	250

234 if (withResourceTypes)	251 if (withResourceTypes)

235 trigger["resource-type"] = getResourceTypes(filter);	252 trigger["resource-type"] = getResourceTypes(filter);

236 if (filter.thirdParty != null)	253 if (filter.thirdParty != null)

237 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];	254 trigger["load-type"] = [filter.thirdParty ? "third-party" : "first-party"];

238	255

(...skipping 106 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
345 return;	362 return;

346 if (filter instanceof filterClasses.RegExpFilter &&	363 if (filter instanceof filterClasses.RegExpFilter &&

347 filter.regexpSource == null)	364 filter.regexpSource == null)

348 return;	365 return;

349	366

350 if (filter instanceof filterClasses.BlockingFilter)	367 if (filter instanceof filterClasses.BlockingFilter)

351 this.requestFilters.push(filter);	368 this.requestFilters.push(filter);

352	369

353 if (filter instanceof filterClasses.WhitelistFilter)	370 if (filter instanceof filterClasses.WhitelistFilter)

354 {	371 {

355 if (filter.contentType & (typeMap.IMAGE	372 if (filter.contentType & (typeMap.DOCUMENT

	373 \| typeMap.IMAGE

356 \| typeMap.STYLESHEET	374 \| typeMap.STYLESHEET

357 \| typeMap.SCRIPT	375 \| typeMap.SCRIPT

358 \| typeMap.FONT	376 \| typeMap.FONT

359 \| typeMap.MEDIA	377 \| typeMap.MEDIA

360 \| typeMap.POPUP	378 \| typeMap.POPUP

361 \| typeMap.OBJECT	379 \| typeMap.OBJECT

362 \| typeMap.OBJECT_SUBREQUEST	380 \| typeMap.OBJECT_SUBREQUEST

363 \| typeMap.XMLHTTPREQUEST	381 \| typeMap.XMLHTTPREQUEST

364 \| typeMap.PING	382 \| typeMap.PING

365 \| typeMap.SUBDOCUMENT	383 \| typeMap.SUBDOCUMENT

(...skipping 71 matching lines...) Expand 10 before \| Expand all \| Expand 10 after Loading...
437	455

438 for (let filter of this.elemhideExceptions)	456 for (let filter of this.elemhideExceptions)

439 addRule(convertFilter(filter, "ignore-previous-rules", false));	457 addRule(convertFilter(filter, "ignore-previous-rules", false));

440 for (let filter of this.requestFilters)	458 for (let filter of this.requestFilters)

441 addRule(convertFilter(filter, "block", true));	459 addRule(convertFilter(filter, "block", true));

442 for (let filter of this.requestExceptions)	460 for (let filter of this.requestExceptions)

443 addRule(convertFilter(filter, "ignore-previous-rules", true));	461 addRule(convertFilter(filter, "ignore-previous-rules", true));

444	462

445 return rules;	463 return rules;

446 };	464 };

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »