Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Delta Between Two Patch Sets: compiled/RegExpFilter.cpp

Issue 29333474: Issue 4125 - [emscripten] Convert filter classes to C++ (Closed)
Left Patch Set: Rebased, addressed comments, changed StringMap::find() return value Created Feb. 18, 2016, 4:02 p.m.
Right Patch Set: Addressed comments from Patch Set 28 Created March 21, 2017, 10:04 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
Left: Side by side diff | Download
Right: Side by side diff | Download
« no previous file with change/comment | « compiled/RegExpFilter.h ('k') | compiled/String.h » ('j') | no next file with change/comment »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
LEFTRIGHT
1 #include <climits> 1 #include <climits>
2 2
3 #include <emscripten.h> 3 #include <emscripten.h>
4 4
5 #include "RegExpFilter.h" 5 #include "RegExpFilter.h"
6 #include "StringScanner.h" 6 #include "StringScanner.h"
7 #include "StringMap.h" 7 #include "StringMap.h"
8 8
9 namespace 9 namespace
10 { 10 {
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after
44 {u"media"_str, TYPE_MEDIA}, 44 {u"media"_str, TYPE_MEDIA},
45 {u"font"_str, TYPE_FONT}, 45 {u"font"_str, TYPE_FONT},
46 {u"background"_str, TYPE_IMAGE}, // Backwards compat 46 {u"background"_str, TYPE_IMAGE}, // Backwards compat
47 47
48 {u"popup"_str, TYPE_POPUP}, 48 {u"popup"_str, TYPE_POPUP},
49 {u"genericblock"_str, TYPE_GENERICBLOCK}, 49 {u"genericblock"_str, TYPE_GENERICBLOCK},
50 {u"generichide"_str, TYPE_GENERICHIDE}, 50 {u"generichide"_str, TYPE_GENERICHIDE},
51 {u"elemhide"_str, TYPE_ELEMHIDE}, 51 {u"elemhide"_str, TYPE_ELEMHIDE},
52 }; 52 };
53 53
54 const int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT | TYPE_ELEMHIDE | TYPE_P OPUP | 54 const int defaultTypeMask = INT_MAX & ~(TYPE_DOCUMENT | TYPE_ELEMHIDE |
55 TYPE_GENERICBLOCK | TYPE_GENERICHIDE); 55 TYPE_POPUP | TYPE_GENERICBLOCK | TYPE_GENERICHIDE);
56 56
57 int GenerateRegExp(const String& regexp, bool matchCase) 57 int GenerateRegExp(const String& regexp, bool matchCase)
58 { 58 {
59 return EM_ASM_INT(return regexps.create($0, $1), &regexp, matchCase); 59 return EM_ASM_INT(return regexps.create($0, $1), &regexp, matchCase);
60 } 60 }
61 61
62 void NormalizeWhitespace(DependentString& text) 62 void NormalizeWhitespace(DependentString& text)
63 { 63 {
64 // We want to remove all spaces but bail out early in the common scenario 64 // We want to remove all spaces but bail out early in the common scenario
65 // that the string contains no spaces. 65 // that the string contains no spaces.
(...skipping 52 matching lines...) Expand 10 before | Expand all | Expand 10 after
118 data.mContentType &= ~it->second; 118 data.mContentType &= ~it->second;
119 else 119 else
120 data.mContentType |= it->second; 120 data.mContentType |= it->second;
121 } 121 }
122 else if (name.equals(u"domain"_str)) 122 else if (name.equals(u"domain"_str))
123 { 123 {
124 if (valueStart >= 0 && valueEnd > valueStart) 124 if (valueStart >= 0 && valueEnd > valueStart)
125 { 125 {
126 data.mDomainsStart = valueStart; 126 data.mDomainsStart = valueStart;
127 data.mDomainsEnd = valueEnd; 127 data.mDomainsEnd = valueEnd;
128 ActiveFilter::ToLower(text, data.mDomainsStart, data.mDomainsEnd); 128 DependentString(text, valueStart, valueEnd - valueStart).toLower();
129 } 129 }
130 } 130 }
131 else if (name.equals(u"sitekey"_str)) 131 else if (name.equals(u"sitekey"_str))
132 { 132 {
133 if (valueStart >= 0 && valueEnd > valueStart) 133 if (valueStart >= 0 && valueEnd > valueStart)
134 { 134 {
135 data.mSitekeysStart = valueStart; 135 data.mSitekeysStart = valueStart;
136 data.mSitekeysEnd = valueEnd; 136 data.mSitekeysEnd = valueEnd;
137 } 137 }
138 } 138 }
(...skipping 54 matching lines...) Expand 10 before | Expand all | Expand 10 after
193 valueStart = -1; 193 valueStart = -1;
194 break; 194 break;
195 } 195 }
196 } 196 }
197 197
198 if (data.mContentType < 0) 198 if (data.mContentType < 0)
199 data.mContentType = defaultTypeMask; 199 data.mContentType = defaultTypeMask;
200 } 200 }
201 } 201 }
202 202
203 RegExpFilter::RegExpFilter(const String& text, const RegExpFilterData& data) 203 RegExpFilter::RegExpFilter(Type type, const String& text, const RegExpFilterData & data)
204 : ActiveFilter(text, true), RegExpFilterData(data) 204 : ActiveFilter(type, text, true), mData(data)
205 { 205 {
206 } 206 }
207 207
208 RegExpFilter::~RegExpFilter() 208 RegExpFilter::~RegExpFilter()
209 { 209 {
210 if (HasRegExp()) 210 if (mData.HasRegExp())
211 EM_ASM_ARGS(regexps.delete($0), mRegexpId); 211 EM_ASM_ARGS(regexps.delete($0), mData.mRegexpId);
212 } 212 }
213 213
214 Filter::Type RegExpFilter::Parse(DependentString& text, DependentString& error, 214 Filter::Type RegExpFilter::Parse(DependentString& text, DependentString& error,
215 RegExpFilterData& data) 215 RegExpFilterData& data)
216 { 216 {
217 NormalizeWhitespace(text); 217 NormalizeWhitespace(text);
218 218
219 bool blocking = true; 219 Filter::Type type = Type::BLOCKING;
220 220
221 data.mPatternStart = 0; 221 data.mPatternStart = 0;
222 if (text.length() >= 2 && text[0] == u'@' && text[1] == u'@') 222 if (text.length() >= 2 && text[0] == u'@' && text[1] == u'@')
223 { 223 {
224 blocking = false; 224 type = Type::WHITELIST;
225 data.mPatternStart = 2; 225 data.mPatternStart = 2;
226 } 226 }
227 227
228 data.mPatternEnd = text.find(u'$', data.mPatternStart); 228 data.mPatternEnd = text.find(u'$', data.mPatternStart);
229 if (data.mPatternEnd == text.npos) 229 if (data.mPatternEnd == text.npos)
230 data.mPatternEnd = text.length(); 230 data.mPatternEnd = text.length();
231 231
232 ParseOptions(text, error, data, data.mPatternEnd + 1); 232 ParseOptions(text, error, data, data.mPatternEnd + 1);
233 if (!error.empty()) 233 if (!error.empty())
234 return Type::INVALID; 234 return Type::INVALID;
235 235
236 if (data.mPatternEnd - data.mPatternStart >= 2 && 236 if (data.mPatternEnd - data.mPatternStart >= 2 &&
237 text[data.mPatternStart] == u'/' && 237 text[data.mPatternStart] == u'/' &&
238 text[data.mPatternEnd - 1] == u'/') 238 text[data.mPatternEnd - 1] == u'/')
239 { 239 {
240 data.SetRegExp(GenerateRegExp(DependentString(text, data.mPatternStart + 1, 240 data.SetRegExp(GenerateRegExp(DependentString(text, data.mPatternStart + 1,
241 data.mPatternEnd - data.mPatternStart - 2), data.mMatchCase)); 241 data.mPatternEnd - data.mPatternStart - 2), data.mMatchCase));
242 if (data.mRegexpId == -1) 242 if (data.mRegexpId == -1)
243 { 243 {
244 error.reset(u"filter_invalid_regexp"_str); 244 error.reset(u"filter_invalid_regexp"_str);
245 return Type::INVALID; 245 return Type::INVALID;
246 } 246 }
247 } 247 }
248 248
249 if (blocking) 249 return type;
250 return Type::BLOCKING;
251 else
252 return Type::WHITELIST;
253 } 250 }
254 251
255 void RegExpFilter::ParseSitekeys(const String& sitekeys) const 252 void RegExpFilter::ParseSitekeys(const String& sitekeys) const
256 { 253 {
257 StringScanner scanner(sitekeys, 0, u'|'); 254 StringScanner scanner(sitekeys, 0, u'|');
258 size_t start = 0; 255 size_t start = 0;
259 bool done = false; 256 bool done = false;
260 while (!done) 257 while (!done)
261 { 258 {
262 done = scanner.done(); 259 done = scanner.done();
263 if (scanner.next() == u'|') 260 if (scanner.next() == u'|')
264 { 261 {
265 if (scanner.position() > start) 262 if (scanner.position() > start)
266 AddSitekey(DependentString(sitekeys, start, scanner.position() - start)) ; 263 AddSitekey(DependentString(sitekeys, start, scanner.position() - start)) ;
267 start = scanner.position() + 1; 264 start = scanner.position() + 1;
268 } 265 }
269 } 266 }
270 } 267 }
271 268
272 void RegExpFilter::InitJSTypes() 269 void RegExpFilter::InitJSTypes()
273 { 270 {
274 EM_ASM(exports.RegExpFilter.typeMap = {};); 271 EM_ASM(exports.RegExpFilter.typeMap = {};);
275 for (auto it = typeMap.begin(); it != typeMap.end(); ++it) 272 for (auto it = typeMap.begin(); it != typeMap.end(); ++it)
276 EM_ASM_ARGS(exports.RegExpFilter.typeMap[getStringData($0).replace("-", "_") .toUpperCase()] = $1, &(it->first), it->second); 273 EM_ASM_ARGS(exports.RegExpFilter.typeMap[readString($0).replace("-", "_").to UpperCase()] = $1, &(it->first), it->second);
277 } 274 }
278 275
279 OwnedString RegExpFilter::RegExpFromSource(const String& source) 276 OwnedString RegExpFilter::RegExpFromSource(const String& source)
280 { 277 {
281 /* TODO: this is very inefficient */ 278 /* TODO: this is very inefficient */
282 279
283 // Note: This doesn't remove trailing wildcards, otherwise the result should 280 // Note: This doesn't remove trailing wildcards, otherwise the result should
284 // be identical to Filter.toRegExp(). 281 // be identical to Filter.toRegExp().
285 OwnedString result; 282 OwnedString result;
286 String::value_type prevChar = u'*'; 283 String::value_type prevChar = u'*';
(...skipping 43 matching lines...) Expand 10 before | Expand all | Expand 10 after
330 } 327 }
331 result.append(currChar); 328 result.append(currChar);
332 } 329 }
333 prevChar = currChar; 330 prevChar = currChar;
334 } 331 }
335 return result; 332 return result;
336 } 333 }
337 334
338 RegExpFilter::DomainMap* RegExpFilter::GetDomains() const 335 RegExpFilter::DomainMap* RegExpFilter::GetDomains() const
339 { 336 {
340 if (!DomainsParsingDone()) 337 if (!mData.DomainsParsingDone())
341 { 338 {
342 ParseDomains(GetDomainsSource(mText), u'|'); 339 ParseDomains(mData.GetDomainsSource(mText), u'|');
343 SetDomainsParsingDone(); 340 mData.SetDomainsParsingDone();
344 } 341 }
345 return ActiveFilter::GetDomains(); 342 return ActiveFilter::GetDomains();
346 } 343 }
347 344
348 RegExpFilter::SitekeySet* RegExpFilter::GetSitekeys() const 345 RegExpFilter::SitekeySet* RegExpFilter::GetSitekeys() const
349 { 346 {
350 if (!SitekeyParsingDone()) 347 if (!mData.SitekeyParsingDone())
351 { 348 {
352 ParseSitekeys(GetSitekeysSource(mText)); 349 ParseSitekeys(mData.GetSitekeysSource(mText));
353 SetSitekeysParsingDone(); 350 mData.SetSitekeysParsingDone();
354 } 351 }
355 return ActiveFilter::GetSitekeys(); 352 return ActiveFilter::GetSitekeys();
356 } 353 }
357 354
358 bool RegExpFilter::Matches(const String& location, int typeMask, 355 bool RegExpFilter::Matches(const String& location, int typeMask,
359 DependentString& docDomain, bool thirdParty, const String& sitekey) const 356 DependentString& docDomain, bool thirdParty, const String& sitekey) const
360 { 357 {
361 if (!(mContentType & typeMask) || 358 if (!(mData.mContentType & typeMask) ||
362 (mThirdParty == TrippleState::YES && !thirdParty) || 359 (mData.mThirdParty == TrippleState::YES && !thirdParty) ||
363 (mThirdParty == TrippleState::NO && thirdParty) || 360 (mData.mThirdParty == TrippleState::NO && thirdParty) ||
364 !IsActiveOnDomain(docDomain, sitekey)) 361 !IsActiveOnDomain(docDomain, sitekey))
365 { 362 {
366 return false; 363 return false;
367 } 364 }
368 365
369 if (!RegExpParsingDone()) 366 if (!mData.RegExpParsingDone())
370 { 367 {
371 const OwnedString pattern(GetRegExpSource(mText)); 368 const OwnedString pattern(mData.GetRegExpSource(mText));
372 SetRegExp(GenerateRegExp(RegExpFromSource(pattern), mMatchCase)); 369 mData.SetRegExp(GenerateRegExp(RegExpFromSource(pattern), mData.mMatchCase)) ;
373 } 370 }
374 return EM_ASM_INT(return regexps.test($0, $1), mRegexpId, &location); 371 return EM_ASM_INT(return regexps.test($0, $1), mData.mRegexpId, &location);
375 } 372 }
LEFTRIGHT

Powered by Google App Engine
This is Rietveld