LEFT | RIGHT |
1 #include "Filter.h" | 1 #include "Filter.h" |
2 #include "CommentFilter.h" | 2 #include "CommentFilter.h" |
| 3 #include "InvalidFilter.h" |
3 #include "RegExpFilter.h" | 4 #include "RegExpFilter.h" |
| 5 #include "BlockingFilter.h" |
| 6 #include "WhitelistFilter.h" |
| 7 #include "ElemHideBase.h" |
4 #include "ElemHideFilter.h" | 8 #include "ElemHideFilter.h" |
5 #include "ElemHideException.h" | 9 #include "ElemHideException.h" |
| 10 #include "CSSPropertyFilter.h" |
6 #include "StringMap.h" | 11 #include "StringMap.h" |
7 | 12 |
8 namespace | 13 namespace |
9 { | 14 { |
10 StringMap<Filter*> knownFilters(8192); | 15 StringMap<Filter*> knownFilters(8192); |
11 | 16 |
12 void trim_spaces(String& str) | 17 void NormalizeWhitespace(DependentString& text) |
13 { | 18 { |
| 19 String::size_type start = 0; |
| 20 String::size_type end = text.length(); |
| 21 |
| 22 // Remove leading spaces and special characters like line breaks |
| 23 for (; start < end; start++) |
| 24 if (text[start] > ' ') |
| 25 break; |
| 26 |
| 27 // Now look for invalid characters inside the string |
14 String::size_type pos; | 28 String::size_type pos; |
| 29 for (pos = start; pos < end; pos++) |
| 30 if (text[pos] < ' ') |
| 31 break; |
15 | 32 |
16 // Remove leading whitespace | 33 if (pos < end) |
17 for (pos = 0; pos < str.length(); ++pos) | 34 { |
18 if (str[pos] != u' ') | 35 // Found invalid characters, copy all the valid characters while skipping |
| 36 // the invalid ones. |
| 37 String::size_type delta = 1; |
| 38 for (pos = pos + 1; pos < end; pos++) |
| 39 { |
| 40 if (text[pos] < ' ') |
| 41 delta++; |
| 42 else |
| 43 text[pos - delta] = text[pos]; |
| 44 } |
| 45 end -= delta; |
| 46 } |
| 47 |
| 48 // Remove trailing spaces |
| 49 for (; end > 0; end--) |
| 50 if (text[end - 1] != ' ') |
19 break; | 51 break; |
20 str.reset(str, pos); | |
21 | 52 |
22 // Remove trailing whitespace | 53 // Set new string boundaries |
23 for (pos = str.length(); pos > 0; --pos) | 54 text.reset(text, start, end - start); |
24 if (str[pos - 1] != u' ') | |
25 break; | |
26 str.reset(str, 0, pos); | |
27 } | |
28 | |
29 void remove_spaces(String& str) | |
30 { | |
31 String::size_type pos; | |
32 | |
33 for (String::size_type i = 0; i < str.length(); ++i) | |
34 if (str[i] != u' ') | |
35 str[pos++] = str[i]; | |
36 | |
37 str.reset(str, 0, pos); | |
38 } | 55 } |
39 } | 56 } |
40 | 57 |
41 Filter::Filter(const String& text) | 58 Filter::Filter(Type type, const String& text) |
42 : mText(text) | 59 : mType(type), mText(text) |
43 { | 60 { |
44 annotate_address(this, "Filter"); | 61 annotate_address(this, "Filter"); |
45 mText.ensure_own_buffer(); | |
46 } | 62 } |
47 | 63 |
48 Filter::~Filter() | 64 Filter::~Filter() |
49 { | 65 { |
50 // TODO: This should be removing from knownFilters | 66 knownFilters.erase(mText); |
51 } | 67 } |
52 | 68 |
53 String Filter::Serialize() const | 69 OwnedString Filter::Serialize() const |
54 { | 70 { |
55 String result(u"[Filter]\ntext="_str); | 71 OwnedString result(u"[Filter]\ntext="_str); |
56 result.append(mText); | 72 result.append(mText); |
57 result.append(u'\n'); | 73 result.append(u'\n'); |
58 return std::move(result); | 74 return result; |
59 } | 75 } |
60 | 76 |
61 Filter* Filter::FromText(const String& text) | 77 Filter* Filter::FromText(DependentString& text) |
62 { | 78 { |
63 auto it = knownFilters.find(text); | 79 NormalizeWhitespace(text); |
64 if (it != knownFilters.end()) | 80 if (text.empty()) |
65 return it->second; | 81 return nullptr; |
66 | 82 |
67 FilterPtr filter(CommentFilter::Create(text)); | 83 // Parsing also normalizes the filter text, so it has to be done before the |
68 if (!filter) | 84 // lookup in knownFilters. |
69 filter.reset(ElemHideBase::Create(text)); | 85 union |
70 if (!filter) | 86 { |
71 filter.reset(RegExpFilter::Create(text)); | 87 RegExpFilterData regexp; |
| 88 ElemHideData elemhide; |
| 89 } data; |
| 90 DependentString error; |
72 | 91 |
| 92 Filter::Type type = CommentFilter::Parse(text); |
| 93 if (type == Filter::Type::UNKNOWN) |
| 94 type = ElemHideBase::Parse(text, data.elemhide); |
| 95 if (type == Filter::Type::UNKNOWN) |
| 96 type = RegExpFilter::Parse(text, error, data.regexp); |
| 97 |
| 98 auto knownFilter = knownFilters.find(text); |
| 99 if (knownFilter) |
| 100 { |
| 101 knownFilter->second->AddRef(); |
| 102 return knownFilter->second; |
| 103 } |
| 104 |
| 105 FilterPtr filter; |
| 106 switch (type) |
| 107 { |
| 108 case Filter::Type::COMMENT: |
| 109 filter = new CommentFilter(text); |
| 110 break; |
| 111 case Filter::Type::INVALID: |
| 112 filter = new InvalidFilter(text, error); |
| 113 break; |
| 114 case Filter::Type::BLOCKING: |
| 115 filter = new BlockingFilter(text, data.regexp); |
| 116 break; |
| 117 case Filter::Type::WHITELIST: |
| 118 filter = new WhitelistFilter(text, data.regexp); |
| 119 break; |
| 120 case Filter::Type::ELEMHIDE: |
| 121 filter = new ElemHideFilter(text, data.elemhide); |
| 122 break; |
| 123 case Filter::Type::ELEMHIDEEXCEPTION: |
| 124 filter = new ElemHideException(text, data.elemhide); |
| 125 break; |
| 126 case Filter::Type::CSSPROPERTY: |
| 127 filter = new CSSPropertyFilter(text, data.elemhide); |
| 128 if (static_cast<CSSPropertyFilter*>(filter.get())->IsGeneric()) |
| 129 filter = new InvalidFilter(text, u"filter_cssproperty_nodomain"_str); |
| 130 break; |
| 131 default: |
| 132 // This should never happen but just in case |
| 133 return nullptr; |
| 134 } |
| 135 |
| 136 // This is a hack: we looked up the entry using text but create it using |
| 137 // filter->mText. This works because both are equal at this point. However, |
| 138 // text refers to a temporary buffer which will go away. |
73 enter_context("Adding to known filters"); | 139 enter_context("Adding to known filters"); |
74 knownFilters[filter->mText] = filter.get(); | 140 knownFilter.assign(filter->mText, filter.get()); |
75 exit_context(); | 141 exit_context(); |
76 | 142 |
77 // TODO: We intentionally leak the filter here - currently it won't be used | 143 return filter.release(); |
78 // for anything and would be deleted immediately. | |
79 filter->AddRef(); | |
80 | |
81 return filter; | |
82 } | 144 } |
83 | |
84 String Filter::Normalize(String& text) | |
85 { | |
86 // Removing special characters like line breaks | |
87 String::size_type delta = 0; | |
88 for (String::size_type i = 0; i < text.length(); ++i) | |
89 { | |
90 if (text[i] >= u' ') | |
91 text[i - delta] = text[i]; | |
92 else | |
93 ++delta; | |
94 } | |
95 text.reset(text, 0, text.length() - delta); | |
96 | |
97 trim_spaces(text); | |
98 | |
99 { | |
100 String::size_type domainsEnd; | |
101 String::size_type selectorStart; | |
102 Filter::Type type = ElemHideBase::Parse(text, &domainsEnd, &selectorStart); | |
103 if (type != Filter::Type::UNKNOWN) | |
104 { | |
105 String domains(text, 0, domainsEnd); | |
106 String selector(text, selectorStart); | |
107 remove_spaces(domains); | |
108 trim_spaces(selector); | |
109 | |
110 String::size_type domainsDelta = domainsEnd - domains.length(); | |
111 String::size_type selectorDelta = text.length() - selectorStart - | |
112 selector.length(); | |
113 | |
114 if (domainsDelta) | |
115 for (String::size_type i = domainsEnd; i < selectorStart; ++i) | |
116 text[i - domainsDelta] = text[i]; | |
117 | |
118 if (domainsDelta + selectorDelta) | |
119 for (String::size_type i = 0; i < selector.length(); ++i) | |
120 text[selectorStart - domainsDelta + i] = selector[i]; | |
121 | |
122 text.reset(text, 0, text.length() - domainsDelta - selectorDelta); | |
123 return text; | |
124 } | |
125 } | |
126 | |
127 if (CommentFilter::Parse(text) == Filter::Type::UNKNOWN) | |
128 remove_spaces(text); | |
129 return text; | |
130 } | |
LEFT | RIGHT |