Index: compiled/matcher/Matcher.cpp |
=================================================================== |
new file mode 100644 |
--- /dev/null |
+++ b/compiled/matcher/Matcher.cpp |
@@ -0,0 +1,175 @@ |
+/* |
+ * This file is part of Adblock Plus <https://adblockplus.org/>, |
+ * Copyright (C) 2006-present eyeo GmbH |
+ * |
+ * Adblock Plus is free software: you can redistribute it and/or modify |
+ * it under the terms of the GNU General Public License version 3 as |
+ * published by the Free Software Foundation. |
+ * |
+ * Adblock Plus is distributed in the hope that it will be useful, |
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of |
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
+ * GNU General Public License for more details. |
+ * |
+ * You should have received a copy of the GNU General Public License |
+ * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
+ */ |
+ |
+#include "Matcher.h" |
+#include "../filter/WhitelistFilter.h" |
+ |
+class RollingHash |
+{ |
+private: |
+ static const String::size_type SUBSTRING_LENGTH = 8; |
+ static const uint32_t VALUE_SIZE = 32; |
+ typedef uint32_t result_type; |
+ String::size_type mCount; |
+ result_type mResult; |
+ String::value_type mCurrentChars[SUBSTRING_LENGTH]; |
+ |
+private: |
+ result_type Rotate(result_type value, uint32_t bits) |
sergei
2017/10/18 10:58:43
should it be static?
|
+ { |
+ return value << bits | value >> (VALUE_SIZE - bits); |
sergei
2017/10/18 10:58:42
Mind adding static_assert(std::is_unsigned<result_
sergei
2017/10/18 10:58:42
I wonder whether our compilers are smart enough to
|
+ } |
+ |
+public: |
+ RollingHash() |
+ : mCount(0), mResult(0) |
+ { |
+ } |
+ |
+ void Advance(String::value_type newChar) |
+ { |
+ auto pos = mCount++ % SUBSTRING_LENGTH; |
+ mResult = Rotate(mResult, 1) ^ newChar; |
+ if (mCount > SUBSTRING_LENGTH) |
sergei
2017/10/18 10:58:43
Do we really need this condition?
Can we rather se
|
+ mResult ^= Rotate(mCurrentChars[pos], SUBSTRING_LENGTH); |
Wladimir Palant
2017/10/17 12:51:22
Normally, one wouldn't use the char value directly
|
+ mCurrentChars[pos] = newChar; |
+ } |
+ |
+ int32_t PrefixLength() |
hub
2017/11/20 17:38:33
IMHO this should be const method.
|
+ { |
+ return static_cast<int32_t>(mCount) - SUBSTRING_LENGTH; |
+ } |
Wladimir Palant
2017/10/17 12:51:22
This goal of this method is to avoid exposing SUBS
sergei
2017/10/18 10:58:43
IMO not only the caller should do the calculation
|
+ |
+ result_type GetResult() |
hub
2017/11/20 17:38:33
and const method here too.
|
+ { |
+ return mResult & 0x7FFFFFFF; |
Wladimir Palant
2017/10/17 12:51:22
We cut off the top bit because key values 0xFFFFFF
sergei
2017/10/18 10:58:42
Maybe put a special const value masking the range
|
+ } |
+}; |
+ |
+Matcher* Matcher::Create() |
+{ |
+ return new Matcher(); |
+} |
+ |
+void Matcher::Add(Filter& filter) |
+{ |
+ RegExpFilter* regExpFilter = filter.As<RegExpFilter>(); |
+ if (!regExpFilter) |
+ return; |
+ |
+ RollingHash hash; |
+ DependentString str(regExpFilter->GetRegExpSource()); |
+ int32_t minPrefixLength = 0; |
+ for (auto i = 0; i < str.length(); i++) |
sergei
2017/10/18 10:58:43
Here auto resolves into `int`, but should be Strin
|
+ { |
+ auto currentChar = str[i]; |
+ if (currentChar == u'*' || currentChar == u'^' || currentChar == u'|') |
+ minPrefixLength = i + 1; |
+ hash.Advance(currentChar); |
+ if (hash.PrefixLength() >= minPrefixLength) |
+ { |
+ auto candidate = hash.GetResult(); |
+ auto entry = mFilters.find(candidate); |
+ if (!entry) |
+ { |
+ entry.assign(candidate, RegExpFilterPtr(regExpFilter)); |
+ return; |
+ } |
+ } |
+ } |
+ |
+ mUnoptimizedFilters.push_back(RegExpFilterPtr(regExpFilter)); |
+} |
+ |
+void Matcher::Remove(const Filter& filter) |
+{ |
+ const RegExpFilter* regExpFilter = filter.As<RegExpFilter>(); |
+ if (!regExpFilter) |
+ return; |
+ |
+ RollingHash hash; |
+ DependentString str(regExpFilter->GetRegExpSource()); |
+ String::size_type minPrefixLength = 0; |
+ for (auto i = 0; i < str.length(); i++) |
+ { |
+ auto currentChar = str[i]; |
+ if (currentChar == u'*' || currentChar == u'^' || currentChar == u'|') |
sergei
2017/10/18 10:58:43
What about putting this condition into an inline f
|
+ minPrefixLength = i + 1; |
+ hash.Advance(currentChar); |
+ if (hash.PrefixLength() >= minPrefixLength) |
+ { |
+ auto candidate = hash.GetResult(); |
+ auto entry = mFilters.find(candidate); |
+ if (entry && entry->second == &filter) |
sergei
2017/10/18 10:58:43
The type of second is RegExpFilterPtr, so finally
|
+ { |
+ mFilters.erase(candidate); |
+ return; |
+ } |
+ } |
+ } |
+ |
+ for (auto ptr : mUnoptimizedFilters) |
+ if (ptr == &filter) |
+ ptr.reset(); |
+} |
+ |
+RegExpFilter* Matcher::MatchesAny(const String& location, int typeMask, |
+ DependentString& docDomain, bool thirdParty, const String& siteKey, |
+ bool specificOnly) |
+{ |
+ RegExpFilterPtr result; |
+ |
+ RollingHash hash; |
+ for (auto i = 0; i < location.length(); i++) |
+ { |
+ hash.Advance(location[i]); |
+ if (hash.PrefixLength() >= 0) |
+ { |
+ auto entry = mFilters.find(hash.GetResult()); |
+ if (!entry) |
+ continue; |
+ |
+ if (specificOnly && !entry->second->As<WhitelistFilter>() && entry->second->IsGeneric()) |
+ continue; |
+ |
+ if (!entry->second->Matches(location, typeMask, docDomain, thirdParty, siteKey)) |
+ continue; |
+ |
+ result.reset(entry->second.get()); |
+ if (result->As<WhitelistFilter>()) |
+ return result.release(); |
+ } |
+ } |
+ |
+ for (auto ptr : mUnoptimizedFilters) |
+ { |
+ if (!ptr) |
+ continue; |
+ |
+ if (specificOnly && !ptr->As<WhitelistFilter>() && ptr->IsGeneric()) |
+ continue; |
+ |
+ if (!ptr->Matches(location, typeMask, docDomain, thirdParty, siteKey)) |
+ continue; |
+ |
+ result.reset(ptr.get()); |
+ if (result->As<WhitelistFilter>()) |
+ return result.release(); |
+ } |
+ |
+ return result.release(); |
+} |