Left: | ||
Right: |
OLD | NEW |
---|---|
1 /* | 1 /* |
2 * This file is part of Adblock Plus <https://adblockplus.org/>, | 2 * This file is part of Adblock Plus <https://adblockplus.org/>, |
3 * Copyright (C) 2006-present eyeo GmbH | 3 * Copyright (C) 2006-present eyeo GmbH |
4 * | 4 * |
5 * Adblock Plus is free software: you can redistribute it and/or modify | 5 * Adblock Plus is free software: you can redistribute it and/or modify |
6 * it under the terms of the GNU General Public License version 3 as | 6 * it under the terms of the GNU General Public License version 3 as |
7 * published by the Free Software Foundation. | 7 * published by the Free Software Foundation. |
8 * | 8 * |
9 * Adblock Plus is distributed in the hope that it will be useful, | 9 * Adblock Plus is distributed in the hope that it will be useful, |
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
12 * GNU General Public License for more details. | 12 * GNU General Public License for more details. |
13 * | 13 * |
14 * You should have received a copy of the GNU General Public License | 14 * You should have received a copy of the GNU General Public License |
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. | 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. |
16 */ | 16 */ |
17 | 17 |
18 #pragma once | 18 #pragma once |
19 | 19 |
20 #include <cstddef> | 20 #include <cstddef> |
21 #include <cmath> | |
22 #include <initializer_list> | |
23 #include <memory> | |
24 | 21 |
22 #include "Map.h" | |
25 #include "String.h" | 23 #include "String.h" |
26 #include "debug.h" | |
27 | |
28 template<typename T> | |
29 class StringMap; | |
30 | 24 |
31 namespace StringMap_internal | 25 namespace StringMap_internal |
32 { | 26 { |
33 template<typename Entry> | 27 struct StringSetEntry |
34 struct HashContainerIterator | |
35 { | 28 { |
36 typedef Entry entry_type; | 29 typedef String key_type; |
37 typedef HashContainerIterator<Entry> iterator; | 30 typedef size_t size_type; |
38 | 31 |
39 const entry_type* mPos; | 32 DependentString first; |
sergei
2017/10/17 12:58:06
Actually, the type of `first` should be key_type,
Wladimir Palant
2017/12/04 13:44:15
Done.
| |
40 const entry_type* mEnd; | |
41 | 33 |
42 explicit HashContainerIterator(const entry_type* start, const entry_type* en d) | 34 StringSetEntry(const key_type& key = DependentString()) |
43 : mPos(start), mEnd(end) | |
44 { | 35 { |
45 if (mPos != mEnd && mPos->first.is_invalid()) | 36 if (!key.is_invalid()) |
46 ++(*this); | 37 first.reset(key); |
sergei
2017/10/17 12:58:06
Why is this trick required and `:first(key)` canno
Wladimir Palant
2017/12/04 13:44:15
That's because DepedentString() constructor will c
sergei
2017/12/04 14:26:09
I have added a ticket for that because it seems a
| |
47 } | 38 } |
48 | 39 |
49 const entry_type& operator*() const | 40 bool equals(const key_type& other) const |
50 { | 41 { |
51 return *mPos; | 42 return first.equals(other); |
52 } | 43 } |
53 | 44 |
54 const entry_type* operator->() const | 45 bool is_invalid() const |
55 { | 46 { |
56 return mPos; | 47 return first.is_invalid(); |
57 } | 48 } |
58 | 49 |
59 iterator& operator++() | 50 bool is_deleted() const |
60 { | 51 { |
61 do { | 52 return first.is_deleted(); |
62 ++mPos; | |
63 } while(mPos != mEnd && mPos->first.is_invalid()); | |
64 return *this; | |
65 } | 53 } |
66 | 54 |
67 bool operator==(const iterator& it) const | 55 void erase() |
68 { | 56 { |
69 return mPos == it.mPos; | 57 first.erase(); |
70 } | 58 } |
71 | 59 |
72 bool operator!=(const iterator& it) const | 60 static size_type hash(const key_type& key) |
73 { | 61 { |
74 return mPos != it.mPos; | 62 // FNV-1a hash function |
63 size_type result = 2166136261; | |
64 for (String::size_type i = 0; i < key.length(); i++) | |
65 result = (result ^ key[i]) * 16777619; | |
66 return result; | |
75 } | 67 } |
76 }; | 68 }; |
77 | 69 |
78 template<typename Entry> | 70 template<typename Value> |
79 struct HashContainerReference | 71 struct StringMapEntry : StringSetEntry |
80 { | 72 { |
81 typedef Entry entry_type; | 73 typedef StringSetEntry super; |
74 typedef Value value_type; | |
82 | 75 |
83 entry_type* mEntry; | 76 Value second; |
84 | 77 |
85 explicit HashContainerReference(entry_type* entry) | 78 StringMapEntry(const key_type& key = DependentString(), |
86 : mEntry(entry) | 79 value_type value = value_type()) |
80 : super(key), second(value) | |
87 { | 81 { |
88 } | 82 } |
89 | 83 |
90 const entry_type* operator->() const | 84 void erase() |
91 { | 85 { |
92 return mEntry; | 86 super::erase(); |
93 } | 87 second = value_type(); |
94 | |
95 operator bool() const | |
96 { | |
97 return !mEntry->first.is_invalid(); | |
98 } | |
99 }; | |
100 | |
101 template<typename Entry> | |
102 class HashContainer | |
103 { | |
104 public: | |
105 typedef Entry entry_type; | |
106 typedef size_t size_type; | |
107 typedef HashContainerIterator<Entry> const_iterator; | |
108 typedef HashContainerReference<const Entry> const_reference; | |
109 | |
110 private: | |
111 explicit HashContainer(const HashContainer& other); | |
112 void operator=(const HashContainer& other); | |
113 | |
114 protected: | |
115 static constexpr size_type MIN_BUCKETS = 1; | |
116 static constexpr double LOAD_FACTOR = 0.8; | |
117 std::unique_ptr<entry_type[]> mBuckets; | |
118 size_type mBucketCount; | |
119 size_type mEntryCount; | |
120 | |
121 #if defined(DEBUG) | |
122 size_type mInsertCounter; | |
123 #endif | |
124 | |
125 explicit HashContainer(size_type expectedEntries = 0) | |
126 : mEntryCount(0) | |
127 { | |
128 expectedEntries = ceil(expectedEntries / LOAD_FACTOR); | |
129 mBucketCount = MIN_BUCKETS; | |
130 while (mBucketCount < expectedEntries) | |
131 mBucketCount <<= 1; | |
132 | |
133 mBuckets.reset(new entry_type[mBucketCount]); | |
134 // Working around https://github.com/waywardmonkeys/emscripten-trace-colle ctor/issues/2 here | |
135 annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t able buffer"); | |
136 } | |
137 | |
138 static size_type hash(const String& str) | |
139 { | |
140 // FNV-1a hash function | |
141 size_type result = 2166136261; | |
142 for (String::size_type i = 0; i < str.length(); i++) | |
143 result = (result ^ str[i]) * 16777619; | |
144 return result; | |
145 } | |
146 | |
147 entry_type* find_bucket(const String& key) const | |
148 { | |
149 size_type h = hash(key); | |
150 | |
151 // This does quadratic probing, effectively the following formula is used: | |
152 // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount | |
153 for (size_type i = 0; ; ++i) | |
154 { | |
155 // mBucketCount is 2^n so (h & mBucketCount - 1) is equivalent to | |
156 // h % mBucketCount but significantly faster. | |
157 entry_type* entry = &mBuckets[h & (mBucketCount - 1)]; | |
158 if (entry->first.is_invalid() || entry->first.equals(key)) | |
159 return entry; | |
160 h += i; | |
161 } | |
162 } | |
163 | |
164 void resize(size_type bucketCount) | |
165 { | |
166 std::unique_ptr<entry_type[]> oldBuckets(std::move(mBuckets)); | |
167 size_type oldCount = mBucketCount; | |
168 | |
169 mEntryCount = 0; | |
170 mBucketCount = bucketCount; | |
171 mBuckets.reset(new entry_type[mBucketCount]); | |
172 // Working around https://github.com/waywardmonkeys/emscripten-trace-colle ctor/issues/2 here | |
173 annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t able buffer"); | |
174 | |
175 // Copy old entries into the new buffer | |
176 for (size_type i = 0; i < oldCount; i++) | |
177 { | |
178 entry_type& entry = oldBuckets[i]; | |
179 if (!entry.first.is_invalid() && !entry.first.is_deleted()) | |
180 { | |
181 *find_bucket(entry.first) = entry; | |
182 mEntryCount++; | |
183 } | |
184 } | |
185 } | |
186 | |
187 entry_type* assign(entry_type* existing, const entry_type& entry) | |
188 { | |
189 if (existing->first.is_invalid()) | |
190 { | |
191 if (mEntryCount + 1 >= mBucketCount * LOAD_FACTOR) | |
192 { | |
193 resize(mBucketCount << 1); | |
194 existing = find_bucket(entry.first); | |
195 } | |
196 mEntryCount++; | |
197 #if defined(DEBUG) | |
198 mInsertCounter++; | |
199 #endif | |
200 } | |
201 *existing = entry; | |
202 return existing; | |
203 } | |
204 | |
205 public: | |
206 void insert(const entry_type& entry) | |
207 { | |
208 assign(find_bucket(entry.first), entry); | |
209 } | |
210 | |
211 bool erase(const String& key) | |
212 { | |
213 entry_type* entry = find_bucket(key); | |
214 if (entry->first.is_invalid()) | |
215 return false; | |
216 | |
217 entry->first.erase(); | |
218 return true; | |
219 } | |
220 | |
221 const_reference find(const String& key) const | |
222 { | |
223 return const_reference(find_bucket(key)); | |
224 } | |
225 | |
226 const_iterator begin() const | |
227 { | |
228 return const_iterator(&mBuckets[0], &mBuckets[mBucketCount]); | |
229 } | |
230 | |
231 const_iterator end() const | |
232 { | |
233 return const_iterator(&mBuckets[mBucketCount], &mBuckets[mBucketCount]); | |
234 } | |
235 | |
236 size_type size() const | |
237 { | |
238 return mEntryCount; | |
239 } | |
240 }; | |
241 | |
242 struct StringSetEntry | |
243 { | |
244 StringSetEntry() {} | |
245 StringSetEntry(const String& key) | |
246 : first(key) | |
247 { | |
248 } | |
249 | |
250 DependentString first; | |
251 }; | |
252 | |
253 template<typename T> | |
254 struct StringMapEntry | |
255 { | |
256 StringMapEntry() {} | |
257 StringMapEntry(const String& key) | |
258 : first(key), second() | |
259 { | |
260 } | |
261 StringMapEntry(const String& key, T value) | |
262 : first(key), second(value) | |
263 { | |
264 } | |
265 | |
266 DependentString first; | |
267 T second; | |
268 }; | |
269 | |
270 template<typename T> | |
271 struct StringMapEntryReference : public HashContainerReference<StringMapEntry< T>> | |
272 { | |
273 typedef HashContainerReference<StringMapEntry<T>> super; | |
274 typedef typename super::entry_type entry_type; | |
275 typedef StringMap<T> map_type; | |
276 | |
277 map_type* mMap; | |
278 | |
279 #if defined(DEBUG) | |
280 typename map_type::size_type mInsertCounter; | |
281 typename map_type::size_type mHash; | |
282 #endif | |
283 | |
284 StringMapEntryReference(map_type* map, const String& key, entry_type* entry) | |
285 : super(entry), mMap(map) | |
286 { | |
287 #if defined(DEBUG) | |
288 mInsertCounter = mMap->mInsertCounter; | |
289 mHash = mMap->hash(key); | |
290 #endif | |
291 } | |
292 | |
293 void assign(const String& key, const T& value) | |
294 { | |
295 #if defined(DEBUG) | |
296 assert2(mInsertCounter == mMap->mInsertCounter, | |
297 u"There should be no insert operations performed between map.find() an d assign()"_str); | |
298 assert2(mHash == mMap->hash(key), | |
299 u"The keys used in map.find() and assign() should be identical"_str); | |
300 #endif | |
301 | |
302 mMap->assign(this->mEntry, entry_type(key, value)); | |
303 } | 88 } |
304 }; | 89 }; |
305 } | 90 } |
306 | 91 |
307 class StringSet | 92 using StringSet = Set<StringMap_internal::StringSetEntry>; |
308 : public StringMap_internal::HashContainer<StringMap_internal::StringSetEntry> | |
309 { | |
310 }; | |
311 | 93 |
312 template<typename T> | 94 template<typename Value> |
313 class StringMap | 95 using StringMap = Map<StringMap_internal::StringMapEntry<Value>>; |
314 : public StringMap_internal::HashContainer<StringMap_internal::StringMapEntry< T>> | |
315 { | |
316 public: | |
317 typedef StringMap_internal::HashContainer<StringMap_internal::StringMapEntry<T >> super; | |
318 typedef typename super::size_type size_type; | |
319 typedef typename super::entry_type entry_type; | |
320 typedef typename super::const_reference const_reference; | |
321 typedef StringMap_internal::StringMapEntryReference<T> reference; | |
322 friend struct StringMap_internal::StringMapEntryReference<T>; | |
323 | |
324 explicit StringMap(size_type expectedEntries = 0) | |
325 : super(expectedEntries) | |
326 { | |
327 } | |
328 | |
329 StringMap(std::initializer_list<entry_type> list) | |
330 : super(list.size()) | |
331 { | |
332 for (const auto& item : list) | |
333 super::insert(item); | |
334 } | |
335 | |
336 ~StringMap() | |
337 { | |
338 } | |
339 | |
340 T& operator[](const String& key) | |
341 { | |
342 entry_type* entry = super::find_bucket(key); | |
343 if (entry->first.is_invalid()) | |
344 entry = super::assign(entry, key); | |
345 return entry->second; | |
346 } | |
347 | |
348 const_reference find(const String& key) const | |
349 { | |
350 return super::find(key); | |
351 } | |
352 | |
353 reference find(const String& key) | |
354 { | |
355 return reference(this, key, super::find_bucket(key)); | |
356 } | |
357 }; | |
OLD | NEW |