Rietveld Code Review Tool
Help | Bug tracker | Discussion group | Source code

Side by Side Diff: compiled/StringMap.h

Issue 29572731: Issue 5141 - Generalize Map class to allow non-strings as keys (Closed) Base URL: https://hg.adblockplus.org/adblockpluscore
Patch Set: Fixed key initialization issue Created Oct. 17, 2017, 11:45 a.m.
Left:
Right:
Use n/p to move between diff chunks; N/P to move between comments.
Jump to:
View unified diff | Download patch
« compiled/Map.h ('K') | « compiled/Map.h ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 * This file is part of Adblock Plus <https://adblockplus.org/>, 2 * This file is part of Adblock Plus <https://adblockplus.org/>,
3 * Copyright (C) 2006-present eyeo GmbH 3 * Copyright (C) 2006-present eyeo GmbH
4 * 4 *
5 * Adblock Plus is free software: you can redistribute it and/or modify 5 * Adblock Plus is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 3 as 6 * it under the terms of the GNU General Public License version 3 as
7 * published by the Free Software Foundation. 7 * published by the Free Software Foundation.
8 * 8 *
9 * Adblock Plus is distributed in the hope that it will be useful, 9 * Adblock Plus is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details. 12 * GNU General Public License for more details.
13 * 13 *
14 * You should have received a copy of the GNU General Public License 14 * You should have received a copy of the GNU General Public License
15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>. 15 * along with Adblock Plus. If not, see <http://www.gnu.org/licenses/>.
16 */ 16 */
17 17
18 #pragma once 18 #pragma once
19 19
20 #include <cstddef> 20 #include <cstddef>
21 #include <cmath>
22 #include <initializer_list>
23 #include <memory>
24 21
22 #include "Map.h"
25 #include "String.h" 23 #include "String.h"
26 #include "debug.h"
27
28 template<typename T>
29 class StringMap;
30 24
31 namespace StringMap_internal 25 namespace StringMap_internal
32 { 26 {
33 template<typename Entry> 27 struct StringSetEntry
34 struct HashContainerIterator
35 { 28 {
36 typedef Entry entry_type; 29 typedef String key_type;
37 typedef HashContainerIterator<Entry> iterator; 30 typedef size_t size_type;
38 31
39 const entry_type* mPos; 32 DependentString first;
sergei 2017/10/17 12:58:06 Actually, the type of `first` should be key_type,
Wladimir Palant 2017/12/04 13:44:15 Done.
40 const entry_type* mEnd;
41 33
42 explicit HashContainerIterator(const entry_type* start, const entry_type* en d) 34 StringSetEntry(const key_type& key = DependentString())
43 : mPos(start), mEnd(end)
44 { 35 {
45 if (mPos != mEnd && mPos->first.is_invalid()) 36 if (!key.is_invalid())
46 ++(*this); 37 first.reset(key);
sergei 2017/10/17 12:58:06 Why is this trick required and `:first(key)` canno
Wladimir Palant 2017/12/04 13:44:15 That's because DepedentString() constructor will c
sergei 2017/12/04 14:26:09 I have added a ticket for that because it seems a
47 } 38 }
48 39
49 const entry_type& operator*() const 40 bool equals(const key_type& other) const
50 { 41 {
51 return *mPos; 42 return first.equals(other);
52 } 43 }
53 44
54 const entry_type* operator->() const 45 bool is_invalid() const
55 { 46 {
56 return mPos; 47 return first.is_invalid();
57 } 48 }
58 49
59 iterator& operator++() 50 bool is_deleted() const
60 { 51 {
61 do { 52 return first.is_deleted();
62 ++mPos;
63 } while(mPos != mEnd && mPos->first.is_invalid());
64 return *this;
65 } 53 }
66 54
67 bool operator==(const iterator& it) const 55 void erase()
68 { 56 {
69 return mPos == it.mPos; 57 first.erase();
70 } 58 }
71 59
72 bool operator!=(const iterator& it) const 60 static size_type hash(const key_type& key)
73 { 61 {
74 return mPos != it.mPos; 62 // FNV-1a hash function
63 size_type result = 2166136261;
64 for (String::size_type i = 0; i < key.length(); i++)
65 result = (result ^ key[i]) * 16777619;
66 return result;
75 } 67 }
76 }; 68 };
77 69
78 template<typename Entry> 70 template<typename Value>
79 struct HashContainerReference 71 struct StringMapEntry : StringSetEntry
80 { 72 {
81 typedef Entry entry_type; 73 typedef StringSetEntry super;
74 typedef Value value_type;
82 75
83 entry_type* mEntry; 76 Value second;
84 77
85 explicit HashContainerReference(entry_type* entry) 78 StringMapEntry(const key_type& key = DependentString(),
86 : mEntry(entry) 79 value_type value = value_type())
80 : super(key), second(value)
87 { 81 {
88 } 82 }
89 83
90 const entry_type* operator->() const 84 void erase()
91 { 85 {
92 return mEntry; 86 super::erase();
93 } 87 second = value_type();
94
95 operator bool() const
96 {
97 return !mEntry->first.is_invalid();
98 }
99 };
100
101 template<typename Entry>
102 class HashContainer
103 {
104 public:
105 typedef Entry entry_type;
106 typedef size_t size_type;
107 typedef HashContainerIterator<Entry> const_iterator;
108 typedef HashContainerReference<const Entry> const_reference;
109
110 private:
111 explicit HashContainer(const HashContainer& other);
112 void operator=(const HashContainer& other);
113
114 protected:
115 static constexpr size_type MIN_BUCKETS = 1;
116 static constexpr double LOAD_FACTOR = 0.8;
117 std::unique_ptr<entry_type[]> mBuckets;
118 size_type mBucketCount;
119 size_type mEntryCount;
120
121 #if defined(DEBUG)
122 size_type mInsertCounter;
123 #endif
124
125 explicit HashContainer(size_type expectedEntries = 0)
126 : mEntryCount(0)
127 {
128 expectedEntries = ceil(expectedEntries / LOAD_FACTOR);
129 mBucketCount = MIN_BUCKETS;
130 while (mBucketCount < expectedEntries)
131 mBucketCount <<= 1;
132
133 mBuckets.reset(new entry_type[mBucketCount]);
134 // Working around https://github.com/waywardmonkeys/emscripten-trace-colle ctor/issues/2 here
135 annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t able buffer");
136 }
137
138 static size_type hash(const String& str)
139 {
140 // FNV-1a hash function
141 size_type result = 2166136261;
142 for (String::size_type i = 0; i < str.length(); i++)
143 result = (result ^ str[i]) * 16777619;
144 return result;
145 }
146
147 entry_type* find_bucket(const String& key) const
148 {
149 size_type h = hash(key);
150
151 // This does quadratic probing, effectively the following formula is used:
152 // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount
153 for (size_type i = 0; ; ++i)
154 {
155 // mBucketCount is 2^n so (h & mBucketCount - 1) is equivalent to
156 // h % mBucketCount but significantly faster.
157 entry_type* entry = &mBuckets[h & (mBucketCount - 1)];
158 if (entry->first.is_invalid() || entry->first.equals(key))
159 return entry;
160 h += i;
161 }
162 }
163
164 void resize(size_type bucketCount)
165 {
166 std::unique_ptr<entry_type[]> oldBuckets(std::move(mBuckets));
167 size_type oldCount = mBucketCount;
168
169 mEntryCount = 0;
170 mBucketCount = bucketCount;
171 mBuckets.reset(new entry_type[mBucketCount]);
172 // Working around https://github.com/waywardmonkeys/emscripten-trace-colle ctor/issues/2 here
173 annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t able buffer");
174
175 // Copy old entries into the new buffer
176 for (size_type i = 0; i < oldCount; i++)
177 {
178 entry_type& entry = oldBuckets[i];
179 if (!entry.first.is_invalid() && !entry.first.is_deleted())
180 {
181 *find_bucket(entry.first) = entry;
182 mEntryCount++;
183 }
184 }
185 }
186
187 entry_type* assign(entry_type* existing, const entry_type& entry)
188 {
189 if (existing->first.is_invalid())
190 {
191 if (mEntryCount + 1 >= mBucketCount * LOAD_FACTOR)
192 {
193 resize(mBucketCount << 1);
194 existing = find_bucket(entry.first);
195 }
196 mEntryCount++;
197 #if defined(DEBUG)
198 mInsertCounter++;
199 #endif
200 }
201 *existing = entry;
202 return existing;
203 }
204
205 public:
206 void insert(const entry_type& entry)
207 {
208 assign(find_bucket(entry.first), entry);
209 }
210
211 bool erase(const String& key)
212 {
213 entry_type* entry = find_bucket(key);
214 if (entry->first.is_invalid())
215 return false;
216
217 entry->first.erase();
218 return true;
219 }
220
221 const_reference find(const String& key) const
222 {
223 return const_reference(find_bucket(key));
224 }
225
226 const_iterator begin() const
227 {
228 return const_iterator(&mBuckets[0], &mBuckets[mBucketCount]);
229 }
230
231 const_iterator end() const
232 {
233 return const_iterator(&mBuckets[mBucketCount], &mBuckets[mBucketCount]);
234 }
235
236 size_type size() const
237 {
238 return mEntryCount;
239 }
240 };
241
242 struct StringSetEntry
243 {
244 StringSetEntry() {}
245 StringSetEntry(const String& key)
246 : first(key)
247 {
248 }
249
250 DependentString first;
251 };
252
253 template<typename T>
254 struct StringMapEntry
255 {
256 StringMapEntry() {}
257 StringMapEntry(const String& key)
258 : first(key), second()
259 {
260 }
261 StringMapEntry(const String& key, T value)
262 : first(key), second(value)
263 {
264 }
265
266 DependentString first;
267 T second;
268 };
269
270 template<typename T>
271 struct StringMapEntryReference : public HashContainerReference<StringMapEntry< T>>
272 {
273 typedef HashContainerReference<StringMapEntry<T>> super;
274 typedef typename super::entry_type entry_type;
275 typedef StringMap<T> map_type;
276
277 map_type* mMap;
278
279 #if defined(DEBUG)
280 typename map_type::size_type mInsertCounter;
281 typename map_type::size_type mHash;
282 #endif
283
284 StringMapEntryReference(map_type* map, const String& key, entry_type* entry)
285 : super(entry), mMap(map)
286 {
287 #if defined(DEBUG)
288 mInsertCounter = mMap->mInsertCounter;
289 mHash = mMap->hash(key);
290 #endif
291 }
292
293 void assign(const String& key, const T& value)
294 {
295 #if defined(DEBUG)
296 assert2(mInsertCounter == mMap->mInsertCounter,
297 u"There should be no insert operations performed between map.find() an d assign()"_str);
298 assert2(mHash == mMap->hash(key),
299 u"The keys used in map.find() and assign() should be identical"_str);
300 #endif
301
302 mMap->assign(this->mEntry, entry_type(key, value));
303 } 88 }
304 }; 89 };
305 } 90 }
306 91
307 class StringSet 92 using StringSet = Set<StringMap_internal::StringSetEntry>;
308 : public StringMap_internal::HashContainer<StringMap_internal::StringSetEntry>
309 {
310 };
311 93
312 template<typename T> 94 template<typename Value>
313 class StringMap 95 using StringMap = Map<StringMap_internal::StringMapEntry<Value>>;
314 : public StringMap_internal::HashContainer<StringMap_internal::StringMapEntry< T>>
315 {
316 public:
317 typedef StringMap_internal::HashContainer<StringMap_internal::StringMapEntry<T >> super;
318 typedef typename super::size_type size_type;
319 typedef typename super::entry_type entry_type;
320 typedef typename super::const_reference const_reference;
321 typedef StringMap_internal::StringMapEntryReference<T> reference;
322 friend struct StringMap_internal::StringMapEntryReference<T>;
323
324 explicit StringMap(size_type expectedEntries = 0)
325 : super(expectedEntries)
326 {
327 }
328
329 StringMap(std::initializer_list<entry_type> list)
330 : super(list.size())
331 {
332 for (const auto& item : list)
333 super::insert(item);
334 }
335
336 ~StringMap()
337 {
338 }
339
340 T& operator[](const String& key)
341 {
342 entry_type* entry = super::find_bucket(key);
343 if (entry->first.is_invalid())
344 entry = super::assign(entry, key);
345 return entry->second;
346 }
347
348 const_reference find(const String& key) const
349 {
350 return super::find(key);
351 }
352
353 reference find(const String& key)
354 {
355 return reference(this, key, super::find_bucket(key));
356 }
357 };
OLDNEW
« compiled/Map.h ('K') | « compiled/Map.h ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld