Left: | ||
Right: |
LEFT | RIGHT |
---|---|
1 #pragma once | 1 #pragma once |
2 | 2 |
3 #include <cstddef> | 3 #include <cstddef> |
4 #include <cmath> | 4 #include <cmath> |
5 #include <initializer_list> | 5 #include <initializer_list> |
6 #include <memory> | |
6 | 7 |
7 #include "String.h" | 8 #include "String.h" |
8 #include "debug.h" | 9 #include "debug.h" |
9 | 10 |
10 template<typename Entry> | 11 template<typename T> |
11 struct _HashContainerIterator | 12 class StringMap; |
sergei
2016/02/17 12:54:57
What do you think about moving all internal struct
Wladimir Palant
2016/02/18 16:07:07
Done.
| |
13 | |
14 namespace StringMap_internal | |
12 { | 15 { |
13 typedef Entry entry_type; | 16 template<typename Entry> |
14 typedef _HashContainerIterator<Entry> iterator; | 17 struct HashContainerIterator |
15 | 18 { |
16 const entry_type* mPos; | 19 typedef Entry entry_type; |
17 const entry_type* mEnd; | 20 typedef HashContainerIterator<Entry> iterator; |
18 | 21 |
19 explicit _HashContainerIterator(const entry_type* start, const entry_type* end ) | 22 const entry_type* mPos; |
20 : mPos(start), mEnd(end) | 23 const entry_type* mEnd; |
21 { | 24 |
22 if (mPos != mEnd && mPos->first.is_invalid()) | 25 explicit HashContainerIterator(const entry_type* start, const entry_type* en d) |
23 ++(*this); | 26 : mPos(start), mEnd(end) |
24 } | 27 { |
25 | 28 if (mPos != mEnd && mPos->first.is_invalid()) |
26 _HashContainerIterator() | 29 ++(*this); |
27 { | 30 } |
sergei
2016/02/17 12:54:55
It seems this destructor code is not needed.
Wladimir Palant
2016/02/18 16:07:03
Done.
| |
28 } | 31 |
29 | 32 const entry_type& operator*() const |
30 const entry_type& operator*() const | 33 { |
31 { | 34 return *mPos; |
32 return *mPos; | 35 } |
33 } | 36 |
34 | 37 const entry_type* operator->() const |
35 const entry_type* operator->() const | 38 { |
36 { | 39 return mPos; |
37 return mPos; | 40 } |
38 } | 41 |
39 | 42 iterator& operator++() |
40 iterator& operator++() | 43 { |
41 { | 44 do { |
42 do { | 45 ++mPos; |
43 ++mPos; | 46 } while(mPos != mEnd && mPos->first.is_invalid()); |
44 } while(mPos != mEnd && mPos->first.is_invalid()); | 47 return *this; |
45 return *this; | 48 } |
46 } | 49 |
47 | 50 bool operator==(const iterator& it) const |
48 bool operator==(const iterator& it) const | 51 { |
49 { | 52 return mPos == it.mPos; |
50 return mPos == it.mPos; | 53 } |
51 } | 54 |
52 | 55 bool operator!=(const iterator& it) const |
53 bool operator!=(const iterator& it) const | 56 { |
54 { | 57 return mPos != it.mPos; |
55 return mPos != it.mPos; | 58 } |
56 } | 59 }; |
60 | |
61 template<typename Entry> | |
62 struct HashContainerReference | |
63 { | |
64 typedef Entry entry_type; | |
65 | |
66 entry_type* mEntry; | |
67 | |
68 explicit HashContainerReference(entry_type* entry) | |
69 : mEntry(entry) | |
70 { | |
71 } | |
72 | |
73 const entry_type* operator->() const | |
74 { | |
75 return mEntry; | |
76 } | |
77 | |
78 operator bool() const | |
79 { | |
80 return !mEntry->first.is_invalid(); | |
81 } | |
82 }; | |
83 | |
84 template<typename Entry> | |
85 class HashContainer | |
86 { | |
87 public: | |
88 typedef Entry entry_type; | |
89 typedef size_t size_type; | |
90 typedef HashContainerIterator<Entry> const_iterator; | |
91 typedef HashContainerReference<const Entry> const_reference; | |
92 | |
93 private: | |
94 explicit HashContainer(const HashContainer& other); | |
95 void operator=(const HashContainer& other); | |
96 | |
97 protected: | |
98 static constexpr size_type MIN_BUCKETS = 1; | |
99 static constexpr double LOAD_FACTOR = 0.8; | |
100 std::unique_ptr<entry_type[]> mBuckets; | |
101 size_type mBucketCount; | |
102 size_type mEntryCount; | |
103 | |
104 #if defined(DEBUG) | |
105 size_type mInsertCounter; | |
106 #endif | |
107 | |
108 explicit HashContainer(size_type expectedEntries = 0) | |
109 : mEntryCount(0) | |
110 { | |
111 expectedEntries = ceil(expectedEntries / LOAD_FACTOR); | |
112 mBucketCount = MIN_BUCKETS; | |
113 while (mBucketCount < expectedEntries) | |
114 mBucketCount <<= 1; | |
115 | |
116 mBuckets.reset(new entry_type[mBucketCount]); | |
117 // Working around https://github.com/waywardmonkeys/emscripten-trace-colle ctor/issues/2 here | |
118 annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t able buffer"); | |
119 } | |
120 | |
121 static size_type hash(const String& str) | |
122 { | |
123 // FNV-1a hash function | |
124 size_type result = 2166136261; | |
125 for (String::size_type i = 0; i < str.length(); i++) | |
126 result = (result ^ str[i]) * 16777619; | |
127 return result; | |
128 } | |
129 | |
130 entry_type* find_bucket(const String& key) const | |
131 { | |
132 size_type h = hash(key); | |
133 | |
134 // This does quadratic probing, effectively the following formula is used: | |
135 // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount | |
136 for (size_type i = 0; ; ++i) | |
137 { | |
138 // mBucketCount is 2^n so (h & mBucketCount - 1) is equivalent to | |
139 // h % mBucketCount but significantly faster. | |
140 entry_type* entry = &mBuckets[h & (mBucketCount - 1)]; | |
141 if (entry->first.is_invalid() || entry->first.equals(key)) | |
142 return entry; | |
143 h += i; | |
144 } | |
145 } | |
146 | |
147 void resize(size_type bucketCount) | |
148 { | |
149 std::unique_ptr<entry_type[]> oldBuckets(std::move(mBuckets)); | |
150 size_type oldCount = mBucketCount; | |
151 | |
152 mEntryCount = 0; | |
153 mBucketCount = bucketCount; | |
154 mBuckets.reset(new entry_type[mBucketCount]); | |
155 // Working around https://github.com/waywardmonkeys/emscripten-trace-colle ctor/issues/2 here | |
156 annotate_address(reinterpret_cast<size_type*>(mBuckets.get()) - 1, "Hash t able buffer"); | |
157 | |
158 // Copy old entries into the new buffer | |
159 for (size_type i = 0; i < oldCount; i++) | |
160 { | |
161 entry_type& entry = oldBuckets[i]; | |
162 if (!entry.first.is_invalid() && !entry.first.is_deleted()) | |
163 { | |
164 *find_bucket(entry.first) = entry; | |
165 mEntryCount++; | |
166 } | |
167 } | |
168 } | |
169 | |
170 entry_type* assign(entry_type* existing, const entry_type& entry) | |
171 { | |
172 if (existing->first.is_invalid()) | |
173 { | |
174 if (mEntryCount + 1 >= mBucketCount * LOAD_FACTOR) | |
175 { | |
176 resize(mBucketCount << 1); | |
177 existing = find_bucket(entry.first); | |
178 } | |
179 mEntryCount++; | |
180 #if defined(DEBUG) | |
181 mInsertCounter++; | |
182 #endif | |
183 } | |
184 *existing = entry; | |
185 return existing; | |
186 } | |
187 | |
188 public: | |
189 void insert(const entry_type& entry) | |
190 { | |
191 assign(find_bucket(entry.first), entry); | |
192 } | |
193 | |
194 bool erase(const String& key) | |
195 { | |
196 entry_type* entry = find_bucket(key); | |
197 if (entry->first.is_invalid()) | |
198 return false; | |
199 | |
200 entry->first.erase(); | |
201 return true; | |
202 } | |
203 | |
204 const_reference find(const String& key) const | |
205 { | |
206 return const_reference(find_bucket(key)); | |
207 } | |
208 | |
209 const_iterator begin() const | |
210 { | |
211 return const_iterator(&mBuckets[0], &mBuckets[mBucketCount]); | |
212 } | |
213 | |
214 const_iterator end() const | |
215 { | |
216 return const_iterator(&mBuckets[mBucketCount], &mBuckets[mBucketCount]); | |
217 } | |
218 }; | |
219 | |
220 struct StringSetEntry | |
221 { | |
222 StringSetEntry() {} | |
223 StringSetEntry(const String& key) | |
224 : first(key) | |
225 { | |
226 } | |
227 | |
228 DependentString first; | |
229 }; | |
230 | |
231 template<typename T> | |
232 struct StringMapEntry | |
233 { | |
234 StringMapEntry() {} | |
235 StringMapEntry(const String& key) | |
236 : first(key), second() | |
237 { | |
238 } | |
239 StringMapEntry(const String& key, T value) | |
240 : first(key), second(value) | |
241 { | |
242 } | |
243 | |
244 DependentString first; | |
245 T second; | |
246 }; | |
247 | |
248 template<typename T> | |
249 struct StringMapEntryReference : public HashContainerReference<StringMapEntry< T>> | |
250 { | |
251 typedef HashContainerReference<StringMapEntry<T>> super; | |
252 typedef typename super::entry_type entry_type; | |
253 typedef StringMap<T> map_type; | |
254 | |
255 map_type* mMap; | |
256 | |
257 #if defined(DEBUG) | |
258 typename map_type::size_type mInsertCounter; | |
259 typename map_type::size_type mHash; | |
260 #endif | |
261 | |
262 StringMapEntryReference(map_type* map, const String& key, entry_type* entry) | |
263 : super(entry), mMap(map) | |
264 { | |
265 #if defined(DEBUG) | |
266 mInsertCounter = mMap->mInsertCounter; | |
267 mHash = mMap->hash(key); | |
268 #endif | |
269 } | |
270 | |
271 void assign(const String& key, const T& value) | |
272 { | |
273 #if defined(DEBUG) | |
274 assert(mInsertCounter == mMap->mInsertCounter, | |
275 u"There should be no insert operations performed between map.find() an d assign()"_str); | |
276 assert(mHash == mMap->hash(key), | |
277 u"The keys used in map.find() and assign() should be identical"_str); | |
278 #endif | |
279 | |
280 mMap->assign(this->mEntry, entry_type(key, value)); | |
281 } | |
282 }; | |
283 } | |
284 | |
285 class StringSet | |
286 : public StringMap_internal::HashContainer<StringMap_internal::StringSetEntry> | |
287 { | |
57 }; | 288 }; |
58 | 289 |
59 template<typename Entry> | 290 template<typename T> |
60 class _HashContainer | 291 class StringMap |
292 : public StringMap_internal::HashContainer<StringMap_internal::StringMapEntry< T>> | |
61 { | 293 { |
62 public: | 294 public: |
63 typedef Entry entry_type; | 295 typedef StringMap_internal::HashContainer<StringMap_internal::StringMapEntry<T >> super; |
64 typedef size_t size_type; | |
65 typedef _HashContainerIterator<Entry> iterator; | |
sergei
2016/02/17 12:55:01
What do you think about renaming it to `const_iter
Wladimir Palant
2016/02/18 16:07:09
Done.
| |
66 | |
67 private: | |
68 _HashContainer(const _HashContainer& other) {} | |
69 void operator=(const _HashContainer& other) {} | |
sergei
2016/02/17 12:54:59
I guess, it's "noncopyable", it would be better t
Wladimir Palant
2016/02/18 16:07:04
Done.
| |
70 | |
71 protected: | |
72 static constexpr size_type MIN_BUCKETS = 1; | |
73 static constexpr double LOAD_FACTOR = 0.8; | |
74 entry_type* mBuckets; | |
sergei
2016/02/17 12:55:03
It can be std::unique_ptr<entry_type[]>, however I
Wladimir Palant
2016/02/18 16:07:05
Done.
| |
75 size_type mBucketCount; | |
76 size_type mEntryCount; | |
77 | |
78 explicit _HashContainer(size_type expectedEntries = 0) | |
79 : mEntryCount(0) | |
80 { | |
81 expectedEntries = ceil(expectedEntries / LOAD_FACTOR); | |
82 mBucketCount = MIN_BUCKETS; | |
83 while (mBucketCount < expectedEntries) | |
84 mBucketCount <<= 1; | |
85 | |
86 mBuckets = new entry_type[mBucketCount]; | |
87 // Working around https://github.com/waywardmonkeys/emscripten-trace-collect or/issues/2 here | |
88 annotate_address(reinterpret_cast<size_type*>(mBuckets) - 1, "Hash table buf fer"); | |
89 } | |
90 | |
91 ~_HashContainer() | |
92 { | |
93 delete[] mBuckets; | |
94 } | |
95 | |
96 static size_type hash(const String& str) | |
97 { | |
98 // FNV-1a hash function | |
99 size_type result = 2166136261; | |
100 for (String::size_type i = 0; i < str.length(); i++) | |
101 result = (result ^ str[i]) * 16777619; | |
102 return result; | |
103 } | |
104 | |
105 entry_type* find_entry(const String& key) const | |
106 { | |
107 size_type h = hash(key); | |
108 | |
109 // This does quadratic probing, effectively the following formula is used: | |
110 // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount | |
111 for (size_type i = 0; ; ++i) | |
112 { | |
113 // mBucketCount is 2^n so (h & mBucketCount - 1) is equivalent to | |
114 // h % mBucketCount but significantly faster. | |
115 entry_type* entry = mBuckets + (h & mBucketCount - 1); | |
116 if (entry->first.is_invalid() || entry->first.equals(key)) | |
117 return entry; | |
118 h += i; | |
119 } | |
120 } | |
121 | |
122 void resize(size_type bucketCount) | |
123 { | |
124 entry_type* oldBuckets = mBuckets; | |
sergei
2016/02/17 12:55:02
here we can also use std::unique_ptr<[]>
Wladimir Palant
2016/02/18 16:07:12
Done.
| |
125 size_type oldCount = mBucketCount; | |
126 | |
127 mEntryCount = 0; | |
sergei
2016/02/17 12:54:58
after executing of this method the member `mEntryC
Wladimir Palant
2016/02/18 16:07:08
It comes out identical right now, this is going to
sergei
2016/02/22 12:45:54
Acknowledged.
| |
128 mBucketCount = bucketCount; | |
129 mBuckets = new entry_type[mBucketCount]; | |
130 // Working around https://github.com/waywardmonkeys/emscripten-trace-collect or/issues/2 here | |
131 annotate_address(reinterpret_cast<size_type*>(mBuckets) - 1, "Hash table buf fer"); | |
132 | |
133 for (size_type i = 0; i < oldCount; i++) | |
134 { | |
135 entry_type& entry = oldBuckets[i]; | |
136 if (!entry.first.is_invalid()) | |
137 { | |
138 *find_entry(entry.first) = entry; | |
sergei
2016/02/17 12:54:56
It would be nice to explain it somewhere in commen
Wladimir Palant
2016/02/18 16:07:10
Not sure what these comments should explain but I
| |
139 mEntryCount++; | |
140 } | |
141 } | |
142 | |
143 delete[] oldBuckets; | |
144 } | |
145 | |
146 entry_type* assign(entry_type* existing, const entry_type& entry) | |
147 { | |
148 bool added = existing->first.is_invalid(); | |
149 *existing = entry; | |
sergei
2016/02/17 12:55:00
Wouldn't it be better to firstly resize and only t
Wladimir Palant
2016/02/18 16:07:06
I've restructured the code to assign after resizin
sergei
2016/02/22 12:45:51
Acknowledged.
| |
150 if (added) | |
151 { | |
152 mEntryCount++; | |
153 if (mEntryCount >= mBucketCount * LOAD_FACTOR) | |
154 { | |
155 resize(mBucketCount << 1); | |
156 existing = find_entry(entry.first); | |
157 } | |
158 } | |
159 return existing; | |
160 } | |
161 | |
162 public: | |
163 void insert(const entry_type& entry) | |
164 { | |
165 assign(find_entry(entry.first), entry); | |
166 } | |
167 | |
168 iterator find(const String& key) const | |
169 { | |
170 entry_type* entry = find_entry(key); | |
171 if (entry->first.is_invalid()) | |
172 return end(); | |
173 else | |
174 return iterator(entry, mBuckets + mBucketCount); | |
175 } | |
176 | |
177 iterator begin() const | |
178 { | |
179 return iterator(mBuckets, mBuckets + mBucketCount); | |
180 } | |
181 | |
182 iterator end() const | |
183 { | |
184 return iterator(mBuckets + mBucketCount, mBuckets + mBucketCount); | |
185 } | |
186 }; | |
187 | |
188 struct _StringSetEntry | |
189 { | |
190 _StringSetEntry() {} | |
191 _StringSetEntry(const String& key) | |
192 : first(key) | |
193 { | |
194 } | |
195 | |
196 DependentString first; | |
197 }; | |
198 | |
199 class StringSet : public _HashContainer<_StringSetEntry> | |
200 { | |
201 }; | |
202 | |
203 template<typename T> | |
204 struct _StringMapEntry | |
205 { | |
206 _StringMapEntry() {} | |
207 _StringMapEntry(const String& key) | |
208 : first(key) | |
209 { | |
210 } | |
211 _StringMapEntry(const String& key, T value) | |
212 : first(key), second(value) | |
213 { | |
214 } | |
215 | |
216 DependentString first; | |
217 T second; | |
218 }; | |
219 | |
220 template<typename T> | |
221 class StringMap : public _HashContainer<_StringMapEntry<T>> | |
222 { | |
223 public: | |
224 typedef _HashContainer<_StringMapEntry<T>> super; | |
225 typedef typename super::size_type size_type; | 296 typedef typename super::size_type size_type; |
226 typedef typename super::entry_type entry_type; | 297 typedef typename super::entry_type entry_type; |
298 typedef typename super::const_reference const_reference; | |
299 typedef StringMap_internal::StringMapEntryReference<T> reference; | |
300 friend class StringMap_internal::StringMapEntryReference<T>; | |
227 | 301 |
228 explicit StringMap(size_type expectedEntries = 0) | 302 explicit StringMap(size_type expectedEntries = 0) |
229 : super(expectedEntries) | 303 : super(expectedEntries) |
230 { | 304 { |
231 } | 305 } |
232 | 306 |
233 StringMap(std::initializer_list<entry_type> list) | 307 StringMap(std::initializer_list<entry_type> list) |
234 : super(list.size()) | 308 : super(list.size()) |
235 { | 309 { |
236 for (auto it = list.begin(); it != list.end(); ++it) | 310 for (auto it = list.begin(); it != list.end(); ++it) |
237 super::insert(*it); | 311 super::insert(*it); |
238 } | 312 } |
239 | 313 |
240 ~StringMap() | 314 ~StringMap() |
241 { | 315 { |
242 } | 316 } |
243 | 317 |
244 T& operator[](const String& key) | 318 T& operator[](const String& key) |
sergei
2016/02/17 12:55:04
What do you think about having in addition a const
Wladimir Palant
2016/02/18 16:07:11
This is how STL does it, if you don't want to crea
sergei
2016/02/22 12:45:53
Acknowledged.
| |
245 { | 319 { |
246 entry_type* entry = super::find_entry(key); | 320 entry_type* entry = super::find_bucket(key); |
247 if (entry->first.is_invalid()) | 321 if (entry->first.is_invalid()) |
248 entry = super::assign(entry, key); | 322 entry = super::assign(entry, key); |
249 return entry->second; | 323 return entry->second; |
250 } | 324 } |
325 | |
326 const_reference find(const String& key) const | |
327 { | |
328 return super::find(key); | |
329 } | |
330 | |
331 reference find(const String& key) | |
332 { | |
333 return reference(this, key, super::find_bucket(key)); | |
334 } | |
251 }; | 335 }; |
LEFT | RIGHT |