Index: compiled/StringMap.h |
=================================================================== |
new file mode 100644 |
--- /dev/null |
+++ b/compiled/StringMap.h |
@@ -0,0 +1,251 @@ |
+#pragma once |
+ |
+#include <cstddef> |
+#include <cmath> |
+#include <initializer_list> |
+ |
+#include "String.h" |
+#include "debug.h" |
+ |
+template<typename Entry> |
+struct _HashContainerIterator |
sergei
2016/02/17 12:54:57
What do you think about moving all internal struct
Wladimir Palant
2016/02/18 16:07:07
Done.
|
+{ |
+ typedef Entry entry_type; |
+ typedef _HashContainerIterator<Entry> iterator; |
+ |
+ const entry_type* mPos; |
+ const entry_type* mEnd; |
+ |
+ explicit _HashContainerIterator(const entry_type* start, const entry_type* end) |
+ : mPos(start), mEnd(end) |
+ { |
+ if (mPos != mEnd && mPos->first.is_invalid()) |
+ ++(*this); |
+ } |
+ |
+ _HashContainerIterator() |
+ { |
sergei
2016/02/17 12:54:55
It seems this destructor code is not needed.
Wladimir Palant
2016/02/18 16:07:03
Done.
|
+ } |
+ |
+ const entry_type& operator*() const |
+ { |
+ return *mPos; |
+ } |
+ |
+ const entry_type* operator->() const |
+ { |
+ return mPos; |
+ } |
+ |
+ iterator& operator++() |
+ { |
+ do { |
+ ++mPos; |
+ } while(mPos != mEnd && mPos->first.is_invalid()); |
+ return *this; |
+ } |
+ |
+ bool operator==(const iterator& it) const |
+ { |
+ return mPos == it.mPos; |
+ } |
+ |
+ bool operator!=(const iterator& it) const |
+ { |
+ return mPos != it.mPos; |
+ } |
+}; |
+ |
+template<typename Entry> |
+class _HashContainer |
+{ |
+public: |
+ typedef Entry entry_type; |
+ typedef size_t size_type; |
+ typedef _HashContainerIterator<Entry> iterator; |
sergei
2016/02/17 12:55:01
What do you think about renaming it to `const_iter
Wladimir Palant
2016/02/18 16:07:09
Done.
|
+ |
+private: |
+ _HashContainer(const _HashContainer& other) {} |
+ void operator=(const _HashContainer& other) {} |
sergei
2016/02/17 12:54:59
I guess, it's "noncopyable", it would be better t
Wladimir Palant
2016/02/18 16:07:04
Done.
|
+ |
+protected: |
+ static constexpr size_type MIN_BUCKETS = 1; |
+ static constexpr double LOAD_FACTOR = 0.8; |
+ entry_type* mBuckets; |
sergei
2016/02/17 12:55:03
It can be std::unique_ptr<entry_type[]>, however I
Wladimir Palant
2016/02/18 16:07:05
Done.
|
+ size_type mBucketCount; |
+ size_type mEntryCount; |
+ |
+ explicit _HashContainer(size_type expectedEntries = 0) |
+ : mEntryCount(0) |
+ { |
+ expectedEntries = ceil(expectedEntries / LOAD_FACTOR); |
+ mBucketCount = MIN_BUCKETS; |
+ while (mBucketCount < expectedEntries) |
+ mBucketCount <<= 1; |
+ |
+ mBuckets = new entry_type[mBucketCount]; |
+ // Working around https://github.com/waywardmonkeys/emscripten-trace-collector/issues/2 here |
+ annotate_address(reinterpret_cast<size_type*>(mBuckets) - 1, "Hash table buffer"); |
+ } |
+ |
+ ~_HashContainer() |
+ { |
+ delete[] mBuckets; |
+ } |
+ |
+ static size_type hash(const String& str) |
+ { |
+ // FNV-1a hash function |
+ size_type result = 2166136261; |
+ for (String::size_type i = 0; i < str.length(); i++) |
+ result = (result ^ str[i]) * 16777619; |
+ return result; |
+ } |
+ |
+ entry_type* find_entry(const String& key) const |
+ { |
+ size_type h = hash(key); |
+ |
+ // This does quadratic probing, effectively the following formula is used: |
+ // pos = (hash + 1/2 i + 1/2 i ^ 2) mod bucketCount |
+ for (size_type i = 0; ; ++i) |
+ { |
+ // mBucketCount is 2^n so (h & mBucketCount - 1) is equivalent to |
+ // h % mBucketCount but significantly faster. |
+ entry_type* entry = mBuckets + (h & mBucketCount - 1); |
+ if (entry->first.is_invalid() || entry->first.equals(key)) |
+ return entry; |
+ h += i; |
+ } |
+ } |
+ |
+ void resize(size_type bucketCount) |
+ { |
+ entry_type* oldBuckets = mBuckets; |
sergei
2016/02/17 12:55:02
here we can also use std::unique_ptr<[]>
Wladimir Palant
2016/02/18 16:07:12
Done.
|
+ size_type oldCount = mBucketCount; |
+ |
+ mEntryCount = 0; |
sergei
2016/02/17 12:54:58
after executing of this method the member `mEntryC
Wladimir Palant
2016/02/18 16:07:08
It comes out identical right now, this is going to
sergei
2016/02/22 12:45:54
Acknowledged.
|
+ mBucketCount = bucketCount; |
+ mBuckets = new entry_type[mBucketCount]; |
+ // Working around https://github.com/waywardmonkeys/emscripten-trace-collector/issues/2 here |
+ annotate_address(reinterpret_cast<size_type*>(mBuckets) - 1, "Hash table buffer"); |
+ |
+ for (size_type i = 0; i < oldCount; i++) |
+ { |
+ entry_type& entry = oldBuckets[i]; |
+ if (!entry.first.is_invalid()) |
+ { |
+ *find_entry(entry.first) = entry; |
sergei
2016/02/17 12:54:56
It would be nice to explain it somewhere in commen
Wladimir Palant
2016/02/18 16:07:10
Not sure what these comments should explain but I
|
+ mEntryCount++; |
+ } |
+ } |
+ |
+ delete[] oldBuckets; |
+ } |
+ |
+ entry_type* assign(entry_type* existing, const entry_type& entry) |
+ { |
+ bool added = existing->first.is_invalid(); |
+ *existing = entry; |
sergei
2016/02/17 12:55:00
Wouldn't it be better to firstly resize and only t
Wladimir Palant
2016/02/18 16:07:06
I've restructured the code to assign after resizin
sergei
2016/02/22 12:45:51
Acknowledged.
|
+ if (added) |
+ { |
+ mEntryCount++; |
+ if (mEntryCount >= mBucketCount * LOAD_FACTOR) |
+ { |
+ resize(mBucketCount << 1); |
+ existing = find_entry(entry.first); |
+ } |
+ } |
+ return existing; |
+ } |
+ |
+public: |
+ void insert(const entry_type& entry) |
+ { |
+ assign(find_entry(entry.first), entry); |
+ } |
+ |
+ iterator find(const String& key) const |
+ { |
+ entry_type* entry = find_entry(key); |
+ if (entry->first.is_invalid()) |
+ return end(); |
+ else |
+ return iterator(entry, mBuckets + mBucketCount); |
+ } |
+ |
+ iterator begin() const |
+ { |
+ return iterator(mBuckets, mBuckets + mBucketCount); |
+ } |
+ |
+ iterator end() const |
+ { |
+ return iterator(mBuckets + mBucketCount, mBuckets + mBucketCount); |
+ } |
+}; |
+ |
+struct _StringSetEntry |
+{ |
+ _StringSetEntry() {} |
+ _StringSetEntry(const String& key) |
+ : first(key) |
+ { |
+ } |
+ |
+ DependentString first; |
+}; |
+ |
+class StringSet : public _HashContainer<_StringSetEntry> |
+{ |
+}; |
+ |
+template<typename T> |
+struct _StringMapEntry |
+{ |
+ _StringMapEntry() {} |
+ _StringMapEntry(const String& key) |
+ : first(key) |
+ { |
+ } |
+ _StringMapEntry(const String& key, T value) |
+ : first(key), second(value) |
+ { |
+ } |
+ |
+ DependentString first; |
+ T second; |
+}; |
+ |
+template<typename T> |
+class StringMap : public _HashContainer<_StringMapEntry<T>> |
+{ |
+public: |
+ typedef _HashContainer<_StringMapEntry<T>> super; |
+ typedef typename super::size_type size_type; |
+ typedef typename super::entry_type entry_type; |
+ |
+ explicit StringMap(size_type expectedEntries = 0) |
+ : super(expectedEntries) |
+ { |
+ } |
+ |
+ StringMap(std::initializer_list<entry_type> list) |
+ : super(list.size()) |
+ { |
+ for (auto it = list.begin(); it != list.end(); ++it) |
+ super::insert(*it); |
+ } |
+ |
+ ~StringMap() |
+ { |
+ } |
+ |
+ T& operator[](const String& key) |
sergei
2016/02/17 12:55:04
What do you think about having in addition a const
Wladimir Palant
2016/02/18 16:07:11
This is how STL does it, if you don't want to crea
sergei
2016/02/22 12:45:53
Acknowledged.
|
+ { |
+ entry_type* entry = super::find_entry(key); |
+ if (entry->first.is_invalid()) |
+ entry = super::assign(entry, key); |
+ return entry->second; |
+ } |
+}; |