diff --git a/dbms/include/DB/Common/HashTable/HashTable.h b/dbms/include/DB/Common/HashTable/HashTable.h
index 4b7be434a0b..c4b797c7439 100644
--- a/dbms/include/DB/Common/HashTable/HashTable.h
+++ b/dbms/include/DB/Common/HashTable/HashTable.h
@@ -189,7 +189,7 @@ struct ZeroValueStorage<true, Cell>
 {
 private:
 	bool has_zero = false;
-	char zero_value_storage[sizeof(Cell)];	/// Кусок памяти для элемента с ключём 0.
+	char zero_value_storage[sizeof(Cell)] __attribute__((__aligned__(__alignof__(Cell))));	/// Кусок памяти для элемента с ключём 0.
 
 public:
 	bool hasZero() const { return has_zero; }
diff --git a/dbms/include/DB/Common/HashTable/TwoLevelHashTable.h b/dbms/include/DB/Common/HashTable/TwoLevelHashTable.h
index 51a54590d04..d80d8cae97f 100644
--- a/dbms/include/DB/Common/HashTable/TwoLevelHashTable.h
+++ b/dbms/include/DB/Common/HashTable/TwoLevelHashTable.h
@@ -40,34 +40,28 @@ protected:
 	size_t m_size = 0;		/// Количество элементов
 
 	size_t hash(const Key & x) const { return Hash::operator()(x); }
-	size_t bucket(size_t hash_value) const { return hash_value >> 24; }	/// TODO: брать не настолько младший байт.
+	size_t bucket(size_t hash_value) const { return hash_value >> 56; }
 
 	typename Impl::iterator beginOfNextNonEmptyBucket(size_t & bucket)
 	{
-		do
-		{
+		while (bucket != NUM_BUCKETS && impls[bucket].empty())
 			++bucket;
-		}
-		while (bucket != NUM_BUCKETS && !impls[bucket].empty());
 
 		if (bucket != NUM_BUCKETS)
 			return impls[bucket].begin();
 
-		return impls[NUM_BUCKETS - 1].end();
+		return impls[MAX_BUCKET].end();
 	}
 
 	typename Impl::const_iterator beginOfNextNonEmptyBucket(size_t & bucket) const
 	{
-		do
-		{
+		while (bucket != NUM_BUCKETS && impls[bucket].empty())
 			++bucket;
-		}
-		while (bucket != NUM_BUCKETS && !impls[bucket].empty());
 
 		if (bucket != NUM_BUCKETS)
 			return impls[bucket].begin();
 
-		return impls[NUM_BUCKETS - 1].end();
+		return impls[MAX_BUCKET].end();
 	}
 
 public:
@@ -75,19 +69,20 @@ public:
 	typedef typename Impl::value_type value_type;
 
 	static constexpr size_t NUM_BUCKETS = 256;
+	static constexpr size_t MAX_BUCKET = NUM_BUCKETS - 1;
 	Impl impls[NUM_BUCKETS];
 
 
 	class iterator
 	{
-		Impl * impls;
+		Self * container;
 		size_t bucket;
 		typename Impl::iterator current_it;
 
 		friend class TwoLevelHashTable;
 
-		iterator(Impl * impls_, size_t bucket_, typename Impl::iterator & current_it_)
-			: impls(impls_), bucket(bucket_), current_it(current_it_) {}
+		iterator(Self * container_, size_t bucket_, typename Impl::iterator current_it_)
+			: container(container_), bucket(bucket_), current_it(current_it_) {}
 
 	public:
 		iterator() {}
@@ -98,8 +93,11 @@ public:
 		iterator & operator++()
 		{
 			++current_it;
-			if (current_it == impls[bucket].end())
-				current_it = beginOfNextNonEmptyBucket(bucket);
+			if (current_it == container->impls[bucket].end())
+			{
+				++bucket;
+				current_it = container->beginOfNextNonEmptyBucket(bucket);
+			}
 
 			return *this;
 		}
@@ -111,18 +109,18 @@ public:
 
 	class const_iterator
 	{
-		Impl * impls;
+		Self * container;
 		size_t bucket;
 		typename Impl::const_iterator current_it;
 
 		friend class TwoLevelHashTable;
 
-		const_iterator(Impl * impls_, size_t bucket_, typename Impl::const_iterator & current_it_)
-			: impls(impls_), bucket(bucket_), current_it(current_it_) {}
+		const_iterator(Self * container_, size_t bucket_, typename Impl::const_iterator current_it_)
+			: container(container_), bucket(bucket_), current_it(current_it_) {}
 
 	public:
 		const_iterator() {}
-		const_iterator(const iterator & rhs) : impls(rhs.impls), current_it(rhs.current_it), bucket(rhs.bucket) {}
+		const_iterator(const iterator & rhs) : container(rhs.container), bucket(rhs.bucket), current_it(rhs.current_it) {}
 
 		bool operator== (const const_iterator & rhs) const { return current_it == rhs.current_it; }
 		bool operator!= (const const_iterator & rhs) const { return current_it != rhs.current_it; }
@@ -130,8 +128,11 @@ public:
 		const_iterator & operator++()
 		{
 			++current_it;
-			if (current_it == impls[bucket].end())
-				current_it = beginOfNextNonEmptyBucket(bucket);
+			if (current_it == container->impls[bucket].end())
+			{
+				++bucket;
+				current_it = container->beginOfNextNonEmptyBucket(bucket);
+			}
 
 			return *this;
 		}
@@ -144,17 +145,19 @@ public:
 	const_iterator begin() const
 	{
 		size_t buck = 0;
-		return beginOfNextNonEmptyBucket(buck);
+		typename Impl::const_iterator impl_it = beginOfNextNonEmptyBucket(buck);
+		return { this, buck, impl_it };
 	}
 
 	iterator begin()
 	{
 		size_t buck = 0;
-		return beginOfNextNonEmptyBucket(buck);
+		typename Impl::iterator impl_it = beginOfNextNonEmptyBucket(buck);
+		return { this, buck, impl_it };
 	}
 
-	const_iterator end() const 		{ return impls[NUM_BUCKETS - 1].end(); }
-	iterator end() 					{ return impls[NUM_BUCKETS - 1].end(); }
+	const_iterator end() const 		{ return { this, MAX_BUCKET, impls[MAX_BUCKET].end() }; }
+	iterator end() 					{ return { this, MAX_BUCKET, impls[MAX_BUCKET].end() }; }
 
 
 	/// Вставить значение. В случае хоть сколько-нибудь сложных значений, лучше используйте функцию emplace.
@@ -196,7 +199,7 @@ public:
 		size_t buck = bucket(hash_value);
 		typename Impl::iterator impl_it;
 		impls[buck].emplace(x, impl_it, inserted);
-		it = iterator(impls, buck, impl_it);
+		it = iterator(this, buck, impl_it);
 
 		if (inserted)
 			++m_size;
@@ -210,7 +213,7 @@ public:
 
 		typename Impl::iterator found = impls[buck].find(x);
 		return found != impls[buck].end()
-			? iterator(impls, buck, found)
+			? iterator(this, buck, found)
 			: end();
 	}
 
@@ -222,7 +225,7 @@ public:
 
 		typename Impl::const_iterator found = impls[buck].find(x);
 		return found != impls[buck].end()
-			? const_iterator(impls, buck, found)
+			? const_iterator(this, buck, found)
 			: end();
 	}
 
diff --git a/dbms/src/Interpreters/tests/hash_map_string.cpp b/dbms/src/Interpreters/tests/hash_map_string.cpp
index ed8a655bf43..1c3b5d33bf7 100644
--- a/dbms/src/Interpreters/tests/hash_map_string.cpp
+++ b/dbms/src/Interpreters/tests/hash_map_string.cpp
@@ -81,35 +81,160 @@ struct DefaultHash<CompactStringRef>
 };
 
 
+#define mix(h) ({                   \
+	(h) ^= (h) >> 23;               \
+	(h) *= 0x2127599bf4325c37ULL;   \
+	(h) ^= (h) >> 47; })
+
+struct FastHash64
+{
+	size_t operator() (CompactStringRef x) const
+	{
+		const char * buf = x.data();
+		size_t len = x.size;
+
+		const uint64_t    m = 0x880355f21e6d1965ULL;
+		const uint64_t *pos = (const uint64_t *)buf;
+		const uint64_t *end = pos + (len / 8);
+		const unsigned char *pos2;
+		uint64_t h = len * m;
+		uint64_t v;
+
+		while (pos != end) {
+			v  = *pos++;
+			h ^= mix(v);
+			h *= m;
+		}
+
+		pos2 = (const unsigned char*)pos;
+		v = 0;
+
+		switch (len & 7) {
+		case 7: v ^= (uint64_t)pos2[6] << 48;
+		case 6: v ^= (uint64_t)pos2[5] << 40;
+		case 5: v ^= (uint64_t)pos2[4] << 32;
+		case 4: v ^= (uint64_t)pos2[3] << 24;
+		case 3: v ^= (uint64_t)pos2[2] << 16;
+		case 2: v ^= (uint64_t)pos2[1] << 8;
+		case 1: v ^= (uint64_t)pos2[0];
+			h ^= mix(v);
+			h *= m;
+		}
+
+		return mix(h);
+	}
+};
+
+
+struct CrapWow
+{
+	size_t operator() (CompactStringRef x) const
+	{
+		const char * key = x.data();
+		size_t len = x.size;
+		size_t seed = 0;
+
+		const UInt64 m = 0x95b47aa3355ba1a1, n = 0x8a970be7488fda55;
+	    UInt64 hash;
+	    // 3 = m, 4 = n
+	    // r12 = h, r13 = k, ecx = seed, r12 = key
+	    asm(
+	        "leaq (%%rcx,%4), %%r13\n"
+	        "movq %%rdx, %%r14\n"
+	        "movq %%rcx, %%r15\n"
+	        "movq %%rcx, %%r12\n"
+	        "addq %%rax, %%r13\n"
+	        "andq $0xfffffffffffffff0, %%rcx\n"
+	        "jz QW%=\n"
+	        "addq %%rcx, %%r14\n\n"
+	        "negq %%rcx\n"
+	    "XW%=:\n"
+	        "movq %4, %%rax\n"
+	        "mulq (%%r14,%%rcx)\n"
+	        "xorq %%rax, %%r12\n"
+	        "xorq %%rdx, %%r13\n"
+	        "movq %3, %%rax\n"
+	        "mulq 8(%%r14,%%rcx)\n"
+	        "xorq %%rdx, %%r12\n"
+	        "xorq %%rax, %%r13\n"
+	        "addq $16, %%rcx\n"
+	        "jnz XW%=\n"
+	    "QW%=:\n"
+	        "movq %%r15, %%rcx\n"
+	        "andq $8, %%r15\n"
+	        "jz B%=\n"
+	        "movq %4, %%rax\n"
+	        "mulq (%%r14)\n"
+	        "addq $8, %%r14\n"
+	        "xorq %%rax, %%r12\n"
+	        "xorq %%rdx, %%r13\n"
+	    "B%=:\n"
+	        "andq $7, %%rcx\n"
+	        "jz F%=\n"
+	        "movq $1, %%rdx\n"
+	        "shlq $3, %%rcx\n"
+	        "movq %3, %%rax\n"
+	        "shlq %%cl, %%rdx\n"
+	        "addq $-1, %%rdx\n"
+	        "andq (%%r14), %%rdx\n"
+	        "mulq %%rdx\n"
+	        "xorq %%rdx, %%r12\n"
+	        "xorq %%rax, %%r13\n"
+	    "F%=:\n"
+	        "leaq (%%r13,%4), %%rax\n"
+	        "xorq %%r12, %%rax\n"
+	        "mulq %4\n"
+	        "xorq %%rdx, %%rax\n"
+	        "xorq %%r12, %%rax\n"
+	        "xorq %%r13, %%rax\n"
+	        : "=a"(hash), "=c"(key), "=d"(key)
+	        : "r"(m), "r"(n), "a"(seed), "c"(len), "d"(key)
+	        : "%r12", "%r13", "%r14", "%r15", "cc"
+	    );
+	    return hash;
+	}
+};
+
+
+struct SimpleHash
+{
+	size_t operator() (CompactStringRef x) const
+	{
+		const char * pos = x.data();
+		size_t size = x.size;
+
+		const char * end = pos + size;
+
+		size_t res = 0;
+
+		if (size == 0)
+			return 0;
+
+		if (size < 8)
+		{
+			memcpy(reinterpret_cast<char *>(&res), pos, size);
+			return intHash64(res);
+		}
+
+		while (pos + 8 < end)
+		{
+			UInt64 word = *reinterpret_cast<const UInt64 *>(pos);
+			res = intHash64(word ^ res);
+
+			pos += 8;
+		}
+
+		UInt64 word = *reinterpret_cast<const UInt64 *>(end - 8);
+		res = intHash64(word ^ res);
+
+		return res;
+	}
+};
+
+
 typedef CompactStringRef Key;
 typedef UInt64 Value;
 
-struct CellWithSavedHash : public HashMapCell<Key, Value, DefaultHash<Key> >
-{
-	size_t saved_hash;
-
-	CellWithSavedHash() : HashMapCell() {}
-	CellWithSavedHash(const Key & key_, const State & state) : HashMapCell(key_, state) {}
-	CellWithSavedHash(const value_type & value_, const State & state) : HashMapCell(value_, state) {}
-
-/*	static bool equals(const StringRef & lhs, const StringRef & rhs)
-	{
-		if (lhs.size != rhs.size)
-			return false;
-
-		for (size_t pos = lhs.size - 1; pos < lhs.size; --pos)
-			if (lhs.data[pos] != rhs.data[pos])
-				return false;
-
-		return true;
-	}*/
-
-	bool keyEquals(const Key & key_) const { return value.first == key_; }
-	bool keyEquals(const CellWithSavedHash & other) const { return saved_hash == other.saved_hash && value.first == other.value.first; }
-
-	void setHash(size_t hash_value) { saved_hash = hash_value; }
-	size_t getHash(const DefaultHash<Key> & hash) const { return saved_hash; }
-};
 
 struct Grower : public HashTableGrower<>
 {
@@ -188,7 +313,7 @@ int main(int argc, char ** argv)
 		//typedef HashMap<Key, Value> Map;
 
 		/// Сохранение хэша ускоряет ресайзы примерно в 2 раза, и общую производительность - на 6-8%.
-		typedef HashMapTable<Key, CellWithSavedHash, DefaultHash<Key>, Grower> Map;
+		typedef HashMapWithSavedHash<Key, Value, DefaultHash<Key>, Grower> Map;
 
 		Map map;
 		Map::iterator it;
@@ -204,7 +329,7 @@ int main(int argc, char ** argv)
 
 		watch.stop();
 		std::cerr << std::fixed << std::setprecision(2)
-			<< "HashMap. Size: " << map.size()
+			<< "HashMap (CityHash64). Size: " << map.size()
 			<< ", elapsed: " << watch.elapsedSeconds()
 			<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
 #ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
@@ -217,6 +342,93 @@ int main(int argc, char ** argv)
 	{
 		Stopwatch watch;
 
+		typedef HashMapWithSavedHash<Key, Value, FastHash64, Grower> Map;
+
+		Map map;
+		Map::iterator it;
+		bool inserted;
+
+		for (size_t i = 0; i < n; ++i)
+		{
+			map.emplace(data[i], it, inserted);
+			if (inserted)
+				it->second = 0;
+			++it->second;
+		}
+
+		watch.stop();
+		std::cerr << std::fixed << std::setprecision(2)
+			<< "HashMap (FastHash64). Size: " << map.size()
+			<< ", elapsed: " << watch.elapsedSeconds()
+			<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
+#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
+			<< ", collisions: " << map.getCollisions()
+#endif
+			<< std::endl;
+	}
+
+	if (!m || m == 3)
+	{
+		Stopwatch watch;
+
+		typedef HashMapWithSavedHash<Key, Value, CrapWow, Grower> Map;
+
+		Map map;
+		Map::iterator it;
+		bool inserted;
+
+		for (size_t i = 0; i < n; ++i)
+		{
+			map.emplace(data[i], it, inserted);
+			if (inserted)
+				it->second = 0;
+			++it->second;
+		}
+
+		watch.stop();
+		std::cerr << std::fixed << std::setprecision(2)
+			<< "HashMap (CrapWow). Size: " << map.size()
+			<< ", elapsed: " << watch.elapsedSeconds()
+			<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
+#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
+			<< ", collisions: " << map.getCollisions()
+#endif
+			<< std::endl;
+	}
+
+	if (!m || m == 4)
+	{
+		Stopwatch watch;
+
+		typedef HashMapWithSavedHash<Key, Value, SimpleHash, Grower> Map;
+
+		Map map;
+		Map::iterator it;
+		bool inserted;
+
+		for (size_t i = 0; i < n; ++i)
+		{
+			map.emplace(data[i], it, inserted);
+			if (inserted)
+				it->second = 0;
+			++it->second;
+		}
+
+		watch.stop();
+		std::cerr << std::fixed << std::setprecision(2)
+			<< "HashMap (SimpleHash). Size: " << map.size()
+			<< ", elapsed: " << watch.elapsedSeconds()
+			<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
+#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
+			<< ", collisions: " << map.getCollisions()
+#endif
+			<< std::endl;
+	}
+
+	if (!m || m == 5)
+	{
+		Stopwatch watch;
+
 		std::unordered_map<Key, Value, DefaultHash<Key> > map;
 		for (size_t i = 0; i < n; ++i)
 			++map[data[i]];
@@ -229,7 +441,7 @@ int main(int argc, char ** argv)
 			<< std::endl;
 	}
 
-	if (!m || m == 3)
+	if (!m || m == 6)
 	{
 		Stopwatch watch;
 
@@ -246,7 +458,7 @@ int main(int argc, char ** argv)
 			<< std::endl;
 	}
 
-	if (!m || m == 4)
+	if (!m || m == 7)
 	{
 		Stopwatch watch;
 
diff --git a/dbms/src/Interpreters/tests/two_level_hash_map.cpp b/dbms/src/Interpreters/tests/two_level_hash_map.cpp
index 41b921c7e50..26df5c372d7 100644
--- a/dbms/src/Interpreters/tests/two_level_hash_map.cpp
+++ b/dbms/src/Interpreters/tests/two_level_hash_map.cpp
@@ -7,10 +7,11 @@
 #include <sparsehash/dense_hash_map>
 #include <sparsehash/sparse_hash_map>
 
-#include <statdaemons/Stopwatch.h>
-
 //#define DBMS_HASH_MAP_DEBUG_RESIZES
 
+#include <statdaemons/Stopwatch.h>
+#include <stats/UniquesHashSet.h>
+
 #include <DB/Core/Types.h>
 #include <DB/IO/ReadBufferFromFile.h>
 #include <DB/IO/CompressedReadBuffer.h>
@@ -48,6 +49,8 @@ int main(int argc, char ** argv)
 	{
 		Stopwatch watch;
 
+		std::cerr << sizeof(HashMapCell<Key, Value, DefaultHash<Key> >) << std::endl;
+
 		typedef TwoLevelHashTable<Key, HashMapCell<Key, Value, DefaultHash<Key> >, DefaultHash<Key>, HashTableGrower<8>, HashTableAllocator> Map;
 
 		Map map;
@@ -68,6 +71,58 @@ int main(int argc, char ** argv)
 			<< ", elapsed: " << watch.elapsedSeconds()
 			<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
 			<< std::endl;
+
+		size_t sum_counts = 0;
+		size_t elems = 0;
+		for (const auto & kv : map)
+		{
+			sum_counts += kv.second;
+			++elems;
+		}
+
+		std::cerr << "sum_counts: " << sum_counts << ", elems: " << elems << std::endl;
+	}
+
+	{
+		Stopwatch watch;
+
+		typedef TwoLevelHashTable<Key, HashMapCell<Key, Value, DefaultHash<Key> >, DefaultHash<Key>, HashTableGrower<8>, HashTableAllocator> Map;
+		//typedef HashMap<Key, Value, UniquesHashSetDefaultHash> Map;
+
+		Map map;
+		Map::iterator it;
+		bool inserted;
+
+		for (size_t i = 0; i < n; ++i)
+		{
+			map.emplace(i, it, inserted);
+			if (inserted)
+				it->second = 0;
+			++it->second;
+		}
+
+		watch.stop();
+		std::cerr << std::fixed << std::setprecision(2)
+			<< "HashMap. Size: " << map.size()
+			<< ", elapsed: " << watch.elapsedSeconds()
+			<< " (" << n / watch.elapsedSeconds() << " elem/sec.)"
+			<< std::endl;
+
+		size_t sum_counts = 0;
+		size_t elems = 0;
+		for (const auto & kv : map)
+		{
+			sum_counts += kv.second;
+			++elems;
+
+			if (kv.first > n)
+				std::cerr << kv.first << std::endl;
+		}
+
+		std::cerr << "sum_counts: " << sum_counts << ", elems: " << elems << std::endl;
+
+		if (sum_counts != n)
+			std::cerr << "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << std::endl;
 	}
 
 	return 0;