Merge branch 'master' of github.com:yandex/ClickHouse

This commit is contained in:
BayoNet 2019-03-04 18:35:18 +03:00
commit 98d999f95f
198 changed files with 2562 additions and 1208 deletions

View File

@ -24,7 +24,7 @@ if (OS_LINUX AND COMPILER_CLANG)
endif ()
if (LIBCXX_PATH)
# include_directories (SYSTEM BEFORE "${LIBCXX_PATH}/include" "${LIBCXX_PATH}/include/c++/v1")
include_directories (SYSTEM BEFORE "${LIBCXX_PATH}/include" "${LIBCXX_PATH}/include/c++/v1")
link_directories ("${LIBCXX_PATH}/lib")
endif ()
endif ()

View File

@ -51,10 +51,6 @@ set(SRCS
${RDKAFKA_SOURCE_DIR}/snappy.c
${RDKAFKA_SOURCE_DIR}/tinycthread.c
${RDKAFKA_SOURCE_DIR}/tinycthread_extra.c
#${RDKAFKA_SOURCE_DIR}/xxhash.c
#${RDKAFKA_SOURCE_DIR}/lz4.c
#${RDKAFKA_SOURCE_DIR}/lz4frame.c
#${RDKAFKA_SOURCE_DIR}/lz4hc.c
${RDKAFKA_SOURCE_DIR}/rdgz.c
)

View File

@ -33,6 +33,7 @@
#include <Common/CurrentThread.h>
#include <Common/escapeForFileName.h>
#include <Common/getNumberOfPhysicalCPUCores.h>
#include <Common/ThreadStatus.h>
#include <Client/Connection.h>
#include <Interpreters/Context.h>
#include <Interpreters/Cluster.h>
@ -2121,6 +2122,7 @@ void ClusterCopierApp::defineOptions(Poco::Util::OptionSet & options)
void ClusterCopierApp::mainImpl()
{
StatusFile status_file(process_path + "/status");
ThreadStatus thread_status;
auto log = &logger();
LOG_INFO(log, "Starting clickhouse-copier ("

View File

@ -577,7 +577,7 @@ public:
{
for (auto & elem : table)
{
Histogram & histogram = elem.second;
Histogram & histogram = elem.getSecond();
if (histogram.buckets.size() < params.num_buckets_cutoff)
{
@ -591,7 +591,7 @@ public:
{
for (auto & elem : table)
{
Histogram & histogram = elem.second;
Histogram & histogram = elem.getSecond();
if (!histogram.total)
continue;
@ -623,7 +623,7 @@ public:
{
for (auto & elem : table)
{
Histogram & histogram = elem.second;
Histogram & histogram = elem.getSecond();
if (!histogram.total)
continue;
@ -639,7 +639,7 @@ public:
{
for (auto & elem : table)
{
Histogram & histogram = elem.second;
Histogram & histogram = elem.getSecond();
if (!histogram.total)
continue;
@ -674,7 +674,7 @@ public:
while (true)
{
it = table.find(hashContext(code_points.data() + code_points.size() - context_size, code_points.data() + code_points.size()));
if (table.end() != it && it->second.total + it->second.count_end != 0)
if (table.end() != it && it->getSecond().total + it->getSecond().count_end != 0)
break;
if (context_size == 0)
@ -708,7 +708,7 @@ public:
if (num_bytes_after_desired_size > 0)
end_probability_multiplier = std::pow(1.25, num_bytes_after_desired_size);
CodePoint code = it->second.sample(determinator, end_probability_multiplier);
CodePoint code = it->getSecond().sample(determinator, end_probability_multiplier);
if (code == END)
break;

View File

@ -724,7 +724,7 @@ bool TCPHandler::receiveData()
query_context.addExternalTable(external_table_name, storage);
}
/// The data will be written directly to the table.
state.io.out = storage->write(ASTPtr(), query_context.getSettingsRef());
state.io.out = storage->write(ASTPtr(), query_context);
}
if (block)
state.io.out->write(block);

View File

@ -54,7 +54,7 @@ struct EntropyData
void merge(const EntropyData & rhs)
{
for (const auto & pair : rhs.map)
map[pair.first] += pair.second;
map[pair.getFirst()] += pair.getSecond();
}
void serialize(WriteBuffer & buf) const
@ -68,7 +68,7 @@ struct EntropyData
while (reader.next())
{
const auto & pair = reader.get();
map[pair.first] = pair.second;
map[pair.getFirst()] = pair.getSecond();
}
}
@ -76,12 +76,12 @@ struct EntropyData
{
UInt64 total_value = 0;
for (const auto & pair : map)
total_value += pair.second;
total_value += pair.getSecond();
Float64 shannon_entropy = 0;
for (const auto & pair : map)
{
Float64 frequency = Float64(pair.second) / total_value;
Float64 frequency = Float64(pair.getSecond()) / total_value;
shannon_entropy -= frequency * log2(frequency);
}

View File

@ -94,7 +94,7 @@ public:
size_t i = 0;
for (auto it = set.begin(); it != set.end(); ++it, ++i)
data_to[old_size + i] = *it;
data_to[old_size + i] = it->getValue();
}
const char * getHeaderFilePath() const override { return __FILE__; }
@ -150,7 +150,7 @@ public:
for (const auto & elem : set)
{
writeStringBinary(elem, buf);
writeStringBinary(elem.getValue(), buf);
}
}
@ -185,7 +185,7 @@ public:
else
{
if (inserted)
it->data = arena->insert(str_serialized.data, str_serialized.size);
it->getValueMutable().data = arena->insert(str_serialized.data, str_serialized.size);
}
}
@ -198,9 +198,9 @@ public:
State::Set::iterator it;
for (auto & rhs_elem : rhs_set)
{
cur_set.emplace(rhs_elem, it, inserted);
if (inserted && it->size)
it->data = arena->insert(it->data, it->size);
cur_set.emplace(rhs_elem.getValue(), it, inserted);
if (inserted && it->getValue().size)
it->getValueMutable().data = arena->insert(it->getValue().data, it->getValue().size);
}
}
@ -215,7 +215,7 @@ public:
for (auto & elem : set)
{
deserializeAndInsert(elem, data_to);
deserializeAndInsert(elem.getValue(), data_to);
}
}

View File

@ -33,6 +33,8 @@ namespace ErrorCodes
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
}
template <typename> class QuantileTiming;
/** Generic aggregate function for calculation of quantiles.
* It depends on quantile calculation data structure. Look at Quantile*.h for various implementations.
@ -82,6 +84,14 @@ public:
{
if (!returns_many && levels.size() > 1)
throw Exception("Aggregate function " + getName() + " require one parameter or less", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if constexpr (std::is_same_v<Data, QuantileTiming<Value>>)
{
/// QuantileTiming only supports integers (it works only for unsigned integers but signed are also accepted for convenience).
if (!isInteger(argument_type))
throw Exception("Argument for function " + std::string(Name::name) + " must be integer, but it has type "
+ argument_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
}
String getName() const override { return Name::name; }

View File

@ -48,7 +48,7 @@ struct QuantileExactWeighted
void merge(const QuantileExactWeighted & rhs)
{
for (const auto & pair : rhs.map)
map[pair.first] += pair.second;
map[pair.getFirst()] += pair.getSecond();
}
void serialize(WriteBuffer & buf) const
@ -62,7 +62,7 @@ struct QuantileExactWeighted
while (reader.next())
{
const auto & pair = reader.get();
map[pair.first] = pair.second;
map[pair.getFirst()] = pair.getSecond();
}
}
@ -83,12 +83,12 @@ struct QuantileExactWeighted
UInt64 sum_weight = 0;
for (const auto & pair : map)
{
sum_weight += pair.second;
array[i] = pair;
sum_weight += pair.getSecond();
array[i] = pair.getValue();
++i;
}
std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });
std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.getFirst() < b.getFirst(); });
UInt64 threshold = std::ceil(sum_weight * level);
UInt64 accumulated = 0;
@ -97,7 +97,7 @@ struct QuantileExactWeighted
const Pair * end = array + size;
while (it < end)
{
accumulated += it->second;
accumulated += it->getSecond();
if (accumulated >= threshold)
break;
@ -108,7 +108,7 @@ struct QuantileExactWeighted
if (it == end)
--it;
return it->first;
return it->getFirst();
}
/// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
@ -133,12 +133,12 @@ struct QuantileExactWeighted
UInt64 sum_weight = 0;
for (const auto & pair : map)
{
sum_weight += pair.second;
array[i] = pair;
sum_weight += pair.getSecond();
array[i] = pair.getValue();
++i;
}
std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });
std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.getFirst() < b.getFirst(); });
UInt64 accumulated = 0;
@ -150,11 +150,11 @@ struct QuantileExactWeighted
while (it < end)
{
accumulated += it->second;
accumulated += it->getSecond();
while (accumulated >= threshold)
{
result[indices[level_index]] = it->first;
result[indices[level_index]] = it->getFirst();
++level_index;
if (level_index == num_levels)
@ -168,7 +168,7 @@ struct QuantileExactWeighted
while (level_index < num_levels)
{
result[indices[level_index]] = array[size - 1].first;
result[indices[level_index]] = array[size - 1].getFirst();
++level_index;
}
}

View File

@ -33,7 +33,7 @@ namespace
data.resize(hash_map.size());
for (auto val : hash_map)
data[val.second] = val.first;
data[val.getSecond()] = val.getFirst();
for (auto & ind : index)
ind = hash_map[ind];

View File

@ -414,7 +414,7 @@ UInt64 ReverseIndex<IndexType, ColumnType>::insert(const StringRef & data)
column->popBack(1);
}
return *iterator;
return iterator->getValue();
}
template <typename IndexType, typename ColumnType>
@ -429,7 +429,7 @@ UInt64 ReverseIndex<IndexType, ColumnType>::getInsertionPoint(const StringRef &
auto hash = getHash(data);
iterator = index->find(data, hash);
return iterator == index->end() ? size() + base_index : *iterator;
return iterator == index->end() ? size() + base_index : iterator->getValue();
}
}

View File

@ -56,7 +56,7 @@ struct HashMethodOneNumber
/// Get StringRef from value which can be inserted into column.
static StringRef getValueRef(const Value & value)
{
return StringRef(reinterpret_cast<const char *>(&value.first), sizeof(value.first));
return StringRef(reinterpret_cast<const char *>(&value.getFirst()), sizeof(value.getFirst()));
}
};
@ -85,7 +85,7 @@ struct HashMethodString
return StringRef(chars + offsets[row - 1], offsets[row] - offsets[row - 1] - 1);
}
static StringRef getValueRef(const Value & value) { return StringRef(value.first.data, value.first.size); }
static StringRef getValueRef(const Value & value) { return StringRef(value.getFirst().data, value.getFirst().size); }
protected:
friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
@ -122,7 +122,7 @@ struct HashMethodFixedString
StringRef getKey(size_t row, Arena &) const { return StringRef(&(*chars)[row * n], n); }
static StringRef getValueRef(const Value & value) { return StringRef(value.first.data, value.first.size); }
static StringRef getValueRef(const Value & value) { return StringRef(value.getFirst().data, value.getFirst().size); }
protected:
friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
@ -356,8 +356,8 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod
{
if constexpr (has_mapped)
{
new(&it->second) Mapped();
Base::onNewKey(it->first, pool);
new(&it->getSecond()) Mapped();
Base::onNewKey(it->getFirstMutable(), pool);
}
else
Base::onNewKey(*it, pool);
@ -365,8 +365,8 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod
if constexpr (has_mapped)
{
mapped_cache[row] = it->second;
return EmplaceResult(it->second, mapped_cache[row], inserted);
mapped_cache[row] = it->getSecond();
return EmplaceResult(it->getSecond(), mapped_cache[row], inserted);
}
else
return EmplaceResult(inserted);

View File

@ -39,7 +39,7 @@ struct LastElementCache
bool check(const Value & value_) { return !empty && value == value_; }
template <typename Key>
bool check(const Key & key) { return !empty && value.first == key; }
bool check(const Key & key) { return !empty && value.getFirst() == key; }
};
template <typename Data>
@ -147,9 +147,8 @@ protected:
if constexpr (has_mapped)
{
/// Init PairNoInit elements.
cache.value.second = Mapped();
using Key = decltype(cache.value.first);
cache.value.first = Key();
cache.value.getSecond() = Mapped();
cache.value.getFirstMutable() = {};
}
else
cache.value = Value();
@ -171,7 +170,7 @@ protected:
static_cast<Derived &>(*this).onExistingKey(key, pool);
if constexpr (has_mapped)
return EmplaceResult(cache.value.second, cache.value.second, false);
return EmplaceResult(cache.value.getSecond(), cache.value.getSecond(), false);
else
return EmplaceResult(false);
}
@ -183,33 +182,33 @@ protected:
[[maybe_unused]] Mapped * cached = nullptr;
if constexpr (has_mapped)
cached = &it->second;
cached = &it->getSecond();
if (inserted)
{
if constexpr (has_mapped)
{
new(&it->second) Mapped();
static_cast<Derived &>(*this).onNewKey(it->first, pool);
new(&it->getSecond()) Mapped();
static_cast<Derived &>(*this).onNewKey(it->getFirstMutable(), pool);
}
else
static_cast<Derived &>(*this).onNewKey(*it, pool);
static_cast<Derived &>(*this).onNewKey(it->getValueMutable(), pool);
}
else
static_cast<Derived &>(*this).onExistingKey(key, pool);
if constexpr (consecutive_keys_optimization)
{
cache.value = *it;
cache.value = it->getValue();
cache.found = true;
cache.empty = false;
if constexpr (has_mapped)
cached = &cache.value.second;
cached = &cache.value.getSecond();
}
if constexpr (has_mapped)
return EmplaceResult(it->second, *cached, inserted);
return EmplaceResult(it->getSecond(), *cached, inserted);
else
return EmplaceResult(inserted);
}
@ -222,7 +221,7 @@ protected:
if (cache.check(key))
{
if constexpr (has_mapped)
return FindResult(&cache.value.second, cache.found);
return FindResult(&cache.value.getSecond(), cache.found);
else
return FindResult(cache.found);
}
@ -237,18 +236,18 @@ protected:
cache.empty = false;
if (found)
cache.value = *it;
cache.value = it->getValue();
else
{
if constexpr (has_mapped)
cache.value.first = key;
cache.value.getFirstMutable() = key;
else
cache.value = key;
}
}
if constexpr (has_mapped)
return FindResult(found ? &it->second : nullptr, found);
return FindResult(found ? &it->getSecond() : nullptr, found);
else
return FindResult(found);
}

View File

@ -137,12 +137,12 @@ public:
if (rhs.getContainerType() == details::ContainerType::SMALL)
{
for (const auto & x : rhs.small)
insert(x);
insert(x.getValue());
}
else if (rhs.getContainerType() == details::ContainerType::MEDIUM)
{
for (const auto & x : rhs.getContainer<Medium>())
insert(x);
insert(x.getValue());
}
else if (rhs.getContainerType() == details::ContainerType::LARGE)
getContainer<Large>().merge(rhs.getContainer<Large>());
@ -234,7 +234,7 @@ private:
auto tmp_medium = std::make_unique<Medium>();
for (const auto & x : small)
tmp_medium->insert(x);
tmp_medium->insert(x.getValue());
medium = tmp_medium.release();
setContainerType(details::ContainerType::MEDIUM);
@ -253,12 +253,12 @@ private:
if (container_type == details::ContainerType::SMALL)
{
for (const auto & x : small)
tmp_large->insert(x);
tmp_large->insert(x.getValue());
}
else if (container_type == details::ContainerType::MEDIUM)
{
for (const auto & x : getContainer<Medium>())
tmp_large->insert(x);
tmp_large->insert(x.getValue());
destroy();
}

View File

@ -37,9 +37,9 @@ public:
this->emplace(x, it, inserted);
if (inserted)
new(&it->second) mapped_type();
new(&it->getSecond()) mapped_type();
return it->second;
return it->getSecond();
}
void clear()

View File

@ -0,0 +1,73 @@
#pragma once
#include <Common/HashTable/ClearableHashMap.h>
#include <Common/HashTable/FixedHashMap.h>
template <typename Key, typename TMapped>
struct FixedClearableHashMapCell
{
using Mapped = TMapped;
using State = ClearableHashSetState;
using value_type = PairNoInit<Key, Mapped>;
UInt32 version;
Mapped mapped;
FixedClearableHashMapCell() {}
FixedClearableHashMapCell(const Key &, const State & state) : version(state.version) {}
FixedClearableHashMapCell(const value_type & value_, const State & state) : version(state.version), mapped(value_.second) {}
Mapped & getSecond() { return mapped; }
const Mapped & getSecond() const { return mapped; }
bool isZero(const State & state) const { return version != state.version; }
void setZero() { version = 0; }
static constexpr bool need_zero_value_storage = false;
void setMapped(const value_type & value) { mapped = value.getSecond(); }
struct CellExt
{
CellExt() {}
CellExt(Key && key_, FixedClearableHashMapCell * ptr_) : key(key_), ptr(ptr_) {}
void update(Key && key_, FixedClearableHashMapCell * ptr_)
{
key = key_;
ptr = ptr_;
}
Key key;
FixedClearableHashMapCell * ptr;
Key & getFirstMutable() { return key; }
const Key & getFirst() const { return key; }
Mapped & getSecond() { return ptr->mapped; }
const Mapped & getSecond() const { return *ptr->mapped; }
const value_type getValue() const { return {key, *ptr->mapped}; }
};
};
template <typename Key, typename Mapped, typename Allocator = HashTableAllocator>
class FixedClearableHashMap : public FixedHashMap<Key, FixedClearableHashMapCell<Key, Mapped>, Allocator>
{
public:
using key_type = Key;
using mapped_type = Mapped;
using value_type = typename FixedClearableHashMap::cell_type::value_type;
mapped_type & operator[](Key x)
{
typename FixedClearableHashMap::iterator it;
bool inserted;
this->emplace(x, it, inserted);
if (inserted)
new (&it->second) mapped_type();
return it->second;
}
void clear()
{
++this->version;
this->m_size = 0;
}
};

View File

@ -0,0 +1,45 @@
#pragma once
#include <Common/HashTable/ClearableHashSet.h>
#include <Common/HashTable/FixedHashTable.h>
template <typename Key>
struct FixedClearableHashTableCell
{
using State = ClearableHashSetState;
using value_type = Key;
UInt32 version;
FixedClearableHashTableCell() {}
FixedClearableHashTableCell(const Key &, const State & state) : version(state.version) {}
bool isZero(const State & state) const { return version != state.version; }
void setZero() { version = 0; }
static constexpr bool need_zero_value_storage = false;
void setMapped(const value_type & /*value*/) {}
struct CellExt
{
Key key;
value_type & getValueMutable() { return key; }
const value_type & getValue() const { return key; }
void update(Key && key_, FixedClearableHashTableCell *) { key = key_; }
};
};
template <typename Key, typename Allocator = HashTableAllocator>
class FixedClearableHashSet : public FixedHashTable<Key, FixedClearableHashTableCell<Key>, Allocator>
{
public:
using key_type = Key;
using value_type = typename FixedClearableHashSet::cell_type::value_type;
void clear()
{
++this->version;
this->m_size = 0;
}
};

View File

@ -0,0 +1,72 @@
#pragma once
#include <Common/HashTable/FixedHashTable.h>
#include <Common/HashTable/HashMap.h>
template <typename Key, typename TMapped, typename TState = HashTableNoState>
struct FixedHashMapCell
{
using Mapped = TMapped;
using State = TState;
using value_type = PairNoInit<Key, Mapped>;
bool full;
Mapped mapped;
FixedHashMapCell() {}
FixedHashMapCell(const Key &, const State &) : full(true) {}
FixedHashMapCell(const value_type & value_, const State &) : full(true), mapped(value_.second) {}
Mapped & getSecond() { return mapped; }
const Mapped & getSecond() const { return mapped; }
bool isZero(const State &) const { return !full; }
void setZero() { full = false; }
static constexpr bool need_zero_value_storage = false;
void setMapped(const value_type & value) { mapped = value.getSecond(); }
/// Similar to FixedHashSetCell except that we need to contain a pointer to the Mapped field.
/// Note that we have to assemble a continuous layout for the value_type on each call of getValue().
struct CellExt
{
CellExt() {}
CellExt(Key && key_, const FixedHashMapCell * ptr_) : key(key_), ptr(const_cast<FixedHashMapCell *>(ptr_)) {}
void update(Key && key_, const FixedHashMapCell * ptr_)
{
key = key_;
ptr = const_cast<FixedHashMapCell *>(ptr_);
}
Key key;
FixedHashMapCell * ptr;
Key & getFirstMutable() { return key; }
const Key & getFirst() const { return key; }
Mapped & getSecond() { return ptr->mapped; }
const Mapped & getSecond() const { return ptr->mapped; }
const value_type getValue() const { return {key, ptr->mapped}; }
};
};
template <typename Key, typename Mapped, typename Allocator = HashTableAllocator>
class FixedHashMap : public FixedHashTable<Key, FixedHashMapCell<Key, Mapped>, Allocator>
{
public:
using Base = FixedHashTable<Key, FixedHashMapCell<Key, Mapped>, Allocator>;
using key_type = Key;
using mapped_type = Mapped;
using value_type = typename Base::cell_type::value_type;
using Base::Base;
mapped_type & ALWAYS_INLINE operator[](Key x)
{
typename Base::iterator it;
bool inserted;
this->emplace(x, it, inserted);
if (inserted)
new (&it->getSecond()) mapped_type();
return it->getSecond();
}
};

View File

@ -0,0 +1,25 @@
#pragma once
#include <Common/HashTable/FixedHashTable.h>
template <typename Key, typename Allocator = HashTableAllocator>
class FixedHashSet : public FixedHashTable<Key, FixedHashTableCell<Key>, Allocator>
{
public:
using Base = FixedHashTable<Key, FixedHashTableCell<Key>, Allocator>;
using Self = FixedHashSet;
void merge(const Self & rhs)
{
for (size_t i = 0; i < Base::BUFFER_SIZE; ++i)
if (Base::buf[i].isZero(*this) && !rhs.buf[i].isZero(*this))
Base::buf[i] = rhs.buf[i];
}
/// NOTE: Currently this method isn't used. When it does, the ReadBuffer should
/// contain the Key explicitly.
// void readAndMerge(DB::ReadBuffer & rb)
// {
// }
};

View File

@ -0,0 +1,420 @@
#pragma once
#include <Common/HashTable/HashTable.h>
template <typename Key, typename TState = HashTableNoState>
struct FixedHashTableCell
{
using State = TState;
using value_type = Key;
bool full;
FixedHashTableCell() {}
FixedHashTableCell(const Key &, const State &) : full(true) {}
bool isZero(const State &) const { return !full; }
void setZero() { full = false; }
static constexpr bool need_zero_value_storage = false;
void setMapped(const value_type & /*value*/) {}
/// This Cell is only stored inside an iterator. It's used to accomodate the fact
/// that the iterator based API always provide a reference to a continuous memory
/// containing the Key. As a result, we have to instantiate a real Key field.
/// All methods that return a mutable reference to the Key field are named with
/// -Mutable suffix, indicating this is uncommon usage. As this is only for lookup
/// tables, it's totally fine to discard the Key mutations.
struct CellExt
{
Key key;
value_type & getValueMutable() { return key; }
const value_type & getValue() const { return key; }
void update(Key && key_, FixedHashTableCell *) { key = key_; }
};
};
/** Used as a lookup table for small keys such as UInt8, UInt16. It's different
* than a HashTable in that keys are not stored in the Cell buf, but inferred
* inside each iterator. There are a bunch of to make it faster than using
* HashTable: a) It doesn't have a conflict chain; b) There is no key
* comparision; c) The number of cycles for checking cell empty is halved; d)
* Memory layout is tighter, especially the Clearable variants.
*
* NOTE: For Set variants this should always be better. For Map variants
* however, as we need to assemble the real cell inside each iterator, there
* might be some cases we fall short.
*
* TODO: Deprecate the cell API so that end users don't rely on the structure
* of cell. Instead iterator should be used for operations such as cell
* transfer, key updates (f.g. StringRef) and serde. This will allow
* TwoLevelHashSet(Map) to contain different type of sets(maps).
*/
template <typename Key, typename Cell, typename Allocator>
class FixedHashTable : private boost::noncopyable, protected Allocator, protected Cell::State
{
static constexpr size_t BUFFER_SIZE = 1ULL << (sizeof(Key) * 8);
protected:
friend class const_iterator;
friend class iterator;
friend class Reader;
using Self = FixedHashTable;
using cell_type = Cell;
size_t m_size = 0; /// Amount of elements
Cell * buf; /// A piece of memory for all elements except the element with zero key.
void alloc() { buf = reinterpret_cast<Cell *>(Allocator::alloc(BUFFER_SIZE * sizeof(Cell))); }
void free()
{
if (buf)
{
Allocator::free(buf, getBufferSizeInBytes());
buf = nullptr;
}
}
void destroyElements()
{
if (!std::is_trivially_destructible_v<Cell>)
for (iterator it = begin(), it_end = end(); it != it_end; ++it)
it.ptr->~Cell();
}
template <typename Derived, bool is_const>
class iterator_base
{
using Container = std::conditional_t<is_const, const Self, Self>;
using cell_type = std::conditional_t<is_const, const Cell, Cell>;
Container * container;
cell_type * ptr;
friend class FixedHashTable;
public:
iterator_base() {}
iterator_base(Container * container_, cell_type * ptr_) : container(container_), ptr(ptr_)
{
cell.update(ptr - container->buf, ptr);
}
bool operator==(const iterator_base & rhs) const { return ptr == rhs.ptr; }
bool operator!=(const iterator_base & rhs) const { return ptr != rhs.ptr; }
Derived & operator++()
{
++ptr;
/// Skip empty cells in the main buffer.
auto buf_end = container->buf + container->BUFFER_SIZE;
while (ptr < buf_end && ptr->isZero(*container))
++ptr;
return static_cast<Derived &>(*this);
}
auto & operator*()
{
if (cell.key != ptr - container->buf)
cell.update(ptr - container->buf, ptr);
return cell;
}
auto * operator-> ()
{
if (cell.key != ptr - container->buf)
cell.update(ptr - container->buf, ptr);
return &cell;
}
auto getPtr() const { return ptr; }
size_t getHash() const { return ptr - container->buf; }
size_t getCollisionChainLength() const { return 0; }
typename cell_type::CellExt cell;
};
public:
using key_type = Key;
using value_type = typename Cell::value_type;
size_t hash(const Key & x) const { return x; }
FixedHashTable() { alloc(); }
FixedHashTable(FixedHashTable && rhs) : buf(nullptr) { *this = std::move(rhs); }
~FixedHashTable()
{
destroyElements();
free();
}
FixedHashTable & operator=(FixedHashTable && rhs)
{
destroyElements();
free();
std::swap(buf, rhs.buf);
std::swap(m_size, rhs.m_size);
Allocator::operator=(std::move(rhs));
Cell::State::operator=(std::move(rhs));
return *this;
}
class Reader final : private Cell::State
{
public:
Reader(DB::ReadBuffer & in_) : in(in_) {}
Reader(const Reader &) = delete;
Reader & operator=(const Reader &) = delete;
bool next()
{
if (!is_initialized)
{
Cell::State::read(in);
DB::readVarUInt(size, in);
is_initialized = true;
}
if (read_count == size)
{
is_eof = true;
return false;
}
cell.read(in);
++read_count;
return true;
}
inline const value_type & get() const
{
if (!is_initialized || is_eof)
throw DB::Exception("No available data", DB::ErrorCodes::NO_AVAILABLE_DATA);
return cell.getValue();
}
private:
DB::ReadBuffer & in;
Cell cell;
size_t read_count = 0;
size_t size;
bool is_eof = false;
bool is_initialized = false;
};
class iterator : public iterator_base<iterator, false>
{
public:
using iterator_base<iterator, false>::iterator_base;
};
class const_iterator : public iterator_base<const_iterator, true>
{
public:
using iterator_base<const_iterator, true>::iterator_base;
};
const_iterator begin() const
{
if (!buf)
return end();
const Cell * ptr = buf;
auto buf_end = buf + BUFFER_SIZE;
while (ptr < buf_end && ptr->isZero(*this))
++ptr;
return const_iterator(this, ptr);
}
const_iterator cbegin() const { return begin(); }
iterator begin()
{
if (!buf)
return end();
Cell * ptr = buf;
auto buf_end = buf + BUFFER_SIZE;
while (ptr < buf_end && ptr->isZero(*this))
++ptr;
return iterator(this, ptr);
}
const_iterator end() const { return const_iterator(this, buf + BUFFER_SIZE); }
const_iterator cend() const { return end(); }
iterator end() { return iterator(this, buf + BUFFER_SIZE); }
protected:
void ALWAYS_INLINE emplaceImpl(Key x, iterator & it, bool & inserted)
{
it = iterator(this, &buf[x]);
if (!buf[x].isZero(*this))
{
inserted = false;
return;
}
new (&buf[x]) Cell(x, *this);
inserted = true;
++m_size;
}
public:
std::pair<iterator, bool> ALWAYS_INLINE insert(const value_type & x)
{
std::pair<iterator, bool> res;
emplaceImpl(Cell::getKey(x), res.first, res.second);
if (res.second)
res.first.ptr->setMapped(x);
return res;
}
void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted) { emplaceImpl(x, it, inserted); }
void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted, size_t) { emplaceImpl(x, it, inserted); }
template <typename ObjectToCompareWith>
iterator ALWAYS_INLINE find(ObjectToCompareWith x)
{
return !buf[x].isZero(*this) ? iterator(this, &buf[x]) : end();
}
template <typename ObjectToCompareWith>
const_iterator ALWAYS_INLINE find(ObjectToCompareWith x) const
{
return !buf[x].isZero(*this) ? const_iterator(this, &buf[x]) : end();
}
template <typename ObjectToCompareWith>
iterator ALWAYS_INLINE find(ObjectToCompareWith, size_t hash_value)
{
return !buf[hash_value].isZero(*this) ? iterator(this, &buf[hash_value]) : end();
}
template <typename ObjectToCompareWith>
const_iterator ALWAYS_INLINE find(ObjectToCompareWith, size_t hash_value) const
{
return !buf[hash_value].isZero(*this) ? const_iterator(this, &buf[hash_value]) : end();
}
bool ALWAYS_INLINE has(Key x) const { return !buf[x].isZero(*this); }
bool ALWAYS_INLINE has(Key, size_t hash_value) const { return !buf[hash_value].isZero(*this); }
void write(DB::WriteBuffer & wb) const
{
Cell::State::write(wb);
DB::writeVarUInt(m_size, wb);
for (auto ptr = buf, buf_end = buf + BUFFER_SIZE; ptr < buf_end; ++ptr)
if (!ptr->isZero(*this))
{
DB::writeVarUInt(ptr - buf);
ptr->write(wb);
}
}
void writeText(DB::WriteBuffer & wb) const
{
Cell::State::writeText(wb);
DB::writeText(m_size, wb);
for (auto ptr = buf, buf_end = buf + BUFFER_SIZE; ptr < buf_end; ++ptr)
{
if (!ptr->isZero(*this))
{
DB::writeChar(',', wb);
DB::writeText(ptr - buf, wb);
DB::writeChar(',', wb);
ptr->writeText(wb);
}
}
}
void read(DB::ReadBuffer & rb)
{
Cell::State::read(rb);
destroyElements();
DB::readVarUInt(m_size, rb);
free();
alloc();
for (size_t i = 0; i < m_size; ++i)
{
size_t place_value = 0;
DB::readVarUInt(place_value, rb);
Cell x;
x.read(rb);
new (&buf[place_value]) Cell(x, *this);
}
}
void readText(DB::ReadBuffer & rb)
{
Cell::State::readText(rb);
destroyElements();
DB::readText(m_size, rb);
free();
alloc();
for (size_t i = 0; i < m_size; ++i)
{
size_t place_value = 0;
DB::assertChar(',', rb);
DB::readText(place_value, rb);
Cell x;
DB::assertChar(',', rb);
x.readText(rb);
new (&buf[place_value]) Cell(x, *this);
}
}
size_t size() const { return m_size; }
bool empty() const { return 0 == m_size; }
void clear()
{
destroyElements();
m_size = 0;
memset(static_cast<void *>(buf), 0, BUFFER_SIZE * sizeof(*buf));
}
/// After executing this function, the table can only be destroyed,
/// and also you can use the methods `size`, `empty`, `begin`, `end`.
void clearAndShrink()
{
destroyElements();
m_size = 0;
free();
}
size_t getBufferSizeInBytes() const { return BUFFER_SIZE * sizeof(Cell); }
size_t getBufferSizeInCells() const { return BUFFER_SIZE; }
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
size_t getCollisions() const { return 0; }
#endif
};

View File

@ -15,11 +15,14 @@ struct NoInitTag {};
/// A pair that does not initialize the elements, if not needed.
template <typename First, typename Second>
struct PairNoInit
class PairNoInit
{
First first;
Second second;
template <typename, typename, typename, typename>
friend class HashMapCell;
public:
PairNoInit() {}
template <typename First_>
@ -29,6 +32,11 @@ struct PairNoInit
template <typename First_, typename Second_>
PairNoInit(First_ && first_, Second_ && second_)
: first(std::forward<First_>(first_)), second(std::forward<Second_>(second_)) {}
First & getFirstMutable() { return first; }
const First & getFirst() const { return first; }
Second & getSecond() { return second; }
const Second & getSecond() const { return second; }
};
@ -45,10 +53,14 @@ struct HashMapCell
HashMapCell(const Key & key_, const State &) : value(key_, NoInitTag()) {}
HashMapCell(const value_type & value_, const State &) : value(value_) {}
value_type & getValue() { return value; }
Key & getFirstMutable() { return value.first; }
const Key & getFirst() const { return value.first; }
Mapped & getSecond() { return value.second; }
const Mapped & getSecond() const { return value.second; }
value_type & getValueMutable() { return value; }
const value_type & getValue() const { return value; }
static Key & getKey(value_type & value) { return value.first; }
static const Key & getKey(const value_type & value) { return value.first; }
bool keyEquals(const Key & key_) const { return value.first == key_; }
@ -111,8 +123,8 @@ struct HashMapCellWithSavedHash : public HashMapCell<Key, TMapped, Hash, TState>
using Base::Base;
bool keyEquals(const Key & key_) const { return this->value.first == key_; }
bool keyEquals(const Key & key_, size_t hash_) const { return saved_hash == hash_ && this->value.first == key_; }
bool keyEquals(const Key & key_) const { return this->value.getFirst() == key_; }
bool keyEquals(const Key & key_, size_t hash_) const { return saved_hash == hash_ && this->value.getFirst() == key_; }
bool keyEquals(const Key & key_, size_t hash_, const typename Base::State &) const { return keyEquals(key_, hash_); }
void setHash(size_t hash_value) { saved_hash = hash_value; }
@ -158,9 +170,9 @@ public:
* the compiler can not guess about this, and generates the `load`, `increment`, `store` code.
*/
if (inserted)
new(&it->second) mapped_type();
new(&it->getSecond()) mapped_type();
return it->second;
return it->getSecond();
}
};

View File

@ -98,11 +98,10 @@ struct HashTableCell
/// HashTableCell(const value_type & value_, const State & state) : key(value_) {}
/// Get what the value_type of the container will be.
value_type & getValue() { return key; }
value_type & getValueMutable() { return key; }
const value_type & getValue() const { return key; }
/// Get the key.
static Key & getKey(value_type & value) { return value; }
static const Key & getKey(const value_type & value) { return value; }
/// Are the keys at the cells equal?
@ -459,8 +458,8 @@ protected:
return static_cast<Derived &>(*this);
}
auto & operator* () const { return ptr->getValue(); }
auto * operator->() const { return &ptr->getValue(); }
auto & operator* () const { return *ptr; }
auto * operator->() const { return ptr; }
auto getPtr() const { return ptr; }
size_t getHash() const { return ptr->getHash(*container); }

View File

@ -148,8 +148,8 @@ public:
return *this;
}
value_type & operator* () const { return ptr->getValue(); }
value_type * operator->() const { return &ptr->getValue(); }
Cell & operator* () const { return *ptr; }
Cell * operator->() const { return ptr; }
Cell * getPtr() const { return ptr; }
};
@ -176,8 +176,8 @@ public:
return *this;
}
const value_type & operator* () const { return ptr->getValue(); }
const value_type * operator->() const { return &ptr->getValue(); }
const Cell & operator* () const { return *ptr; }
const Cell * operator->() const { return ptr; }
const Cell * getPtr() const { return ptr; }
};
@ -399,8 +399,8 @@ public:
typename SmallMapTable::iterator it;
bool inserted;
this->emplace(x, it, inserted);
new(&it->second) mapped_type();
return it->second;
new(&it->getSecond()) mapped_type();
return it->getSecond();
}
};

View File

@ -29,9 +29,9 @@ public:
this->emplace(x, it, inserted);
if (inserted)
new(&it->second) mapped_type();
new(&it->getSecond()) mapped_type();
return it->second;
return it->getSecond();
}
};

View File

@ -98,7 +98,7 @@ public:
/// It is assumed that the zero key (stored separately) is first in iteration order.
if (it != src.end() && it.getPtr()->isZero(src))
{
insert(*it);
insert(it->getValue());
++it;
}
@ -141,8 +141,8 @@ public:
return *this;
}
value_type & operator* () const { return *current_it; }
value_type * operator->() const { return &*current_it; }
Cell & operator* () const { return *current_it; }
Cell * operator->() const { return current_it.getPtr(); }
Cell * getPtr() const { return current_it.getPtr(); }
size_t getHash() const { return current_it.getHash(); }
@ -179,8 +179,8 @@ public:
return *this;
}
const value_type & operator* () const { return *current_it; }
const value_type * operator->() const { return &*current_it; }
const Cell & operator* () const { return *current_it; }
const Cell * operator->() const { return current_it->getPtr(); }
const Cell * getPtr() const { return current_it.getPtr(); }
size_t getHash() const { return current_it.getHash(); }

View File

@ -45,7 +45,7 @@ private:
Large * tmp_large = new Large;
for (const auto & x : small)
tmp_large->insert(x);
tmp_large->insert(x.getValue());
large = tmp_large;
}
@ -99,7 +99,7 @@ public:
else
{
for (const auto & x : rhs.small)
insert(x);
insert(x.getValue());
}
}

View File

@ -33,27 +33,28 @@ namespace ErrorCodes
}
class RWLockImpl::LockHandlerImpl
class RWLockImpl::LockHolderImpl
{
RWLock parent;
GroupsContainer::iterator it_group;
ClientsContainer::iterator it_client;
ThreadToHandler::iterator it_handler;
ThreadToHolder::iterator it_thread;
QueryIdToHolder::iterator it_query;
CurrentMetrics::Increment active_client_increment;
LockHandlerImpl(RWLock && parent, GroupsContainer::iterator it_group, ClientsContainer::iterator it_client);
LockHolderImpl(RWLock && parent, GroupsContainer::iterator it_group, ClientsContainer::iterator it_client);
public:
LockHandlerImpl(const LockHandlerImpl & other) = delete;
LockHolderImpl(const LockHolderImpl & other) = delete;
~LockHandlerImpl();
~LockHolderImpl();
friend class RWLockImpl;
};
RWLockImpl::LockHandler RWLockImpl::getLock(RWLockImpl::Type type)
RWLockImpl::LockHolder RWLockImpl::getLock(RWLockImpl::Type type, const String & query_id)
{
Stopwatch watch(CLOCK_MONOTONIC_COARSE);
CurrentMetrics::Increment waiting_client_increment((type == Read) ? CurrentMetrics::RWLockWaitingReaders
@ -66,28 +67,33 @@ RWLockImpl::LockHandler RWLockImpl::getLock(RWLockImpl::Type type)
: ProfileEvents::RWLockWritersWaitMilliseconds, watch.elapsedMilliseconds());
};
auto this_thread_id = std::this_thread::get_id();
GroupsContainer::iterator it_group;
ClientsContainer::iterator it_client;
std::unique_lock lock(mutex);
/// Check if the same thread is acquiring previously acquired lock
auto it_handler = thread_to_handler.find(this_thread_id);
if (it_handler != thread_to_handler.end())
/// Check if the same query is acquiring previously acquired lock
LockHolder existing_holder_ptr;
auto this_thread_id = std::this_thread::get_id();
auto it_thread = thread_to_holder.find(this_thread_id);
auto it_query = query_id_to_holder.end();
if (query_id != RWLockImpl::NO_QUERY)
it_query = query_id_to_holder.find(query_id);
if (it_thread != thread_to_holder.end())
existing_holder_ptr = it_thread->second.lock();
else if (it_query != query_id_to_holder.end())
existing_holder_ptr = it_query->second.lock();
if (existing_holder_ptr)
{
auto handler_ptr = it_handler->second.lock();
/// XXX: it means we can't upgrade lock from read to write - with proper waiting!
if (type != Read || existing_holder_ptr->it_group->type != Read)
throw Exception("Attempt to acquire exclusive lock recursively", ErrorCodes::LOGICAL_ERROR);
/// Lock may be released in another thread, but not yet deleted inside |~LogHandlerImpl()|
if (handler_ptr)
{
/// XXX: it means we can't upgrade lock from read to write - with proper waiting!
if (type != Read || handler_ptr->it_group->type != Read)
throw Exception("Attempt to acquire exclusive lock recursively", ErrorCodes::LOGICAL_ERROR);
return handler_ptr;
}
return existing_holder_ptr;
}
if (type == Type::Write || queue.empty() || queue.back().type == Type::Write)
@ -115,11 +121,15 @@ RWLockImpl::LockHandler RWLockImpl::getLock(RWLockImpl::Type type)
throw;
}
LockHandler res(new LockHandlerImpl(shared_from_this(), it_group, it_client));
LockHolder res(new LockHolderImpl(shared_from_this(), it_group, it_client));
/// Insert myself (weak_ptr to the handler) to threads set to implement recursive lock
it_handler = thread_to_handler.emplace(this_thread_id, res).first;
res->it_handler = it_handler;
/// Insert myself (weak_ptr to the holder) to threads set to implement recursive lock
it_thread = thread_to_holder.emplace(this_thread_id, res).first;
res->it_thread = it_thread;
if (query_id != RWLockImpl::NO_QUERY)
it_query = query_id_to_holder.emplace(query_id, res).first;
res->it_query = it_query;
/// We are first, we should not wait anything
/// If we are not the first client in the group, a notification could be already sent
@ -137,12 +147,15 @@ RWLockImpl::LockHandler RWLockImpl::getLock(RWLockImpl::Type type)
}
RWLockImpl::LockHandlerImpl::~LockHandlerImpl()
RWLockImpl::LockHolderImpl::~LockHolderImpl()
{
std::unique_lock lock(parent->mutex);
/// Remove weak_ptr to the handler, since there are no owners of the current lock
parent->thread_to_handler.erase(it_handler);
/// Remove weak_ptrs to the holder, since there are no owners of the current lock
parent->thread_to_holder.erase(it_thread);
if (it_query != parent->query_id_to_holder.end())
parent->query_id_to_holder.erase(it_query);
/// Removes myself from client list of our group
it_group->clients.erase(it_client);
@ -161,7 +174,7 @@ RWLockImpl::LockHandlerImpl::~LockHandlerImpl()
}
RWLockImpl::LockHandlerImpl::LockHandlerImpl(RWLock && parent, RWLockImpl::GroupsContainer::iterator it_group,
RWLockImpl::LockHolderImpl::LockHolderImpl(RWLock && parent, RWLockImpl::GroupsContainer::iterator it_group,
RWLockImpl::ClientsContainer::iterator it_client)
: parent{std::move(parent)}, it_group{it_group}, it_client{it_client},
active_client_increment{(*it_client == RWLockImpl::Read) ? CurrentMetrics::RWLockActiveReaders

View File

@ -1,4 +1,5 @@
#pragma once
#include <Core/Types.h>
#include <boost/core/noncopyable.hpp>
#include <list>
#include <vector>
@ -17,7 +18,13 @@ using RWLock = std::shared_ptr<RWLockImpl>;
/// Implements shared lock with FIFO service
/// Can be acquired recursively (several calls from the same thread) in Read mode
/// Can be acquired recursively (several calls for the same query or the same OS thread) in Read mode
///
/// NOTE: it is important to allow acquiring the same lock in Read mode without waiting if it is already
/// acquired by another thread of the same query. Otherwise the following deadlock is possible:
/// - SELECT thread 1 locks in the Read mode
/// - ALTER tries to lock in the Write mode (waits for SELECT thread 1)
/// - SELECT thread 2 tries to lock in the Read mode (waits for ALTER)
class RWLockImpl : public std::enable_shared_from_this<RWLockImpl>
{
public:
@ -29,14 +36,17 @@ public:
static RWLock create() { return RWLock(new RWLockImpl); }
/// Just use LockHandler::reset() to release the lock
class LockHandlerImpl;
friend class LockHandlerImpl;
using LockHandler = std::shared_ptr<LockHandlerImpl>;
/// Just use LockHolder::reset() to release the lock
class LockHolderImpl;
friend class LockHolderImpl;
using LockHolder = std::shared_ptr<LockHolderImpl>;
/// Waits in the queue and returns appropriate lock
LockHandler getLock(Type type);
/// Empty query_id means the lock is acquired out of the query context (e.g. in a background thread).
LockHolder getLock(Type type, const String & query_id);
/// Use as query_id to acquire a lock outside the query context.
inline static const String NO_QUERY = String();
private:
RWLockImpl() = default;
@ -44,7 +54,8 @@ private:
struct Group;
using GroupsContainer = std::list<Group>;
using ClientsContainer = std::list<Type>;
using ThreadToHandler = std::map<std::thread::id, std::weak_ptr<LockHandlerImpl>>;
using ThreadToHolder = std::map<std::thread::id, std::weak_ptr<LockHolderImpl>>;
using QueryIdToHolder = std::map<String, std::weak_ptr<LockHolderImpl>>;
/// Group of clients that should be executed concurrently
/// i.e. a group could contain several readers, but only one writer
@ -61,7 +72,8 @@ private:
mutable std::mutex mutex;
GroupsContainer queue;
ThreadToHandler thread_to_handler;
ThreadToHolder thread_to_holder;
QueryIdToHolder query_id_to_holder;
};

View File

@ -152,7 +152,7 @@ public:
auto it = counter_map.find(key, hash);
if (it != counter_map.end())
{
auto c = it->second;
auto c = it->getSecond();
c->count += increment;
c->error += error;
percolate(c);
@ -189,8 +189,8 @@ public:
min->error = alpha + error;
percolate(min);
it->second = min;
it->first = min->key;
it->getSecond() = min;
it->getFirstMutable() = min->key;
counter_map.reinsert(it, hash);
}
}

View File

@ -137,8 +137,8 @@ struct TasksStatsCounters
profile_events.increment(ProfileEvents::OSCPUVirtualTimeMicroseconds,
safeDiff(prev.stat.cpu_run_virtual_total, curr.stat.cpu_run_virtual_total) / 1000U);
/// Too old struct version, do not read new fields
if (curr.stat.version < TASKSTATS_VERSION)
/// Since TASKSTATS_VERSION = 3 extended accounting and IO accounting is available.
if (curr.stat.version < 3)
return;
profile_events.increment(ProfileEvents::OSReadChars, safeDiff(prev.stat.read_char, curr.stat.read_char));

View File

@ -155,10 +155,10 @@ int main(int argc, char ** argv)
map.emplace(rand(), it, inserted);
if (inserted)
{
new(&it->second) Arr(n);
new(&it->getSecond()) Arr(n);
for (size_t j = 0; j < n; ++j)
it->second[j] = field;
it->getSecond()[j] = field;
}
}

View File

@ -39,7 +39,7 @@ TEST(Common, RWLock_1)
auto type = (std::uniform_int_distribution<>(0, 9)(gen) >= round) ? RWLockImpl::Read : RWLockImpl::Write;
auto sleep_for = std::chrono::duration<int, std::micro>(std::uniform_int_distribution<>(1, 100)(gen));
auto lock = fifo_lock->getLock(type);
auto lock = fifo_lock->getLock(type, RWLockImpl::NO_QUERY);
if (type == RWLockImpl::Write)
{
@ -99,7 +99,7 @@ TEST(Common, RWLock_Recursive)
{
for (int i = 0; i < 2 * cycles; ++i)
{
auto lock = fifo_lock->getLock(RWLockImpl::Write);
auto lock = fifo_lock->getLock(RWLockImpl::Write, RWLockImpl::NO_QUERY);
auto sleep_for = std::chrono::duration<int, std::micro>(std::uniform_int_distribution<>(1, 100)(gen));
std::this_thread::sleep_for(sleep_for);
@ -110,17 +110,17 @@ TEST(Common, RWLock_Recursive)
{
for (int i = 0; i < cycles; ++i)
{
auto lock1 = fifo_lock->getLock(RWLockImpl::Read);
auto lock1 = fifo_lock->getLock(RWLockImpl::Read, RWLockImpl::NO_QUERY);
auto sleep_for = std::chrono::duration<int, std::micro>(std::uniform_int_distribution<>(1, 100)(gen));
std::this_thread::sleep_for(sleep_for);
auto lock2 = fifo_lock->getLock(RWLockImpl::Read);
auto lock2 = fifo_lock->getLock(RWLockImpl::Read, RWLockImpl::NO_QUERY);
EXPECT_ANY_THROW({fifo_lock->getLock(RWLockImpl::Write);});
EXPECT_ANY_THROW({fifo_lock->getLock(RWLockImpl::Write, RWLockImpl::NO_QUERY);});
}
fifo_lock->getLock(RWLockImpl::Write);
fifo_lock->getLock(RWLockImpl::Write, RWLockImpl::NO_QUERY);
});
t1.join();
@ -143,7 +143,7 @@ TEST(Common, RWLock_PerfTest_Readers)
{
for (auto i = 0; i < cycles; ++i)
{
auto lock = fifo_lock->getLock(RWLockImpl::Read);
auto lock = fifo_lock->getLock(RWLockImpl::Read, RWLockImpl::NO_QUERY);
}
};

View File

@ -21,13 +21,13 @@ int main(int, char **)
bool inserted;
cont.emplace(3, it, inserted);
std::cerr << inserted << ", " << *it << std::endl;
std::cerr << inserted << ", " << it->getValue() << std::endl;
cont.emplace(3, it, inserted);
std::cerr << inserted << ", " << *it << std::endl;
std::cerr << inserted << ", " << it->getValue() << std::endl;
for (auto x : cont)
std::cerr << x << std::endl;
std::cerr << x.getValue() << std::endl;
DB::WriteBufferFromOwnString wb;
cont.writeText(wb);

View File

@ -82,14 +82,14 @@ void aggregate12(Map & map, Source::const_iterator begin, Source::const_iterator
{
if (*it == *prev_it)
{
++found->second;
++found->getSecond();
continue;
}
prev_it = it;
bool inserted;
map.emplace(*it, found, inserted);
++found->second;
++found->getSecond();
}
}
@ -107,14 +107,14 @@ void aggregate22(MapTwoLevel & map, Source::const_iterator begin, Source::const_
{
if (*it == *prev_it)
{
++found->second;
++found->getSecond();
continue;
}
prev_it = it;
bool inserted;
map.emplace(*it, found, inserted);
++found->second;
++found->getSecond();
}
}
@ -126,7 +126,7 @@ void merge2(MapTwoLevel * maps, size_t num_threads, size_t bucket)
{
for (size_t i = 1; i < num_threads; ++i)
for (auto it = maps[i].impls[bucket].begin(); it != maps[i].impls[bucket].end(); ++it)
maps[0].impls[bucket][it->first] += it->second;
maps[0].impls[bucket][it->getFirst()] += it->getSecond();
}
void aggregate3(Map & local_map, Map & global_map, Mutex & mutex, Source::const_iterator begin, Source::const_iterator end)
@ -138,7 +138,7 @@ void aggregate3(Map & local_map, Map & global_map, Mutex & mutex, Source::const_
Map::iterator found = local_map.find(*it);
if (found != local_map.end())
++found->second;
++found->getSecond();
else if (local_map.size() < threshold)
++local_map[*it]; /// TODO You could do one lookup, not two.
else
@ -163,13 +163,13 @@ void aggregate33(Map & local_map, Map & global_map, Mutex & mutex, Source::const
Map::iterator found;
bool inserted;
local_map.emplace(*it, found, inserted);
++found->second;
++found->getSecond();
if (inserted && local_map.size() == threshold)
{
std::lock_guard<Mutex> lock(mutex);
for (auto & value_type : local_map)
global_map[value_type.first] += value_type.second;
global_map[value_type.getFirst()] += value_type.getSecond();
local_map.clear();
}
@ -198,7 +198,7 @@ void aggregate4(Map & local_map, MapTwoLevel & global_map, Mutex * mutexes, Sour
Map::iterator found = local_map.find(*it);
if (found != local_map.end())
++found->second;
++found->getSecond();
else
{
size_t hash_value = global_map.hash(*it);
@ -311,7 +311,7 @@ int main(int argc, char ** argv)
for (size_t i = 1; i < num_threads; ++i)
for (auto it = maps[i].begin(); it != maps[i].end(); ++it)
maps[0][it->first] += it->second;
maps[0][it->getFirst()] += it->getSecond();
watch.stop();
double time_merged = watch.elapsedSeconds();
@ -365,7 +365,7 @@ int main(int argc, char ** argv)
for (size_t i = 1; i < num_threads; ++i)
for (auto it = maps[i].begin(); it != maps[i].end(); ++it)
maps[0][it->first] += it->second;
maps[0][it->getFirst()] += it->getSecond();
watch.stop();
@ -435,7 +435,7 @@ int main(int argc, char ** argv)
continue;
finish = false;
maps[0][iterators[i]->first] += iterators[i]->second;
maps[0][iterators[i]->getFirst()] += iterators[i]->getSecond();
++iterators[i];
}
@ -623,7 +623,7 @@ int main(int argc, char ** argv)
for (size_t i = 0; i < num_threads; ++i)
for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
global_map[it->first] += it->second;
global_map[it->getFirst()] += it->getSecond();
pool.wait();
@ -689,7 +689,7 @@ int main(int argc, char ** argv)
for (size_t i = 0; i < num_threads; ++i)
for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
global_map[it->first] += it->second;
global_map[it->getFirst()] += it->getSecond();
pool.wait();
@ -760,7 +760,7 @@ int main(int argc, char ** argv)
for (size_t i = 0; i < num_threads; ++i)
for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
global_map[it->first] += it->second;
global_map[it->getFirst()] += it->getSecond();
pool.wait();

View File

@ -51,9 +51,9 @@ struct AggregateIndependent
map.emplace(*it, place, inserted);
if (inserted)
creator(place->second);
creator(place->getSecond());
else
updater(place->second);
updater(place->getSecond());
}
});
}
@ -93,7 +93,7 @@ struct AggregateIndependentWithSequentialKeysOptimization
{
if (it != begin && *it == prev_key)
{
updater(place->second);
updater(place->getSecond());
continue;
}
prev_key = *it;
@ -102,9 +102,9 @@ struct AggregateIndependentWithSequentialKeysOptimization
map.emplace(*it, place, inserted);
if (inserted)
creator(place->second);
creator(place->getSecond());
else
updater(place->second);
updater(place->getSecond());
}
});
}
@ -131,7 +131,7 @@ struct MergeSequential
auto begin = source_maps[i]->begin();
auto end = source_maps[i]->end();
for (auto it = begin; it != end; ++it)
merger((*source_maps[0])[it->first], it->second);
merger((*source_maps[0])[it->getFirst()], it->getSecond());
}
result_map = source_maps[0];
@ -161,7 +161,7 @@ struct MergeSequentialTransposed /// In practice not better than usual.
continue;
finish = false;
merger((*result_map)[iterators[i]->first], iterators[i]->second);
merger((*result_map)[iterators[i]->getFirst()], iterators[i]->getSecond());
++iterators[i];
}

View File

@ -20,13 +20,13 @@ int main(int, char **)
bool inserted;
cont.emplace(3, it, inserted);
std::cerr << inserted << ", " << *it << std::endl;
std::cerr << inserted << ", " << it->getValue() << std::endl;
cont.emplace(3, it, inserted);
std::cerr << inserted << ", " << *it << std::endl;
std::cerr << inserted << ", " << it->getValue() << std::endl;
for (auto x : cont)
std::cerr << x << std::endl;
std::cerr << x.getValue() << std::endl;
DB::WriteBufferFromOwnString wb;
cont.writeText(wb);
@ -42,7 +42,7 @@ int main(int, char **)
cont[1] = "Goodbye.";
for (auto x : cont)
std::cerr << x.first << " -> " << x.second << std::endl;
std::cerr << x.getFirst() << " -> " << x.getSecond() << std::endl;
DB::WriteBufferFromOwnString wb;
cont.writeText(wb);

View File

@ -163,7 +163,7 @@ void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header,
StoragePtr storage = StorageMemory::create(data.second, ColumnsDescription{columns});
storage->startup();
context.addExternalTable(data.second, storage);
BlockOutputStreamPtr output = storage->write(ASTPtr(), settings);
BlockOutputStreamPtr output = storage->write(ASTPtr(), context);
/// Write data
data.first->readPrefix();

View File

@ -10,7 +10,7 @@
namespace DB
{
inline void readBinary(Array & x, ReadBuffer & buf)
void readBinary(Array & x, ReadBuffer & buf)
{
size_t size;
UInt8 type;
@ -151,12 +151,8 @@ namespace DB
DB::String res = applyVisitor(DB::FieldVisitorToString(), DB::Field(x));
buf.write(res.data(), res.size());
}
}
namespace DB
{
inline void readBinary(Tuple & x_def, ReadBuffer & buf)
void readBinary(Tuple & x_def, ReadBuffer & buf)
{
auto & x = x_def.toUnderType();
size_t size;

View File

@ -222,7 +222,7 @@ int main(int argc, char ** argv)
size_t i = 0;
for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
{
devnull.write(it->first.data, it->first.size);
devnull.write(it->getFirst().data, it->getFirst().size);
devnull << std::endl;
}
@ -249,7 +249,7 @@ int main(int argc, char ** argv)
size_t i = 0;
for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
{
devnull.write(it->first.data, it->first.size);
devnull.write(it->getFirst().data, it->getFirst().size);
devnull << std::endl;
}
}

View File

@ -19,10 +19,14 @@ namespace ErrorCodes
CreatingSetsBlockInputStream::CreatingSetsBlockInputStream(
const BlockInputStreamPtr & input,
const SubqueriesForSets & subqueries_for_sets_,
const SizeLimits & network_transfer_limits)
: subqueries_for_sets(subqueries_for_sets_),
network_transfer_limits(network_transfer_limits)
const Context & context_)
: subqueries_for_sets(subqueries_for_sets_)
, context(context_)
{
const Settings & settings = context.getSettingsRef();
network_transfer_limits = SizeLimits(
settings.max_rows_to_transfer, settings.max_bytes_to_transfer, settings.transfer_overflow_mode);
for (auto & elem : subqueries_for_sets)
{
if (elem.second.source)
@ -92,7 +96,7 @@ void CreatingSetsBlockInputStream::createOne(SubqueryForSet & subquery)
BlockOutputStreamPtr table_out;
if (subquery.table)
table_out = subquery.table->write({}, {});
table_out = subquery.table->write({}, context);
bool done_with_set = !subquery.set;
bool done_with_join = !subquery.join;

View File

@ -20,7 +20,7 @@ public:
CreatingSetsBlockInputStream(
const BlockInputStreamPtr & input,
const SubqueriesForSets & subqueries_for_sets_,
const SizeLimits & network_transfer_limits);
const Context & context_);
String getName() const override { return "CreatingSets"; }
@ -35,6 +35,7 @@ protected:
private:
SubqueriesForSets subqueries_for_sets;
const Context & context;
bool created = false;
SizeLimits network_transfer_limits;

View File

@ -247,6 +247,26 @@ void IBlockInputStream::checkQuota(Block & block)
}
}
static void limitProgressingSpeed(size_t total_progress_size, size_t max_speed_in_seconds, UInt64 total_elapsed_microseconds)
{
/// How much time to wait for the average speed to become `max_speed_in_seconds`.
UInt64 desired_microseconds = total_progress_size * 1000000 / max_speed_in_seconds;
if (desired_microseconds > total_elapsed_microseconds)
{
UInt64 sleep_microseconds = desired_microseconds - total_elapsed_microseconds;
::timespec sleep_ts;
sleep_ts.tv_sec = sleep_microseconds / 1000000;
sleep_ts.tv_nsec = sleep_microseconds % 1000000 * 1000;
/// NOTE: Returns early in case of a signal. This is considered normal.
/// NOTE: It's worth noting that this behavior affects kill of queries.
::nanosleep(&sleep_ts, nullptr);
ProfileEvents::increment(ProfileEvents::ThrottlerSleepMicroseconds, sleep_microseconds);
}
}
void IBlockInputStream::progressImpl(const Progress & value)
{
@ -313,8 +333,9 @@ void IBlockInputStream::progressImpl(const Progress & value)
last_profile_events_update_time = total_elapsed_microseconds;
}
if ((limits.min_execution_speed || (total_rows && limits.timeout_before_checking_execution_speed != 0))
&& (static_cast<Int64>(total_elapsed_microseconds) > limits.timeout_before_checking_execution_speed.totalMicroseconds()))
if ((limits.min_execution_speed || limits.max_execution_speed || limits.min_execution_speed_bytes ||
limits.max_execution_speed_bytes || (total_rows && limits.timeout_before_checking_execution_speed != 0)) &&
(static_cast<Int64>(total_elapsed_microseconds) > limits.timeout_before_checking_execution_speed.totalMicroseconds()))
{
/// Do not count sleeps in throttlers
UInt64 throttler_sleep_microseconds = CurrentThread::getProfileEvents()[ProfileEvents::ThrottlerSleepMicroseconds];
@ -328,6 +349,11 @@ void IBlockInputStream::progressImpl(const Progress & value)
+ " rows/sec., minimum: " + toString(limits.min_execution_speed),
ErrorCodes::TOO_SLOW);
if (limits.min_execution_speed_bytes && progress.bytes / elapsed_seconds < limits.min_execution_speed_bytes)
throw Exception("Query is executing too slow: " + toString(progress.bytes / elapsed_seconds)
+ " bytes/sec., minimum: " + toString(limits.min_execution_speed_bytes),
ErrorCodes::TOO_SLOW);
/// If the predicted execution time is longer than `max_execution_time`.
if (limits.max_execution_time != 0 && total_rows)
{
@ -339,6 +365,12 @@ void IBlockInputStream::progressImpl(const Progress & value)
+ ". Estimated rows to process: " + toString(total_rows),
ErrorCodes::TOO_SLOW);
}
if (limits.max_execution_speed && progress.rows / elapsed_seconds >= limits.max_execution_speed)
limitProgressingSpeed(progress.rows, limits.max_execution_speed, total_elapsed_microseconds);
if (limits.max_execution_speed_bytes && progress.bytes / elapsed_seconds >= limits.max_execution_speed_bytes)
limitProgressingSpeed(progress.bytes, limits.max_execution_speed_bytes, total_elapsed_microseconds);
}
}

View File

@ -212,6 +212,9 @@ public:
/// in rows per second
size_t min_execution_speed = 0;
size_t max_execution_speed = 0;
size_t min_execution_speed_bytes = 0;
size_t max_execution_speed_bytes = 0;
/// Verify that the speed is not too low after the specified time has elapsed.
Poco::Timespan timeout_before_checking_execution_speed = 0;
};

View File

@ -20,7 +20,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
* Although now any insertion into the table is done via PushingToViewsBlockOutputStream,
* but it's clear that here is not the best place for this functionality.
*/
addTableLock(storage->lockStructure(true));
addTableLock(storage->lockStructure(true, context.getCurrentQueryId()));
/// If the "root" table deduplactes blocks, there are no need to make deduplication for children
/// Moreover, deduplication for AggregatingMergeTree children could produce false positives due to low size of inserting blocks
@ -45,7 +45,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
auto & materialized_view = dynamic_cast<const StorageMaterializedView &>(*dependent_table);
if (StoragePtr inner_table = materialized_view.tryGetTargetTable())
addTableLock(inner_table->lockStructure(true));
addTableLock(inner_table->lockStructure(true, context.getCurrentQueryId()));
auto query = materialized_view.getInnerQuery();
BlockOutputStreamPtr out = std::make_shared<PushingToViewsBlockOutputStream>(
@ -57,7 +57,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
/* Do not push to destination table if the flag is set */
if (!no_destination)
{
output = storage->write(query_ptr, context.getSettingsRef());
output = storage->write(query_ptr, context);
replicated_output = dynamic_cast<ReplicatedMergeTreeBlockOutputStream *>(output.get());
}
}

View File

@ -74,7 +74,7 @@ void DataTypeEnum<Type>::fillMaps()
if (!name_to_value_pair.second)
throw Exception{"Duplicate names in enum: '" + name_and_value.first + "' = " + toString(name_and_value.second)
+ " and '" + name_to_value_pair.first->first.toString() + "' = " + toString(name_to_value_pair.first->second),
+ " and '" + name_to_value_pair.first->getFirst().toString() + "' = " + toString(name_to_value_pair.first->getSecond()),
ErrorCodes::SYNTAX_ERROR};
const auto value_to_name_pair = value_to_name_map.insert(

View File

@ -81,7 +81,7 @@ public:
if (it == std::end(name_to_value_map))
throw Exception{"Unknown element '" + field_name.toString() + "' for type " + getName(), ErrorCodes::LOGICAL_ERROR};
return it->second;
return it->getSecond();
}
Field castToName(const Field & value_or_name) const override;

View File

@ -222,7 +222,7 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
std::vector<size_t> required_rows(outdated_keys.size());
std::transform(
std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.second.front(); });
std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getSecond().front(); });
/// request new values
update(

View File

@ -342,7 +342,7 @@ private:
std::vector<size_t> required_rows(outdated_keys.size());
std::transform(
std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.second.front(); });
std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getSecond().front(); });
/// request new values
update(
@ -468,7 +468,7 @@ private:
std::vector<size_t> required_rows(outdated_keys.size());
std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
{
return pair.second.front();
return pair.getSecond().front();
});
update(
@ -500,7 +500,7 @@ private:
{
const StringRef key = keys_array[row];
const auto it = map.find(key);
const auto string_ref = it != std::end(map) ? it->second : get_default(row);
const auto string_ref = it != std::end(map) ? it->getSecond() : get_default(row);
out->insertData(string_ref.data, string_ref.size);
}
}
@ -607,7 +607,7 @@ private:
/// Check which ids have not been found and require setting null_value
for (const auto & key_found_pair : remaining_keys)
{
if (key_found_pair.second)
if (key_found_pair.getSecond())
{
++found_num;
continue;
@ -615,7 +615,7 @@ private:
++not_found_num;
auto key = key_found_pair.first;
auto key = key_found_pair.getFirst();
const auto hash = StringRefHash{}(key);
const auto find_result = findCellIdx(key, now, hash);
const auto & cell_idx = find_result.cell_idx;

View File

@ -611,7 +611,7 @@ void ComplexKeyHashedDictionary::getItemsImpl(
const auto key = placeKeysInPool(i, key_columns, keys, temporary_keys_pool);
const auto it = attr.find(key);
set_value(i, it != attr.end() ? static_cast<OutputType>(it->second) : get_default(i));
set_value(i, it != attr.end() ? static_cast<OutputType>(it->getSecond()) : get_default(i));
/// free memory allocated for the key
temporary_keys_pool.rollback(key.size);
@ -779,7 +779,7 @@ std::vector<StringRef> ComplexKeyHashedDictionary::getKeys(const Attribute & att
std::vector<StringRef> keys;
keys.reserve(attr.size());
for (const auto & key : attr)
keys.push_back(key.first);
keys.push_back(key.getFirst());
return keys;
}

View File

@ -2,12 +2,15 @@
#include <future>
#include <thread>
#include <ext/scope_guard.h>
#include <DataStreams/IBlockOutputStream.h>
#include <DataStreams/OwningBlockInputStream.h>
#include <Interpreters/Context.h>
#include <IO/WriteHelpers.h>
#include <Common/ShellCommand.h>
#include <Common/ThreadPool.h>
#include <common/logger_useful.h>
#include <common/LocalDateTime.h>
#include "DictionarySourceFactory.h"
#include "DictionarySourceHelpers.h"
#include "DictionaryStructure.h"
@ -46,7 +49,6 @@ ExecutableDictionarySource::ExecutableDictionarySource(
Block & sample_block,
const Context & context)
: log(&Logger::get("ExecutableDictionarySource"))
, update_time{std::chrono::system_clock::from_time_t(0)}
, dict_struct{dict_struct_}
, command{config.getString(config_prefix + ".command")}
, update_field{config.getString(config_prefix + ".update_field", "")}
@ -68,31 +70,6 @@ ExecutableDictionarySource::ExecutableDictionarySource(const ExecutableDictionar
{
}
std::string ExecutableDictionarySource::getUpdateFieldAndDate()
{
if (update_time != std::chrono::system_clock::from_time_t(0))
{
auto tmp_time = update_time;
update_time = std::chrono::system_clock::now();
time_t hr_time = std::chrono::system_clock::to_time_t(tmp_time) - 1;
char buffer[80];
struct tm * timeinfo;
timeinfo = localtime(&hr_time);
strftime(buffer, 80, "\"%Y-%m-%d %H:%M:%S\"", timeinfo);
std::string str_time(buffer);
return command + " " + update_field + " " + str_time;
///Example case: command -T "2018-02-12 12:44:04"
///should return all entries after mentioned date
///if executable is eligible to return entries according to date.
///Where "-T" is passed as update_field.
}
else
{
std::string str_time("\"0000-00-00 00:00:00\""); ///for initial load
return command + " " + update_field + " " + str_time;
}
}
BlockInputStreamPtr ExecutableDictionarySource::loadAll()
{
LOG_TRACE(log, "loadAll " + toString());
@ -103,9 +80,15 @@ BlockInputStreamPtr ExecutableDictionarySource::loadAll()
BlockInputStreamPtr ExecutableDictionarySource::loadUpdatedAll()
{
std::string command_update = getUpdateFieldAndDate();
LOG_TRACE(log, "loadUpdatedAll " + command_update);
auto process = ShellCommand::execute(command_update);
time_t new_update_time = time(nullptr);
SCOPE_EXIT(update_time = new_update_time);
std::string command_with_update_field = command;
if (update_time)
command_with_update_field += " " + update_field + " " + DB::toString(LocalDateTime(update_time - 1));
LOG_TRACE(log, "loadUpdatedAll " + command_with_update_field);
auto process = ShellCommand::execute(command_with_update_field);
auto input_stream = context.getInputFormat(format, process->out, sample_block, max_block_size);
return std::make_shared<ShellCommandOwningBlockInputStream>(input_stream, std::move(process));
}

View File

@ -12,6 +12,7 @@ class Logger;
namespace DB
{
/// Allows loading dictionaries from executable
class ExecutableDictionarySource final : public IDictionarySource
{
@ -27,6 +28,10 @@ public:
BlockInputStreamPtr loadAll() override;
/** The logic of this method is flawed, absolutely incorrect and ignorant.
* It may lead to skipping some values due to clock sync or timezone changes.
* The intended usage of "update_field" is totally different.
*/
BlockInputStreamPtr loadUpdatedAll() override;
BlockInputStreamPtr loadIds(const std::vector<UInt64> & ids) override;
@ -43,13 +48,10 @@ public:
std::string toString() const override;
private:
std::string getUpdateFieldAndDate();
Poco::Logger * log;
std::chrono::time_point<std::chrono::system_clock> update_time;
time_t update_time = 0;
const DictionaryStructure dict_struct;
const std::string command;
const std::string update_field;

View File

@ -83,7 +83,7 @@ void HashedDictionary::isInImpl(const ChildType & child_ids, const AncestorType
{
auto it = attr.find(id);
if (it != std::end(attr))
id = it->second;
id = it->getSecond();
else
break;
}
@ -605,7 +605,7 @@ void HashedDictionary::getItemsImpl(
for (const auto i : ext::range(0, rows))
{
const auto it = attr.find(ids[i]);
set_value(i, it != attr.end() ? static_cast<OutputType>(it->second) : get_default(i));
set_value(i, it != attr.end() ? static_cast<OutputType>(it->getSecond()) : get_default(i));
}
query_count.fetch_add(rows, std::memory_order_relaxed);
@ -707,7 +707,7 @@ PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds(const Attribute &
PaddedPODArray<Key> ids;
ids.reserve(attr.size());
for (const auto & value : attr)
ids.push_back(value.first);
ids.push_back(value.getFirst());
return ids;
}

View File

@ -137,7 +137,7 @@ void RangeHashedDictionary::getString(
if (it != std::end(attr))
{
const auto date = dates[i];
const auto & ranges_and_values = it->second;
const auto & ranges_and_values = it->getSecond();
const auto val_it
= std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<StringRef> & v)
{
@ -408,7 +408,7 @@ void RangeHashedDictionary::getItemsImpl(
if (it != std::end(attr))
{
const auto date = dates[i];
const auto & ranges_and_values = it->second;
const auto & ranges_and_values = it->getSecond();
const auto val_it
= std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<AttributeType> & v)
{
@ -435,7 +435,7 @@ void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const K
if (it != map.end())
{
auto & values = it->second;
auto & values = it->getSecond();
const auto insert_it
= std::lower_bound(std::begin(values), std::end(values), range, [](const Value<T> & lhs, const Range & rhs_range)
@ -508,7 +508,7 @@ void RangeHashedDictionary::setAttributeValue(Attribute & attribute, const Key i
if (it != map.end())
{
auto & values = it->second;
auto & values = it->getSecond();
const auto insert_it = std::lower_bound(
std::begin(values), std::end(values), range, [](const Value<StringRef> & lhs, const Range & rhs_range)
@ -620,9 +620,9 @@ void RangeHashedDictionary::getIdsAndDates(
for (const auto & key : attr)
{
for (const auto & value : key.second)
for (const auto & value : key.getSecond())
{
ids.push_back(key.first);
ids.push_back(key.getFirst());
start_dates.push_back(value.range.left);
end_dates.push_back(value.range.right);

View File

@ -64,9 +64,9 @@ inline size_t JSONEachRowRowInputStream::columnIndex(const StringRef & name, siz
if (prev_positions.size() > key_index
&& prev_positions[key_index] != name_map.end()
&& name == prev_positions[key_index]->first)
&& name == prev_positions[key_index]->getFirst())
{
return prev_positions[key_index]->second;
return prev_positions[key_index]->getSecond();
}
else
{
@ -77,7 +77,7 @@ inline size_t JSONEachRowRowInputStream::columnIndex(const StringRef & name, siz
if (key_index < prev_positions.size())
prev_positions[key_index] = it;
return it->second;
return it->getSecond();
}
else
return UNKNOWN_FIELD;

View File

@ -128,7 +128,7 @@ bool TSKVRowInputStream::read(MutableColumns & columns, RowReadExtension & ext)
}
else
{
index = it->second;
index = it->getSecond();
if (read_columns[index])
throw Exception("Duplicate field found while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA);

View File

@ -65,7 +65,7 @@ FunctionBasePtr FunctionBuilderJoinGet::buildImpl(const ColumnsWithTypeAndName &
auto join = storage_join->getJoin();
DataTypes data_types(arguments.size());
auto table_lock = storage_join->lockStructure(false);
auto table_lock = storage_join->lockStructure(false, context.getCurrentQueryId());
for (size_t i = 0; i < arguments.size(); ++i)
data_types[i] = arguments[i].type;

View File

@ -1142,9 +1142,30 @@ public:
const auto & col_with_type_and_name_right = block.getByPosition(arguments[1]);
const IColumn * col_left_untyped = col_with_type_and_name_left.column.get();
const IColumn * col_right_untyped = col_with_type_and_name_right.column.get();
const DataTypePtr & left_type = col_with_type_and_name_left.type;
const DataTypePtr & right_type = col_with_type_and_name_right.type;
/// The case when arguments are the same (tautological comparison). Return constant.
/// NOTE: Nullable types are special case. (BTW, this function use default implementation for Nullable, so Nullable types cannot be here. Check just in case.)
/// NOTE: We consider NaN comparison to be implementation specific (and in our implementation NaNs are sometimes equal sometimes not).
if (left_type->equals(*right_type) && !left_type->isNullable() && col_left_untyped == col_right_untyped)
{
/// Always true: =, <=, >=
if constexpr (std::is_same_v<Op<int, int>, EqualsOp<int, int>>
|| std::is_same_v<Op<int, int>, LessOrEqualsOp<int, int>>
|| std::is_same_v<Op<int, int>, GreaterOrEqualsOp<int, int>>)
{
block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, 1u);
return;
}
else
{
block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, 0u);
return;
}
}
WhichDataType which_left{left_type};
WhichDataType which_right{right_type};

View File

@ -425,15 +425,15 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable
for (const auto & pair : map)
{
if (pair.second == args)
if (pair.getSecond() == args)
{
++result_offset;
if constexpr (is_numeric_column)
result_data.insertValue(pair.first);
result_data.insertValue(pair.getFirst());
else if constexpr (std::is_same<ColumnType, ColumnString>::value || std::is_same<ColumnType, ColumnFixedString>::value)
result_data.insertData(pair.first.data, pair.first.size);
result_data.insertData(pair.getFirst().data, pair.getFirst().size);
else
result_data.deserializeAndInsertFromArena(pair.first.data);
result_data.deserializeAndInsertFromArena(pair.getFirst().data);
if (all_nullable)
null_map.push_back(0);

View File

@ -0,0 +1,121 @@
#include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h>
#include <Functions/FunctionHelpers.h>
#include <DataTypes/DataTypeArray.h>
#include <Columns/ColumnArray.h>
namespace DB
{
/// flatten([[1, 2, 3], [4, 5]]) = [1, 2, 3, 4, 5] - flatten array.
class FunctionFlatten : public IFunction
{
public:
static constexpr auto name = "flatten";
static FunctionPtr create(const Context &) { return std::make_shared<FunctionFlatten>(); }
size_t getNumberOfArguments() const override { return 1; }
bool useDefaultImplementationForConstants() const override { return true; }
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (!isArray(arguments[0]))
throw Exception("Illegal type " + arguments[0]->getName() +
" of argument of function " + getName() +
", expected Array", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
DataTypePtr nested_type = arguments[0];
while (isArray(nested_type))
nested_type = checkAndGetDataType<DataTypeArray>(nested_type.get())->getNestedType();
return std::make_shared<DataTypeArray>(nested_type);
}
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
{
/** We create an array column with array elements as the most deep elements of nested arrays,
* and construct offsets by selecting elements of most deep offsets by values of ancestor offsets.
*
Example 1:
Source column: Array(Array(UInt8)):
Row 1: [[1, 2, 3], [4, 5]], Row 2: [[6], [7, 8]]
data: [1, 2, 3], [4, 5], [6], [7, 8]
offsets: 2, 4
data.data: 1 2 3 4 5 6 7 8
data.offsets: 3 5 6 8
Result column: Array(UInt8):
Row 1: [1, 2, 3, 4, 5], Row 2: [6, 7, 8]
data: 1 2 3 4 5 6 7 8
offsets: 5 8
Result offsets are selected from the most deep (data.offsets) by previous deep (offsets) (and values are decremented by one):
3 5 6 8
^ ^
Example 2:
Source column: Array(Array(Array(UInt8))):
Row 1: [[], [[1], [], [2, 3]]], Row 2: [[[4]]]
most deep data: 1 2 3 4
offsets1: 2 3
offsets2: 0 3 4
- ^ ^ - select by prev offsets
offsets3: 1 1 3 4
- ^ ^ - select by prev offsets
result offsets: 3, 4
result: Row 1: [1, 2, 3], Row2: [4]
*/
const ColumnArray * src_col = checkAndGetColumn<ColumnArray>(block.getByPosition(arguments[0]).column.get());
if (!src_col)
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + " in argument of function 'flatten'",
ErrorCodes::ILLEGAL_COLUMN);
const IColumn::Offsets & src_offsets = src_col->getOffsets();
ColumnArray::ColumnOffsets::MutablePtr result_offsets_column;
const IColumn::Offsets * prev_offsets = &src_offsets;
const IColumn * prev_data = &src_col->getData();
while (const ColumnArray * next_col = checkAndGetColumn<ColumnArray>(prev_data))
{
if (!result_offsets_column)
result_offsets_column = ColumnArray::ColumnOffsets::create(input_rows_count);
IColumn::Offsets & result_offsets = result_offsets_column->getData();
const IColumn::Offsets * next_offsets = &next_col->getOffsets();
for (size_t i = 0; i < input_rows_count; ++i)
result_offsets[i] = (*next_offsets)[(*prev_offsets)[i] - 1]; /// -1 array subscript is Ok, see PaddedPODArray
prev_offsets = &result_offsets;
prev_data = &next_col->getData();
}
block.getByPosition(result).column = ColumnArray::create(
prev_data->getPtr(),
result_offsets_column ? std::move(result_offsets_column) : src_col->getOffsetsPtr());
}
private:
String getName() const override
{
return name;
}
};
void registerFunctionFlatten(FunctionFactory & factory)
{
factory.registerFunction<FunctionFlatten>();
}
}

View File

@ -30,6 +30,7 @@ void registerFunctionArrayEnumerateUniqRanked(FunctionFactory &);
void registerFunctionArrayEnumerateDenseRanked(FunctionFactory &);
void registerFunctionArrayUniq(FunctionFactory &);
void registerFunctionArrayDistinct(FunctionFactory &);
void registerFunctionFlatten(FunctionFactory &);
void registerFunctionArrayWithConstant(FunctionFactory &);
void registerFunctionsArray(FunctionFactory & factory)
@ -61,6 +62,7 @@ void registerFunctionsArray(FunctionFactory & factory)
registerFunctionArrayEnumerateDenseRanked(factory);
registerFunctionArrayUniq(factory);
registerFunctionArrayDistinct(factory);
registerFunctionFlatten(factory);
registerFunctionArrayWithConstant(factory);
}

View File

@ -508,7 +508,7 @@ private:
{
auto it = table.find(src[i]);
if (it != table.end())
memcpy(&dst[i], &it->second, sizeof(dst[i])); /// little endian.
memcpy(&dst[i], &it->getSecond(), sizeof(dst[i])); /// little endian.
else
dst[i] = dst_default;
}
@ -524,7 +524,7 @@ private:
{
auto it = table.find(src[i]);
if (it != table.end())
memcpy(&dst[i], &it->second, sizeof(dst[i])); /// little endian.
memcpy(&dst[i], &it->getSecond(), sizeof(dst[i])); /// little endian.
else
dst[i] = dst_default[i];
}
@ -540,7 +540,7 @@ private:
{
auto it = table.find(src[i]);
if (it != table.end())
memcpy(&dst[i], &it->second, sizeof(dst[i]));
memcpy(&dst[i], &it->getSecond(), sizeof(dst[i]));
else
dst[i] = src[i];
}
@ -557,7 +557,7 @@ private:
for (size_t i = 0; i < size; ++i)
{
auto it = table.find(src[i]);
StringRef ref = it != table.end() ? it->second : dst_default;
StringRef ref = it != table.end() ? it->getSecond() : dst_default;
dst_data.resize(current_dst_offset + ref.size);
memcpy(&dst_data[current_dst_offset], ref.data, ref.size);
current_dst_offset += ref.size;
@ -581,7 +581,7 @@ private:
StringRef ref;
if (it != table.end())
ref = it->second;
ref = it->getSecond();
else
{
ref.data = reinterpret_cast<const char *>(&dst_default_data[current_dst_default_offset]);
@ -611,7 +611,7 @@ private:
current_src_offset = src_offsets[i];
auto it = table.find(ref);
if (it != table.end())
memcpy(&dst[i], &it->second, sizeof(dst[i]));
memcpy(&dst[i], &it->getSecond(), sizeof(dst[i]));
else
dst[i] = dst_default;
}
@ -632,7 +632,7 @@ private:
current_src_offset = src_offsets[i];
auto it = table.find(ref);
if (it != table.end())
memcpy(&dst[i], &it->second, sizeof(dst[i]));
memcpy(&dst[i], &it->getSecond(), sizeof(dst[i]));
else
dst[i] = dst_default[i];
}
@ -655,7 +655,7 @@ private:
auto it = table.find(src_ref);
StringRef dst_ref = it != table.end() ? it->second : (with_default ? dst_default : src_ref);
StringRef dst_ref = it != table.end() ? it->getSecond() : (with_default ? dst_default : src_ref);
dst_data.resize(current_dst_offset + dst_ref.size);
memcpy(&dst_data[current_dst_offset], dst_ref.data, dst_ref.size);
current_dst_offset += dst_ref.size;
@ -697,7 +697,7 @@ private:
StringRef dst_ref;
if (it != table.end())
dst_ref = it->second;
dst_ref = it->getSecond();
else
{
dst_ref.data = reinterpret_cast<const char *>(&dst_default_data[current_dst_default_offset]);

View File

@ -54,7 +54,10 @@ add_check (hashing_read_buffer)
add_executable (io_operators io_operators.cpp)
target_link_libraries (io_operators PRIVATE clickhouse_common_io)
if (OS_LINUX)
add_executable (write_int write_int.cpp)
target_link_libraries (write_int PRIVATE clickhouse_common_io)
if (OS_LINUX OR OS_FREEBSD)
add_executable(write_buffer_aio write_buffer_aio.cpp)
target_link_libraries (write_buffer_aio PRIVATE clickhouse_common_io ${Boost_FILESYSTEM_LIBRARY})

View File

@ -1120,11 +1120,11 @@ void NO_INLINE Aggregator::convertToBlockImplFinal(
for (const auto & value : data)
{
method.insertKeyIntoColumns(value, key_columns, key_sizes);
method.insertKeyIntoColumns(value.getValue(), key_columns, key_sizes);
for (size_t i = 0; i < params.aggregates_size; ++i)
aggregate_functions[i]->insertResultInto(
value.second + offsets_of_aggregate_states[i],
value.getSecond() + offsets_of_aggregate_states[i],
*final_aggregate_columns[i]);
}
@ -1151,13 +1151,13 @@ void NO_INLINE Aggregator::convertToBlockImplNotFinal(
for (auto & value : data)
{
method.insertKeyIntoColumns(value, key_columns, key_sizes);
method.insertKeyIntoColumns(value.getValue(), key_columns, key_sizes);
/// reserved, so push_back does not throw exceptions
for (size_t i = 0; i < params.aggregates_size; ++i)
aggregate_columns[i]->push_back(value.second + offsets_of_aggregate_states[i]);
aggregate_columns[i]->push_back(value.getSecond() + offsets_of_aggregate_states[i]);
value.second = nullptr;
value.getSecond() = nullptr;
}
}
@ -1495,26 +1495,26 @@ void NO_INLINE Aggregator::mergeDataImpl(
{
typename Table::iterator res_it;
bool inserted;
table_dst.emplace(it->first, res_it, inserted, it.getHash());
table_dst.emplace(it->getFirst(), res_it, inserted, it.getHash());
if (!inserted)
{
for (size_t i = 0; i < params.aggregates_size; ++i)
aggregate_functions[i]->merge(
res_it->second + offsets_of_aggregate_states[i],
it->second + offsets_of_aggregate_states[i],
res_it->getSecond() + offsets_of_aggregate_states[i],
it->getSecond() + offsets_of_aggregate_states[i],
arena);
for (size_t i = 0; i < params.aggregates_size; ++i)
aggregate_functions[i]->destroy(
it->second + offsets_of_aggregate_states[i]);
it->getSecond() + offsets_of_aggregate_states[i]);
}
else
{
res_it->second = it->second;
res_it->getSecond() = it->getSecond();
}
it->second = nullptr;
it->getSecond() = nullptr;
}
table_src.clearAndShrink();
@ -1534,22 +1534,22 @@ void NO_INLINE Aggregator::mergeDataNoMoreKeysImpl(
for (auto it = table_src.begin(), end = table_src.end(); it != end; ++it)
{
typename Table::iterator res_it = table_dst.find(it->first, it.getHash());
typename Table::iterator res_it = table_dst.find(it->getFirst(), it.getHash());
AggregateDataPtr res_data = table_dst.end() == res_it
? overflows
: res_it->second;
: res_it->getSecond();
for (size_t i = 0; i < params.aggregates_size; ++i)
aggregate_functions[i]->merge(
res_data + offsets_of_aggregate_states[i],
it->second + offsets_of_aggregate_states[i],
it->getSecond() + offsets_of_aggregate_states[i],
arena);
for (size_t i = 0; i < params.aggregates_size; ++i)
aggregate_functions[i]->destroy(it->second + offsets_of_aggregate_states[i]);
aggregate_functions[i]->destroy(it->getSecond() + offsets_of_aggregate_states[i]);
it->second = nullptr;
it->getSecond() = nullptr;
}
table_src.clearAndShrink();
@ -1567,23 +1567,23 @@ void NO_INLINE Aggregator::mergeDataOnlyExistingKeysImpl(
for (auto it = table_src.begin(); it != table_src.end(); ++it)
{
decltype(it) res_it = table_dst.find(it->first, it.getHash());
decltype(it) res_it = table_dst.find(it->getFirst(), it.getHash());
if (table_dst.end() == res_it)
continue;
AggregateDataPtr res_data = res_it->second;
AggregateDataPtr res_data = res_it->getSecond();
for (size_t i = 0; i < params.aggregates_size; ++i)
aggregate_functions[i]->merge(
res_data + offsets_of_aggregate_states[i],
it->second + offsets_of_aggregate_states[i],
it->getSecond() + offsets_of_aggregate_states[i],
arena);
for (size_t i = 0; i < params.aggregates_size; ++i)
aggregate_functions[i]->destroy(it->second + offsets_of_aggregate_states[i]);
aggregate_functions[i]->destroy(it->getSecond() + offsets_of_aggregate_states[i]);
it->second = nullptr;
it->getSecond() = nullptr;
}
table_src.clearAndShrink();
@ -2428,7 +2428,7 @@ void NO_INLINE Aggregator::destroyImpl(Table & table) const
{
for (auto elem : table)
{
AggregateDataPtr & data = elem.second;
AggregateDataPtr & data = elem.getSecond();
/** If an exception (usually a lack of memory, the MemoryTracker throws) arose
* after inserting the key into a hash table, but before creating all states of aggregate functions,

View File

@ -10,6 +10,7 @@
#include <common/StringRef.h>
#include <Common/Arena.h>
#include <Common/HashTable/FixedHashMap.h>
#include <Common/HashTable/HashMap.h>
#include <Common/HashTable/TwoLevelHashMap.h>
#include <Common/ThreadPool.h>
@ -64,8 +65,8 @@ class IBlockOutputStream;
using AggregatedDataWithoutKey = AggregateDataPtr;
using AggregatedDataWithUInt8Key = HashMap<UInt64, AggregateDataPtr, TrivialHash, HashTableFixedGrower<8>>;
using AggregatedDataWithUInt16Key = HashMap<UInt64, AggregateDataPtr, TrivialHash, HashTableFixedGrower<16>>;
using AggregatedDataWithUInt8Key = FixedHashMap<UInt8, AggregateDataPtr>;
using AggregatedDataWithUInt16Key = FixedHashMap<UInt16, AggregateDataPtr>;
using AggregatedDataWithUInt64Key = HashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>>;
using AggregatedDataWithStringKey = HashMapWithSavedHash<StringRef, AggregateDataPtr>;
@ -178,7 +179,7 @@ struct AggregationMethodOneNumber
// Insert the key from the hash table into columns.
static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns, const Sizes & /*key_sizes*/)
{
static_cast<ColumnVectorHelper *>(key_columns[0].get())->insertRawData<sizeof(FieldType)>(reinterpret_cast<const char *>(&value.first));
static_cast<ColumnVectorHelper *>(key_columns[0].get())->insertRawData<sizeof(FieldType)>(reinterpret_cast<const char *>(&value.getFirst()));
}
};
@ -206,7 +207,7 @@ struct AggregationMethodString
static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns, const Sizes &)
{
key_columns[0]->insertData(value.first.data, value.first.size);
key_columns[0]->insertData(value.getFirst().data, value.getFirst().size);
}
};
@ -234,7 +235,7 @@ struct AggregationMethodFixedString
static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns, const Sizes &)
{
key_columns[0]->insertData(value.first.data, value.first.size);
key_columns[0]->insertData(value.getFirst().data, value.getFirst().size);
}
};
@ -326,7 +327,7 @@ struct AggregationMethodKeysFixed
/// corresponding key is nullable. Update the null map accordingly.
size_t bucket = i / 8;
size_t offset = i % 8;
UInt8 val = (reinterpret_cast<const UInt8 *>(&value.first)[bucket] >> offset) & 1;
UInt8 val = (reinterpret_cast<const UInt8 *>(&value.getFirst())[bucket] >> offset) & 1;
null_map->insertValue(val);
is_null = val == 1;
}
@ -338,7 +339,7 @@ struct AggregationMethodKeysFixed
else
{
size_t size = key_sizes[i];
observed_column->insertData(reinterpret_cast<const char *>(&value.first) + pos, size);
observed_column->insertData(reinterpret_cast<const char *>(&value.getFirst()) + pos, size);
pos += size;
}
}
@ -373,7 +374,7 @@ struct AggregationMethodSerialized
static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns, const Sizes &)
{
auto pos = value.first.data;
auto pos = value.getFirst().data;
for (auto & column : key_columns)
pos = column->deserializeAndInsertFromArena(pos);
}

View File

@ -295,7 +295,7 @@ bool DDLWorker::initAndCheckTask(const String & entry_name, String & out_reason,
{
/// What should we do if we even cannot parse host name and therefore cannot properly submit execution status?
/// We can try to create fail node using FQDN if it equal to host name in cluster config attempt will be successful.
/// Otherwise, that node will be ignored by DDLQueryStatusInputSream.
/// Otherwise, that node will be ignored by DDLQueryStatusInputStream.
tryLogCurrentException(log, "Cannot parse DDL task " + entry_name + ", will try to send error status");
@ -1020,12 +1020,12 @@ void DDLWorker::runCleanupThread()
}
class DDLQueryStatusInputSream : public IBlockInputStream
class DDLQueryStatusInputStream : public IBlockInputStream
{
public:
DDLQueryStatusInputSream(const String & zk_node_path, const DDLLogEntry & entry, const Context & context)
: node_path(zk_node_path), context(context), watch(CLOCK_MONOTONIC_COARSE), log(&Logger::get("DDLQueryStatusInputSream"))
DDLQueryStatusInputStream(const String & zk_node_path, const DDLLogEntry & entry, const Context & context)
: node_path(zk_node_path), context(context), watch(CLOCK_MONOTONIC_COARSE), log(&Logger::get("DDLQueryStatusInputStream"))
{
sample = Block{
{std::make_shared<DataTypeString>(), "host"},
@ -1046,7 +1046,7 @@ public:
String getName() const override
{
return "DDLQueryStatusInputSream";
return "DDLQueryStatusInputStream";
}
Block getHeader() const override { return sample; }
@ -1146,7 +1146,7 @@ public:
return sample.cloneEmpty();
}
~DDLQueryStatusInputSream() override = default;
~DDLQueryStatusInputStream() override = default;
private:
@ -1289,7 +1289,7 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & cont
if (context.getSettingsRef().distributed_ddl_task_timeout == 0)
return io;
auto stream = std::make_shared<DDLQueryStatusInputSream>(node_path, entry, context);
auto stream = std::make_shared<DDLQueryStatusInputStream>(node_path, entry, context);
io.in = std::move(stream);
return io;
}

View File

@ -115,7 +115,7 @@ private:
ThreadGroupStatusPtr thread_group;
friend class DDLQueryStatusInputSream;
friend class DDLQueryStatusInputStream;
friend struct DDLTask;
};

View File

@ -304,7 +304,7 @@ void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node)
{
const IAST & args = *func->arguments;
if (storage && storage->mayBenefitFromIndexForIn(args.children.at(0)))
if (storage && storage->mayBenefitFromIndexForIn(args.children.at(0), context))
{
const ASTPtr & arg = args.children.at(1);
if (typeid_cast<ASTSubquery *>(arg.get()) || isIdentifier(arg))

View File

@ -5,7 +5,7 @@
namespace DB
{
FindIdentifierBestTableData::FindIdentifierBestTableData(const std::vector<DatabaseAndTableWithAlias> & tables_)
FindIdentifierBestTableData::FindIdentifierBestTableData(const std::vector<TableWithColumnNames> & tables_)
: tables(tables_)
{
}
@ -16,13 +16,21 @@ void FindIdentifierBestTableData::visit(ASTIdentifier & identifier, ASTPtr &)
if (!identifier.compound())
{
if (!tables.empty())
best_table = &tables[0];
for (const auto & [table, names] : tables)
{
if (std::find(names.begin(), names.end(), identifier.name) != names.end())
{
// TODO: make sure no collision ever happens
if (!best_table)
best_table = &table;
}
}
}
else
{
// FIXME: make a better matcher using `names`?
size_t best_match = 0;
for (const DatabaseAndTableWithAlias & table : tables)
for (const auto & [table, names] : tables)
{
if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, table))
if (match > best_match)

View File

@ -3,6 +3,7 @@
#include <Parsers/IAST.h>
#include <Parsers/ASTIdentifier.h>
#include <Interpreters/InDepthNodeVisitor.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
namespace DB
{
@ -12,10 +13,10 @@ struct FindIdentifierBestTableData
using TypeToVisit = ASTIdentifier;
using IdentifierWithTable = std::pair<ASTIdentifier *, const DatabaseAndTableWithAlias *>;
const std::vector<DatabaseAndTableWithAlias> & tables;
const std::vector<TableWithColumnNames> & tables;
std::vector<IdentifierWithTable> identifier_table;
FindIdentifierBestTableData(const std::vector<DatabaseAndTableWithAlias> & tables_);
FindIdentifierBestTableData(const std::vector<TableWithColumnNames> & tables_);
void visit(ASTIdentifier & identifier, ASTPtr &);
};

View File

@ -591,7 +591,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
if (!as_table_name.empty())
{
as_storage = context.getTable(as_database_name, as_table_name);
as_storage_lock = as_storage->lockStructure(false);
as_storage_lock = as_storage->lockStructure(false, context.getCurrentQueryId());
}
/// Set and retrieve list of columns.

View File

@ -93,7 +93,7 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl()
table = context.getTable(database_name, table_name);
}
auto table_lock = table->lockStructure(false);
auto table_lock = table->lockStructure(false, context.getCurrentQueryId());
columns = table->getColumns().getAll();
column_defaults = table->getColumns().defaults;
column_comments = table->getColumns().comments;

View File

@ -69,7 +69,7 @@ BlockIO InterpreterDropQuery::executeToTable(String & database_name_, String & t
{
database_and_table.second->shutdown();
/// If table was already dropped by anyone, an exception will be thrown
auto table_lock = database_and_table.second->lockForAlter();
auto table_lock = database_and_table.second->lockForAlter(context.getCurrentQueryId());
/// Drop table from memory, don't touch data and metadata
database_and_table.first->detachTable(database_and_table.second->getTableName());
}
@ -78,7 +78,7 @@ BlockIO InterpreterDropQuery::executeToTable(String & database_name_, String & t
database_and_table.second->checkTableCanBeDropped();
/// If table was already dropped by anyone, an exception will be thrown
auto table_lock = database_and_table.second->lockForAlter();
auto table_lock = database_and_table.second->lockForAlter(context.getCurrentQueryId());
/// Drop table data, don't touch metadata
database_and_table.second->truncate(query_ptr, context);
}
@ -89,7 +89,7 @@ BlockIO InterpreterDropQuery::executeToTable(String & database_name_, String & t
database_and_table.second->shutdown();
/// If table was already dropped by anyone, an exception will be thrown
auto table_lock = database_and_table.second->lockForAlter();
auto table_lock = database_and_table.second->lockForAlter(context.getCurrentQueryId());
/// Delete table metadata and table itself from memory
database_and_table.first->removeTable(context, database_and_table.second->getTableName());
@ -126,7 +126,7 @@ BlockIO InterpreterDropQuery::executeToTemporaryTable(String & table_name, ASTDr
if (kind == ASTDropQuery::Kind::Truncate)
{
/// If table was already dropped by anyone, an exception will be thrown
auto table_lock = table->lockForAlter();
auto table_lock = table->lockForAlter(context.getCurrentQueryId());
/// Drop table data, don't touch metadata
table->truncate(query_ptr, context);
}
@ -135,7 +135,7 @@ BlockIO InterpreterDropQuery::executeToTemporaryTable(String & table_name, ASTDr
context_handle.tryRemoveExternalTable(table_name);
table->shutdown();
/// If table was already dropped by anyone, an exception will be thrown
auto table_lock = table->lockForAlter();
auto table_lock = table->lockForAlter(context.getCurrentQueryId());
/// Delete table data
table->drop();
table->is_dropped = true;

View File

@ -1,13 +1,15 @@
#include <sstream>
#include <DataStreams/OneBlockInputStream.h>
#include <DataStreams/BlockIO.h>
#include <DataTypes/DataTypeString.h>
#include <Parsers/queryToString.h>
#include <Common/typeid_cast.h>
#include <Interpreters/InterpreterExplainQuery.h>
#include <DataStreams/BlockIO.h>
#include <DataStreams/OneBlockInputStream.h>
#include <DataTypes/DataTypeString.h>
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
#include <Parsers/ASTExplainQuery.h>
#include <Parsers/DumpASTNode.h>
#include <Parsers/queryToString.h>
#include <Common/typeid_cast.h>
#include <sstream>
namespace DB
@ -26,7 +28,7 @@ Block InterpreterExplainQuery::getSampleBlock()
Block block;
ColumnWithTypeAndName col;
col.name = "ast";
col.name = "explain";
col.type = std::make_shared<DataTypeString>();
col.column = col.type->createColumn();
block.insert(col);
@ -38,12 +40,21 @@ Block InterpreterExplainQuery::getSampleBlock()
BlockInputStreamPtr InterpreterExplainQuery::executeImpl()
{
const ASTExplainQuery & ast = typeid_cast<const ASTExplainQuery &>(*query);
std::stringstream ss;
dumpAST(ast, ss);
Block sample_block = getSampleBlock();
MutableColumns res_columns = sample_block.cloneEmptyColumns();
std::stringstream ss;
if (ast.getKind() == ASTExplainQuery::ParsedAST)
{
dumpAST(ast, ss);
}
else if (ast.getKind() == ASTExplainQuery::AnalyzedSyntax)
{
InterpreterSelectWithUnionQuery interpreter(ast.children.at(0), context, {}, QueryProcessingStage::FetchColumns, 0, true, true);
interpreter.getQuery()->format(IAST::FormatSettings(ss, false));
}
res_columns[0]->insert(ss.str());
return std::make_shared<OneBlockInputStream>(sample_block.cloneWithColumns(std::move(res_columns)));

View File

@ -1,5 +1,6 @@
#pragma once
#include <Interpreters/Context.h>
#include <Interpreters/IInterpreter.h>
@ -14,8 +15,8 @@ using ASTPtr = std::shared_ptr<IAST>;
class InterpreterExplainQuery : public IInterpreter
{
public:
InterpreterExplainQuery(const ASTPtr & query_, const Context &)
: query(query_)
InterpreterExplainQuery(const ASTPtr & query_, const Context & context_)
: query(query_), context(context_)
{}
BlockIO execute() override;
@ -24,6 +25,7 @@ public:
private:
ASTPtr query;
Context context;
BlockInputStreamPtr executeImpl();
};

View File

@ -96,7 +96,7 @@ BlockIO InterpreterInsertQuery::execute()
checkAccess(query);
StoragePtr table = getTable(query);
auto table_lock = table->lockStructure(true);
auto table_lock = table->lockStructure(true, context.getCurrentQueryId());
/// We create a pipeline of several streams, into which we will write data.
BlockOutputStreamPtr out;

View File

@ -23,7 +23,7 @@ BlockIO InterpreterOptimizeQuery::execute()
return executeDDLQueryOnCluster(query_ptr, context, {ast.database});
StoragePtr table = context.getTable(ast.database, ast.table);
auto table_lock = table->lockStructure(true);
auto table_lock = table->lockStructure(true, context.getCurrentQueryId());
table->optimize(query_ptr, ast.partition, ast.final, ast.deduplicate, context);
return {};
}

View File

@ -101,7 +101,7 @@ BlockIO InterpreterRenameQuery::execute()
for (const auto & names : unique_tables_from)
if (auto table = context.tryGetTable(names.database_name, names.table_name))
locks.emplace_back(table->lockForAlter());
locks.emplace_back(table->lockForAlter(context.getCurrentQueryId()));
/** All tables are locked. If there are more than one rename in chain,
* we need to hold global lock while doing all renames. Order matters to avoid deadlocks.

View File

@ -80,8 +80,10 @@ InterpreterSelectQuery::InterpreterSelectQuery(
const Names & required_result_column_names,
QueryProcessingStage::Enum to_stage_,
size_t subquery_depth_,
bool only_analyze_)
: InterpreterSelectQuery(query_ptr_, context_, nullptr, nullptr, required_result_column_names, to_stage_, subquery_depth_, only_analyze_)
bool only_analyze_,
bool modify_inplace)
: InterpreterSelectQuery(
query_ptr_, context_, nullptr, nullptr, required_result_column_names, to_stage_, subquery_depth_, only_analyze_, modify_inplace)
{
}
@ -90,8 +92,9 @@ InterpreterSelectQuery::InterpreterSelectQuery(
const Context & context_,
const BlockInputStreamPtr & input_,
QueryProcessingStage::Enum to_stage_,
bool only_analyze_)
: InterpreterSelectQuery(query_ptr_, context_, input_, nullptr, Names{}, to_stage_, 0, only_analyze_)
bool only_analyze_,
bool modify_inplace)
: InterpreterSelectQuery(query_ptr_, context_, input_, nullptr, Names{}, to_stage_, 0, only_analyze_, modify_inplace)
{
}
@ -100,8 +103,9 @@ InterpreterSelectQuery::InterpreterSelectQuery(
const Context & context_,
const StoragePtr & storage_,
QueryProcessingStage::Enum to_stage_,
bool only_analyze_)
: InterpreterSelectQuery(query_ptr_, context_, nullptr, storage_, Names{}, to_stage_, 0, only_analyze_)
bool only_analyze_,
bool modify_inplace)
: InterpreterSelectQuery(query_ptr_, context_, nullptr, storage_, Names{}, to_stage_, 0, only_analyze_, modify_inplace)
{
}
@ -131,8 +135,10 @@ InterpreterSelectQuery::InterpreterSelectQuery(
const Names & required_result_column_names,
QueryProcessingStage::Enum to_stage_,
size_t subquery_depth_,
bool only_analyze_)
: query_ptr(query_ptr_->clone()) /// Note: the query is cloned because it will be modified during analysis.
bool only_analyze_,
bool modify_inplace)
/// NOTE: the query almost always should be cloned because it will be modified during analysis.
: query_ptr(modify_inplace ? query_ptr_ : query_ptr_->clone())
, query(typeid_cast<ASTSelectQuery &>(*query_ptr))
, context(context_)
, to_stage(to_stage_)
@ -170,7 +176,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
{
/// Read from subquery.
interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
table_expression, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1, only_analyze);
table_expression, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1, only_analyze, modify_inplace);
source_header = interpreter_subquery->getSampleBlock();
}
@ -194,7 +200,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
}
if (storage)
table_lock = storage->lockStructure(false);
table_lock = storage->lockStructure(false, context.getCurrentQueryId());
syntax_analyzer_result = SyntaxAnalyzer(context, subquery_depth).analyze(
query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, storage);
@ -217,16 +223,23 @@ InterpreterSelectQuery::InterpreterSelectQuery(
for (const auto & it : query_analyzer->getExternalTables())
if (!context.tryGetExternalTable(it.first))
context.addExternalTable(it.first, it.second);
}
if (!only_analyze || modify_inplace)
{
if (query_analyzer->isRewriteSubqueriesPredicate())
{
/// remake interpreter_subquery when PredicateOptimizer is rewrite subqueries and main table is subquery
if (is_subquery)
interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
table_expression, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1,
only_analyze);
table_expression,
getSubqueryContext(context),
required_columns,
QueryProcessingStage::Complete,
subquery_depth + 1,
only_analyze,
modify_inplace);
}
}
if (interpreter_subquery)
@ -1026,6 +1039,9 @@ void InterpreterSelectQuery::executeFetchColumns(
if (to_stage == QueryProcessingStage::Complete)
{
limits.min_execution_speed = settings.min_execution_speed;
limits.max_execution_speed = settings.max_execution_speed;
limits.min_execution_speed_bytes = settings.min_execution_speed_bytes;
limits.max_execution_speed_bytes = settings.max_execution_speed_bytes;
limits.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed;
}
@ -1474,12 +1490,9 @@ void InterpreterSelectQuery::executeExtremes(Pipeline & pipeline)
void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(Pipeline & pipeline, SubqueriesForSets & subqueries_for_sets)
{
const Settings & settings = context.getSettingsRef();
executeUnion(pipeline);
pipeline.firstStream() = std::make_shared<CreatingSetsBlockInputStream>(
pipeline.firstStream(), subqueries_for_sets,
SizeLimits(settings.max_rows_to_transfer, settings.max_bytes_to_transfer, settings.transfer_overflow_mode));
pipeline.firstStream(), subqueries_for_sets, context);
}
void InterpreterSelectQuery::unifyStreams(Pipeline & pipeline)

View File

@ -52,7 +52,8 @@ public:
const Names & required_result_column_names = Names{},
QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
size_t subquery_depth_ = 0,
bool only_analyze_ = false);
bool only_analyze_ = false,
bool modify_inplace = false);
/// Read data not from the table specified in the query, but from the prepared source `input`.
InterpreterSelectQuery(
@ -60,7 +61,8 @@ public:
const Context & context_,
const BlockInputStreamPtr & input_,
QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
bool only_analyze_ = false);
bool only_analyze_ = false,
bool modify_inplace = false);
/// Read data not from the table specified in the query, but from the specified `storage_`.
InterpreterSelectQuery(
@ -68,7 +70,8 @@ public:
const Context & context_,
const StoragePtr & storage_,
QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
bool only_analyze_ = false);
bool only_analyze_ = false,
bool modify_inplace = false);
~InterpreterSelectQuery() override;
@ -82,6 +85,8 @@ public:
void ignoreWithTotals();
ASTPtr getQuery() const { return query_ptr; }
private:
InterpreterSelectQuery(
const ASTPtr & query_ptr_,
@ -91,7 +96,8 @@ private:
const Names & required_result_column_names,
QueryProcessingStage::Enum to_stage_,
size_t subquery_depth_,
bool only_analyze_);
bool only_analyze_,
bool modify_inplace);
struct Pipeline

View File

@ -10,6 +10,7 @@
#include <Columns/ColumnConst.h>
#include <Common/typeid_cast.h>
#include <Parsers/queryToString.h>
#include <Parsers/ASTExpressionList.h>
namespace DB
@ -28,7 +29,8 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
const Names & required_result_column_names,
QueryProcessingStage::Enum to_stage_,
size_t subquery_depth_,
bool only_analyze)
bool only_analyze,
bool modify_inplace)
: query_ptr(query_ptr_),
context(context_),
to_stage(to_stage_),
@ -81,12 +83,17 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
for (size_t query_num = 0; query_num < num_selects; ++query_num)
{
const Names & current_required_result_column_names = query_num == 0
? required_result_column_names
: required_result_column_names_for_other_selects[query_num];
const Names & current_required_result_column_names
= query_num == 0 ? required_result_column_names : required_result_column_names_for_other_selects[query_num];
nested_interpreters.emplace_back(std::make_unique<InterpreterSelectQuery>(
ast.list_of_selects->children.at(query_num), context, current_required_result_column_names, to_stage, subquery_depth, only_analyze));
ast.list_of_selects->children.at(query_num),
context,
current_required_result_column_names,
to_stage,
subquery_depth,
only_analyze,
modify_inplace));
}
/// Determine structure of the result.

View File

@ -22,7 +22,8 @@ public:
const Names & required_result_column_names = Names{},
QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
size_t subquery_depth_ = 0,
bool only_analyze = false);
bool only_analyze = false,
bool modify_inplace = false);
~InterpreterSelectWithUnionQuery() override;
@ -39,6 +40,8 @@ public:
void ignoreWithTotals();
ASTPtr getQuery() const { return query_ptr; }
private:
ASTPtr query_ptr;
Context context;

View File

@ -239,7 +239,7 @@ StoragePtr InterpreterSystemQuery::tryRestartReplica(const String & database_nam
table->shutdown();
/// If table was already dropped by anyone, an exception will be thrown
auto table_lock = table->lockForAlter();
auto table_lock = table->lockForAlter(context.getCurrentQueryId());
create_ast = system_context.getCreateTableQuery(database_name, table_name);
database->detachTable(table_name);

View File

@ -300,7 +300,7 @@ void Join::setSampleBlock(const Block & block)
if (column.get() != column_no_lc.get())
{
materialized_columns.emplace_back(std::move(column_no_lc));
key_columns[i] = materialized_columns[i].get();
key_columns[i] = materialized_columns.back().get();
}
/// We will join only keys, where all components are not NULL.
@ -1317,10 +1317,10 @@ private:
for (; it != end; ++it)
{
if (it->second.getUsed())
if (it->getSecond().getUsed())
continue;
AdderNonJoined<STRICTNESS, typename Map::mapped_type>::add(it->second, rows_added, columns_left, columns_keys_and_right);
AdderNonJoined<STRICTNESS, typename Map::mapped_type>::add(it->getSecond(), rows_added, columns_left, columns_keys_and_right);
if (rows_added >= max_block_size)
{

View File

@ -10,6 +10,7 @@
#include <Common/Arena.h>
#include <Common/ColumnsHashing.h>
#include <Common/HashTable/HashMap.h>
#include <Common/HashTable/FixedHashMap.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnFixedString.h>
@ -218,8 +219,8 @@ public:
template <typename Mapped>
struct MapsTemplate
{
std::unique_ptr<HashMap<UInt8, Mapped, TrivialHash, HashTableFixedGrower<8>>> key8;
std::unique_ptr<HashMap<UInt16, Mapped, TrivialHash, HashTableFixedGrower<16>>> key16;
std::unique_ptr<FixedHashMap<UInt8, Mapped>> key8;
std::unique_ptr<FixedHashMap<UInt16, Mapped>> key16;
std::unique_ptr<HashMap<UInt32, Mapped, HashCRC32<UInt32>>> key32;
std::unique_ptr<HashMap<UInt64, Mapped, HashCRC32<UInt64>>> key64;
std::unique_ptr<HashMapWithSavedHash<StringRef, Mapped>> key_string;

View File

@ -394,11 +394,7 @@ BlockInputStreamPtr MutationsInterpreter::addStreamsForLaterStages(BlockInputStr
const SubqueriesForSets & subqueries_for_sets = stage.analyzer->getSubqueriesForSets();
if (!subqueries_for_sets.empty())
{
const auto & settings = context.getSettingsRef();
in = std::make_shared<CreatingSetsBlockInputStream>(in, subqueries_for_sets,
SizeLimits(settings.max_rows_to_transfer, settings.max_bytes_to_transfer, settings.transfer_overflow_mode));
}
in = std::make_shared<CreatingSetsBlockInputStream>(in, subqueries_for_sets, context);
}
in = std::make_shared<MaterializingBlockInputStream>(in);

View File

@ -59,17 +59,17 @@ bool PredicateExpressionsOptimizer::optimize()
is_rewrite_subqueries |= optimizeImpl(ast_select->where_expression, all_subquery_projection_columns, OptimizeKind::PUSH_TO_WHERE);
is_rewrite_subqueries |= optimizeImpl(ast_select->prewhere_expression, all_subquery_projection_columns, OptimizeKind::PUSH_TO_PREWHERE);
}
return is_rewrite_subqueries;
}
bool PredicateExpressionsOptimizer::optimizeImpl(
ASTPtr & outer_expression, SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind expression_kind)
ASTPtr & outer_expression, const SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind expression_kind)
{
/// split predicate with `and`
std::vector<ASTPtr> outer_predicate_expressions = splitConjunctionPredicate(outer_expression);
std::vector<DatabaseAndTableWithAlias> database_and_table_with_aliases =
getDatabaseAndTables(*ast_select, context.getCurrentDatabase());
std::vector<TableWithColumnNames> tables_with_columns = getDatabaseAndTablesWithColumnNames(*ast_select, context);
bool is_rewrite_subquery = false;
for (auto & outer_predicate : outer_predicate_expressions)
@ -77,7 +77,7 @@ bool PredicateExpressionsOptimizer::optimizeImpl(
if (isArrayJoinFunction(outer_predicate))
continue;
auto outer_predicate_dependencies = getDependenciesAndQualifiers(outer_predicate, database_and_table_with_aliases);
auto outer_predicate_dependencies = getDependenciesAndQualifiers(outer_predicate, tables_with_columns);
/// TODO: remove origin expression
for (const auto & [subquery, projection_columns] : subqueries_projection_columns)
@ -92,7 +92,7 @@ bool PredicateExpressionsOptimizer::optimizeImpl(
cleanExpressionAlias(inner_predicate); /// clears the alias name contained in the outer predicate
std::vector<IdentifierWithQualifier> inner_predicate_dependencies =
getDependenciesAndQualifiers(inner_predicate, database_and_table_with_aliases);
getDependenciesAndQualifiers(inner_predicate, tables_with_columns);
setNewAliasesForInnerPredicate(projection_columns, inner_predicate_dependencies);
@ -169,7 +169,7 @@ std::vector<ASTPtr> PredicateExpressionsOptimizer::splitConjunctionPredicate(AST
}
std::vector<PredicateExpressionsOptimizer::IdentifierWithQualifier>
PredicateExpressionsOptimizer::getDependenciesAndQualifiers(ASTPtr & expression, std::vector<DatabaseAndTableWithAlias> & tables)
PredicateExpressionsOptimizer::getDependenciesAndQualifiers(ASTPtr & expression, std::vector<TableWithColumnNames> & tables)
{
FindIdentifierBestTableVisitor::Data find_data(tables);
FindIdentifierBestTableVisitor(find_data).visit(expression);

View File

@ -2,6 +2,8 @@
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <map>
namespace DB
{
@ -70,11 +72,11 @@ private:
std::vector<ASTPtr> splitConjunctionPredicate(ASTPtr & predicate_expression);
std::vector<IdentifierWithQualifier> getDependenciesAndQualifiers(ASTPtr & expression,
std::vector<DatabaseAndTableWithAlias> & tables_with_aliases);
std::vector<TableWithColumnNames> & tables_with_aliases);
bool optimizeExpression(const ASTPtr & outer_expression, ASTPtr & subquery_expression, ASTSelectQuery * subquery);
bool optimizeImpl(ASTPtr & outer_expression, SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind optimize_kind);
bool optimizeImpl(ASTPtr & outer_expression, const SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind optimize_kind);
bool allowPushDown(const ASTSelectQuery * subquery);

View File

@ -8,6 +8,8 @@
#include <Common/Arena.h>
#include <Common/HashTable/HashSet.h>
#include <Common/HashTable/ClearableHashSet.h>
#include <Common/HashTable/FixedClearableHashSet.h>
#include <Common/HashTable/FixedHashSet.h>
#include <Common/UInt128.h>
@ -182,9 +184,8 @@ struct SetMethodHashed
*/
struct NonClearableSet
{
/// TODO Use either bit- or byte-set for these two options.
std::unique_ptr<SetMethodOneNumber<UInt8, HashSet<UInt8, TrivialHash, HashTableFixedGrower<8>>>> key8;
std::unique_ptr<SetMethodOneNumber<UInt16, HashSet<UInt16, TrivialHash, HashTableFixedGrower<16>>>> key16;
std::unique_ptr<SetMethodOneNumber<UInt8, FixedHashSet<UInt8>>> key8;
std::unique_ptr<SetMethodOneNumber<UInt16, FixedHashSet<UInt16>>> key16;
/** Also for the experiment was tested the ability to use SmallSet,
* as long as the number of elements in the set is small (and, if necessary, converted to a full-fledged HashSet).
@ -209,9 +210,8 @@ struct NonClearableSet
struct ClearableSet
{
/// TODO Use either bit- or byte-set for these two options.
std::unique_ptr<SetMethodOneNumber<UInt8, ClearableHashSet<UInt8, TrivialHash, HashTableFixedGrower<8>>>> key8;
std::unique_ptr<SetMethodOneNumber<UInt16, ClearableHashSet<UInt16, TrivialHash, HashTableFixedGrower<16>>>> key16;
std::unique_ptr<SetMethodOneNumber<UInt8, FixedClearableHashSet<UInt8>>> key8;
std::unique_ptr<SetMethodOneNumber<UInt16, FixedClearableHashSet<UInt16>>> key16;
std::unique_ptr<SetMethodOneNumber<UInt32, ClearableHashSet<UInt32, HashCRC32<UInt32>>>> key32;
std::unique_ptr<SetMethodOneNumber<UInt64, ClearableHashSet<UInt64, HashCRC32<UInt64>>>> key64;

View File

@ -233,7 +233,10 @@ struct Settings
M(SettingSeconds, max_execution_time, 0, "") \
M(SettingOverflowMode<false>, timeout_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.") \
\
M(SettingUInt64, min_execution_speed, 0, "In rows per second.") \
M(SettingUInt64, min_execution_speed, 0, "Minimum number of execution rows per second.") \
M(SettingUInt64, max_execution_speed, 0, "Maximum number of execution rows per second.") \
M(SettingUInt64, min_execution_speed_bytes, 0, "Minimum number of execution bytes per second.") \
M(SettingUInt64, max_execution_speed_bytes, 0, "Maximum number of execution bytes per second.") \
M(SettingSeconds, timeout_before_checking_execution_speed, 0, "Check that the speed is not too low after the specified time has elapsed.") \
\
M(SettingUInt64, max_columns_to_read, 0, "") \

View File

@ -13,23 +13,26 @@
#include <Interpreters/ExternalDictionaries.h>
#include <Interpreters/OptimizeIfWithConstantConditionVisitor.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTOrderByElement.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTOrderByElement.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ParserTablesInSelectQuery.h>
#include <Parsers/parseQuery.h>
#include <Parsers/queryToString.h>
#include <DataTypes/NestedUtils.h>
#include <Common/typeid_cast.h>
#include <Core/NamesAndTypes.h>
#include <Storages/IStorage.h>
#include <IO/WriteHelpers.h>
#include <Storages/IStorage.h>
#include <Common/typeid_cast.h>
#include <functional>
namespace DB
{
@ -107,7 +110,6 @@ void translateQualifiedNames(ASTPtr & query, const ASTSelectQuery & select_query
visitor.visit(query);
}
bool hasArrayJoin(const ASTPtr & ast)
{
if (const ASTFunction * function = typeid_cast<const ASTFunction *>(&*ast))
@ -591,8 +593,30 @@ Names qualifyOccupiedNames(NamesAndTypesList & columns, const NameSet & source_c
return originals;
}
void replaceJoinedTable(const ASTTablesInSelectQueryElement* join)
{
if (!join || !join->table_expression)
return;
auto & table_expr = static_cast<ASTTableExpression &>(*join->table_expression.get());
if (table_expr.database_and_table_name)
{
auto & table_id = typeid_cast<ASTIdentifier &>(*table_expr.database_and_table_name.get());
String expr = "(select * from " + table_id.name + ") as " + table_id.shortName();
// FIXME: since the expression "a as b" exposes both "a" and "b" names, which is not equivalent to "(select * from a) as b",
// we can't replace aliased tables.
// FIXME: long table names include database name, which we can't save within alias.
if (table_id.alias.empty() && table_id.isShort())
{
ParserTableExpression parser;
table_expr = static_cast<ASTTableExpression &>(*parseQuery(parser, expr, 0));
}
}
}
} // namespace
SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
ASTPtr & query,
@ -628,6 +652,8 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
{
if (const ASTTablesInSelectQueryElement * node = select_query->join())
{
replaceJoinedTable(node);
const auto & joined_expression = static_cast<const ASTTableExpression &>(*node->table_expression);
DatabaseAndTableWithAlias table(joined_expression, context.getCurrentDatabase());

View File

@ -14,6 +14,10 @@ add_executable (hash_map hash_map.cpp)
target_include_directories (hash_map SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
target_link_libraries (hash_map PRIVATE dbms clickhouse_compression)
add_executable (hash_map_lookup hash_map_lookup.cpp)
target_include_directories (hash_map_lookup SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
target_link_libraries (hash_map_lookup PRIVATE dbms clickhouse_compression)
add_executable (hash_map3 hash_map3.cpp)
target_include_directories(hash_map3 SYSTEM BEFORE PRIVATE ${METROHASH_INCLUDE_DIR})
target_link_libraries (hash_map3 PRIVATE dbms ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES})

View File

@ -162,8 +162,8 @@ int main(int argc, char ** argv)
map.emplace(data[i], it, inserted);
if (inserted)
{
new(&it->second) Value;
std::swap(it->second, value);
new(&it->getSecond()) Value;
std::swap(it->getSecond(), value);
INIT
}
}
@ -193,8 +193,8 @@ int main(int argc, char ** argv)
map.emplace(data[i], it, inserted);
if (inserted)
{
new(&it->second) Value;
std::swap(it->second, value);
new(&it->getSecond()) Value;
std::swap(it->getSecond(), value);
INIT
}
}
@ -225,8 +225,8 @@ int main(int argc, char ** argv)
map.emplace(data[i], it, inserted);
if (inserted)
{
new(&it->second) Value;
std::swap(it->second, value);
new(&it->getSecond()) Value;
std::swap(it->getSecond(), value);
INIT
}
}

View File

@ -38,7 +38,7 @@ public:
if (this->buf[i].isZero(*this))
std::cerr << "[ ]";
else
std::cerr << '[' << this->buf[i].getValue().first.data << ", " << this->buf[i].getValue().second << ']';
std::cerr << '[' << this->buf[i].getValue().getFirst().data << ", " << this->buf[i].getValue().getSecond() << ']';
}
std::cerr << std::endl;
}
@ -85,7 +85,7 @@ int main(int, char **)
std::cerr << "Collisions: " << map.getCollisions() << std::endl;
for (auto x : map)
std::cerr << x.first.toString() << " -> " << x.second << std::endl;
std::cerr << x.getFirst().toString() << " -> " << x.getSecond() << std::endl;
return 0;
}

View File

@ -0,0 +1,124 @@
#include <iomanip>
#include <iostream>
#include <vector>
#include <Common/Stopwatch.h>
#define DBMS_HASH_MAP_COUNT_COLLISIONS
#define DBMS_HASH_MAP_DEBUG_RESIZES
#include <Compression/CompressedReadBuffer.h>
#include <Core/Types.h>
#include <IO/ReadBufferFromFile.h>
#include <IO/ReadHelpers.h>
#include <Interpreters/AggregationCommon.h>
#include <Common/HashTable/FixedHashMap.h>
#include <Common/HashTable/HashMap.h>
/** Do this:
for file in ResolutionWidth ResolutionDepth; do
for size in 30000 100000 300000 1000000 5000000; do
echo
BEST_METHOD=0
BEST_RESULT=0
for method in {1..2}; do
echo -ne $file $size $method '';
TOTAL_ELEMS=0
for i in {0..1000}; do
TOTAL_ELEMS=$(( $TOTAL_ELEMS + $size ))
if [[ $TOTAL_ELEMS -gt 25000000 ]]; then break; fi
./hash_map_lookup $size $method < ${file}.bin 2>&1 |
grep HashMap | grep -oE '[0-9\.]+ elem';
done | awk -W interactive '{ if ($1 > x) { x = $1 }; printf(".") } END { print x }' | tee /tmp/hash_map_lookup_res;
CUR_RESULT=$(cat /tmp/hash_map_lookup_res | tr -d '.')
if [[ $CUR_RESULT -gt $BEST_RESULT ]]; then
BEST_METHOD=$method
BEST_RESULT=$CUR_RESULT
fi;
done;
echo Best: $BEST_METHOD - $BEST_RESULT
done;
done
*/
template <typename Map>
void NO_INLINE bench(const std::vector<UInt16> & data, const char * name)
{
Map map;
typename Map::iterator it;
bool inserted;
Stopwatch watch;
for (size_t i = 0, size = data.size(); i < size; ++i)
{
map.emplace(data[i], it, inserted);
if (inserted)
it->getSecond() = 1;
else
++it->getSecond();
}
for (size_t i = 0, size = data.size(); i < size; ++i)
{
it = map.find(data[i]);
++it->getSecond();
}
watch.stop();
std::cerr << std::fixed << std::setprecision(2) << "HashMap (" << name << "). Size: " << map.size()
<< ", elapsed: " << watch.elapsedSeconds() << " (" << data.size() / watch.elapsedSeconds() << " elem/sec.)"
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
<< ", collisions: " << map.getCollisions()
#endif
<< std::endl;
}
template <typename Map>
void insert(Map & map, StringRef & k)
{
bool inserted;
typename Map::iterator it;
map.emplace(k, it, inserted, nullptr);
if (inserted)
*it = 1;
else
++*it;
std::cout << *map.find(k) << std::endl;
}
int main(int argc, char ** argv)
{
if (argc < 3)
{
std::cerr << "Usage: program n m\n";
return 1;
}
size_t n = atoi(argv[1]);
size_t m = atoi(argv[2]);
std::vector<UInt16> data(n);
{
Stopwatch watch;
DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
DB::CompressedReadBuffer in2(in1);
for (size_t i = 0; i < n && !in2.eof(); ++i)
{
DB::readBinary(data[i], in2);
}
watch.stop();
std::cerr << std::fixed << std::setprecision(2) << "Vector. Size: " << n << ", elapsed: " << watch.elapsedSeconds() << " ("
<< n / watch.elapsedSeconds() << " elem/sec.)" << std::endl;
}
using OldLookup = HashMap<UInt16, UInt8, TrivialHash, HashTableFixedGrower<16>>;
using NewLookup = FixedHashMap<UInt16, UInt8>;
if (!m || m == 1)
bench<OldLookup>(data, "Old Lookup");
if (!m || m == 2)
bench<NewLookup>(data, "New Lookup");
return 0;
}

View File

@ -337,8 +337,8 @@ int main(int argc, char ** argv)
{
map.emplace(data[i], it, inserted);
if (inserted)
it->second = 0;
++it->second;
it->getSecond() = 0;
++it->getSecond();
}
watch.stop();
@ -366,8 +366,8 @@ int main(int argc, char ** argv)
{
map.emplace(data[i], it, inserted);
if (inserted)
it->second = 0;
++it->second;
it->getSecond() = 0;
++it->getSecond();
}
watch.stop();
@ -396,8 +396,8 @@ int main(int argc, char ** argv)
{
map.emplace(data[i], it, inserted);
if (inserted)
it->second = 0;
++it->second;
it->getSecond() = 0;
++it->getSecond();
}
watch.stop();
@ -426,8 +426,8 @@ int main(int argc, char ** argv)
{
map.emplace(data[i], it, inserted);
if (inserted)
it->second = 0;
++it->second;
it->getSecond() = 0;
++it->getSecond();
}
watch.stop();

View File

@ -595,8 +595,8 @@ void NO_INLINE bench(const std::vector<StringRef> & data, const char * name)
{
map.emplace(static_cast<const Key &>(data[i]), it, inserted);
if (inserted)
it->second = 0;
++it->second;
it->getSecond() = 0;
++it->getSecond();
}
watch.stop();

View File

@ -442,8 +442,8 @@ void NO_INLINE bench(const std::vector<StringRef> & data, const char * name)
{
map.emplace(static_cast<const Key &>(data[i]), it, inserted);
if (inserted)
it->second = 0;
++it->second;
it->getSecond() = 0;
++it->getSecond();
}
watch.stop();

View File

@ -144,8 +144,8 @@ int main(int argc, char ** argv)
{
map.emplace(data[i], it, inserted);
if (inserted)
it->second = 0;
++it->second;
it->getSecond() = 0;
++it->getSecond();
}
watch.stop();
@ -173,8 +173,8 @@ int main(int argc, char ** argv)
{
map.emplace(SmallStringRef(data[i].data, data[i].size), it, inserted);
if (inserted)
it->second = 0;
++it->second;
it->getSecond() = 0;
++it->getSecond();
}
watch.stop();

View File

@ -67,8 +67,8 @@ int main(int argc, char ** argv)
{
map.emplace(data[i], it, inserted);
if (inserted)
it->second = 0;
++it->second;
it->getSecond() = 0;
++it->getSecond();
}
watch.stop();
@ -82,7 +82,7 @@ int main(int argc, char ** argv)
size_t elems = 0;
for (const auto & kv : map)
{
sum_counts += kv.second;
sum_counts += kv.getSecond();
++elems;
}
@ -103,8 +103,8 @@ int main(int argc, char ** argv)
{
map.emplace(i, it, inserted);
if (inserted)
it->second = 0;
++it->second;
it->getSecond() = 0;
++it->getSecond();
}
watch.stop();
@ -118,11 +118,11 @@ int main(int argc, char ** argv)
size_t elems = 0;
for (const auto & kv : map)
{
sum_counts += kv.second;
sum_counts += kv.getSecond();
++elems;
if (kv.first > n)
std::cerr << kv.first << std::endl;
if (kv.getFirst() > n)
std::cerr << kv.getFirst() << std::endl;
}
std::cerr << "sum_counts: " << sum_counts << ", elems: " << elems << std::endl;

View File

@ -14,13 +14,15 @@ public:
enum ExplainKind
{
ParsedAST,
AnalyzedSyntax,
};
ASTExplainQuery(ExplainKind kind_ = ParsedAST)
ASTExplainQuery(ExplainKind kind_)
: kind(kind_)
{}
String getID(char delim) const override { return "Explain" + (delim + toString(kind)); }
ExplainKind getKind() const { return kind; }
ASTPtr clone() const override { return std::make_shared<ASTExplainQuery>(*this); }
protected:
@ -37,7 +39,9 @@ private:
switch (kind)
{
case ParsedAST: return "ParsedAST";
case AnalyzedSyntax: return "AnalyzedSyntax";
}
__builtin_unreachable();
}
};

Some files were not shown because too many files have changed in this diff Show More