mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 00:22:29 +00:00
Merge branch 'master' of github.com:yandex/ClickHouse
This commit is contained in:
commit
98d999f95f
@ -24,7 +24,7 @@ if (OS_LINUX AND COMPILER_CLANG)
|
||||
endif ()
|
||||
|
||||
if (LIBCXX_PATH)
|
||||
# include_directories (SYSTEM BEFORE "${LIBCXX_PATH}/include" "${LIBCXX_PATH}/include/c++/v1")
|
||||
include_directories (SYSTEM BEFORE "${LIBCXX_PATH}/include" "${LIBCXX_PATH}/include/c++/v1")
|
||||
link_directories ("${LIBCXX_PATH}/lib")
|
||||
endif ()
|
||||
endif ()
|
||||
|
@ -51,10 +51,6 @@ set(SRCS
|
||||
${RDKAFKA_SOURCE_DIR}/snappy.c
|
||||
${RDKAFKA_SOURCE_DIR}/tinycthread.c
|
||||
${RDKAFKA_SOURCE_DIR}/tinycthread_extra.c
|
||||
#${RDKAFKA_SOURCE_DIR}/xxhash.c
|
||||
#${RDKAFKA_SOURCE_DIR}/lz4.c
|
||||
#${RDKAFKA_SOURCE_DIR}/lz4frame.c
|
||||
#${RDKAFKA_SOURCE_DIR}/lz4hc.c
|
||||
${RDKAFKA_SOURCE_DIR}/rdgz.c
|
||||
)
|
||||
|
||||
|
@ -33,6 +33,7 @@
|
||||
#include <Common/CurrentThread.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Common/getNumberOfPhysicalCPUCores.h>
|
||||
#include <Common/ThreadStatus.h>
|
||||
#include <Client/Connection.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/Cluster.h>
|
||||
@ -2121,6 +2122,7 @@ void ClusterCopierApp::defineOptions(Poco::Util::OptionSet & options)
|
||||
void ClusterCopierApp::mainImpl()
|
||||
{
|
||||
StatusFile status_file(process_path + "/status");
|
||||
ThreadStatus thread_status;
|
||||
|
||||
auto log = &logger();
|
||||
LOG_INFO(log, "Starting clickhouse-copier ("
|
||||
|
@ -577,7 +577,7 @@ public:
|
||||
{
|
||||
for (auto & elem : table)
|
||||
{
|
||||
Histogram & histogram = elem.second;
|
||||
Histogram & histogram = elem.getSecond();
|
||||
|
||||
if (histogram.buckets.size() < params.num_buckets_cutoff)
|
||||
{
|
||||
@ -591,7 +591,7 @@ public:
|
||||
{
|
||||
for (auto & elem : table)
|
||||
{
|
||||
Histogram & histogram = elem.second;
|
||||
Histogram & histogram = elem.getSecond();
|
||||
if (!histogram.total)
|
||||
continue;
|
||||
|
||||
@ -623,7 +623,7 @@ public:
|
||||
{
|
||||
for (auto & elem : table)
|
||||
{
|
||||
Histogram & histogram = elem.second;
|
||||
Histogram & histogram = elem.getSecond();
|
||||
if (!histogram.total)
|
||||
continue;
|
||||
|
||||
@ -639,7 +639,7 @@ public:
|
||||
{
|
||||
for (auto & elem : table)
|
||||
{
|
||||
Histogram & histogram = elem.second;
|
||||
Histogram & histogram = elem.getSecond();
|
||||
if (!histogram.total)
|
||||
continue;
|
||||
|
||||
@ -674,7 +674,7 @@ public:
|
||||
while (true)
|
||||
{
|
||||
it = table.find(hashContext(code_points.data() + code_points.size() - context_size, code_points.data() + code_points.size()));
|
||||
if (table.end() != it && it->second.total + it->second.count_end != 0)
|
||||
if (table.end() != it && it->getSecond().total + it->getSecond().count_end != 0)
|
||||
break;
|
||||
|
||||
if (context_size == 0)
|
||||
@ -708,7 +708,7 @@ public:
|
||||
if (num_bytes_after_desired_size > 0)
|
||||
end_probability_multiplier = std::pow(1.25, num_bytes_after_desired_size);
|
||||
|
||||
CodePoint code = it->second.sample(determinator, end_probability_multiplier);
|
||||
CodePoint code = it->getSecond().sample(determinator, end_probability_multiplier);
|
||||
|
||||
if (code == END)
|
||||
break;
|
||||
|
@ -724,7 +724,7 @@ bool TCPHandler::receiveData()
|
||||
query_context.addExternalTable(external_table_name, storage);
|
||||
}
|
||||
/// The data will be written directly to the table.
|
||||
state.io.out = storage->write(ASTPtr(), query_context.getSettingsRef());
|
||||
state.io.out = storage->write(ASTPtr(), query_context);
|
||||
}
|
||||
if (block)
|
||||
state.io.out->write(block);
|
||||
|
@ -54,7 +54,7 @@ struct EntropyData
|
||||
void merge(const EntropyData & rhs)
|
||||
{
|
||||
for (const auto & pair : rhs.map)
|
||||
map[pair.first] += pair.second;
|
||||
map[pair.getFirst()] += pair.getSecond();
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf) const
|
||||
@ -68,7 +68,7 @@ struct EntropyData
|
||||
while (reader.next())
|
||||
{
|
||||
const auto & pair = reader.get();
|
||||
map[pair.first] = pair.second;
|
||||
map[pair.getFirst()] = pair.getSecond();
|
||||
}
|
||||
}
|
||||
|
||||
@ -76,12 +76,12 @@ struct EntropyData
|
||||
{
|
||||
UInt64 total_value = 0;
|
||||
for (const auto & pair : map)
|
||||
total_value += pair.second;
|
||||
total_value += pair.getSecond();
|
||||
|
||||
Float64 shannon_entropy = 0;
|
||||
for (const auto & pair : map)
|
||||
{
|
||||
Float64 frequency = Float64(pair.second) / total_value;
|
||||
Float64 frequency = Float64(pair.getSecond()) / total_value;
|
||||
shannon_entropy -= frequency * log2(frequency);
|
||||
}
|
||||
|
||||
|
@ -94,7 +94,7 @@ public:
|
||||
|
||||
size_t i = 0;
|
||||
for (auto it = set.begin(); it != set.end(); ++it, ++i)
|
||||
data_to[old_size + i] = *it;
|
||||
data_to[old_size + i] = it->getValue();
|
||||
}
|
||||
|
||||
const char * getHeaderFilePath() const override { return __FILE__; }
|
||||
@ -150,7 +150,7 @@ public:
|
||||
|
||||
for (const auto & elem : set)
|
||||
{
|
||||
writeStringBinary(elem, buf);
|
||||
writeStringBinary(elem.getValue(), buf);
|
||||
}
|
||||
}
|
||||
|
||||
@ -185,7 +185,7 @@ public:
|
||||
else
|
||||
{
|
||||
if (inserted)
|
||||
it->data = arena->insert(str_serialized.data, str_serialized.size);
|
||||
it->getValueMutable().data = arena->insert(str_serialized.data, str_serialized.size);
|
||||
}
|
||||
}
|
||||
|
||||
@ -198,9 +198,9 @@ public:
|
||||
State::Set::iterator it;
|
||||
for (auto & rhs_elem : rhs_set)
|
||||
{
|
||||
cur_set.emplace(rhs_elem, it, inserted);
|
||||
if (inserted && it->size)
|
||||
it->data = arena->insert(it->data, it->size);
|
||||
cur_set.emplace(rhs_elem.getValue(), it, inserted);
|
||||
if (inserted && it->getValue().size)
|
||||
it->getValueMutable().data = arena->insert(it->getValue().data, it->getValue().size);
|
||||
}
|
||||
}
|
||||
|
||||
@ -215,7 +215,7 @@ public:
|
||||
|
||||
for (auto & elem : set)
|
||||
{
|
||||
deserializeAndInsert(elem, data_to);
|
||||
deserializeAndInsert(elem.getValue(), data_to);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -33,6 +33,8 @@ namespace ErrorCodes
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
template <typename> class QuantileTiming;
|
||||
|
||||
|
||||
/** Generic aggregate function for calculation of quantiles.
|
||||
* It depends on quantile calculation data structure. Look at Quantile*.h for various implementations.
|
||||
@ -82,6 +84,14 @@ public:
|
||||
{
|
||||
if (!returns_many && levels.size() > 1)
|
||||
throw Exception("Aggregate function " + getName() + " require one parameter or less", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
if constexpr (std::is_same_v<Data, QuantileTiming<Value>>)
|
||||
{
|
||||
/// QuantileTiming only supports integers (it works only for unsigned integers but signed are also accepted for convenience).
|
||||
if (!isInteger(argument_type))
|
||||
throw Exception("Argument for function " + std::string(Name::name) + " must be integer, but it has type "
|
||||
+ argument_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
}
|
||||
}
|
||||
|
||||
String getName() const override { return Name::name; }
|
||||
|
@ -48,7 +48,7 @@ struct QuantileExactWeighted
|
||||
void merge(const QuantileExactWeighted & rhs)
|
||||
{
|
||||
for (const auto & pair : rhs.map)
|
||||
map[pair.first] += pair.second;
|
||||
map[pair.getFirst()] += pair.getSecond();
|
||||
}
|
||||
|
||||
void serialize(WriteBuffer & buf) const
|
||||
@ -62,7 +62,7 @@ struct QuantileExactWeighted
|
||||
while (reader.next())
|
||||
{
|
||||
const auto & pair = reader.get();
|
||||
map[pair.first] = pair.second;
|
||||
map[pair.getFirst()] = pair.getSecond();
|
||||
}
|
||||
}
|
||||
|
||||
@ -83,12 +83,12 @@ struct QuantileExactWeighted
|
||||
UInt64 sum_weight = 0;
|
||||
for (const auto & pair : map)
|
||||
{
|
||||
sum_weight += pair.second;
|
||||
array[i] = pair;
|
||||
sum_weight += pair.getSecond();
|
||||
array[i] = pair.getValue();
|
||||
++i;
|
||||
}
|
||||
|
||||
std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });
|
||||
std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.getFirst() < b.getFirst(); });
|
||||
|
||||
UInt64 threshold = std::ceil(sum_weight * level);
|
||||
UInt64 accumulated = 0;
|
||||
@ -97,7 +97,7 @@ struct QuantileExactWeighted
|
||||
const Pair * end = array + size;
|
||||
while (it < end)
|
||||
{
|
||||
accumulated += it->second;
|
||||
accumulated += it->getSecond();
|
||||
|
||||
if (accumulated >= threshold)
|
||||
break;
|
||||
@ -108,7 +108,7 @@ struct QuantileExactWeighted
|
||||
if (it == end)
|
||||
--it;
|
||||
|
||||
return it->first;
|
||||
return it->getFirst();
|
||||
}
|
||||
|
||||
/// Get the `size` values of `levels` quantiles. Write `size` results starting with `result` address.
|
||||
@ -133,12 +133,12 @@ struct QuantileExactWeighted
|
||||
UInt64 sum_weight = 0;
|
||||
for (const auto & pair : map)
|
||||
{
|
||||
sum_weight += pair.second;
|
||||
array[i] = pair;
|
||||
sum_weight += pair.getSecond();
|
||||
array[i] = pair.getValue();
|
||||
++i;
|
||||
}
|
||||
|
||||
std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.first < b.first; });
|
||||
std::sort(array, array + size, [](const Pair & a, const Pair & b) { return a.getFirst() < b.getFirst(); });
|
||||
|
||||
UInt64 accumulated = 0;
|
||||
|
||||
@ -150,11 +150,11 @@ struct QuantileExactWeighted
|
||||
|
||||
while (it < end)
|
||||
{
|
||||
accumulated += it->second;
|
||||
accumulated += it->getSecond();
|
||||
|
||||
while (accumulated >= threshold)
|
||||
{
|
||||
result[indices[level_index]] = it->first;
|
||||
result[indices[level_index]] = it->getFirst();
|
||||
++level_index;
|
||||
|
||||
if (level_index == num_levels)
|
||||
@ -168,7 +168,7 @@ struct QuantileExactWeighted
|
||||
|
||||
while (level_index < num_levels)
|
||||
{
|
||||
result[indices[level_index]] = array[size - 1].first;
|
||||
result[indices[level_index]] = array[size - 1].getFirst();
|
||||
++level_index;
|
||||
}
|
||||
}
|
||||
|
@ -33,7 +33,7 @@ namespace
|
||||
|
||||
data.resize(hash_map.size());
|
||||
for (auto val : hash_map)
|
||||
data[val.second] = val.first;
|
||||
data[val.getSecond()] = val.getFirst();
|
||||
|
||||
for (auto & ind : index)
|
||||
ind = hash_map[ind];
|
||||
|
@ -414,7 +414,7 @@ UInt64 ReverseIndex<IndexType, ColumnType>::insert(const StringRef & data)
|
||||
column->popBack(1);
|
||||
}
|
||||
|
||||
return *iterator;
|
||||
return iterator->getValue();
|
||||
}
|
||||
|
||||
template <typename IndexType, typename ColumnType>
|
||||
@ -429,7 +429,7 @@ UInt64 ReverseIndex<IndexType, ColumnType>::getInsertionPoint(const StringRef &
|
||||
auto hash = getHash(data);
|
||||
iterator = index->find(data, hash);
|
||||
|
||||
return iterator == index->end() ? size() + base_index : *iterator;
|
||||
return iterator == index->end() ? size() + base_index : iterator->getValue();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -56,7 +56,7 @@ struct HashMethodOneNumber
|
||||
/// Get StringRef from value which can be inserted into column.
|
||||
static StringRef getValueRef(const Value & value)
|
||||
{
|
||||
return StringRef(reinterpret_cast<const char *>(&value.first), sizeof(value.first));
|
||||
return StringRef(reinterpret_cast<const char *>(&value.getFirst()), sizeof(value.getFirst()));
|
||||
}
|
||||
};
|
||||
|
||||
@ -85,7 +85,7 @@ struct HashMethodString
|
||||
return StringRef(chars + offsets[row - 1], offsets[row] - offsets[row - 1] - 1);
|
||||
}
|
||||
|
||||
static StringRef getValueRef(const Value & value) { return StringRef(value.first.data, value.first.size); }
|
||||
static StringRef getValueRef(const Value & value) { return StringRef(value.getFirst().data, value.getFirst().size); }
|
||||
|
||||
protected:
|
||||
friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
|
||||
@ -122,7 +122,7 @@ struct HashMethodFixedString
|
||||
|
||||
StringRef getKey(size_t row, Arena &) const { return StringRef(&(*chars)[row * n], n); }
|
||||
|
||||
static StringRef getValueRef(const Value & value) { return StringRef(value.first.data, value.first.size); }
|
||||
static StringRef getValueRef(const Value & value) { return StringRef(value.getFirst().data, value.getFirst().size); }
|
||||
|
||||
protected:
|
||||
friend class columns_hashing_impl::HashMethodBase<Self, Value, Mapped, use_cache>;
|
||||
@ -356,8 +356,8 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod
|
||||
{
|
||||
if constexpr (has_mapped)
|
||||
{
|
||||
new(&it->second) Mapped();
|
||||
Base::onNewKey(it->first, pool);
|
||||
new(&it->getSecond()) Mapped();
|
||||
Base::onNewKey(it->getFirstMutable(), pool);
|
||||
}
|
||||
else
|
||||
Base::onNewKey(*it, pool);
|
||||
@ -365,8 +365,8 @@ struct HashMethodSingleLowCardinalityColumn : public SingleColumnMethod
|
||||
|
||||
if constexpr (has_mapped)
|
||||
{
|
||||
mapped_cache[row] = it->second;
|
||||
return EmplaceResult(it->second, mapped_cache[row], inserted);
|
||||
mapped_cache[row] = it->getSecond();
|
||||
return EmplaceResult(it->getSecond(), mapped_cache[row], inserted);
|
||||
}
|
||||
else
|
||||
return EmplaceResult(inserted);
|
||||
|
@ -39,7 +39,7 @@ struct LastElementCache
|
||||
bool check(const Value & value_) { return !empty && value == value_; }
|
||||
|
||||
template <typename Key>
|
||||
bool check(const Key & key) { return !empty && value.first == key; }
|
||||
bool check(const Key & key) { return !empty && value.getFirst() == key; }
|
||||
};
|
||||
|
||||
template <typename Data>
|
||||
@ -147,9 +147,8 @@ protected:
|
||||
if constexpr (has_mapped)
|
||||
{
|
||||
/// Init PairNoInit elements.
|
||||
cache.value.second = Mapped();
|
||||
using Key = decltype(cache.value.first);
|
||||
cache.value.first = Key();
|
||||
cache.value.getSecond() = Mapped();
|
||||
cache.value.getFirstMutable() = {};
|
||||
}
|
||||
else
|
||||
cache.value = Value();
|
||||
@ -171,7 +170,7 @@ protected:
|
||||
static_cast<Derived &>(*this).onExistingKey(key, pool);
|
||||
|
||||
if constexpr (has_mapped)
|
||||
return EmplaceResult(cache.value.second, cache.value.second, false);
|
||||
return EmplaceResult(cache.value.getSecond(), cache.value.getSecond(), false);
|
||||
else
|
||||
return EmplaceResult(false);
|
||||
}
|
||||
@ -183,33 +182,33 @@ protected:
|
||||
|
||||
[[maybe_unused]] Mapped * cached = nullptr;
|
||||
if constexpr (has_mapped)
|
||||
cached = &it->second;
|
||||
cached = &it->getSecond();
|
||||
|
||||
if (inserted)
|
||||
{
|
||||
if constexpr (has_mapped)
|
||||
{
|
||||
new(&it->second) Mapped();
|
||||
static_cast<Derived &>(*this).onNewKey(it->first, pool);
|
||||
new(&it->getSecond()) Mapped();
|
||||
static_cast<Derived &>(*this).onNewKey(it->getFirstMutable(), pool);
|
||||
}
|
||||
else
|
||||
static_cast<Derived &>(*this).onNewKey(*it, pool);
|
||||
static_cast<Derived &>(*this).onNewKey(it->getValueMutable(), pool);
|
||||
}
|
||||
else
|
||||
static_cast<Derived &>(*this).onExistingKey(key, pool);
|
||||
|
||||
if constexpr (consecutive_keys_optimization)
|
||||
{
|
||||
cache.value = *it;
|
||||
cache.value = it->getValue();
|
||||
cache.found = true;
|
||||
cache.empty = false;
|
||||
|
||||
if constexpr (has_mapped)
|
||||
cached = &cache.value.second;
|
||||
cached = &cache.value.getSecond();
|
||||
}
|
||||
|
||||
if constexpr (has_mapped)
|
||||
return EmplaceResult(it->second, *cached, inserted);
|
||||
return EmplaceResult(it->getSecond(), *cached, inserted);
|
||||
else
|
||||
return EmplaceResult(inserted);
|
||||
}
|
||||
@ -222,7 +221,7 @@ protected:
|
||||
if (cache.check(key))
|
||||
{
|
||||
if constexpr (has_mapped)
|
||||
return FindResult(&cache.value.second, cache.found);
|
||||
return FindResult(&cache.value.getSecond(), cache.found);
|
||||
else
|
||||
return FindResult(cache.found);
|
||||
}
|
||||
@ -237,18 +236,18 @@ protected:
|
||||
cache.empty = false;
|
||||
|
||||
if (found)
|
||||
cache.value = *it;
|
||||
cache.value = it->getValue();
|
||||
else
|
||||
{
|
||||
if constexpr (has_mapped)
|
||||
cache.value.first = key;
|
||||
cache.value.getFirstMutable() = key;
|
||||
else
|
||||
cache.value = key;
|
||||
}
|
||||
}
|
||||
|
||||
if constexpr (has_mapped)
|
||||
return FindResult(found ? &it->second : nullptr, found);
|
||||
return FindResult(found ? &it->getSecond() : nullptr, found);
|
||||
else
|
||||
return FindResult(found);
|
||||
}
|
||||
|
@ -137,12 +137,12 @@ public:
|
||||
if (rhs.getContainerType() == details::ContainerType::SMALL)
|
||||
{
|
||||
for (const auto & x : rhs.small)
|
||||
insert(x);
|
||||
insert(x.getValue());
|
||||
}
|
||||
else if (rhs.getContainerType() == details::ContainerType::MEDIUM)
|
||||
{
|
||||
for (const auto & x : rhs.getContainer<Medium>())
|
||||
insert(x);
|
||||
insert(x.getValue());
|
||||
}
|
||||
else if (rhs.getContainerType() == details::ContainerType::LARGE)
|
||||
getContainer<Large>().merge(rhs.getContainer<Large>());
|
||||
@ -234,7 +234,7 @@ private:
|
||||
auto tmp_medium = std::make_unique<Medium>();
|
||||
|
||||
for (const auto & x : small)
|
||||
tmp_medium->insert(x);
|
||||
tmp_medium->insert(x.getValue());
|
||||
|
||||
medium = tmp_medium.release();
|
||||
setContainerType(details::ContainerType::MEDIUM);
|
||||
@ -253,12 +253,12 @@ private:
|
||||
if (container_type == details::ContainerType::SMALL)
|
||||
{
|
||||
for (const auto & x : small)
|
||||
tmp_large->insert(x);
|
||||
tmp_large->insert(x.getValue());
|
||||
}
|
||||
else if (container_type == details::ContainerType::MEDIUM)
|
||||
{
|
||||
for (const auto & x : getContainer<Medium>())
|
||||
tmp_large->insert(x);
|
||||
tmp_large->insert(x.getValue());
|
||||
|
||||
destroy();
|
||||
}
|
||||
|
@ -37,9 +37,9 @@ public:
|
||||
this->emplace(x, it, inserted);
|
||||
|
||||
if (inserted)
|
||||
new(&it->second) mapped_type();
|
||||
new(&it->getSecond()) mapped_type();
|
||||
|
||||
return it->second;
|
||||
return it->getSecond();
|
||||
}
|
||||
|
||||
void clear()
|
||||
|
73
dbms/src/Common/HashTable/FixedClearableHashMap.h
Normal file
73
dbms/src/Common/HashTable/FixedClearableHashMap.h
Normal file
@ -0,0 +1,73 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/HashTable/ClearableHashMap.h>
|
||||
#include <Common/HashTable/FixedHashMap.h>
|
||||
|
||||
|
||||
template <typename Key, typename TMapped>
|
||||
struct FixedClearableHashMapCell
|
||||
{
|
||||
using Mapped = TMapped;
|
||||
using State = ClearableHashSetState;
|
||||
|
||||
using value_type = PairNoInit<Key, Mapped>;
|
||||
UInt32 version;
|
||||
Mapped mapped;
|
||||
|
||||
FixedClearableHashMapCell() {}
|
||||
FixedClearableHashMapCell(const Key &, const State & state) : version(state.version) {}
|
||||
FixedClearableHashMapCell(const value_type & value_, const State & state) : version(state.version), mapped(value_.second) {}
|
||||
|
||||
Mapped & getSecond() { return mapped; }
|
||||
const Mapped & getSecond() const { return mapped; }
|
||||
bool isZero(const State & state) const { return version != state.version; }
|
||||
void setZero() { version = 0; }
|
||||
static constexpr bool need_zero_value_storage = false;
|
||||
void setMapped(const value_type & value) { mapped = value.getSecond(); }
|
||||
|
||||
struct CellExt
|
||||
{
|
||||
CellExt() {}
|
||||
CellExt(Key && key_, FixedClearableHashMapCell * ptr_) : key(key_), ptr(ptr_) {}
|
||||
void update(Key && key_, FixedClearableHashMapCell * ptr_)
|
||||
{
|
||||
key = key_;
|
||||
ptr = ptr_;
|
||||
}
|
||||
Key key;
|
||||
FixedClearableHashMapCell * ptr;
|
||||
Key & getFirstMutable() { return key; }
|
||||
const Key & getFirst() const { return key; }
|
||||
Mapped & getSecond() { return ptr->mapped; }
|
||||
const Mapped & getSecond() const { return *ptr->mapped; }
|
||||
const value_type getValue() const { return {key, *ptr->mapped}; }
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
template <typename Key, typename Mapped, typename Allocator = HashTableAllocator>
|
||||
class FixedClearableHashMap : public FixedHashMap<Key, FixedClearableHashMapCell<Key, Mapped>, Allocator>
|
||||
{
|
||||
public:
|
||||
using key_type = Key;
|
||||
using mapped_type = Mapped;
|
||||
using value_type = typename FixedClearableHashMap::cell_type::value_type;
|
||||
|
||||
mapped_type & operator[](Key x)
|
||||
{
|
||||
typename FixedClearableHashMap::iterator it;
|
||||
bool inserted;
|
||||
this->emplace(x, it, inserted);
|
||||
|
||||
if (inserted)
|
||||
new (&it->second) mapped_type();
|
||||
|
||||
return it->second;
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
++this->version;
|
||||
this->m_size = 0;
|
||||
}
|
||||
};
|
45
dbms/src/Common/HashTable/FixedClearableHashSet.h
Normal file
45
dbms/src/Common/HashTable/FixedClearableHashSet.h
Normal file
@ -0,0 +1,45 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/HashTable/ClearableHashSet.h>
|
||||
#include <Common/HashTable/FixedHashTable.h>
|
||||
|
||||
|
||||
template <typename Key>
|
||||
struct FixedClearableHashTableCell
|
||||
{
|
||||
using State = ClearableHashSetState;
|
||||
|
||||
using value_type = Key;
|
||||
UInt32 version;
|
||||
|
||||
FixedClearableHashTableCell() {}
|
||||
FixedClearableHashTableCell(const Key &, const State & state) : version(state.version) {}
|
||||
|
||||
bool isZero(const State & state) const { return version != state.version; }
|
||||
void setZero() { version = 0; }
|
||||
static constexpr bool need_zero_value_storage = false;
|
||||
void setMapped(const value_type & /*value*/) {}
|
||||
|
||||
struct CellExt
|
||||
{
|
||||
Key key;
|
||||
value_type & getValueMutable() { return key; }
|
||||
const value_type & getValue() const { return key; }
|
||||
void update(Key && key_, FixedClearableHashTableCell *) { key = key_; }
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
template <typename Key, typename Allocator = HashTableAllocator>
|
||||
class FixedClearableHashSet : public FixedHashTable<Key, FixedClearableHashTableCell<Key>, Allocator>
|
||||
{
|
||||
public:
|
||||
using key_type = Key;
|
||||
using value_type = typename FixedClearableHashSet::cell_type::value_type;
|
||||
|
||||
void clear()
|
||||
{
|
||||
++this->version;
|
||||
this->m_size = 0;
|
||||
}
|
||||
};
|
72
dbms/src/Common/HashTable/FixedHashMap.h
Normal file
72
dbms/src/Common/HashTable/FixedHashMap.h
Normal file
@ -0,0 +1,72 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/HashTable/FixedHashTable.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
|
||||
|
||||
template <typename Key, typename TMapped, typename TState = HashTableNoState>
|
||||
struct FixedHashMapCell
|
||||
{
|
||||
using Mapped = TMapped;
|
||||
using State = TState;
|
||||
|
||||
using value_type = PairNoInit<Key, Mapped>;
|
||||
bool full;
|
||||
Mapped mapped;
|
||||
|
||||
FixedHashMapCell() {}
|
||||
FixedHashMapCell(const Key &, const State &) : full(true) {}
|
||||
FixedHashMapCell(const value_type & value_, const State &) : full(true), mapped(value_.second) {}
|
||||
|
||||
Mapped & getSecond() { return mapped; }
|
||||
const Mapped & getSecond() const { return mapped; }
|
||||
bool isZero(const State &) const { return !full; }
|
||||
void setZero() { full = false; }
|
||||
static constexpr bool need_zero_value_storage = false;
|
||||
void setMapped(const value_type & value) { mapped = value.getSecond(); }
|
||||
|
||||
/// Similar to FixedHashSetCell except that we need to contain a pointer to the Mapped field.
|
||||
/// Note that we have to assemble a continuous layout for the value_type on each call of getValue().
|
||||
struct CellExt
|
||||
{
|
||||
CellExt() {}
|
||||
CellExt(Key && key_, const FixedHashMapCell * ptr_) : key(key_), ptr(const_cast<FixedHashMapCell *>(ptr_)) {}
|
||||
void update(Key && key_, const FixedHashMapCell * ptr_)
|
||||
{
|
||||
key = key_;
|
||||
ptr = const_cast<FixedHashMapCell *>(ptr_);
|
||||
}
|
||||
Key key;
|
||||
FixedHashMapCell * ptr;
|
||||
|
||||
Key & getFirstMutable() { return key; }
|
||||
const Key & getFirst() const { return key; }
|
||||
Mapped & getSecond() { return ptr->mapped; }
|
||||
const Mapped & getSecond() const { return ptr->mapped; }
|
||||
const value_type getValue() const { return {key, ptr->mapped}; }
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
template <typename Key, typename Mapped, typename Allocator = HashTableAllocator>
|
||||
class FixedHashMap : public FixedHashTable<Key, FixedHashMapCell<Key, Mapped>, Allocator>
|
||||
{
|
||||
public:
|
||||
using Base = FixedHashTable<Key, FixedHashMapCell<Key, Mapped>, Allocator>;
|
||||
using key_type = Key;
|
||||
using mapped_type = Mapped;
|
||||
using value_type = typename Base::cell_type::value_type;
|
||||
|
||||
using Base::Base;
|
||||
|
||||
mapped_type & ALWAYS_INLINE operator[](Key x)
|
||||
{
|
||||
typename Base::iterator it;
|
||||
bool inserted;
|
||||
this->emplace(x, it, inserted);
|
||||
if (inserted)
|
||||
new (&it->getSecond()) mapped_type();
|
||||
|
||||
return it->getSecond();
|
||||
}
|
||||
};
|
25
dbms/src/Common/HashTable/FixedHashSet.h
Normal file
25
dbms/src/Common/HashTable/FixedHashSet.h
Normal file
@ -0,0 +1,25 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/HashTable/FixedHashTable.h>
|
||||
|
||||
template <typename Key, typename Allocator = HashTableAllocator>
|
||||
class FixedHashSet : public FixedHashTable<Key, FixedHashTableCell<Key>, Allocator>
|
||||
{
|
||||
public:
|
||||
using Base = FixedHashTable<Key, FixedHashTableCell<Key>, Allocator>;
|
||||
using Self = FixedHashSet;
|
||||
|
||||
void merge(const Self & rhs)
|
||||
{
|
||||
for (size_t i = 0; i < Base::BUFFER_SIZE; ++i)
|
||||
if (Base::buf[i].isZero(*this) && !rhs.buf[i].isZero(*this))
|
||||
Base::buf[i] = rhs.buf[i];
|
||||
}
|
||||
|
||||
/// NOTE: Currently this method isn't used. When it does, the ReadBuffer should
|
||||
/// contain the Key explicitly.
|
||||
// void readAndMerge(DB::ReadBuffer & rb)
|
||||
// {
|
||||
|
||||
// }
|
||||
};
|
420
dbms/src/Common/HashTable/FixedHashTable.h
Normal file
420
dbms/src/Common/HashTable/FixedHashTable.h
Normal file
@ -0,0 +1,420 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/HashTable/HashTable.h>
|
||||
|
||||
template <typename Key, typename TState = HashTableNoState>
|
||||
struct FixedHashTableCell
|
||||
{
|
||||
using State = TState;
|
||||
|
||||
using value_type = Key;
|
||||
bool full;
|
||||
|
||||
FixedHashTableCell() {}
|
||||
FixedHashTableCell(const Key &, const State &) : full(true) {}
|
||||
|
||||
bool isZero(const State &) const { return !full; }
|
||||
void setZero() { full = false; }
|
||||
static constexpr bool need_zero_value_storage = false;
|
||||
void setMapped(const value_type & /*value*/) {}
|
||||
|
||||
/// This Cell is only stored inside an iterator. It's used to accomodate the fact
|
||||
/// that the iterator based API always provide a reference to a continuous memory
|
||||
/// containing the Key. As a result, we have to instantiate a real Key field.
|
||||
/// All methods that return a mutable reference to the Key field are named with
|
||||
/// -Mutable suffix, indicating this is uncommon usage. As this is only for lookup
|
||||
/// tables, it's totally fine to discard the Key mutations.
|
||||
struct CellExt
|
||||
{
|
||||
Key key;
|
||||
|
||||
value_type & getValueMutable() { return key; }
|
||||
const value_type & getValue() const { return key; }
|
||||
void update(Key && key_, FixedHashTableCell *) { key = key_; }
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
/** Used as a lookup table for small keys such as UInt8, UInt16. It's different
|
||||
* than a HashTable in that keys are not stored in the Cell buf, but inferred
|
||||
* inside each iterator. There are a bunch of to make it faster than using
|
||||
* HashTable: a) It doesn't have a conflict chain; b) There is no key
|
||||
* comparision; c) The number of cycles for checking cell empty is halved; d)
|
||||
* Memory layout is tighter, especially the Clearable variants.
|
||||
*
|
||||
* NOTE: For Set variants this should always be better. For Map variants
|
||||
* however, as we need to assemble the real cell inside each iterator, there
|
||||
* might be some cases we fall short.
|
||||
*
|
||||
* TODO: Deprecate the cell API so that end users don't rely on the structure
|
||||
* of cell. Instead iterator should be used for operations such as cell
|
||||
* transfer, key updates (f.g. StringRef) and serde. This will allow
|
||||
* TwoLevelHashSet(Map) to contain different type of sets(maps).
|
||||
*/
|
||||
template <typename Key, typename Cell, typename Allocator>
|
||||
class FixedHashTable : private boost::noncopyable, protected Allocator, protected Cell::State
|
||||
{
|
||||
static constexpr size_t BUFFER_SIZE = 1ULL << (sizeof(Key) * 8);
|
||||
|
||||
protected:
|
||||
friend class const_iterator;
|
||||
friend class iterator;
|
||||
friend class Reader;
|
||||
|
||||
using Self = FixedHashTable;
|
||||
using cell_type = Cell;
|
||||
|
||||
size_t m_size = 0; /// Amount of elements
|
||||
Cell * buf; /// A piece of memory for all elements except the element with zero key.
|
||||
|
||||
void alloc() { buf = reinterpret_cast<Cell *>(Allocator::alloc(BUFFER_SIZE * sizeof(Cell))); }
|
||||
|
||||
void free()
|
||||
{
|
||||
if (buf)
|
||||
{
|
||||
Allocator::free(buf, getBufferSizeInBytes());
|
||||
buf = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void destroyElements()
|
||||
{
|
||||
if (!std::is_trivially_destructible_v<Cell>)
|
||||
for (iterator it = begin(), it_end = end(); it != it_end; ++it)
|
||||
it.ptr->~Cell();
|
||||
}
|
||||
|
||||
|
||||
template <typename Derived, bool is_const>
|
||||
class iterator_base
|
||||
{
|
||||
using Container = std::conditional_t<is_const, const Self, Self>;
|
||||
using cell_type = std::conditional_t<is_const, const Cell, Cell>;
|
||||
|
||||
Container * container;
|
||||
cell_type * ptr;
|
||||
|
||||
friend class FixedHashTable;
|
||||
|
||||
public:
|
||||
iterator_base() {}
|
||||
iterator_base(Container * container_, cell_type * ptr_) : container(container_), ptr(ptr_)
|
||||
{
|
||||
cell.update(ptr - container->buf, ptr);
|
||||
}
|
||||
|
||||
bool operator==(const iterator_base & rhs) const { return ptr == rhs.ptr; }
|
||||
bool operator!=(const iterator_base & rhs) const { return ptr != rhs.ptr; }
|
||||
|
||||
Derived & operator++()
|
||||
{
|
||||
++ptr;
|
||||
|
||||
/// Skip empty cells in the main buffer.
|
||||
auto buf_end = container->buf + container->BUFFER_SIZE;
|
||||
while (ptr < buf_end && ptr->isZero(*container))
|
||||
++ptr;
|
||||
|
||||
return static_cast<Derived &>(*this);
|
||||
}
|
||||
|
||||
auto & operator*()
|
||||
{
|
||||
if (cell.key != ptr - container->buf)
|
||||
cell.update(ptr - container->buf, ptr);
|
||||
return cell;
|
||||
}
|
||||
auto * operator-> ()
|
||||
{
|
||||
if (cell.key != ptr - container->buf)
|
||||
cell.update(ptr - container->buf, ptr);
|
||||
return &cell;
|
||||
}
|
||||
|
||||
auto getPtr() const { return ptr; }
|
||||
size_t getHash() const { return ptr - container->buf; }
|
||||
size_t getCollisionChainLength() const { return 0; }
|
||||
typename cell_type::CellExt cell;
|
||||
};
|
||||
|
||||
|
||||
public:
|
||||
using key_type = Key;
|
||||
using value_type = typename Cell::value_type;
|
||||
|
||||
size_t hash(const Key & x) const { return x; }
|
||||
|
||||
FixedHashTable() { alloc(); }
|
||||
|
||||
FixedHashTable(FixedHashTable && rhs) : buf(nullptr) { *this = std::move(rhs); }
|
||||
|
||||
~FixedHashTable()
|
||||
{
|
||||
destroyElements();
|
||||
free();
|
||||
}
|
||||
|
||||
FixedHashTable & operator=(FixedHashTable && rhs)
|
||||
{
|
||||
destroyElements();
|
||||
free();
|
||||
|
||||
std::swap(buf, rhs.buf);
|
||||
std::swap(m_size, rhs.m_size);
|
||||
|
||||
Allocator::operator=(std::move(rhs));
|
||||
Cell::State::operator=(std::move(rhs));
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
class Reader final : private Cell::State
|
||||
{
|
||||
public:
|
||||
Reader(DB::ReadBuffer & in_) : in(in_) {}
|
||||
|
||||
Reader(const Reader &) = delete;
|
||||
Reader & operator=(const Reader &) = delete;
|
||||
|
||||
bool next()
|
||||
{
|
||||
if (!is_initialized)
|
||||
{
|
||||
Cell::State::read(in);
|
||||
DB::readVarUInt(size, in);
|
||||
is_initialized = true;
|
||||
}
|
||||
|
||||
if (read_count == size)
|
||||
{
|
||||
is_eof = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
cell.read(in);
|
||||
++read_count;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
inline const value_type & get() const
|
||||
{
|
||||
if (!is_initialized || is_eof)
|
||||
throw DB::Exception("No available data", DB::ErrorCodes::NO_AVAILABLE_DATA);
|
||||
|
||||
return cell.getValue();
|
||||
}
|
||||
|
||||
private:
|
||||
DB::ReadBuffer & in;
|
||||
Cell cell;
|
||||
size_t read_count = 0;
|
||||
size_t size;
|
||||
bool is_eof = false;
|
||||
bool is_initialized = false;
|
||||
};
|
||||
|
||||
|
||||
class iterator : public iterator_base<iterator, false>
|
||||
{
|
||||
public:
|
||||
using iterator_base<iterator, false>::iterator_base;
|
||||
};
|
||||
|
||||
class const_iterator : public iterator_base<const_iterator, true>
|
||||
{
|
||||
public:
|
||||
using iterator_base<const_iterator, true>::iterator_base;
|
||||
};
|
||||
|
||||
|
||||
const_iterator begin() const
|
||||
{
|
||||
if (!buf)
|
||||
return end();
|
||||
|
||||
const Cell * ptr = buf;
|
||||
auto buf_end = buf + BUFFER_SIZE;
|
||||
while (ptr < buf_end && ptr->isZero(*this))
|
||||
++ptr;
|
||||
|
||||
return const_iterator(this, ptr);
|
||||
}
|
||||
|
||||
const_iterator cbegin() const { return begin(); }
|
||||
|
||||
iterator begin()
|
||||
{
|
||||
if (!buf)
|
||||
return end();
|
||||
|
||||
Cell * ptr = buf;
|
||||
auto buf_end = buf + BUFFER_SIZE;
|
||||
while (ptr < buf_end && ptr->isZero(*this))
|
||||
++ptr;
|
||||
|
||||
return iterator(this, ptr);
|
||||
}
|
||||
|
||||
const_iterator end() const { return const_iterator(this, buf + BUFFER_SIZE); }
|
||||
const_iterator cend() const { return end(); }
|
||||
iterator end() { return iterator(this, buf + BUFFER_SIZE); }
|
||||
|
||||
|
||||
protected:
|
||||
void ALWAYS_INLINE emplaceImpl(Key x, iterator & it, bool & inserted)
|
||||
{
|
||||
it = iterator(this, &buf[x]);
|
||||
|
||||
if (!buf[x].isZero(*this))
|
||||
{
|
||||
inserted = false;
|
||||
return;
|
||||
}
|
||||
|
||||
new (&buf[x]) Cell(x, *this);
|
||||
inserted = true;
|
||||
++m_size;
|
||||
}
|
||||
|
||||
|
||||
public:
|
||||
std::pair<iterator, bool> ALWAYS_INLINE insert(const value_type & x)
|
||||
{
|
||||
std::pair<iterator, bool> res;
|
||||
emplaceImpl(Cell::getKey(x), res.first, res.second);
|
||||
if (res.second)
|
||||
res.first.ptr->setMapped(x);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted) { emplaceImpl(x, it, inserted); }
|
||||
void ALWAYS_INLINE emplace(Key x, iterator & it, bool & inserted, size_t) { emplaceImpl(x, it, inserted); }
|
||||
|
||||
template <typename ObjectToCompareWith>
|
||||
iterator ALWAYS_INLINE find(ObjectToCompareWith x)
|
||||
{
|
||||
return !buf[x].isZero(*this) ? iterator(this, &buf[x]) : end();
|
||||
}
|
||||
|
||||
template <typename ObjectToCompareWith>
|
||||
const_iterator ALWAYS_INLINE find(ObjectToCompareWith x) const
|
||||
{
|
||||
return !buf[x].isZero(*this) ? const_iterator(this, &buf[x]) : end();
|
||||
}
|
||||
|
||||
template <typename ObjectToCompareWith>
|
||||
iterator ALWAYS_INLINE find(ObjectToCompareWith, size_t hash_value)
|
||||
{
|
||||
return !buf[hash_value].isZero(*this) ? iterator(this, &buf[hash_value]) : end();
|
||||
}
|
||||
|
||||
template <typename ObjectToCompareWith>
|
||||
const_iterator ALWAYS_INLINE find(ObjectToCompareWith, size_t hash_value) const
|
||||
{
|
||||
return !buf[hash_value].isZero(*this) ? const_iterator(this, &buf[hash_value]) : end();
|
||||
}
|
||||
|
||||
bool ALWAYS_INLINE has(Key x) const { return !buf[x].isZero(*this); }
|
||||
bool ALWAYS_INLINE has(Key, size_t hash_value) const { return !buf[hash_value].isZero(*this); }
|
||||
|
||||
void write(DB::WriteBuffer & wb) const
|
||||
{
|
||||
Cell::State::write(wb);
|
||||
DB::writeVarUInt(m_size, wb);
|
||||
|
||||
for (auto ptr = buf, buf_end = buf + BUFFER_SIZE; ptr < buf_end; ++ptr)
|
||||
if (!ptr->isZero(*this))
|
||||
{
|
||||
DB::writeVarUInt(ptr - buf);
|
||||
ptr->write(wb);
|
||||
}
|
||||
}
|
||||
|
||||
void writeText(DB::WriteBuffer & wb) const
|
||||
{
|
||||
Cell::State::writeText(wb);
|
||||
DB::writeText(m_size, wb);
|
||||
|
||||
for (auto ptr = buf, buf_end = buf + BUFFER_SIZE; ptr < buf_end; ++ptr)
|
||||
{
|
||||
if (!ptr->isZero(*this))
|
||||
{
|
||||
DB::writeChar(',', wb);
|
||||
DB::writeText(ptr - buf, wb);
|
||||
DB::writeChar(',', wb);
|
||||
ptr->writeText(wb);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void read(DB::ReadBuffer & rb)
|
||||
{
|
||||
Cell::State::read(rb);
|
||||
destroyElements();
|
||||
DB::readVarUInt(m_size, rb);
|
||||
free();
|
||||
alloc();
|
||||
|
||||
for (size_t i = 0; i < m_size; ++i)
|
||||
{
|
||||
size_t place_value = 0;
|
||||
DB::readVarUInt(place_value, rb);
|
||||
Cell x;
|
||||
x.read(rb);
|
||||
new (&buf[place_value]) Cell(x, *this);
|
||||
}
|
||||
}
|
||||
|
||||
void readText(DB::ReadBuffer & rb)
|
||||
{
|
||||
Cell::State::readText(rb);
|
||||
destroyElements();
|
||||
DB::readText(m_size, rb);
|
||||
free();
|
||||
alloc();
|
||||
|
||||
for (size_t i = 0; i < m_size; ++i)
|
||||
{
|
||||
size_t place_value = 0;
|
||||
DB::assertChar(',', rb);
|
||||
DB::readText(place_value, rb);
|
||||
Cell x;
|
||||
DB::assertChar(',', rb);
|
||||
x.readText(rb);
|
||||
new (&buf[place_value]) Cell(x, *this);
|
||||
}
|
||||
}
|
||||
|
||||
size_t size() const { return m_size; }
|
||||
|
||||
bool empty() const { return 0 == m_size; }
|
||||
|
||||
void clear()
|
||||
{
|
||||
destroyElements();
|
||||
m_size = 0;
|
||||
|
||||
memset(static_cast<void *>(buf), 0, BUFFER_SIZE * sizeof(*buf));
|
||||
}
|
||||
|
||||
/// After executing this function, the table can only be destroyed,
|
||||
/// and also you can use the methods `size`, `empty`, `begin`, `end`.
|
||||
void clearAndShrink()
|
||||
{
|
||||
destroyElements();
|
||||
m_size = 0;
|
||||
free();
|
||||
}
|
||||
|
||||
size_t getBufferSizeInBytes() const { return BUFFER_SIZE * sizeof(Cell); }
|
||||
|
||||
size_t getBufferSizeInCells() const { return BUFFER_SIZE; }
|
||||
|
||||
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
|
||||
size_t getCollisions() const { return 0; }
|
||||
#endif
|
||||
};
|
@ -15,11 +15,14 @@ struct NoInitTag {};
|
||||
|
||||
/// A pair that does not initialize the elements, if not needed.
|
||||
template <typename First, typename Second>
|
||||
struct PairNoInit
|
||||
class PairNoInit
|
||||
{
|
||||
First first;
|
||||
Second second;
|
||||
template <typename, typename, typename, typename>
|
||||
friend class HashMapCell;
|
||||
|
||||
public:
|
||||
PairNoInit() {}
|
||||
|
||||
template <typename First_>
|
||||
@ -29,6 +32,11 @@ struct PairNoInit
|
||||
template <typename First_, typename Second_>
|
||||
PairNoInit(First_ && first_, Second_ && second_)
|
||||
: first(std::forward<First_>(first_)), second(std::forward<Second_>(second_)) {}
|
||||
|
||||
First & getFirstMutable() { return first; }
|
||||
const First & getFirst() const { return first; }
|
||||
Second & getSecond() { return second; }
|
||||
const Second & getSecond() const { return second; }
|
||||
};
|
||||
|
||||
|
||||
@ -45,10 +53,14 @@ struct HashMapCell
|
||||
HashMapCell(const Key & key_, const State &) : value(key_, NoInitTag()) {}
|
||||
HashMapCell(const value_type & value_, const State &) : value(value_) {}
|
||||
|
||||
value_type & getValue() { return value; }
|
||||
Key & getFirstMutable() { return value.first; }
|
||||
const Key & getFirst() const { return value.first; }
|
||||
Mapped & getSecond() { return value.second; }
|
||||
const Mapped & getSecond() const { return value.second; }
|
||||
|
||||
value_type & getValueMutable() { return value; }
|
||||
const value_type & getValue() const { return value; }
|
||||
|
||||
static Key & getKey(value_type & value) { return value.first; }
|
||||
static const Key & getKey(const value_type & value) { return value.first; }
|
||||
|
||||
bool keyEquals(const Key & key_) const { return value.first == key_; }
|
||||
@ -111,8 +123,8 @@ struct HashMapCellWithSavedHash : public HashMapCell<Key, TMapped, Hash, TState>
|
||||
|
||||
using Base::Base;
|
||||
|
||||
bool keyEquals(const Key & key_) const { return this->value.first == key_; }
|
||||
bool keyEquals(const Key & key_, size_t hash_) const { return saved_hash == hash_ && this->value.first == key_; }
|
||||
bool keyEquals(const Key & key_) const { return this->value.getFirst() == key_; }
|
||||
bool keyEquals(const Key & key_, size_t hash_) const { return saved_hash == hash_ && this->value.getFirst() == key_; }
|
||||
bool keyEquals(const Key & key_, size_t hash_, const typename Base::State &) const { return keyEquals(key_, hash_); }
|
||||
|
||||
void setHash(size_t hash_value) { saved_hash = hash_value; }
|
||||
@ -158,9 +170,9 @@ public:
|
||||
* the compiler can not guess about this, and generates the `load`, `increment`, `store` code.
|
||||
*/
|
||||
if (inserted)
|
||||
new(&it->second) mapped_type();
|
||||
new(&it->getSecond()) mapped_type();
|
||||
|
||||
return it->second;
|
||||
return it->getSecond();
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -98,11 +98,10 @@ struct HashTableCell
|
||||
/// HashTableCell(const value_type & value_, const State & state) : key(value_) {}
|
||||
|
||||
/// Get what the value_type of the container will be.
|
||||
value_type & getValue() { return key; }
|
||||
value_type & getValueMutable() { return key; }
|
||||
const value_type & getValue() const { return key; }
|
||||
|
||||
/// Get the key.
|
||||
static Key & getKey(value_type & value) { return value; }
|
||||
static const Key & getKey(const value_type & value) { return value; }
|
||||
|
||||
/// Are the keys at the cells equal?
|
||||
@ -459,8 +458,8 @@ protected:
|
||||
return static_cast<Derived &>(*this);
|
||||
}
|
||||
|
||||
auto & operator* () const { return ptr->getValue(); }
|
||||
auto * operator->() const { return &ptr->getValue(); }
|
||||
auto & operator* () const { return *ptr; }
|
||||
auto * operator->() const { return ptr; }
|
||||
|
||||
auto getPtr() const { return ptr; }
|
||||
size_t getHash() const { return ptr->getHash(*container); }
|
||||
|
@ -148,8 +148,8 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
value_type & operator* () const { return ptr->getValue(); }
|
||||
value_type * operator->() const { return &ptr->getValue(); }
|
||||
Cell & operator* () const { return *ptr; }
|
||||
Cell * operator->() const { return ptr; }
|
||||
|
||||
Cell * getPtr() const { return ptr; }
|
||||
};
|
||||
@ -176,8 +176,8 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
const value_type & operator* () const { return ptr->getValue(); }
|
||||
const value_type * operator->() const { return &ptr->getValue(); }
|
||||
const Cell & operator* () const { return *ptr; }
|
||||
const Cell * operator->() const { return ptr; }
|
||||
|
||||
const Cell * getPtr() const { return ptr; }
|
||||
};
|
||||
@ -399,8 +399,8 @@ public:
|
||||
typename SmallMapTable::iterator it;
|
||||
bool inserted;
|
||||
this->emplace(x, it, inserted);
|
||||
new(&it->second) mapped_type();
|
||||
return it->second;
|
||||
new(&it->getSecond()) mapped_type();
|
||||
return it->getSecond();
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -29,9 +29,9 @@ public:
|
||||
this->emplace(x, it, inserted);
|
||||
|
||||
if (inserted)
|
||||
new(&it->second) mapped_type();
|
||||
new(&it->getSecond()) mapped_type();
|
||||
|
||||
return it->second;
|
||||
return it->getSecond();
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -98,7 +98,7 @@ public:
|
||||
/// It is assumed that the zero key (stored separately) is first in iteration order.
|
||||
if (it != src.end() && it.getPtr()->isZero(src))
|
||||
{
|
||||
insert(*it);
|
||||
insert(it->getValue());
|
||||
++it;
|
||||
}
|
||||
|
||||
@ -141,8 +141,8 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
value_type & operator* () const { return *current_it; }
|
||||
value_type * operator->() const { return &*current_it; }
|
||||
Cell & operator* () const { return *current_it; }
|
||||
Cell * operator->() const { return current_it.getPtr(); }
|
||||
|
||||
Cell * getPtr() const { return current_it.getPtr(); }
|
||||
size_t getHash() const { return current_it.getHash(); }
|
||||
@ -179,8 +179,8 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
const value_type & operator* () const { return *current_it; }
|
||||
const value_type * operator->() const { return &*current_it; }
|
||||
const Cell & operator* () const { return *current_it; }
|
||||
const Cell * operator->() const { return current_it->getPtr(); }
|
||||
|
||||
const Cell * getPtr() const { return current_it.getPtr(); }
|
||||
size_t getHash() const { return current_it.getHash(); }
|
||||
|
@ -45,7 +45,7 @@ private:
|
||||
Large * tmp_large = new Large;
|
||||
|
||||
for (const auto & x : small)
|
||||
tmp_large->insert(x);
|
||||
tmp_large->insert(x.getValue());
|
||||
|
||||
large = tmp_large;
|
||||
}
|
||||
@ -99,7 +99,7 @@ public:
|
||||
else
|
||||
{
|
||||
for (const auto & x : rhs.small)
|
||||
insert(x);
|
||||
insert(x.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -33,27 +33,28 @@ namespace ErrorCodes
|
||||
}
|
||||
|
||||
|
||||
class RWLockImpl::LockHandlerImpl
|
||||
class RWLockImpl::LockHolderImpl
|
||||
{
|
||||
RWLock parent;
|
||||
GroupsContainer::iterator it_group;
|
||||
ClientsContainer::iterator it_client;
|
||||
ThreadToHandler::iterator it_handler;
|
||||
ThreadToHolder::iterator it_thread;
|
||||
QueryIdToHolder::iterator it_query;
|
||||
CurrentMetrics::Increment active_client_increment;
|
||||
|
||||
LockHandlerImpl(RWLock && parent, GroupsContainer::iterator it_group, ClientsContainer::iterator it_client);
|
||||
LockHolderImpl(RWLock && parent, GroupsContainer::iterator it_group, ClientsContainer::iterator it_client);
|
||||
|
||||
public:
|
||||
|
||||
LockHandlerImpl(const LockHandlerImpl & other) = delete;
|
||||
LockHolderImpl(const LockHolderImpl & other) = delete;
|
||||
|
||||
~LockHandlerImpl();
|
||||
~LockHolderImpl();
|
||||
|
||||
friend class RWLockImpl;
|
||||
};
|
||||
|
||||
|
||||
RWLockImpl::LockHandler RWLockImpl::getLock(RWLockImpl::Type type)
|
||||
RWLockImpl::LockHolder RWLockImpl::getLock(RWLockImpl::Type type, const String & query_id)
|
||||
{
|
||||
Stopwatch watch(CLOCK_MONOTONIC_COARSE);
|
||||
CurrentMetrics::Increment waiting_client_increment((type == Read) ? CurrentMetrics::RWLockWaitingReaders
|
||||
@ -66,28 +67,33 @@ RWLockImpl::LockHandler RWLockImpl::getLock(RWLockImpl::Type type)
|
||||
: ProfileEvents::RWLockWritersWaitMilliseconds, watch.elapsedMilliseconds());
|
||||
};
|
||||
|
||||
auto this_thread_id = std::this_thread::get_id();
|
||||
GroupsContainer::iterator it_group;
|
||||
ClientsContainer::iterator it_client;
|
||||
|
||||
std::unique_lock lock(mutex);
|
||||
|
||||
/// Check if the same thread is acquiring previously acquired lock
|
||||
auto it_handler = thread_to_handler.find(this_thread_id);
|
||||
if (it_handler != thread_to_handler.end())
|
||||
/// Check if the same query is acquiring previously acquired lock
|
||||
LockHolder existing_holder_ptr;
|
||||
|
||||
auto this_thread_id = std::this_thread::get_id();
|
||||
auto it_thread = thread_to_holder.find(this_thread_id);
|
||||
|
||||
auto it_query = query_id_to_holder.end();
|
||||
if (query_id != RWLockImpl::NO_QUERY)
|
||||
it_query = query_id_to_holder.find(query_id);
|
||||
|
||||
if (it_thread != thread_to_holder.end())
|
||||
existing_holder_ptr = it_thread->second.lock();
|
||||
else if (it_query != query_id_to_holder.end())
|
||||
existing_holder_ptr = it_query->second.lock();
|
||||
|
||||
if (existing_holder_ptr)
|
||||
{
|
||||
auto handler_ptr = it_handler->second.lock();
|
||||
/// XXX: it means we can't upgrade lock from read to write - with proper waiting!
|
||||
if (type != Read || existing_holder_ptr->it_group->type != Read)
|
||||
throw Exception("Attempt to acquire exclusive lock recursively", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
/// Lock may be released in another thread, but not yet deleted inside |~LogHandlerImpl()|
|
||||
|
||||
if (handler_ptr)
|
||||
{
|
||||
/// XXX: it means we can't upgrade lock from read to write - with proper waiting!
|
||||
if (type != Read || handler_ptr->it_group->type != Read)
|
||||
throw Exception("Attempt to acquire exclusive lock recursively", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
return handler_ptr;
|
||||
}
|
||||
return existing_holder_ptr;
|
||||
}
|
||||
|
||||
if (type == Type::Write || queue.empty() || queue.back().type == Type::Write)
|
||||
@ -115,11 +121,15 @@ RWLockImpl::LockHandler RWLockImpl::getLock(RWLockImpl::Type type)
|
||||
throw;
|
||||
}
|
||||
|
||||
LockHandler res(new LockHandlerImpl(shared_from_this(), it_group, it_client));
|
||||
LockHolder res(new LockHolderImpl(shared_from_this(), it_group, it_client));
|
||||
|
||||
/// Insert myself (weak_ptr to the handler) to threads set to implement recursive lock
|
||||
it_handler = thread_to_handler.emplace(this_thread_id, res).first;
|
||||
res->it_handler = it_handler;
|
||||
/// Insert myself (weak_ptr to the holder) to threads set to implement recursive lock
|
||||
it_thread = thread_to_holder.emplace(this_thread_id, res).first;
|
||||
res->it_thread = it_thread;
|
||||
|
||||
if (query_id != RWLockImpl::NO_QUERY)
|
||||
it_query = query_id_to_holder.emplace(query_id, res).first;
|
||||
res->it_query = it_query;
|
||||
|
||||
/// We are first, we should not wait anything
|
||||
/// If we are not the first client in the group, a notification could be already sent
|
||||
@ -137,12 +147,15 @@ RWLockImpl::LockHandler RWLockImpl::getLock(RWLockImpl::Type type)
|
||||
}
|
||||
|
||||
|
||||
RWLockImpl::LockHandlerImpl::~LockHandlerImpl()
|
||||
RWLockImpl::LockHolderImpl::~LockHolderImpl()
|
||||
{
|
||||
std::unique_lock lock(parent->mutex);
|
||||
|
||||
/// Remove weak_ptr to the handler, since there are no owners of the current lock
|
||||
parent->thread_to_handler.erase(it_handler);
|
||||
/// Remove weak_ptrs to the holder, since there are no owners of the current lock
|
||||
parent->thread_to_holder.erase(it_thread);
|
||||
|
||||
if (it_query != parent->query_id_to_holder.end())
|
||||
parent->query_id_to_holder.erase(it_query);
|
||||
|
||||
/// Removes myself from client list of our group
|
||||
it_group->clients.erase(it_client);
|
||||
@ -161,7 +174,7 @@ RWLockImpl::LockHandlerImpl::~LockHandlerImpl()
|
||||
}
|
||||
|
||||
|
||||
RWLockImpl::LockHandlerImpl::LockHandlerImpl(RWLock && parent, RWLockImpl::GroupsContainer::iterator it_group,
|
||||
RWLockImpl::LockHolderImpl::LockHolderImpl(RWLock && parent, RWLockImpl::GroupsContainer::iterator it_group,
|
||||
RWLockImpl::ClientsContainer::iterator it_client)
|
||||
: parent{std::move(parent)}, it_group{it_group}, it_client{it_client},
|
||||
active_client_increment{(*it_client == RWLockImpl::Read) ? CurrentMetrics::RWLockActiveReaders
|
||||
|
@ -1,4 +1,5 @@
|
||||
#pragma once
|
||||
#include <Core/Types.h>
|
||||
#include <boost/core/noncopyable.hpp>
|
||||
#include <list>
|
||||
#include <vector>
|
||||
@ -17,7 +18,13 @@ using RWLock = std::shared_ptr<RWLockImpl>;
|
||||
|
||||
|
||||
/// Implements shared lock with FIFO service
|
||||
/// Can be acquired recursively (several calls from the same thread) in Read mode
|
||||
/// Can be acquired recursively (several calls for the same query or the same OS thread) in Read mode
|
||||
///
|
||||
/// NOTE: it is important to allow acquiring the same lock in Read mode without waiting if it is already
|
||||
/// acquired by another thread of the same query. Otherwise the following deadlock is possible:
|
||||
/// - SELECT thread 1 locks in the Read mode
|
||||
/// - ALTER tries to lock in the Write mode (waits for SELECT thread 1)
|
||||
/// - SELECT thread 2 tries to lock in the Read mode (waits for ALTER)
|
||||
class RWLockImpl : public std::enable_shared_from_this<RWLockImpl>
|
||||
{
|
||||
public:
|
||||
@ -29,14 +36,17 @@ public:
|
||||
|
||||
static RWLock create() { return RWLock(new RWLockImpl); }
|
||||
|
||||
/// Just use LockHandler::reset() to release the lock
|
||||
class LockHandlerImpl;
|
||||
friend class LockHandlerImpl;
|
||||
using LockHandler = std::shared_ptr<LockHandlerImpl>;
|
||||
|
||||
/// Just use LockHolder::reset() to release the lock
|
||||
class LockHolderImpl;
|
||||
friend class LockHolderImpl;
|
||||
using LockHolder = std::shared_ptr<LockHolderImpl>;
|
||||
|
||||
/// Waits in the queue and returns appropriate lock
|
||||
LockHandler getLock(Type type);
|
||||
/// Empty query_id means the lock is acquired out of the query context (e.g. in a background thread).
|
||||
LockHolder getLock(Type type, const String & query_id);
|
||||
|
||||
/// Use as query_id to acquire a lock outside the query context.
|
||||
inline static const String NO_QUERY = String();
|
||||
|
||||
private:
|
||||
RWLockImpl() = default;
|
||||
@ -44,7 +54,8 @@ private:
|
||||
struct Group;
|
||||
using GroupsContainer = std::list<Group>;
|
||||
using ClientsContainer = std::list<Type>;
|
||||
using ThreadToHandler = std::map<std::thread::id, std::weak_ptr<LockHandlerImpl>>;
|
||||
using ThreadToHolder = std::map<std::thread::id, std::weak_ptr<LockHolderImpl>>;
|
||||
using QueryIdToHolder = std::map<String, std::weak_ptr<LockHolderImpl>>;
|
||||
|
||||
/// Group of clients that should be executed concurrently
|
||||
/// i.e. a group could contain several readers, but only one writer
|
||||
@ -61,7 +72,8 @@ private:
|
||||
|
||||
mutable std::mutex mutex;
|
||||
GroupsContainer queue;
|
||||
ThreadToHandler thread_to_handler;
|
||||
ThreadToHolder thread_to_holder;
|
||||
QueryIdToHolder query_id_to_holder;
|
||||
};
|
||||
|
||||
|
||||
|
@ -152,7 +152,7 @@ public:
|
||||
auto it = counter_map.find(key, hash);
|
||||
if (it != counter_map.end())
|
||||
{
|
||||
auto c = it->second;
|
||||
auto c = it->getSecond();
|
||||
c->count += increment;
|
||||
c->error += error;
|
||||
percolate(c);
|
||||
@ -189,8 +189,8 @@ public:
|
||||
min->error = alpha + error;
|
||||
percolate(min);
|
||||
|
||||
it->second = min;
|
||||
it->first = min->key;
|
||||
it->getSecond() = min;
|
||||
it->getFirstMutable() = min->key;
|
||||
counter_map.reinsert(it, hash);
|
||||
}
|
||||
}
|
||||
|
@ -137,8 +137,8 @@ struct TasksStatsCounters
|
||||
profile_events.increment(ProfileEvents::OSCPUVirtualTimeMicroseconds,
|
||||
safeDiff(prev.stat.cpu_run_virtual_total, curr.stat.cpu_run_virtual_total) / 1000U);
|
||||
|
||||
/// Too old struct version, do not read new fields
|
||||
if (curr.stat.version < TASKSTATS_VERSION)
|
||||
/// Since TASKSTATS_VERSION = 3 extended accounting and IO accounting is available.
|
||||
if (curr.stat.version < 3)
|
||||
return;
|
||||
|
||||
profile_events.increment(ProfileEvents::OSReadChars, safeDiff(prev.stat.read_char, curr.stat.read_char));
|
||||
|
@ -155,10 +155,10 @@ int main(int argc, char ** argv)
|
||||
map.emplace(rand(), it, inserted);
|
||||
if (inserted)
|
||||
{
|
||||
new(&it->second) Arr(n);
|
||||
new(&it->getSecond()) Arr(n);
|
||||
|
||||
for (size_t j = 0; j < n; ++j)
|
||||
it->second[j] = field;
|
||||
it->getSecond()[j] = field;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -39,7 +39,7 @@ TEST(Common, RWLock_1)
|
||||
auto type = (std::uniform_int_distribution<>(0, 9)(gen) >= round) ? RWLockImpl::Read : RWLockImpl::Write;
|
||||
auto sleep_for = std::chrono::duration<int, std::micro>(std::uniform_int_distribution<>(1, 100)(gen));
|
||||
|
||||
auto lock = fifo_lock->getLock(type);
|
||||
auto lock = fifo_lock->getLock(type, RWLockImpl::NO_QUERY);
|
||||
|
||||
if (type == RWLockImpl::Write)
|
||||
{
|
||||
@ -99,7 +99,7 @@ TEST(Common, RWLock_Recursive)
|
||||
{
|
||||
for (int i = 0; i < 2 * cycles; ++i)
|
||||
{
|
||||
auto lock = fifo_lock->getLock(RWLockImpl::Write);
|
||||
auto lock = fifo_lock->getLock(RWLockImpl::Write, RWLockImpl::NO_QUERY);
|
||||
|
||||
auto sleep_for = std::chrono::duration<int, std::micro>(std::uniform_int_distribution<>(1, 100)(gen));
|
||||
std::this_thread::sleep_for(sleep_for);
|
||||
@ -110,17 +110,17 @@ TEST(Common, RWLock_Recursive)
|
||||
{
|
||||
for (int i = 0; i < cycles; ++i)
|
||||
{
|
||||
auto lock1 = fifo_lock->getLock(RWLockImpl::Read);
|
||||
auto lock1 = fifo_lock->getLock(RWLockImpl::Read, RWLockImpl::NO_QUERY);
|
||||
|
||||
auto sleep_for = std::chrono::duration<int, std::micro>(std::uniform_int_distribution<>(1, 100)(gen));
|
||||
std::this_thread::sleep_for(sleep_for);
|
||||
|
||||
auto lock2 = fifo_lock->getLock(RWLockImpl::Read);
|
||||
auto lock2 = fifo_lock->getLock(RWLockImpl::Read, RWLockImpl::NO_QUERY);
|
||||
|
||||
EXPECT_ANY_THROW({fifo_lock->getLock(RWLockImpl::Write);});
|
||||
EXPECT_ANY_THROW({fifo_lock->getLock(RWLockImpl::Write, RWLockImpl::NO_QUERY);});
|
||||
}
|
||||
|
||||
fifo_lock->getLock(RWLockImpl::Write);
|
||||
fifo_lock->getLock(RWLockImpl::Write, RWLockImpl::NO_QUERY);
|
||||
});
|
||||
|
||||
t1.join();
|
||||
@ -143,7 +143,7 @@ TEST(Common, RWLock_PerfTest_Readers)
|
||||
{
|
||||
for (auto i = 0; i < cycles; ++i)
|
||||
{
|
||||
auto lock = fifo_lock->getLock(RWLockImpl::Read);
|
||||
auto lock = fifo_lock->getLock(RWLockImpl::Read, RWLockImpl::NO_QUERY);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -21,13 +21,13 @@ int main(int, char **)
|
||||
bool inserted;
|
||||
|
||||
cont.emplace(3, it, inserted);
|
||||
std::cerr << inserted << ", " << *it << std::endl;
|
||||
std::cerr << inserted << ", " << it->getValue() << std::endl;
|
||||
|
||||
cont.emplace(3, it, inserted);
|
||||
std::cerr << inserted << ", " << *it << std::endl;
|
||||
std::cerr << inserted << ", " << it->getValue() << std::endl;
|
||||
|
||||
for (auto x : cont)
|
||||
std::cerr << x << std::endl;
|
||||
std::cerr << x.getValue() << std::endl;
|
||||
|
||||
DB::WriteBufferFromOwnString wb;
|
||||
cont.writeText(wb);
|
||||
|
@ -82,14 +82,14 @@ void aggregate12(Map & map, Source::const_iterator begin, Source::const_iterator
|
||||
{
|
||||
if (*it == *prev_it)
|
||||
{
|
||||
++found->second;
|
||||
++found->getSecond();
|
||||
continue;
|
||||
}
|
||||
prev_it = it;
|
||||
|
||||
bool inserted;
|
||||
map.emplace(*it, found, inserted);
|
||||
++found->second;
|
||||
++found->getSecond();
|
||||
}
|
||||
}
|
||||
|
||||
@ -107,14 +107,14 @@ void aggregate22(MapTwoLevel & map, Source::const_iterator begin, Source::const_
|
||||
{
|
||||
if (*it == *prev_it)
|
||||
{
|
||||
++found->second;
|
||||
++found->getSecond();
|
||||
continue;
|
||||
}
|
||||
prev_it = it;
|
||||
|
||||
bool inserted;
|
||||
map.emplace(*it, found, inserted);
|
||||
++found->second;
|
||||
++found->getSecond();
|
||||
}
|
||||
}
|
||||
|
||||
@ -126,7 +126,7 @@ void merge2(MapTwoLevel * maps, size_t num_threads, size_t bucket)
|
||||
{
|
||||
for (size_t i = 1; i < num_threads; ++i)
|
||||
for (auto it = maps[i].impls[bucket].begin(); it != maps[i].impls[bucket].end(); ++it)
|
||||
maps[0].impls[bucket][it->first] += it->second;
|
||||
maps[0].impls[bucket][it->getFirst()] += it->getSecond();
|
||||
}
|
||||
|
||||
void aggregate3(Map & local_map, Map & global_map, Mutex & mutex, Source::const_iterator begin, Source::const_iterator end)
|
||||
@ -138,7 +138,7 @@ void aggregate3(Map & local_map, Map & global_map, Mutex & mutex, Source::const_
|
||||
Map::iterator found = local_map.find(*it);
|
||||
|
||||
if (found != local_map.end())
|
||||
++found->second;
|
||||
++found->getSecond();
|
||||
else if (local_map.size() < threshold)
|
||||
++local_map[*it]; /// TODO You could do one lookup, not two.
|
||||
else
|
||||
@ -163,13 +163,13 @@ void aggregate33(Map & local_map, Map & global_map, Mutex & mutex, Source::const
|
||||
Map::iterator found;
|
||||
bool inserted;
|
||||
local_map.emplace(*it, found, inserted);
|
||||
++found->second;
|
||||
++found->getSecond();
|
||||
|
||||
if (inserted && local_map.size() == threshold)
|
||||
{
|
||||
std::lock_guard<Mutex> lock(mutex);
|
||||
for (auto & value_type : local_map)
|
||||
global_map[value_type.first] += value_type.second;
|
||||
global_map[value_type.getFirst()] += value_type.getSecond();
|
||||
|
||||
local_map.clear();
|
||||
}
|
||||
@ -198,7 +198,7 @@ void aggregate4(Map & local_map, MapTwoLevel & global_map, Mutex * mutexes, Sour
|
||||
Map::iterator found = local_map.find(*it);
|
||||
|
||||
if (found != local_map.end())
|
||||
++found->second;
|
||||
++found->getSecond();
|
||||
else
|
||||
{
|
||||
size_t hash_value = global_map.hash(*it);
|
||||
@ -311,7 +311,7 @@ int main(int argc, char ** argv)
|
||||
|
||||
for (size_t i = 1; i < num_threads; ++i)
|
||||
for (auto it = maps[i].begin(); it != maps[i].end(); ++it)
|
||||
maps[0][it->first] += it->second;
|
||||
maps[0][it->getFirst()] += it->getSecond();
|
||||
|
||||
watch.stop();
|
||||
double time_merged = watch.elapsedSeconds();
|
||||
@ -365,7 +365,7 @@ int main(int argc, char ** argv)
|
||||
|
||||
for (size_t i = 1; i < num_threads; ++i)
|
||||
for (auto it = maps[i].begin(); it != maps[i].end(); ++it)
|
||||
maps[0][it->first] += it->second;
|
||||
maps[0][it->getFirst()] += it->getSecond();
|
||||
|
||||
watch.stop();
|
||||
|
||||
@ -435,7 +435,7 @@ int main(int argc, char ** argv)
|
||||
continue;
|
||||
|
||||
finish = false;
|
||||
maps[0][iterators[i]->first] += iterators[i]->second;
|
||||
maps[0][iterators[i]->getFirst()] += iterators[i]->getSecond();
|
||||
++iterators[i];
|
||||
}
|
||||
|
||||
@ -623,7 +623,7 @@ int main(int argc, char ** argv)
|
||||
|
||||
for (size_t i = 0; i < num_threads; ++i)
|
||||
for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
|
||||
global_map[it->first] += it->second;
|
||||
global_map[it->getFirst()] += it->getSecond();
|
||||
|
||||
pool.wait();
|
||||
|
||||
@ -689,7 +689,7 @@ int main(int argc, char ** argv)
|
||||
|
||||
for (size_t i = 0; i < num_threads; ++i)
|
||||
for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
|
||||
global_map[it->first] += it->second;
|
||||
global_map[it->getFirst()] += it->getSecond();
|
||||
|
||||
pool.wait();
|
||||
|
||||
@ -760,7 +760,7 @@ int main(int argc, char ** argv)
|
||||
|
||||
for (size_t i = 0; i < num_threads; ++i)
|
||||
for (auto it = local_maps[i].begin(); it != local_maps[i].end(); ++it)
|
||||
global_map[it->first] += it->second;
|
||||
global_map[it->getFirst()] += it->getSecond();
|
||||
|
||||
pool.wait();
|
||||
|
||||
|
@ -51,9 +51,9 @@ struct AggregateIndependent
|
||||
map.emplace(*it, place, inserted);
|
||||
|
||||
if (inserted)
|
||||
creator(place->second);
|
||||
creator(place->getSecond());
|
||||
else
|
||||
updater(place->second);
|
||||
updater(place->getSecond());
|
||||
}
|
||||
});
|
||||
}
|
||||
@ -93,7 +93,7 @@ struct AggregateIndependentWithSequentialKeysOptimization
|
||||
{
|
||||
if (it != begin && *it == prev_key)
|
||||
{
|
||||
updater(place->second);
|
||||
updater(place->getSecond());
|
||||
continue;
|
||||
}
|
||||
prev_key = *it;
|
||||
@ -102,9 +102,9 @@ struct AggregateIndependentWithSequentialKeysOptimization
|
||||
map.emplace(*it, place, inserted);
|
||||
|
||||
if (inserted)
|
||||
creator(place->second);
|
||||
creator(place->getSecond());
|
||||
else
|
||||
updater(place->second);
|
||||
updater(place->getSecond());
|
||||
}
|
||||
});
|
||||
}
|
||||
@ -131,7 +131,7 @@ struct MergeSequential
|
||||
auto begin = source_maps[i]->begin();
|
||||
auto end = source_maps[i]->end();
|
||||
for (auto it = begin; it != end; ++it)
|
||||
merger((*source_maps[0])[it->first], it->second);
|
||||
merger((*source_maps[0])[it->getFirst()], it->getSecond());
|
||||
}
|
||||
|
||||
result_map = source_maps[0];
|
||||
@ -161,7 +161,7 @@ struct MergeSequentialTransposed /// In practice not better than usual.
|
||||
continue;
|
||||
|
||||
finish = false;
|
||||
merger((*result_map)[iterators[i]->first], iterators[i]->second);
|
||||
merger((*result_map)[iterators[i]->getFirst()], iterators[i]->getSecond());
|
||||
++iterators[i];
|
||||
}
|
||||
|
||||
|
@ -20,13 +20,13 @@ int main(int, char **)
|
||||
bool inserted;
|
||||
|
||||
cont.emplace(3, it, inserted);
|
||||
std::cerr << inserted << ", " << *it << std::endl;
|
||||
std::cerr << inserted << ", " << it->getValue() << std::endl;
|
||||
|
||||
cont.emplace(3, it, inserted);
|
||||
std::cerr << inserted << ", " << *it << std::endl;
|
||||
std::cerr << inserted << ", " << it->getValue() << std::endl;
|
||||
|
||||
for (auto x : cont)
|
||||
std::cerr << x << std::endl;
|
||||
std::cerr << x.getValue() << std::endl;
|
||||
|
||||
DB::WriteBufferFromOwnString wb;
|
||||
cont.writeText(wb);
|
||||
@ -42,7 +42,7 @@ int main(int, char **)
|
||||
cont[1] = "Goodbye.";
|
||||
|
||||
for (auto x : cont)
|
||||
std::cerr << x.first << " -> " << x.second << std::endl;
|
||||
std::cerr << x.getFirst() << " -> " << x.getSecond() << std::endl;
|
||||
|
||||
DB::WriteBufferFromOwnString wb;
|
||||
cont.writeText(wb);
|
||||
|
@ -163,7 +163,7 @@ void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header,
|
||||
StoragePtr storage = StorageMemory::create(data.second, ColumnsDescription{columns});
|
||||
storage->startup();
|
||||
context.addExternalTable(data.second, storage);
|
||||
BlockOutputStreamPtr output = storage->write(ASTPtr(), settings);
|
||||
BlockOutputStreamPtr output = storage->write(ASTPtr(), context);
|
||||
|
||||
/// Write data
|
||||
data.first->readPrefix();
|
||||
|
@ -10,7 +10,7 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
inline void readBinary(Array & x, ReadBuffer & buf)
|
||||
void readBinary(Array & x, ReadBuffer & buf)
|
||||
{
|
||||
size_t size;
|
||||
UInt8 type;
|
||||
@ -151,12 +151,8 @@ namespace DB
|
||||
DB::String res = applyVisitor(DB::FieldVisitorToString(), DB::Field(x));
|
||||
buf.write(res.data(), res.size());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
inline void readBinary(Tuple & x_def, ReadBuffer & buf)
|
||||
void readBinary(Tuple & x_def, ReadBuffer & buf)
|
||||
{
|
||||
auto & x = x_def.toUnderType();
|
||||
size_t size;
|
||||
|
@ -222,7 +222,7 @@ int main(int argc, char ** argv)
|
||||
size_t i = 0;
|
||||
for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
|
||||
{
|
||||
devnull.write(it->first.data, it->first.size);
|
||||
devnull.write(it->getFirst().data, it->getFirst().size);
|
||||
devnull << std::endl;
|
||||
}
|
||||
|
||||
@ -249,7 +249,7 @@ int main(int argc, char ** argv)
|
||||
size_t i = 0;
|
||||
for (auto it = set.begin(); i < elems_show && it != set.end(); ++it, ++i)
|
||||
{
|
||||
devnull.write(it->first.data, it->first.size);
|
||||
devnull.write(it->getFirst().data, it->getFirst().size);
|
||||
devnull << std::endl;
|
||||
}
|
||||
}
|
||||
|
@ -19,10 +19,14 @@ namespace ErrorCodes
|
||||
CreatingSetsBlockInputStream::CreatingSetsBlockInputStream(
|
||||
const BlockInputStreamPtr & input,
|
||||
const SubqueriesForSets & subqueries_for_sets_,
|
||||
const SizeLimits & network_transfer_limits)
|
||||
: subqueries_for_sets(subqueries_for_sets_),
|
||||
network_transfer_limits(network_transfer_limits)
|
||||
const Context & context_)
|
||||
: subqueries_for_sets(subqueries_for_sets_)
|
||||
, context(context_)
|
||||
{
|
||||
const Settings & settings = context.getSettingsRef();
|
||||
network_transfer_limits = SizeLimits(
|
||||
settings.max_rows_to_transfer, settings.max_bytes_to_transfer, settings.transfer_overflow_mode);
|
||||
|
||||
for (auto & elem : subqueries_for_sets)
|
||||
{
|
||||
if (elem.second.source)
|
||||
@ -92,7 +96,7 @@ void CreatingSetsBlockInputStream::createOne(SubqueryForSet & subquery)
|
||||
|
||||
BlockOutputStreamPtr table_out;
|
||||
if (subquery.table)
|
||||
table_out = subquery.table->write({}, {});
|
||||
table_out = subquery.table->write({}, context);
|
||||
|
||||
bool done_with_set = !subquery.set;
|
||||
bool done_with_join = !subquery.join;
|
||||
|
@ -20,7 +20,7 @@ public:
|
||||
CreatingSetsBlockInputStream(
|
||||
const BlockInputStreamPtr & input,
|
||||
const SubqueriesForSets & subqueries_for_sets_,
|
||||
const SizeLimits & network_transfer_limits);
|
||||
const Context & context_);
|
||||
|
||||
String getName() const override { return "CreatingSets"; }
|
||||
|
||||
@ -35,6 +35,7 @@ protected:
|
||||
|
||||
private:
|
||||
SubqueriesForSets subqueries_for_sets;
|
||||
const Context & context;
|
||||
bool created = false;
|
||||
|
||||
SizeLimits network_transfer_limits;
|
||||
|
@ -247,6 +247,26 @@ void IBlockInputStream::checkQuota(Block & block)
|
||||
}
|
||||
}
|
||||
|
||||
static void limitProgressingSpeed(size_t total_progress_size, size_t max_speed_in_seconds, UInt64 total_elapsed_microseconds)
|
||||
{
|
||||
/// How much time to wait for the average speed to become `max_speed_in_seconds`.
|
||||
UInt64 desired_microseconds = total_progress_size * 1000000 / max_speed_in_seconds;
|
||||
|
||||
if (desired_microseconds > total_elapsed_microseconds)
|
||||
{
|
||||
UInt64 sleep_microseconds = desired_microseconds - total_elapsed_microseconds;
|
||||
::timespec sleep_ts;
|
||||
sleep_ts.tv_sec = sleep_microseconds / 1000000;
|
||||
sleep_ts.tv_nsec = sleep_microseconds % 1000000 * 1000;
|
||||
|
||||
/// NOTE: Returns early in case of a signal. This is considered normal.
|
||||
/// NOTE: It's worth noting that this behavior affects kill of queries.
|
||||
::nanosleep(&sleep_ts, nullptr);
|
||||
|
||||
ProfileEvents::increment(ProfileEvents::ThrottlerSleepMicroseconds, sleep_microseconds);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void IBlockInputStream::progressImpl(const Progress & value)
|
||||
{
|
||||
@ -313,8 +333,9 @@ void IBlockInputStream::progressImpl(const Progress & value)
|
||||
last_profile_events_update_time = total_elapsed_microseconds;
|
||||
}
|
||||
|
||||
if ((limits.min_execution_speed || (total_rows && limits.timeout_before_checking_execution_speed != 0))
|
||||
&& (static_cast<Int64>(total_elapsed_microseconds) > limits.timeout_before_checking_execution_speed.totalMicroseconds()))
|
||||
if ((limits.min_execution_speed || limits.max_execution_speed || limits.min_execution_speed_bytes ||
|
||||
limits.max_execution_speed_bytes || (total_rows && limits.timeout_before_checking_execution_speed != 0)) &&
|
||||
(static_cast<Int64>(total_elapsed_microseconds) > limits.timeout_before_checking_execution_speed.totalMicroseconds()))
|
||||
{
|
||||
/// Do not count sleeps in throttlers
|
||||
UInt64 throttler_sleep_microseconds = CurrentThread::getProfileEvents()[ProfileEvents::ThrottlerSleepMicroseconds];
|
||||
@ -328,6 +349,11 @@ void IBlockInputStream::progressImpl(const Progress & value)
|
||||
+ " rows/sec., minimum: " + toString(limits.min_execution_speed),
|
||||
ErrorCodes::TOO_SLOW);
|
||||
|
||||
if (limits.min_execution_speed_bytes && progress.bytes / elapsed_seconds < limits.min_execution_speed_bytes)
|
||||
throw Exception("Query is executing too slow: " + toString(progress.bytes / elapsed_seconds)
|
||||
+ " bytes/sec., minimum: " + toString(limits.min_execution_speed_bytes),
|
||||
ErrorCodes::TOO_SLOW);
|
||||
|
||||
/// If the predicted execution time is longer than `max_execution_time`.
|
||||
if (limits.max_execution_time != 0 && total_rows)
|
||||
{
|
||||
@ -339,6 +365,12 @@ void IBlockInputStream::progressImpl(const Progress & value)
|
||||
+ ". Estimated rows to process: " + toString(total_rows),
|
||||
ErrorCodes::TOO_SLOW);
|
||||
}
|
||||
|
||||
if (limits.max_execution_speed && progress.rows / elapsed_seconds >= limits.max_execution_speed)
|
||||
limitProgressingSpeed(progress.rows, limits.max_execution_speed, total_elapsed_microseconds);
|
||||
|
||||
if (limits.max_execution_speed_bytes && progress.bytes / elapsed_seconds >= limits.max_execution_speed_bytes)
|
||||
limitProgressingSpeed(progress.bytes, limits.max_execution_speed_bytes, total_elapsed_microseconds);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -212,6 +212,9 @@ public:
|
||||
|
||||
/// in rows per second
|
||||
size_t min_execution_speed = 0;
|
||||
size_t max_execution_speed = 0;
|
||||
size_t min_execution_speed_bytes = 0;
|
||||
size_t max_execution_speed_bytes = 0;
|
||||
/// Verify that the speed is not too low after the specified time has elapsed.
|
||||
Poco::Timespan timeout_before_checking_execution_speed = 0;
|
||||
};
|
||||
|
@ -20,7 +20,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
|
||||
* Although now any insertion into the table is done via PushingToViewsBlockOutputStream,
|
||||
* but it's clear that here is not the best place for this functionality.
|
||||
*/
|
||||
addTableLock(storage->lockStructure(true));
|
||||
addTableLock(storage->lockStructure(true, context.getCurrentQueryId()));
|
||||
|
||||
/// If the "root" table deduplactes blocks, there are no need to make deduplication for children
|
||||
/// Moreover, deduplication for AggregatingMergeTree children could produce false positives due to low size of inserting blocks
|
||||
@ -45,7 +45,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
|
||||
auto & materialized_view = dynamic_cast<const StorageMaterializedView &>(*dependent_table);
|
||||
|
||||
if (StoragePtr inner_table = materialized_view.tryGetTargetTable())
|
||||
addTableLock(inner_table->lockStructure(true));
|
||||
addTableLock(inner_table->lockStructure(true, context.getCurrentQueryId()));
|
||||
|
||||
auto query = materialized_view.getInnerQuery();
|
||||
BlockOutputStreamPtr out = std::make_shared<PushingToViewsBlockOutputStream>(
|
||||
@ -57,7 +57,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream(
|
||||
/* Do not push to destination table if the flag is set */
|
||||
if (!no_destination)
|
||||
{
|
||||
output = storage->write(query_ptr, context.getSettingsRef());
|
||||
output = storage->write(query_ptr, context);
|
||||
replicated_output = dynamic_cast<ReplicatedMergeTreeBlockOutputStream *>(output.get());
|
||||
}
|
||||
}
|
||||
|
@ -74,7 +74,7 @@ void DataTypeEnum<Type>::fillMaps()
|
||||
|
||||
if (!name_to_value_pair.second)
|
||||
throw Exception{"Duplicate names in enum: '" + name_and_value.first + "' = " + toString(name_and_value.second)
|
||||
+ " and '" + name_to_value_pair.first->first.toString() + "' = " + toString(name_to_value_pair.first->second),
|
||||
+ " and '" + name_to_value_pair.first->getFirst().toString() + "' = " + toString(name_to_value_pair.first->getSecond()),
|
||||
ErrorCodes::SYNTAX_ERROR};
|
||||
|
||||
const auto value_to_name_pair = value_to_name_map.insert(
|
||||
|
@ -81,7 +81,7 @@ public:
|
||||
if (it == std::end(name_to_value_map))
|
||||
throw Exception{"Unknown element '" + field_name.toString() + "' for type " + getName(), ErrorCodes::LOGICAL_ERROR};
|
||||
|
||||
return it->second;
|
||||
return it->getSecond();
|
||||
}
|
||||
|
||||
Field castToName(const Field & value_or_name) const override;
|
||||
|
@ -222,7 +222,7 @@ void ComplexKeyCacheDictionary::has(const Columns & key_columns, const DataTypes
|
||||
|
||||
std::vector<size_t> required_rows(outdated_keys.size());
|
||||
std::transform(
|
||||
std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.second.front(); });
|
||||
std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getSecond().front(); });
|
||||
|
||||
/// request new values
|
||||
update(
|
||||
|
@ -342,7 +342,7 @@ private:
|
||||
|
||||
std::vector<size_t> required_rows(outdated_keys.size());
|
||||
std::transform(
|
||||
std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.second.front(); });
|
||||
std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair) { return pair.getSecond().front(); });
|
||||
|
||||
/// request new values
|
||||
update(
|
||||
@ -468,7 +468,7 @@ private:
|
||||
std::vector<size_t> required_rows(outdated_keys.size());
|
||||
std::transform(std::begin(outdated_keys), std::end(outdated_keys), std::begin(required_rows), [](auto & pair)
|
||||
{
|
||||
return pair.second.front();
|
||||
return pair.getSecond().front();
|
||||
});
|
||||
|
||||
update(
|
||||
@ -500,7 +500,7 @@ private:
|
||||
{
|
||||
const StringRef key = keys_array[row];
|
||||
const auto it = map.find(key);
|
||||
const auto string_ref = it != std::end(map) ? it->second : get_default(row);
|
||||
const auto string_ref = it != std::end(map) ? it->getSecond() : get_default(row);
|
||||
out->insertData(string_ref.data, string_ref.size);
|
||||
}
|
||||
}
|
||||
@ -607,7 +607,7 @@ private:
|
||||
/// Check which ids have not been found and require setting null_value
|
||||
for (const auto & key_found_pair : remaining_keys)
|
||||
{
|
||||
if (key_found_pair.second)
|
||||
if (key_found_pair.getSecond())
|
||||
{
|
||||
++found_num;
|
||||
continue;
|
||||
@ -615,7 +615,7 @@ private:
|
||||
|
||||
++not_found_num;
|
||||
|
||||
auto key = key_found_pair.first;
|
||||
auto key = key_found_pair.getFirst();
|
||||
const auto hash = StringRefHash{}(key);
|
||||
const auto find_result = findCellIdx(key, now, hash);
|
||||
const auto & cell_idx = find_result.cell_idx;
|
||||
|
@ -611,7 +611,7 @@ void ComplexKeyHashedDictionary::getItemsImpl(
|
||||
const auto key = placeKeysInPool(i, key_columns, keys, temporary_keys_pool);
|
||||
|
||||
const auto it = attr.find(key);
|
||||
set_value(i, it != attr.end() ? static_cast<OutputType>(it->second) : get_default(i));
|
||||
set_value(i, it != attr.end() ? static_cast<OutputType>(it->getSecond()) : get_default(i));
|
||||
|
||||
/// free memory allocated for the key
|
||||
temporary_keys_pool.rollback(key.size);
|
||||
@ -779,7 +779,7 @@ std::vector<StringRef> ComplexKeyHashedDictionary::getKeys(const Attribute & att
|
||||
std::vector<StringRef> keys;
|
||||
keys.reserve(attr.size());
|
||||
for (const auto & key : attr)
|
||||
keys.push_back(key.first);
|
||||
keys.push_back(key.getFirst());
|
||||
|
||||
return keys;
|
||||
}
|
||||
|
@ -2,12 +2,15 @@
|
||||
|
||||
#include <future>
|
||||
#include <thread>
|
||||
#include <ext/scope_guard.h>
|
||||
#include <DataStreams/IBlockOutputStream.h>
|
||||
#include <DataStreams/OwningBlockInputStream.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Common/ShellCommand.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <common/LocalDateTime.h>
|
||||
#include "DictionarySourceFactory.h"
|
||||
#include "DictionarySourceHelpers.h"
|
||||
#include "DictionaryStructure.h"
|
||||
@ -46,7 +49,6 @@ ExecutableDictionarySource::ExecutableDictionarySource(
|
||||
Block & sample_block,
|
||||
const Context & context)
|
||||
: log(&Logger::get("ExecutableDictionarySource"))
|
||||
, update_time{std::chrono::system_clock::from_time_t(0)}
|
||||
, dict_struct{dict_struct_}
|
||||
, command{config.getString(config_prefix + ".command")}
|
||||
, update_field{config.getString(config_prefix + ".update_field", "")}
|
||||
@ -68,31 +70,6 @@ ExecutableDictionarySource::ExecutableDictionarySource(const ExecutableDictionar
|
||||
{
|
||||
}
|
||||
|
||||
std::string ExecutableDictionarySource::getUpdateFieldAndDate()
|
||||
{
|
||||
if (update_time != std::chrono::system_clock::from_time_t(0))
|
||||
{
|
||||
auto tmp_time = update_time;
|
||||
update_time = std::chrono::system_clock::now();
|
||||
time_t hr_time = std::chrono::system_clock::to_time_t(tmp_time) - 1;
|
||||
char buffer[80];
|
||||
struct tm * timeinfo;
|
||||
timeinfo = localtime(&hr_time);
|
||||
strftime(buffer, 80, "\"%Y-%m-%d %H:%M:%S\"", timeinfo);
|
||||
std::string str_time(buffer);
|
||||
return command + " " + update_field + " " + str_time;
|
||||
///Example case: command -T "2018-02-12 12:44:04"
|
||||
///should return all entries after mentioned date
|
||||
///if executable is eligible to return entries according to date.
|
||||
///Where "-T" is passed as update_field.
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string str_time("\"0000-00-00 00:00:00\""); ///for initial load
|
||||
return command + " " + update_field + " " + str_time;
|
||||
}
|
||||
}
|
||||
|
||||
BlockInputStreamPtr ExecutableDictionarySource::loadAll()
|
||||
{
|
||||
LOG_TRACE(log, "loadAll " + toString());
|
||||
@ -103,9 +80,15 @@ BlockInputStreamPtr ExecutableDictionarySource::loadAll()
|
||||
|
||||
BlockInputStreamPtr ExecutableDictionarySource::loadUpdatedAll()
|
||||
{
|
||||
std::string command_update = getUpdateFieldAndDate();
|
||||
LOG_TRACE(log, "loadUpdatedAll " + command_update);
|
||||
auto process = ShellCommand::execute(command_update);
|
||||
time_t new_update_time = time(nullptr);
|
||||
SCOPE_EXIT(update_time = new_update_time);
|
||||
|
||||
std::string command_with_update_field = command;
|
||||
if (update_time)
|
||||
command_with_update_field += " " + update_field + " " + DB::toString(LocalDateTime(update_time - 1));
|
||||
|
||||
LOG_TRACE(log, "loadUpdatedAll " + command_with_update_field);
|
||||
auto process = ShellCommand::execute(command_with_update_field);
|
||||
auto input_stream = context.getInputFormat(format, process->out, sample_block, max_block_size);
|
||||
return std::make_shared<ShellCommandOwningBlockInputStream>(input_stream, std::move(process));
|
||||
}
|
||||
|
@ -12,6 +12,7 @@ class Logger;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Allows loading dictionaries from executable
|
||||
class ExecutableDictionarySource final : public IDictionarySource
|
||||
{
|
||||
@ -27,6 +28,10 @@ public:
|
||||
|
||||
BlockInputStreamPtr loadAll() override;
|
||||
|
||||
/** The logic of this method is flawed, absolutely incorrect and ignorant.
|
||||
* It may lead to skipping some values due to clock sync or timezone changes.
|
||||
* The intended usage of "update_field" is totally different.
|
||||
*/
|
||||
BlockInputStreamPtr loadUpdatedAll() override;
|
||||
|
||||
BlockInputStreamPtr loadIds(const std::vector<UInt64> & ids) override;
|
||||
@ -43,13 +48,10 @@ public:
|
||||
|
||||
std::string toString() const override;
|
||||
|
||||
|
||||
private:
|
||||
std::string getUpdateFieldAndDate();
|
||||
|
||||
Poco::Logger * log;
|
||||
|
||||
std::chrono::time_point<std::chrono::system_clock> update_time;
|
||||
time_t update_time = 0;
|
||||
const DictionaryStructure dict_struct;
|
||||
const std::string command;
|
||||
const std::string update_field;
|
||||
|
@ -83,7 +83,7 @@ void HashedDictionary::isInImpl(const ChildType & child_ids, const AncestorType
|
||||
{
|
||||
auto it = attr.find(id);
|
||||
if (it != std::end(attr))
|
||||
id = it->second;
|
||||
id = it->getSecond();
|
||||
else
|
||||
break;
|
||||
}
|
||||
@ -605,7 +605,7 @@ void HashedDictionary::getItemsImpl(
|
||||
for (const auto i : ext::range(0, rows))
|
||||
{
|
||||
const auto it = attr.find(ids[i]);
|
||||
set_value(i, it != attr.end() ? static_cast<OutputType>(it->second) : get_default(i));
|
||||
set_value(i, it != attr.end() ? static_cast<OutputType>(it->getSecond()) : get_default(i));
|
||||
}
|
||||
|
||||
query_count.fetch_add(rows, std::memory_order_relaxed);
|
||||
@ -707,7 +707,7 @@ PaddedPODArray<HashedDictionary::Key> HashedDictionary::getIds(const Attribute &
|
||||
PaddedPODArray<Key> ids;
|
||||
ids.reserve(attr.size());
|
||||
for (const auto & value : attr)
|
||||
ids.push_back(value.first);
|
||||
ids.push_back(value.getFirst());
|
||||
|
||||
return ids;
|
||||
}
|
||||
|
@ -137,7 +137,7 @@ void RangeHashedDictionary::getString(
|
||||
if (it != std::end(attr))
|
||||
{
|
||||
const auto date = dates[i];
|
||||
const auto & ranges_and_values = it->second;
|
||||
const auto & ranges_and_values = it->getSecond();
|
||||
const auto val_it
|
||||
= std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<StringRef> & v)
|
||||
{
|
||||
@ -408,7 +408,7 @@ void RangeHashedDictionary::getItemsImpl(
|
||||
if (it != std::end(attr))
|
||||
{
|
||||
const auto date = dates[i];
|
||||
const auto & ranges_and_values = it->second;
|
||||
const auto & ranges_and_values = it->getSecond();
|
||||
const auto val_it
|
||||
= std::find_if(std::begin(ranges_and_values), std::end(ranges_and_values), [date](const Value<AttributeType> & v)
|
||||
{
|
||||
@ -435,7 +435,7 @@ void RangeHashedDictionary::setAttributeValueImpl(Attribute & attribute, const K
|
||||
|
||||
if (it != map.end())
|
||||
{
|
||||
auto & values = it->second;
|
||||
auto & values = it->getSecond();
|
||||
|
||||
const auto insert_it
|
||||
= std::lower_bound(std::begin(values), std::end(values), range, [](const Value<T> & lhs, const Range & rhs_range)
|
||||
@ -508,7 +508,7 @@ void RangeHashedDictionary::setAttributeValue(Attribute & attribute, const Key i
|
||||
|
||||
if (it != map.end())
|
||||
{
|
||||
auto & values = it->second;
|
||||
auto & values = it->getSecond();
|
||||
|
||||
const auto insert_it = std::lower_bound(
|
||||
std::begin(values), std::end(values), range, [](const Value<StringRef> & lhs, const Range & rhs_range)
|
||||
@ -620,9 +620,9 @@ void RangeHashedDictionary::getIdsAndDates(
|
||||
|
||||
for (const auto & key : attr)
|
||||
{
|
||||
for (const auto & value : key.second)
|
||||
for (const auto & value : key.getSecond())
|
||||
{
|
||||
ids.push_back(key.first);
|
||||
ids.push_back(key.getFirst());
|
||||
start_dates.push_back(value.range.left);
|
||||
end_dates.push_back(value.range.right);
|
||||
|
||||
|
@ -64,9 +64,9 @@ inline size_t JSONEachRowRowInputStream::columnIndex(const StringRef & name, siz
|
||||
|
||||
if (prev_positions.size() > key_index
|
||||
&& prev_positions[key_index] != name_map.end()
|
||||
&& name == prev_positions[key_index]->first)
|
||||
&& name == prev_positions[key_index]->getFirst())
|
||||
{
|
||||
return prev_positions[key_index]->second;
|
||||
return prev_positions[key_index]->getSecond();
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -77,7 +77,7 @@ inline size_t JSONEachRowRowInputStream::columnIndex(const StringRef & name, siz
|
||||
if (key_index < prev_positions.size())
|
||||
prev_positions[key_index] = it;
|
||||
|
||||
return it->second;
|
||||
return it->getSecond();
|
||||
}
|
||||
else
|
||||
return UNKNOWN_FIELD;
|
||||
|
@ -128,7 +128,7 @@ bool TSKVRowInputStream::read(MutableColumns & columns, RowReadExtension & ext)
|
||||
}
|
||||
else
|
||||
{
|
||||
index = it->second;
|
||||
index = it->getSecond();
|
||||
|
||||
if (read_columns[index])
|
||||
throw Exception("Duplicate field found while parsing TSKV format: " + name_ref.toString(), ErrorCodes::INCORRECT_DATA);
|
||||
|
@ -65,7 +65,7 @@ FunctionBasePtr FunctionBuilderJoinGet::buildImpl(const ColumnsWithTypeAndName &
|
||||
auto join = storage_join->getJoin();
|
||||
DataTypes data_types(arguments.size());
|
||||
|
||||
auto table_lock = storage_join->lockStructure(false);
|
||||
auto table_lock = storage_join->lockStructure(false, context.getCurrentQueryId());
|
||||
for (size_t i = 0; i < arguments.size(); ++i)
|
||||
data_types[i] = arguments[i].type;
|
||||
|
||||
|
@ -1142,9 +1142,30 @@ public:
|
||||
const auto & col_with_type_and_name_right = block.getByPosition(arguments[1]);
|
||||
const IColumn * col_left_untyped = col_with_type_and_name_left.column.get();
|
||||
const IColumn * col_right_untyped = col_with_type_and_name_right.column.get();
|
||||
|
||||
const DataTypePtr & left_type = col_with_type_and_name_left.type;
|
||||
const DataTypePtr & right_type = col_with_type_and_name_right.type;
|
||||
|
||||
/// The case when arguments are the same (tautological comparison). Return constant.
|
||||
/// NOTE: Nullable types are special case. (BTW, this function use default implementation for Nullable, so Nullable types cannot be here. Check just in case.)
|
||||
/// NOTE: We consider NaN comparison to be implementation specific (and in our implementation NaNs are sometimes equal sometimes not).
|
||||
if (left_type->equals(*right_type) && !left_type->isNullable() && col_left_untyped == col_right_untyped)
|
||||
{
|
||||
/// Always true: =, <=, >=
|
||||
if constexpr (std::is_same_v<Op<int, int>, EqualsOp<int, int>>
|
||||
|| std::is_same_v<Op<int, int>, LessOrEqualsOp<int, int>>
|
||||
|| std::is_same_v<Op<int, int>, GreaterOrEqualsOp<int, int>>)
|
||||
{
|
||||
block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, 1u);
|
||||
return;
|
||||
}
|
||||
else
|
||||
{
|
||||
block.getByPosition(result).column = DataTypeUInt8().createColumnConst(input_rows_count, 0u);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
WhichDataType which_left{left_type};
|
||||
WhichDataType which_right{right_type};
|
||||
|
||||
|
@ -425,15 +425,15 @@ ColumnPtr FunctionArrayIntersect::execute(const UnpackedArrays & arrays, Mutable
|
||||
|
||||
for (const auto & pair : map)
|
||||
{
|
||||
if (pair.second == args)
|
||||
if (pair.getSecond() == args)
|
||||
{
|
||||
++result_offset;
|
||||
if constexpr (is_numeric_column)
|
||||
result_data.insertValue(pair.first);
|
||||
result_data.insertValue(pair.getFirst());
|
||||
else if constexpr (std::is_same<ColumnType, ColumnString>::value || std::is_same<ColumnType, ColumnFixedString>::value)
|
||||
result_data.insertData(pair.first.data, pair.first.size);
|
||||
result_data.insertData(pair.getFirst().data, pair.getFirst().size);
|
||||
else
|
||||
result_data.deserializeAndInsertFromArena(pair.first.data);
|
||||
result_data.deserializeAndInsertFromArena(pair.getFirst().data);
|
||||
|
||||
if (all_nullable)
|
||||
null_map.push_back(0);
|
||||
|
121
dbms/src/Functions/flatten.cpp
Normal file
121
dbms/src/Functions/flatten.cpp
Normal file
@ -0,0 +1,121 @@
|
||||
#include <Functions/IFunction.h>
|
||||
#include <Functions/FunctionFactory.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// flatten([[1, 2, 3], [4, 5]]) = [1, 2, 3, 4, 5] - flatten array.
|
||||
class FunctionFlatten : public IFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "flatten";
|
||||
|
||||
static FunctionPtr create(const Context &) { return std::make_shared<FunctionFlatten>(); }
|
||||
|
||||
size_t getNumberOfArguments() const override { return 1; }
|
||||
bool useDefaultImplementationForConstants() const override { return true; }
|
||||
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
|
||||
{
|
||||
if (!isArray(arguments[0]))
|
||||
throw Exception("Illegal type " + arguments[0]->getName() +
|
||||
" of argument of function " + getName() +
|
||||
", expected Array", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
DataTypePtr nested_type = arguments[0];
|
||||
while (isArray(nested_type))
|
||||
nested_type = checkAndGetDataType<DataTypeArray>(nested_type.get())->getNestedType();
|
||||
|
||||
return std::make_shared<DataTypeArray>(nested_type);
|
||||
}
|
||||
|
||||
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override
|
||||
{
|
||||
/** We create an array column with array elements as the most deep elements of nested arrays,
|
||||
* and construct offsets by selecting elements of most deep offsets by values of ancestor offsets.
|
||||
*
|
||||
Example 1:
|
||||
|
||||
Source column: Array(Array(UInt8)):
|
||||
Row 1: [[1, 2, 3], [4, 5]], Row 2: [[6], [7, 8]]
|
||||
data: [1, 2, 3], [4, 5], [6], [7, 8]
|
||||
offsets: 2, 4
|
||||
data.data: 1 2 3 4 5 6 7 8
|
||||
data.offsets: 3 5 6 8
|
||||
|
||||
Result column: Array(UInt8):
|
||||
Row 1: [1, 2, 3, 4, 5], Row 2: [6, 7, 8]
|
||||
data: 1 2 3 4 5 6 7 8
|
||||
offsets: 5 8
|
||||
|
||||
Result offsets are selected from the most deep (data.offsets) by previous deep (offsets) (and values are decremented by one):
|
||||
3 5 6 8
|
||||
^ ^
|
||||
|
||||
Example 2:
|
||||
|
||||
Source column: Array(Array(Array(UInt8))):
|
||||
Row 1: [[], [[1], [], [2, 3]]], Row 2: [[[4]]]
|
||||
|
||||
most deep data: 1 2 3 4
|
||||
|
||||
offsets1: 2 3
|
||||
offsets2: 0 3 4
|
||||
- ^ ^ - select by prev offsets
|
||||
offsets3: 1 1 3 4
|
||||
- ^ ^ - select by prev offsets
|
||||
|
||||
result offsets: 3, 4
|
||||
result: Row 1: [1, 2, 3], Row2: [4]
|
||||
*/
|
||||
|
||||
const ColumnArray * src_col = checkAndGetColumn<ColumnArray>(block.getByPosition(arguments[0]).column.get());
|
||||
|
||||
if (!src_col)
|
||||
throw Exception("Illegal column " + block.getByPosition(arguments[0]).column->getName() + " in argument of function 'flatten'",
|
||||
ErrorCodes::ILLEGAL_COLUMN);
|
||||
|
||||
const IColumn::Offsets & src_offsets = src_col->getOffsets();
|
||||
|
||||
ColumnArray::ColumnOffsets::MutablePtr result_offsets_column;
|
||||
const IColumn::Offsets * prev_offsets = &src_offsets;
|
||||
const IColumn * prev_data = &src_col->getData();
|
||||
|
||||
while (const ColumnArray * next_col = checkAndGetColumn<ColumnArray>(prev_data))
|
||||
{
|
||||
if (!result_offsets_column)
|
||||
result_offsets_column = ColumnArray::ColumnOffsets::create(input_rows_count);
|
||||
|
||||
IColumn::Offsets & result_offsets = result_offsets_column->getData();
|
||||
|
||||
const IColumn::Offsets * next_offsets = &next_col->getOffsets();
|
||||
|
||||
for (size_t i = 0; i < input_rows_count; ++i)
|
||||
result_offsets[i] = (*next_offsets)[(*prev_offsets)[i] - 1]; /// -1 array subscript is Ok, see PaddedPODArray
|
||||
|
||||
prev_offsets = &result_offsets;
|
||||
prev_data = &next_col->getData();
|
||||
}
|
||||
|
||||
block.getByPosition(result).column = ColumnArray::create(
|
||||
prev_data->getPtr(),
|
||||
result_offsets_column ? std::move(result_offsets_column) : src_col->getOffsetsPtr());
|
||||
}
|
||||
|
||||
private:
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
void registerFunctionFlatten(FunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<FunctionFlatten>();
|
||||
}
|
||||
|
||||
}
|
@ -30,6 +30,7 @@ void registerFunctionArrayEnumerateUniqRanked(FunctionFactory &);
|
||||
void registerFunctionArrayEnumerateDenseRanked(FunctionFactory &);
|
||||
void registerFunctionArrayUniq(FunctionFactory &);
|
||||
void registerFunctionArrayDistinct(FunctionFactory &);
|
||||
void registerFunctionFlatten(FunctionFactory &);
|
||||
void registerFunctionArrayWithConstant(FunctionFactory &);
|
||||
|
||||
void registerFunctionsArray(FunctionFactory & factory)
|
||||
@ -61,6 +62,7 @@ void registerFunctionsArray(FunctionFactory & factory)
|
||||
registerFunctionArrayEnumerateDenseRanked(factory);
|
||||
registerFunctionArrayUniq(factory);
|
||||
registerFunctionArrayDistinct(factory);
|
||||
registerFunctionFlatten(factory);
|
||||
registerFunctionArrayWithConstant(factory);
|
||||
}
|
||||
|
||||
|
@ -508,7 +508,7 @@ private:
|
||||
{
|
||||
auto it = table.find(src[i]);
|
||||
if (it != table.end())
|
||||
memcpy(&dst[i], &it->second, sizeof(dst[i])); /// little endian.
|
||||
memcpy(&dst[i], &it->getSecond(), sizeof(dst[i])); /// little endian.
|
||||
else
|
||||
dst[i] = dst_default;
|
||||
}
|
||||
@ -524,7 +524,7 @@ private:
|
||||
{
|
||||
auto it = table.find(src[i]);
|
||||
if (it != table.end())
|
||||
memcpy(&dst[i], &it->second, sizeof(dst[i])); /// little endian.
|
||||
memcpy(&dst[i], &it->getSecond(), sizeof(dst[i])); /// little endian.
|
||||
else
|
||||
dst[i] = dst_default[i];
|
||||
}
|
||||
@ -540,7 +540,7 @@ private:
|
||||
{
|
||||
auto it = table.find(src[i]);
|
||||
if (it != table.end())
|
||||
memcpy(&dst[i], &it->second, sizeof(dst[i]));
|
||||
memcpy(&dst[i], &it->getSecond(), sizeof(dst[i]));
|
||||
else
|
||||
dst[i] = src[i];
|
||||
}
|
||||
@ -557,7 +557,7 @@ private:
|
||||
for (size_t i = 0; i < size; ++i)
|
||||
{
|
||||
auto it = table.find(src[i]);
|
||||
StringRef ref = it != table.end() ? it->second : dst_default;
|
||||
StringRef ref = it != table.end() ? it->getSecond() : dst_default;
|
||||
dst_data.resize(current_dst_offset + ref.size);
|
||||
memcpy(&dst_data[current_dst_offset], ref.data, ref.size);
|
||||
current_dst_offset += ref.size;
|
||||
@ -581,7 +581,7 @@ private:
|
||||
StringRef ref;
|
||||
|
||||
if (it != table.end())
|
||||
ref = it->second;
|
||||
ref = it->getSecond();
|
||||
else
|
||||
{
|
||||
ref.data = reinterpret_cast<const char *>(&dst_default_data[current_dst_default_offset]);
|
||||
@ -611,7 +611,7 @@ private:
|
||||
current_src_offset = src_offsets[i];
|
||||
auto it = table.find(ref);
|
||||
if (it != table.end())
|
||||
memcpy(&dst[i], &it->second, sizeof(dst[i]));
|
||||
memcpy(&dst[i], &it->getSecond(), sizeof(dst[i]));
|
||||
else
|
||||
dst[i] = dst_default;
|
||||
}
|
||||
@ -632,7 +632,7 @@ private:
|
||||
current_src_offset = src_offsets[i];
|
||||
auto it = table.find(ref);
|
||||
if (it != table.end())
|
||||
memcpy(&dst[i], &it->second, sizeof(dst[i]));
|
||||
memcpy(&dst[i], &it->getSecond(), sizeof(dst[i]));
|
||||
else
|
||||
dst[i] = dst_default[i];
|
||||
}
|
||||
@ -655,7 +655,7 @@ private:
|
||||
|
||||
auto it = table.find(src_ref);
|
||||
|
||||
StringRef dst_ref = it != table.end() ? it->second : (with_default ? dst_default : src_ref);
|
||||
StringRef dst_ref = it != table.end() ? it->getSecond() : (with_default ? dst_default : src_ref);
|
||||
dst_data.resize(current_dst_offset + dst_ref.size);
|
||||
memcpy(&dst_data[current_dst_offset], dst_ref.data, dst_ref.size);
|
||||
current_dst_offset += dst_ref.size;
|
||||
@ -697,7 +697,7 @@ private:
|
||||
StringRef dst_ref;
|
||||
|
||||
if (it != table.end())
|
||||
dst_ref = it->second;
|
||||
dst_ref = it->getSecond();
|
||||
else
|
||||
{
|
||||
dst_ref.data = reinterpret_cast<const char *>(&dst_default_data[current_dst_default_offset]);
|
||||
|
@ -54,7 +54,10 @@ add_check (hashing_read_buffer)
|
||||
add_executable (io_operators io_operators.cpp)
|
||||
target_link_libraries (io_operators PRIVATE clickhouse_common_io)
|
||||
|
||||
if (OS_LINUX)
|
||||
add_executable (write_int write_int.cpp)
|
||||
target_link_libraries (write_int PRIVATE clickhouse_common_io)
|
||||
|
||||
if (OS_LINUX OR OS_FREEBSD)
|
||||
add_executable(write_buffer_aio write_buffer_aio.cpp)
|
||||
target_link_libraries (write_buffer_aio PRIVATE clickhouse_common_io ${Boost_FILESYSTEM_LIBRARY})
|
||||
|
||||
|
@ -1120,11 +1120,11 @@ void NO_INLINE Aggregator::convertToBlockImplFinal(
|
||||
|
||||
for (const auto & value : data)
|
||||
{
|
||||
method.insertKeyIntoColumns(value, key_columns, key_sizes);
|
||||
method.insertKeyIntoColumns(value.getValue(), key_columns, key_sizes);
|
||||
|
||||
for (size_t i = 0; i < params.aggregates_size; ++i)
|
||||
aggregate_functions[i]->insertResultInto(
|
||||
value.second + offsets_of_aggregate_states[i],
|
||||
value.getSecond() + offsets_of_aggregate_states[i],
|
||||
*final_aggregate_columns[i]);
|
||||
}
|
||||
|
||||
@ -1151,13 +1151,13 @@ void NO_INLINE Aggregator::convertToBlockImplNotFinal(
|
||||
|
||||
for (auto & value : data)
|
||||
{
|
||||
method.insertKeyIntoColumns(value, key_columns, key_sizes);
|
||||
method.insertKeyIntoColumns(value.getValue(), key_columns, key_sizes);
|
||||
|
||||
/// reserved, so push_back does not throw exceptions
|
||||
for (size_t i = 0; i < params.aggregates_size; ++i)
|
||||
aggregate_columns[i]->push_back(value.second + offsets_of_aggregate_states[i]);
|
||||
aggregate_columns[i]->push_back(value.getSecond() + offsets_of_aggregate_states[i]);
|
||||
|
||||
value.second = nullptr;
|
||||
value.getSecond() = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1495,26 +1495,26 @@ void NO_INLINE Aggregator::mergeDataImpl(
|
||||
{
|
||||
typename Table::iterator res_it;
|
||||
bool inserted;
|
||||
table_dst.emplace(it->first, res_it, inserted, it.getHash());
|
||||
table_dst.emplace(it->getFirst(), res_it, inserted, it.getHash());
|
||||
|
||||
if (!inserted)
|
||||
{
|
||||
for (size_t i = 0; i < params.aggregates_size; ++i)
|
||||
aggregate_functions[i]->merge(
|
||||
res_it->second + offsets_of_aggregate_states[i],
|
||||
it->second + offsets_of_aggregate_states[i],
|
||||
res_it->getSecond() + offsets_of_aggregate_states[i],
|
||||
it->getSecond() + offsets_of_aggregate_states[i],
|
||||
arena);
|
||||
|
||||
for (size_t i = 0; i < params.aggregates_size; ++i)
|
||||
aggregate_functions[i]->destroy(
|
||||
it->second + offsets_of_aggregate_states[i]);
|
||||
it->getSecond() + offsets_of_aggregate_states[i]);
|
||||
}
|
||||
else
|
||||
{
|
||||
res_it->second = it->second;
|
||||
res_it->getSecond() = it->getSecond();
|
||||
}
|
||||
|
||||
it->second = nullptr;
|
||||
it->getSecond() = nullptr;
|
||||
}
|
||||
|
||||
table_src.clearAndShrink();
|
||||
@ -1534,22 +1534,22 @@ void NO_INLINE Aggregator::mergeDataNoMoreKeysImpl(
|
||||
|
||||
for (auto it = table_src.begin(), end = table_src.end(); it != end; ++it)
|
||||
{
|
||||
typename Table::iterator res_it = table_dst.find(it->first, it.getHash());
|
||||
typename Table::iterator res_it = table_dst.find(it->getFirst(), it.getHash());
|
||||
|
||||
AggregateDataPtr res_data = table_dst.end() == res_it
|
||||
? overflows
|
||||
: res_it->second;
|
||||
: res_it->getSecond();
|
||||
|
||||
for (size_t i = 0; i < params.aggregates_size; ++i)
|
||||
aggregate_functions[i]->merge(
|
||||
res_data + offsets_of_aggregate_states[i],
|
||||
it->second + offsets_of_aggregate_states[i],
|
||||
it->getSecond() + offsets_of_aggregate_states[i],
|
||||
arena);
|
||||
|
||||
for (size_t i = 0; i < params.aggregates_size; ++i)
|
||||
aggregate_functions[i]->destroy(it->second + offsets_of_aggregate_states[i]);
|
||||
aggregate_functions[i]->destroy(it->getSecond() + offsets_of_aggregate_states[i]);
|
||||
|
||||
it->second = nullptr;
|
||||
it->getSecond() = nullptr;
|
||||
}
|
||||
|
||||
table_src.clearAndShrink();
|
||||
@ -1567,23 +1567,23 @@ void NO_INLINE Aggregator::mergeDataOnlyExistingKeysImpl(
|
||||
|
||||
for (auto it = table_src.begin(); it != table_src.end(); ++it)
|
||||
{
|
||||
decltype(it) res_it = table_dst.find(it->first, it.getHash());
|
||||
decltype(it) res_it = table_dst.find(it->getFirst(), it.getHash());
|
||||
|
||||
if (table_dst.end() == res_it)
|
||||
continue;
|
||||
|
||||
AggregateDataPtr res_data = res_it->second;
|
||||
AggregateDataPtr res_data = res_it->getSecond();
|
||||
|
||||
for (size_t i = 0; i < params.aggregates_size; ++i)
|
||||
aggregate_functions[i]->merge(
|
||||
res_data + offsets_of_aggregate_states[i],
|
||||
it->second + offsets_of_aggregate_states[i],
|
||||
it->getSecond() + offsets_of_aggregate_states[i],
|
||||
arena);
|
||||
|
||||
for (size_t i = 0; i < params.aggregates_size; ++i)
|
||||
aggregate_functions[i]->destroy(it->second + offsets_of_aggregate_states[i]);
|
||||
aggregate_functions[i]->destroy(it->getSecond() + offsets_of_aggregate_states[i]);
|
||||
|
||||
it->second = nullptr;
|
||||
it->getSecond() = nullptr;
|
||||
}
|
||||
|
||||
table_src.clearAndShrink();
|
||||
@ -2428,7 +2428,7 @@ void NO_INLINE Aggregator::destroyImpl(Table & table) const
|
||||
{
|
||||
for (auto elem : table)
|
||||
{
|
||||
AggregateDataPtr & data = elem.second;
|
||||
AggregateDataPtr & data = elem.getSecond();
|
||||
|
||||
/** If an exception (usually a lack of memory, the MemoryTracker throws) arose
|
||||
* after inserting the key into a hash table, but before creating all states of aggregate functions,
|
||||
|
@ -10,6 +10,7 @@
|
||||
|
||||
#include <common/StringRef.h>
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/HashTable/FixedHashMap.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/HashTable/TwoLevelHashMap.h>
|
||||
#include <Common/ThreadPool.h>
|
||||
@ -64,8 +65,8 @@ class IBlockOutputStream;
|
||||
|
||||
using AggregatedDataWithoutKey = AggregateDataPtr;
|
||||
|
||||
using AggregatedDataWithUInt8Key = HashMap<UInt64, AggregateDataPtr, TrivialHash, HashTableFixedGrower<8>>;
|
||||
using AggregatedDataWithUInt16Key = HashMap<UInt64, AggregateDataPtr, TrivialHash, HashTableFixedGrower<16>>;
|
||||
using AggregatedDataWithUInt8Key = FixedHashMap<UInt8, AggregateDataPtr>;
|
||||
using AggregatedDataWithUInt16Key = FixedHashMap<UInt16, AggregateDataPtr>;
|
||||
|
||||
using AggregatedDataWithUInt64Key = HashMap<UInt64, AggregateDataPtr, HashCRC32<UInt64>>;
|
||||
using AggregatedDataWithStringKey = HashMapWithSavedHash<StringRef, AggregateDataPtr>;
|
||||
@ -178,7 +179,7 @@ struct AggregationMethodOneNumber
|
||||
// Insert the key from the hash table into columns.
|
||||
static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns, const Sizes & /*key_sizes*/)
|
||||
{
|
||||
static_cast<ColumnVectorHelper *>(key_columns[0].get())->insertRawData<sizeof(FieldType)>(reinterpret_cast<const char *>(&value.first));
|
||||
static_cast<ColumnVectorHelper *>(key_columns[0].get())->insertRawData<sizeof(FieldType)>(reinterpret_cast<const char *>(&value.getFirst()));
|
||||
}
|
||||
};
|
||||
|
||||
@ -206,7 +207,7 @@ struct AggregationMethodString
|
||||
|
||||
static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns, const Sizes &)
|
||||
{
|
||||
key_columns[0]->insertData(value.first.data, value.first.size);
|
||||
key_columns[0]->insertData(value.getFirst().data, value.getFirst().size);
|
||||
}
|
||||
};
|
||||
|
||||
@ -234,7 +235,7 @@ struct AggregationMethodFixedString
|
||||
|
||||
static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns, const Sizes &)
|
||||
{
|
||||
key_columns[0]->insertData(value.first.data, value.first.size);
|
||||
key_columns[0]->insertData(value.getFirst().data, value.getFirst().size);
|
||||
}
|
||||
};
|
||||
|
||||
@ -326,7 +327,7 @@ struct AggregationMethodKeysFixed
|
||||
/// corresponding key is nullable. Update the null map accordingly.
|
||||
size_t bucket = i / 8;
|
||||
size_t offset = i % 8;
|
||||
UInt8 val = (reinterpret_cast<const UInt8 *>(&value.first)[bucket] >> offset) & 1;
|
||||
UInt8 val = (reinterpret_cast<const UInt8 *>(&value.getFirst())[bucket] >> offset) & 1;
|
||||
null_map->insertValue(val);
|
||||
is_null = val == 1;
|
||||
}
|
||||
@ -338,7 +339,7 @@ struct AggregationMethodKeysFixed
|
||||
else
|
||||
{
|
||||
size_t size = key_sizes[i];
|
||||
observed_column->insertData(reinterpret_cast<const char *>(&value.first) + pos, size);
|
||||
observed_column->insertData(reinterpret_cast<const char *>(&value.getFirst()) + pos, size);
|
||||
pos += size;
|
||||
}
|
||||
}
|
||||
@ -373,7 +374,7 @@ struct AggregationMethodSerialized
|
||||
|
||||
static void insertKeyIntoColumns(const typename Data::value_type & value, MutableColumns & key_columns, const Sizes &)
|
||||
{
|
||||
auto pos = value.first.data;
|
||||
auto pos = value.getFirst().data;
|
||||
for (auto & column : key_columns)
|
||||
pos = column->deserializeAndInsertFromArena(pos);
|
||||
}
|
||||
|
@ -295,7 +295,7 @@ bool DDLWorker::initAndCheckTask(const String & entry_name, String & out_reason,
|
||||
{
|
||||
/// What should we do if we even cannot parse host name and therefore cannot properly submit execution status?
|
||||
/// We can try to create fail node using FQDN if it equal to host name in cluster config attempt will be successful.
|
||||
/// Otherwise, that node will be ignored by DDLQueryStatusInputSream.
|
||||
/// Otherwise, that node will be ignored by DDLQueryStatusInputStream.
|
||||
|
||||
tryLogCurrentException(log, "Cannot parse DDL task " + entry_name + ", will try to send error status");
|
||||
|
||||
@ -1020,12 +1020,12 @@ void DDLWorker::runCleanupThread()
|
||||
}
|
||||
|
||||
|
||||
class DDLQueryStatusInputSream : public IBlockInputStream
|
||||
class DDLQueryStatusInputStream : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
|
||||
DDLQueryStatusInputSream(const String & zk_node_path, const DDLLogEntry & entry, const Context & context)
|
||||
: node_path(zk_node_path), context(context), watch(CLOCK_MONOTONIC_COARSE), log(&Logger::get("DDLQueryStatusInputSream"))
|
||||
DDLQueryStatusInputStream(const String & zk_node_path, const DDLLogEntry & entry, const Context & context)
|
||||
: node_path(zk_node_path), context(context), watch(CLOCK_MONOTONIC_COARSE), log(&Logger::get("DDLQueryStatusInputStream"))
|
||||
{
|
||||
sample = Block{
|
||||
{std::make_shared<DataTypeString>(), "host"},
|
||||
@ -1046,7 +1046,7 @@ public:
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return "DDLQueryStatusInputSream";
|
||||
return "DDLQueryStatusInputStream";
|
||||
}
|
||||
|
||||
Block getHeader() const override { return sample; }
|
||||
@ -1146,7 +1146,7 @@ public:
|
||||
return sample.cloneEmpty();
|
||||
}
|
||||
|
||||
~DDLQueryStatusInputSream() override = default;
|
||||
~DDLQueryStatusInputStream() override = default;
|
||||
|
||||
private:
|
||||
|
||||
@ -1289,7 +1289,7 @@ BlockIO executeDDLQueryOnCluster(const ASTPtr & query_ptr_, const Context & cont
|
||||
if (context.getSettingsRef().distributed_ddl_task_timeout == 0)
|
||||
return io;
|
||||
|
||||
auto stream = std::make_shared<DDLQueryStatusInputSream>(node_path, entry, context);
|
||||
auto stream = std::make_shared<DDLQueryStatusInputStream>(node_path, entry, context);
|
||||
io.in = std::move(stream);
|
||||
return io;
|
||||
}
|
||||
|
@ -115,7 +115,7 @@ private:
|
||||
|
||||
ThreadGroupStatusPtr thread_group;
|
||||
|
||||
friend class DDLQueryStatusInputSream;
|
||||
friend class DDLQueryStatusInputStream;
|
||||
friend struct DDLTask;
|
||||
};
|
||||
|
||||
|
@ -304,7 +304,7 @@ void ExpressionAnalyzer::makeSetsForIndexImpl(const ASTPtr & node)
|
||||
{
|
||||
const IAST & args = *func->arguments;
|
||||
|
||||
if (storage && storage->mayBenefitFromIndexForIn(args.children.at(0)))
|
||||
if (storage && storage->mayBenefitFromIndexForIn(args.children.at(0), context))
|
||||
{
|
||||
const ASTPtr & arg = args.children.at(1);
|
||||
if (typeid_cast<ASTSubquery *>(arg.get()) || isIdentifier(arg))
|
||||
|
@ -5,7 +5,7 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
FindIdentifierBestTableData::FindIdentifierBestTableData(const std::vector<DatabaseAndTableWithAlias> & tables_)
|
||||
FindIdentifierBestTableData::FindIdentifierBestTableData(const std::vector<TableWithColumnNames> & tables_)
|
||||
: tables(tables_)
|
||||
{
|
||||
}
|
||||
@ -16,13 +16,21 @@ void FindIdentifierBestTableData::visit(ASTIdentifier & identifier, ASTPtr &)
|
||||
|
||||
if (!identifier.compound())
|
||||
{
|
||||
if (!tables.empty())
|
||||
best_table = &tables[0];
|
||||
for (const auto & [table, names] : tables)
|
||||
{
|
||||
if (std::find(names.begin(), names.end(), identifier.name) != names.end())
|
||||
{
|
||||
// TODO: make sure no collision ever happens
|
||||
if (!best_table)
|
||||
best_table = &table;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// FIXME: make a better matcher using `names`?
|
||||
size_t best_match = 0;
|
||||
for (const DatabaseAndTableWithAlias & table : tables)
|
||||
for (const auto & [table, names] : tables)
|
||||
{
|
||||
if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, table))
|
||||
if (match > best_match)
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Parsers/IAST.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
#include <Interpreters/DatabaseAndTableWithAlias.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -12,10 +13,10 @@ struct FindIdentifierBestTableData
|
||||
using TypeToVisit = ASTIdentifier;
|
||||
using IdentifierWithTable = std::pair<ASTIdentifier *, const DatabaseAndTableWithAlias *>;
|
||||
|
||||
const std::vector<DatabaseAndTableWithAlias> & tables;
|
||||
const std::vector<TableWithColumnNames> & tables;
|
||||
std::vector<IdentifierWithTable> identifier_table;
|
||||
|
||||
FindIdentifierBestTableData(const std::vector<DatabaseAndTableWithAlias> & tables_);
|
||||
FindIdentifierBestTableData(const std::vector<TableWithColumnNames> & tables_);
|
||||
|
||||
void visit(ASTIdentifier & identifier, ASTPtr &);
|
||||
};
|
||||
|
@ -591,7 +591,7 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create)
|
||||
if (!as_table_name.empty())
|
||||
{
|
||||
as_storage = context.getTable(as_database_name, as_table_name);
|
||||
as_storage_lock = as_storage->lockStructure(false);
|
||||
as_storage_lock = as_storage->lockStructure(false, context.getCurrentQueryId());
|
||||
}
|
||||
|
||||
/// Set and retrieve list of columns.
|
||||
|
@ -93,7 +93,7 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl()
|
||||
table = context.getTable(database_name, table_name);
|
||||
}
|
||||
|
||||
auto table_lock = table->lockStructure(false);
|
||||
auto table_lock = table->lockStructure(false, context.getCurrentQueryId());
|
||||
columns = table->getColumns().getAll();
|
||||
column_defaults = table->getColumns().defaults;
|
||||
column_comments = table->getColumns().comments;
|
||||
|
@ -69,7 +69,7 @@ BlockIO InterpreterDropQuery::executeToTable(String & database_name_, String & t
|
||||
{
|
||||
database_and_table.second->shutdown();
|
||||
/// If table was already dropped by anyone, an exception will be thrown
|
||||
auto table_lock = database_and_table.second->lockForAlter();
|
||||
auto table_lock = database_and_table.second->lockForAlter(context.getCurrentQueryId());
|
||||
/// Drop table from memory, don't touch data and metadata
|
||||
database_and_table.first->detachTable(database_and_table.second->getTableName());
|
||||
}
|
||||
@ -78,7 +78,7 @@ BlockIO InterpreterDropQuery::executeToTable(String & database_name_, String & t
|
||||
database_and_table.second->checkTableCanBeDropped();
|
||||
|
||||
/// If table was already dropped by anyone, an exception will be thrown
|
||||
auto table_lock = database_and_table.second->lockForAlter();
|
||||
auto table_lock = database_and_table.second->lockForAlter(context.getCurrentQueryId());
|
||||
/// Drop table data, don't touch metadata
|
||||
database_and_table.second->truncate(query_ptr, context);
|
||||
}
|
||||
@ -89,7 +89,7 @@ BlockIO InterpreterDropQuery::executeToTable(String & database_name_, String & t
|
||||
database_and_table.second->shutdown();
|
||||
/// If table was already dropped by anyone, an exception will be thrown
|
||||
|
||||
auto table_lock = database_and_table.second->lockForAlter();
|
||||
auto table_lock = database_and_table.second->lockForAlter(context.getCurrentQueryId());
|
||||
/// Delete table metadata and table itself from memory
|
||||
|
||||
database_and_table.first->removeTable(context, database_and_table.second->getTableName());
|
||||
@ -126,7 +126,7 @@ BlockIO InterpreterDropQuery::executeToTemporaryTable(String & table_name, ASTDr
|
||||
if (kind == ASTDropQuery::Kind::Truncate)
|
||||
{
|
||||
/// If table was already dropped by anyone, an exception will be thrown
|
||||
auto table_lock = table->lockForAlter();
|
||||
auto table_lock = table->lockForAlter(context.getCurrentQueryId());
|
||||
/// Drop table data, don't touch metadata
|
||||
table->truncate(query_ptr, context);
|
||||
}
|
||||
@ -135,7 +135,7 @@ BlockIO InterpreterDropQuery::executeToTemporaryTable(String & table_name, ASTDr
|
||||
context_handle.tryRemoveExternalTable(table_name);
|
||||
table->shutdown();
|
||||
/// If table was already dropped by anyone, an exception will be thrown
|
||||
auto table_lock = table->lockForAlter();
|
||||
auto table_lock = table->lockForAlter(context.getCurrentQueryId());
|
||||
/// Delete table data
|
||||
table->drop();
|
||||
table->is_dropped = true;
|
||||
|
@ -1,13 +1,15 @@
|
||||
#include <sstream>
|
||||
|
||||
#include <DataStreams/OneBlockInputStream.h>
|
||||
#include <DataStreams/BlockIO.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Interpreters/InterpreterExplainQuery.h>
|
||||
|
||||
#include <DataStreams/BlockIO.h>
|
||||
#include <DataStreams/OneBlockInputStream.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
|
||||
#include <Parsers/ASTExplainQuery.h>
|
||||
#include <Parsers/DumpASTNode.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
#include <sstream>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -26,7 +28,7 @@ Block InterpreterExplainQuery::getSampleBlock()
|
||||
Block block;
|
||||
|
||||
ColumnWithTypeAndName col;
|
||||
col.name = "ast";
|
||||
col.name = "explain";
|
||||
col.type = std::make_shared<DataTypeString>();
|
||||
col.column = col.type->createColumn();
|
||||
block.insert(col);
|
||||
@ -38,12 +40,21 @@ Block InterpreterExplainQuery::getSampleBlock()
|
||||
BlockInputStreamPtr InterpreterExplainQuery::executeImpl()
|
||||
{
|
||||
const ASTExplainQuery & ast = typeid_cast<const ASTExplainQuery &>(*query);
|
||||
|
||||
std::stringstream ss;
|
||||
dumpAST(ast, ss);
|
||||
|
||||
Block sample_block = getSampleBlock();
|
||||
MutableColumns res_columns = sample_block.cloneEmptyColumns();
|
||||
|
||||
std::stringstream ss;
|
||||
|
||||
if (ast.getKind() == ASTExplainQuery::ParsedAST)
|
||||
{
|
||||
dumpAST(ast, ss);
|
||||
}
|
||||
else if (ast.getKind() == ASTExplainQuery::AnalyzedSyntax)
|
||||
{
|
||||
InterpreterSelectWithUnionQuery interpreter(ast.children.at(0), context, {}, QueryProcessingStage::FetchColumns, 0, true, true);
|
||||
interpreter.getQuery()->format(IAST::FormatSettings(ss, false));
|
||||
}
|
||||
|
||||
res_columns[0]->insert(ss.str());
|
||||
|
||||
return std::make_shared<OneBlockInputStream>(sample_block.cloneWithColumns(std::move(res_columns)));
|
||||
|
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/IInterpreter.h>
|
||||
|
||||
|
||||
@ -14,8 +15,8 @@ using ASTPtr = std::shared_ptr<IAST>;
|
||||
class InterpreterExplainQuery : public IInterpreter
|
||||
{
|
||||
public:
|
||||
InterpreterExplainQuery(const ASTPtr & query_, const Context &)
|
||||
: query(query_)
|
||||
InterpreterExplainQuery(const ASTPtr & query_, const Context & context_)
|
||||
: query(query_), context(context_)
|
||||
{}
|
||||
|
||||
BlockIO execute() override;
|
||||
@ -24,6 +25,7 @@ public:
|
||||
|
||||
private:
|
||||
ASTPtr query;
|
||||
Context context;
|
||||
|
||||
BlockInputStreamPtr executeImpl();
|
||||
};
|
||||
|
@ -96,7 +96,7 @@ BlockIO InterpreterInsertQuery::execute()
|
||||
checkAccess(query);
|
||||
StoragePtr table = getTable(query);
|
||||
|
||||
auto table_lock = table->lockStructure(true);
|
||||
auto table_lock = table->lockStructure(true, context.getCurrentQueryId());
|
||||
|
||||
/// We create a pipeline of several streams, into which we will write data.
|
||||
BlockOutputStreamPtr out;
|
||||
|
@ -23,7 +23,7 @@ BlockIO InterpreterOptimizeQuery::execute()
|
||||
return executeDDLQueryOnCluster(query_ptr, context, {ast.database});
|
||||
|
||||
StoragePtr table = context.getTable(ast.database, ast.table);
|
||||
auto table_lock = table->lockStructure(true);
|
||||
auto table_lock = table->lockStructure(true, context.getCurrentQueryId());
|
||||
table->optimize(query_ptr, ast.partition, ast.final, ast.deduplicate, context);
|
||||
return {};
|
||||
}
|
||||
|
@ -101,7 +101,7 @@ BlockIO InterpreterRenameQuery::execute()
|
||||
|
||||
for (const auto & names : unique_tables_from)
|
||||
if (auto table = context.tryGetTable(names.database_name, names.table_name))
|
||||
locks.emplace_back(table->lockForAlter());
|
||||
locks.emplace_back(table->lockForAlter(context.getCurrentQueryId()));
|
||||
|
||||
/** All tables are locked. If there are more than one rename in chain,
|
||||
* we need to hold global lock while doing all renames. Order matters to avoid deadlocks.
|
||||
|
@ -80,8 +80,10 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
const Names & required_result_column_names,
|
||||
QueryProcessingStage::Enum to_stage_,
|
||||
size_t subquery_depth_,
|
||||
bool only_analyze_)
|
||||
: InterpreterSelectQuery(query_ptr_, context_, nullptr, nullptr, required_result_column_names, to_stage_, subquery_depth_, only_analyze_)
|
||||
bool only_analyze_,
|
||||
bool modify_inplace)
|
||||
: InterpreterSelectQuery(
|
||||
query_ptr_, context_, nullptr, nullptr, required_result_column_names, to_stage_, subquery_depth_, only_analyze_, modify_inplace)
|
||||
{
|
||||
}
|
||||
|
||||
@ -90,8 +92,9 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
const Context & context_,
|
||||
const BlockInputStreamPtr & input_,
|
||||
QueryProcessingStage::Enum to_stage_,
|
||||
bool only_analyze_)
|
||||
: InterpreterSelectQuery(query_ptr_, context_, input_, nullptr, Names{}, to_stage_, 0, only_analyze_)
|
||||
bool only_analyze_,
|
||||
bool modify_inplace)
|
||||
: InterpreterSelectQuery(query_ptr_, context_, input_, nullptr, Names{}, to_stage_, 0, only_analyze_, modify_inplace)
|
||||
{
|
||||
}
|
||||
|
||||
@ -100,8 +103,9 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
const Context & context_,
|
||||
const StoragePtr & storage_,
|
||||
QueryProcessingStage::Enum to_stage_,
|
||||
bool only_analyze_)
|
||||
: InterpreterSelectQuery(query_ptr_, context_, nullptr, storage_, Names{}, to_stage_, 0, only_analyze_)
|
||||
bool only_analyze_,
|
||||
bool modify_inplace)
|
||||
: InterpreterSelectQuery(query_ptr_, context_, nullptr, storage_, Names{}, to_stage_, 0, only_analyze_, modify_inplace)
|
||||
{
|
||||
}
|
||||
|
||||
@ -131,8 +135,10 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
const Names & required_result_column_names,
|
||||
QueryProcessingStage::Enum to_stage_,
|
||||
size_t subquery_depth_,
|
||||
bool only_analyze_)
|
||||
: query_ptr(query_ptr_->clone()) /// Note: the query is cloned because it will be modified during analysis.
|
||||
bool only_analyze_,
|
||||
bool modify_inplace)
|
||||
/// NOTE: the query almost always should be cloned because it will be modified during analysis.
|
||||
: query_ptr(modify_inplace ? query_ptr_ : query_ptr_->clone())
|
||||
, query(typeid_cast<ASTSelectQuery &>(*query_ptr))
|
||||
, context(context_)
|
||||
, to_stage(to_stage_)
|
||||
@ -170,7 +176,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
{
|
||||
/// Read from subquery.
|
||||
interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
|
||||
table_expression, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1, only_analyze);
|
||||
table_expression, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1, only_analyze, modify_inplace);
|
||||
|
||||
source_header = interpreter_subquery->getSampleBlock();
|
||||
}
|
||||
@ -194,7 +200,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
}
|
||||
|
||||
if (storage)
|
||||
table_lock = storage->lockStructure(false);
|
||||
table_lock = storage->lockStructure(false, context.getCurrentQueryId());
|
||||
|
||||
syntax_analyzer_result = SyntaxAnalyzer(context, subquery_depth).analyze(
|
||||
query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, storage);
|
||||
@ -217,16 +223,23 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
for (const auto & it : query_analyzer->getExternalTables())
|
||||
if (!context.tryGetExternalTable(it.first))
|
||||
context.addExternalTable(it.first, it.second);
|
||||
}
|
||||
|
||||
if (!only_analyze || modify_inplace)
|
||||
{
|
||||
if (query_analyzer->isRewriteSubqueriesPredicate())
|
||||
{
|
||||
/// remake interpreter_subquery when PredicateOptimizer is rewrite subqueries and main table is subquery
|
||||
if (is_subquery)
|
||||
interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
|
||||
table_expression, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1,
|
||||
only_analyze);
|
||||
table_expression,
|
||||
getSubqueryContext(context),
|
||||
required_columns,
|
||||
QueryProcessingStage::Complete,
|
||||
subquery_depth + 1,
|
||||
only_analyze,
|
||||
modify_inplace);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if (interpreter_subquery)
|
||||
@ -1026,6 +1039,9 @@ void InterpreterSelectQuery::executeFetchColumns(
|
||||
if (to_stage == QueryProcessingStage::Complete)
|
||||
{
|
||||
limits.min_execution_speed = settings.min_execution_speed;
|
||||
limits.max_execution_speed = settings.max_execution_speed;
|
||||
limits.min_execution_speed_bytes = settings.min_execution_speed_bytes;
|
||||
limits.max_execution_speed_bytes = settings.max_execution_speed_bytes;
|
||||
limits.timeout_before_checking_execution_speed = settings.timeout_before_checking_execution_speed;
|
||||
}
|
||||
|
||||
@ -1474,12 +1490,9 @@ void InterpreterSelectQuery::executeExtremes(Pipeline & pipeline)
|
||||
|
||||
void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(Pipeline & pipeline, SubqueriesForSets & subqueries_for_sets)
|
||||
{
|
||||
const Settings & settings = context.getSettingsRef();
|
||||
|
||||
executeUnion(pipeline);
|
||||
pipeline.firstStream() = std::make_shared<CreatingSetsBlockInputStream>(
|
||||
pipeline.firstStream(), subqueries_for_sets,
|
||||
SizeLimits(settings.max_rows_to_transfer, settings.max_bytes_to_transfer, settings.transfer_overflow_mode));
|
||||
pipeline.firstStream(), subqueries_for_sets, context);
|
||||
}
|
||||
|
||||
void InterpreterSelectQuery::unifyStreams(Pipeline & pipeline)
|
||||
|
@ -52,7 +52,8 @@ public:
|
||||
const Names & required_result_column_names = Names{},
|
||||
QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
|
||||
size_t subquery_depth_ = 0,
|
||||
bool only_analyze_ = false);
|
||||
bool only_analyze_ = false,
|
||||
bool modify_inplace = false);
|
||||
|
||||
/// Read data not from the table specified in the query, but from the prepared source `input`.
|
||||
InterpreterSelectQuery(
|
||||
@ -60,7 +61,8 @@ public:
|
||||
const Context & context_,
|
||||
const BlockInputStreamPtr & input_,
|
||||
QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
|
||||
bool only_analyze_ = false);
|
||||
bool only_analyze_ = false,
|
||||
bool modify_inplace = false);
|
||||
|
||||
/// Read data not from the table specified in the query, but from the specified `storage_`.
|
||||
InterpreterSelectQuery(
|
||||
@ -68,7 +70,8 @@ public:
|
||||
const Context & context_,
|
||||
const StoragePtr & storage_,
|
||||
QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
|
||||
bool only_analyze_ = false);
|
||||
bool only_analyze_ = false,
|
||||
bool modify_inplace = false);
|
||||
|
||||
~InterpreterSelectQuery() override;
|
||||
|
||||
@ -82,6 +85,8 @@ public:
|
||||
|
||||
void ignoreWithTotals();
|
||||
|
||||
ASTPtr getQuery() const { return query_ptr; }
|
||||
|
||||
private:
|
||||
InterpreterSelectQuery(
|
||||
const ASTPtr & query_ptr_,
|
||||
@ -91,7 +96,8 @@ private:
|
||||
const Names & required_result_column_names,
|
||||
QueryProcessingStage::Enum to_stage_,
|
||||
size_t subquery_depth_,
|
||||
bool only_analyze_);
|
||||
bool only_analyze_,
|
||||
bool modify_inplace);
|
||||
|
||||
|
||||
struct Pipeline
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -28,7 +29,8 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
|
||||
const Names & required_result_column_names,
|
||||
QueryProcessingStage::Enum to_stage_,
|
||||
size_t subquery_depth_,
|
||||
bool only_analyze)
|
||||
bool only_analyze,
|
||||
bool modify_inplace)
|
||||
: query_ptr(query_ptr_),
|
||||
context(context_),
|
||||
to_stage(to_stage_),
|
||||
@ -81,12 +83,17 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
|
||||
|
||||
for (size_t query_num = 0; query_num < num_selects; ++query_num)
|
||||
{
|
||||
const Names & current_required_result_column_names = query_num == 0
|
||||
? required_result_column_names
|
||||
: required_result_column_names_for_other_selects[query_num];
|
||||
const Names & current_required_result_column_names
|
||||
= query_num == 0 ? required_result_column_names : required_result_column_names_for_other_selects[query_num];
|
||||
|
||||
nested_interpreters.emplace_back(std::make_unique<InterpreterSelectQuery>(
|
||||
ast.list_of_selects->children.at(query_num), context, current_required_result_column_names, to_stage, subquery_depth, only_analyze));
|
||||
ast.list_of_selects->children.at(query_num),
|
||||
context,
|
||||
current_required_result_column_names,
|
||||
to_stage,
|
||||
subquery_depth,
|
||||
only_analyze,
|
||||
modify_inplace));
|
||||
}
|
||||
|
||||
/// Determine structure of the result.
|
||||
|
@ -22,7 +22,8 @@ public:
|
||||
const Names & required_result_column_names = Names{},
|
||||
QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
|
||||
size_t subquery_depth_ = 0,
|
||||
bool only_analyze = false);
|
||||
bool only_analyze = false,
|
||||
bool modify_inplace = false);
|
||||
|
||||
~InterpreterSelectWithUnionQuery() override;
|
||||
|
||||
@ -39,6 +40,8 @@ public:
|
||||
|
||||
void ignoreWithTotals();
|
||||
|
||||
ASTPtr getQuery() const { return query_ptr; }
|
||||
|
||||
private:
|
||||
ASTPtr query_ptr;
|
||||
Context context;
|
||||
|
@ -239,7 +239,7 @@ StoragePtr InterpreterSystemQuery::tryRestartReplica(const String & database_nam
|
||||
table->shutdown();
|
||||
|
||||
/// If table was already dropped by anyone, an exception will be thrown
|
||||
auto table_lock = table->lockForAlter();
|
||||
auto table_lock = table->lockForAlter(context.getCurrentQueryId());
|
||||
create_ast = system_context.getCreateTableQuery(database_name, table_name);
|
||||
|
||||
database->detachTable(table_name);
|
||||
|
@ -300,7 +300,7 @@ void Join::setSampleBlock(const Block & block)
|
||||
if (column.get() != column_no_lc.get())
|
||||
{
|
||||
materialized_columns.emplace_back(std::move(column_no_lc));
|
||||
key_columns[i] = materialized_columns[i].get();
|
||||
key_columns[i] = materialized_columns.back().get();
|
||||
}
|
||||
|
||||
/// We will join only keys, where all components are not NULL.
|
||||
@ -1317,10 +1317,10 @@ private:
|
||||
|
||||
for (; it != end; ++it)
|
||||
{
|
||||
if (it->second.getUsed())
|
||||
if (it->getSecond().getUsed())
|
||||
continue;
|
||||
|
||||
AdderNonJoined<STRICTNESS, typename Map::mapped_type>::add(it->second, rows_added, columns_left, columns_keys_and_right);
|
||||
AdderNonJoined<STRICTNESS, typename Map::mapped_type>::add(it->getSecond(), rows_added, columns_left, columns_keys_and_right);
|
||||
|
||||
if (rows_added >= max_block_size)
|
||||
{
|
||||
|
@ -10,6 +10,7 @@
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/ColumnsHashing.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
#include <Common/HashTable/FixedHashMap.h>
|
||||
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
@ -218,8 +219,8 @@ public:
|
||||
template <typename Mapped>
|
||||
struct MapsTemplate
|
||||
{
|
||||
std::unique_ptr<HashMap<UInt8, Mapped, TrivialHash, HashTableFixedGrower<8>>> key8;
|
||||
std::unique_ptr<HashMap<UInt16, Mapped, TrivialHash, HashTableFixedGrower<16>>> key16;
|
||||
std::unique_ptr<FixedHashMap<UInt8, Mapped>> key8;
|
||||
std::unique_ptr<FixedHashMap<UInt16, Mapped>> key16;
|
||||
std::unique_ptr<HashMap<UInt32, Mapped, HashCRC32<UInt32>>> key32;
|
||||
std::unique_ptr<HashMap<UInt64, Mapped, HashCRC32<UInt64>>> key64;
|
||||
std::unique_ptr<HashMapWithSavedHash<StringRef, Mapped>> key_string;
|
||||
|
@ -394,11 +394,7 @@ BlockInputStreamPtr MutationsInterpreter::addStreamsForLaterStages(BlockInputStr
|
||||
|
||||
const SubqueriesForSets & subqueries_for_sets = stage.analyzer->getSubqueriesForSets();
|
||||
if (!subqueries_for_sets.empty())
|
||||
{
|
||||
const auto & settings = context.getSettingsRef();
|
||||
in = std::make_shared<CreatingSetsBlockInputStream>(in, subqueries_for_sets,
|
||||
SizeLimits(settings.max_rows_to_transfer, settings.max_bytes_to_transfer, settings.transfer_overflow_mode));
|
||||
}
|
||||
in = std::make_shared<CreatingSetsBlockInputStream>(in, subqueries_for_sets, context);
|
||||
}
|
||||
|
||||
in = std::make_shared<MaterializingBlockInputStream>(in);
|
||||
|
@ -59,17 +59,17 @@ bool PredicateExpressionsOptimizer::optimize()
|
||||
is_rewrite_subqueries |= optimizeImpl(ast_select->where_expression, all_subquery_projection_columns, OptimizeKind::PUSH_TO_WHERE);
|
||||
is_rewrite_subqueries |= optimizeImpl(ast_select->prewhere_expression, all_subquery_projection_columns, OptimizeKind::PUSH_TO_PREWHERE);
|
||||
}
|
||||
|
||||
return is_rewrite_subqueries;
|
||||
}
|
||||
|
||||
bool PredicateExpressionsOptimizer::optimizeImpl(
|
||||
ASTPtr & outer_expression, SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind expression_kind)
|
||||
ASTPtr & outer_expression, const SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind expression_kind)
|
||||
{
|
||||
/// split predicate with `and`
|
||||
std::vector<ASTPtr> outer_predicate_expressions = splitConjunctionPredicate(outer_expression);
|
||||
|
||||
std::vector<DatabaseAndTableWithAlias> database_and_table_with_aliases =
|
||||
getDatabaseAndTables(*ast_select, context.getCurrentDatabase());
|
||||
std::vector<TableWithColumnNames> tables_with_columns = getDatabaseAndTablesWithColumnNames(*ast_select, context);
|
||||
|
||||
bool is_rewrite_subquery = false;
|
||||
for (auto & outer_predicate : outer_predicate_expressions)
|
||||
@ -77,7 +77,7 @@ bool PredicateExpressionsOptimizer::optimizeImpl(
|
||||
if (isArrayJoinFunction(outer_predicate))
|
||||
continue;
|
||||
|
||||
auto outer_predicate_dependencies = getDependenciesAndQualifiers(outer_predicate, database_and_table_with_aliases);
|
||||
auto outer_predicate_dependencies = getDependenciesAndQualifiers(outer_predicate, tables_with_columns);
|
||||
|
||||
/// TODO: remove origin expression
|
||||
for (const auto & [subquery, projection_columns] : subqueries_projection_columns)
|
||||
@ -92,7 +92,7 @@ bool PredicateExpressionsOptimizer::optimizeImpl(
|
||||
cleanExpressionAlias(inner_predicate); /// clears the alias name contained in the outer predicate
|
||||
|
||||
std::vector<IdentifierWithQualifier> inner_predicate_dependencies =
|
||||
getDependenciesAndQualifiers(inner_predicate, database_and_table_with_aliases);
|
||||
getDependenciesAndQualifiers(inner_predicate, tables_with_columns);
|
||||
|
||||
setNewAliasesForInnerPredicate(projection_columns, inner_predicate_dependencies);
|
||||
|
||||
@ -169,7 +169,7 @@ std::vector<ASTPtr> PredicateExpressionsOptimizer::splitConjunctionPredicate(AST
|
||||
}
|
||||
|
||||
std::vector<PredicateExpressionsOptimizer::IdentifierWithQualifier>
|
||||
PredicateExpressionsOptimizer::getDependenciesAndQualifiers(ASTPtr & expression, std::vector<DatabaseAndTableWithAlias> & tables)
|
||||
PredicateExpressionsOptimizer::getDependenciesAndQualifiers(ASTPtr & expression, std::vector<TableWithColumnNames> & tables)
|
||||
{
|
||||
FindIdentifierBestTableVisitor::Data find_data(tables);
|
||||
FindIdentifierBestTableVisitor(find_data).visit(expression);
|
||||
|
@ -2,6 +2,8 @@
|
||||
|
||||
#include <Interpreters/DatabaseAndTableWithAlias.h>
|
||||
|
||||
#include <map>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -70,11 +72,11 @@ private:
|
||||
std::vector<ASTPtr> splitConjunctionPredicate(ASTPtr & predicate_expression);
|
||||
|
||||
std::vector<IdentifierWithQualifier> getDependenciesAndQualifiers(ASTPtr & expression,
|
||||
std::vector<DatabaseAndTableWithAlias> & tables_with_aliases);
|
||||
std::vector<TableWithColumnNames> & tables_with_aliases);
|
||||
|
||||
bool optimizeExpression(const ASTPtr & outer_expression, ASTPtr & subquery_expression, ASTSelectQuery * subquery);
|
||||
|
||||
bool optimizeImpl(ASTPtr & outer_expression, SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind optimize_kind);
|
||||
bool optimizeImpl(ASTPtr & outer_expression, const SubqueriesProjectionColumns & subqueries_projection_columns, OptimizeKind optimize_kind);
|
||||
|
||||
bool allowPushDown(const ASTSelectQuery * subquery);
|
||||
|
||||
|
@ -8,6 +8,8 @@
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/HashTable/HashSet.h>
|
||||
#include <Common/HashTable/ClearableHashSet.h>
|
||||
#include <Common/HashTable/FixedClearableHashSet.h>
|
||||
#include <Common/HashTable/FixedHashSet.h>
|
||||
#include <Common/UInt128.h>
|
||||
|
||||
|
||||
@ -182,9 +184,8 @@ struct SetMethodHashed
|
||||
*/
|
||||
struct NonClearableSet
|
||||
{
|
||||
/// TODO Use either bit- or byte-set for these two options.
|
||||
std::unique_ptr<SetMethodOneNumber<UInt8, HashSet<UInt8, TrivialHash, HashTableFixedGrower<8>>>> key8;
|
||||
std::unique_ptr<SetMethodOneNumber<UInt16, HashSet<UInt16, TrivialHash, HashTableFixedGrower<16>>>> key16;
|
||||
std::unique_ptr<SetMethodOneNumber<UInt8, FixedHashSet<UInt8>>> key8;
|
||||
std::unique_ptr<SetMethodOneNumber<UInt16, FixedHashSet<UInt16>>> key16;
|
||||
|
||||
/** Also for the experiment was tested the ability to use SmallSet,
|
||||
* as long as the number of elements in the set is small (and, if necessary, converted to a full-fledged HashSet).
|
||||
@ -209,9 +210,8 @@ struct NonClearableSet
|
||||
|
||||
struct ClearableSet
|
||||
{
|
||||
/// TODO Use either bit- or byte-set for these two options.
|
||||
std::unique_ptr<SetMethodOneNumber<UInt8, ClearableHashSet<UInt8, TrivialHash, HashTableFixedGrower<8>>>> key8;
|
||||
std::unique_ptr<SetMethodOneNumber<UInt16, ClearableHashSet<UInt16, TrivialHash, HashTableFixedGrower<16>>>> key16;
|
||||
std::unique_ptr<SetMethodOneNumber<UInt8, FixedClearableHashSet<UInt8>>> key8;
|
||||
std::unique_ptr<SetMethodOneNumber<UInt16, FixedClearableHashSet<UInt16>>> key16;
|
||||
|
||||
std::unique_ptr<SetMethodOneNumber<UInt32, ClearableHashSet<UInt32, HashCRC32<UInt32>>>> key32;
|
||||
std::unique_ptr<SetMethodOneNumber<UInt64, ClearableHashSet<UInt64, HashCRC32<UInt64>>>> key64;
|
||||
|
@ -233,7 +233,10 @@ struct Settings
|
||||
M(SettingSeconds, max_execution_time, 0, "") \
|
||||
M(SettingOverflowMode<false>, timeout_overflow_mode, OverflowMode::THROW, "What to do when the limit is exceeded.") \
|
||||
\
|
||||
M(SettingUInt64, min_execution_speed, 0, "In rows per second.") \
|
||||
M(SettingUInt64, min_execution_speed, 0, "Minimum number of execution rows per second.") \
|
||||
M(SettingUInt64, max_execution_speed, 0, "Maximum number of execution rows per second.") \
|
||||
M(SettingUInt64, min_execution_speed_bytes, 0, "Minimum number of execution bytes per second.") \
|
||||
M(SettingUInt64, max_execution_speed_bytes, 0, "Maximum number of execution bytes per second.") \
|
||||
M(SettingSeconds, timeout_before_checking_execution_speed, 0, "Check that the speed is not too low after the specified time has elapsed.") \
|
||||
\
|
||||
M(SettingUInt64, max_columns_to_read, 0, "") \
|
||||
|
@ -13,23 +13,26 @@
|
||||
#include <Interpreters/ExternalDictionaries.h>
|
||||
#include <Interpreters/OptimizeIfWithConstantConditionVisitor.h>
|
||||
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTOrderByElement.h>
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTOrderByElement.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
#include <Parsers/ParserTablesInSelectQuery.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
#include <functional>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -107,7 +110,6 @@ void translateQualifiedNames(ASTPtr & query, const ASTSelectQuery & select_query
|
||||
visitor.visit(query);
|
||||
}
|
||||
|
||||
|
||||
bool hasArrayJoin(const ASTPtr & ast)
|
||||
{
|
||||
if (const ASTFunction * function = typeid_cast<const ASTFunction *>(&*ast))
|
||||
@ -591,8 +593,30 @@ Names qualifyOccupiedNames(NamesAndTypesList & columns, const NameSet & source_c
|
||||
return originals;
|
||||
}
|
||||
|
||||
void replaceJoinedTable(const ASTTablesInSelectQueryElement* join)
|
||||
{
|
||||
if (!join || !join->table_expression)
|
||||
return;
|
||||
|
||||
auto & table_expr = static_cast<ASTTableExpression &>(*join->table_expression.get());
|
||||
if (table_expr.database_and_table_name)
|
||||
{
|
||||
auto & table_id = typeid_cast<ASTIdentifier &>(*table_expr.database_and_table_name.get());
|
||||
String expr = "(select * from " + table_id.name + ") as " + table_id.shortName();
|
||||
|
||||
// FIXME: since the expression "a as b" exposes both "a" and "b" names, which is not equivalent to "(select * from a) as b",
|
||||
// we can't replace aliased tables.
|
||||
// FIXME: long table names include database name, which we can't save within alias.
|
||||
if (table_id.alias.empty() && table_id.isShort())
|
||||
{
|
||||
ParserTableExpression parser;
|
||||
table_expr = static_cast<ASTTableExpression &>(*parseQuery(parser, expr, 0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
|
||||
ASTPtr & query,
|
||||
@ -628,6 +652,8 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
|
||||
{
|
||||
if (const ASTTablesInSelectQueryElement * node = select_query->join())
|
||||
{
|
||||
replaceJoinedTable(node);
|
||||
|
||||
const auto & joined_expression = static_cast<const ASTTableExpression &>(*node->table_expression);
|
||||
DatabaseAndTableWithAlias table(joined_expression, context.getCurrentDatabase());
|
||||
|
||||
|
@ -14,6 +14,10 @@ add_executable (hash_map hash_map.cpp)
|
||||
target_include_directories (hash_map SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
|
||||
target_link_libraries (hash_map PRIVATE dbms clickhouse_compression)
|
||||
|
||||
add_executable (hash_map_lookup hash_map_lookup.cpp)
|
||||
target_include_directories (hash_map_lookup SYSTEM BEFORE PRIVATE ${SPARCEHASH_INCLUDE_DIR})
|
||||
target_link_libraries (hash_map_lookup PRIVATE dbms clickhouse_compression)
|
||||
|
||||
add_executable (hash_map3 hash_map3.cpp)
|
||||
target_include_directories(hash_map3 SYSTEM BEFORE PRIVATE ${METROHASH_INCLUDE_DIR})
|
||||
target_link_libraries (hash_map3 PRIVATE dbms ${FARMHASH_LIBRARIES} ${METROHASH_LIBRARIES})
|
||||
|
@ -162,8 +162,8 @@ int main(int argc, char ** argv)
|
||||
map.emplace(data[i], it, inserted);
|
||||
if (inserted)
|
||||
{
|
||||
new(&it->second) Value;
|
||||
std::swap(it->second, value);
|
||||
new(&it->getSecond()) Value;
|
||||
std::swap(it->getSecond(), value);
|
||||
INIT
|
||||
}
|
||||
}
|
||||
@ -193,8 +193,8 @@ int main(int argc, char ** argv)
|
||||
map.emplace(data[i], it, inserted);
|
||||
if (inserted)
|
||||
{
|
||||
new(&it->second) Value;
|
||||
std::swap(it->second, value);
|
||||
new(&it->getSecond()) Value;
|
||||
std::swap(it->getSecond(), value);
|
||||
INIT
|
||||
}
|
||||
}
|
||||
@ -225,8 +225,8 @@ int main(int argc, char ** argv)
|
||||
map.emplace(data[i], it, inserted);
|
||||
if (inserted)
|
||||
{
|
||||
new(&it->second) Value;
|
||||
std::swap(it->second, value);
|
||||
new(&it->getSecond()) Value;
|
||||
std::swap(it->getSecond(), value);
|
||||
INIT
|
||||
}
|
||||
}
|
||||
|
@ -38,7 +38,7 @@ public:
|
||||
if (this->buf[i].isZero(*this))
|
||||
std::cerr << "[ ]";
|
||||
else
|
||||
std::cerr << '[' << this->buf[i].getValue().first.data << ", " << this->buf[i].getValue().second << ']';
|
||||
std::cerr << '[' << this->buf[i].getValue().getFirst().data << ", " << this->buf[i].getValue().getSecond() << ']';
|
||||
}
|
||||
std::cerr << std::endl;
|
||||
}
|
||||
@ -85,7 +85,7 @@ int main(int, char **)
|
||||
std::cerr << "Collisions: " << map.getCollisions() << std::endl;
|
||||
|
||||
for (auto x : map)
|
||||
std::cerr << x.first.toString() << " -> " << x.second << std::endl;
|
||||
std::cerr << x.getFirst().toString() << " -> " << x.getSecond() << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
124
dbms/src/Interpreters/tests/hash_map_lookup.cpp
Normal file
124
dbms/src/Interpreters/tests/hash_map_lookup.cpp
Normal file
@ -0,0 +1,124 @@
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
#include <Common/Stopwatch.h>
|
||||
|
||||
#define DBMS_HASH_MAP_COUNT_COLLISIONS
|
||||
#define DBMS_HASH_MAP_DEBUG_RESIZES
|
||||
|
||||
#include <Compression/CompressedReadBuffer.h>
|
||||
#include <Core/Types.h>
|
||||
#include <IO/ReadBufferFromFile.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <Interpreters/AggregationCommon.h>
|
||||
#include <Common/HashTable/FixedHashMap.h>
|
||||
#include <Common/HashTable/HashMap.h>
|
||||
|
||||
/** Do this:
|
||||
for file in ResolutionWidth ResolutionDepth; do
|
||||
for size in 30000 100000 300000 1000000 5000000; do
|
||||
echo
|
||||
BEST_METHOD=0
|
||||
BEST_RESULT=0
|
||||
for method in {1..2}; do
|
||||
echo -ne $file $size $method '';
|
||||
TOTAL_ELEMS=0
|
||||
for i in {0..1000}; do
|
||||
TOTAL_ELEMS=$(( $TOTAL_ELEMS + $size ))
|
||||
if [[ $TOTAL_ELEMS -gt 25000000 ]]; then break; fi
|
||||
./hash_map_lookup $size $method < ${file}.bin 2>&1 |
|
||||
grep HashMap | grep -oE '[0-9\.]+ elem';
|
||||
done | awk -W interactive '{ if ($1 > x) { x = $1 }; printf(".") } END { print x }' | tee /tmp/hash_map_lookup_res;
|
||||
CUR_RESULT=$(cat /tmp/hash_map_lookup_res | tr -d '.')
|
||||
if [[ $CUR_RESULT -gt $BEST_RESULT ]]; then
|
||||
BEST_METHOD=$method
|
||||
BEST_RESULT=$CUR_RESULT
|
||||
fi;
|
||||
done;
|
||||
echo Best: $BEST_METHOD - $BEST_RESULT
|
||||
done;
|
||||
done
|
||||
*/
|
||||
|
||||
|
||||
template <typename Map>
|
||||
void NO_INLINE bench(const std::vector<UInt16> & data, const char * name)
|
||||
{
|
||||
Map map;
|
||||
typename Map::iterator it;
|
||||
bool inserted;
|
||||
|
||||
Stopwatch watch;
|
||||
for (size_t i = 0, size = data.size(); i < size; ++i)
|
||||
{
|
||||
map.emplace(data[i], it, inserted);
|
||||
if (inserted)
|
||||
it->getSecond() = 1;
|
||||
else
|
||||
++it->getSecond();
|
||||
}
|
||||
|
||||
for (size_t i = 0, size = data.size(); i < size; ++i)
|
||||
{
|
||||
it = map.find(data[i]);
|
||||
++it->getSecond();
|
||||
}
|
||||
watch.stop();
|
||||
std::cerr << std::fixed << std::setprecision(2) << "HashMap (" << name << "). Size: " << map.size()
|
||||
<< ", elapsed: " << watch.elapsedSeconds() << " (" << data.size() / watch.elapsedSeconds() << " elem/sec.)"
|
||||
#ifdef DBMS_HASH_MAP_COUNT_COLLISIONS
|
||||
<< ", collisions: " << map.getCollisions()
|
||||
#endif
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
template <typename Map>
|
||||
void insert(Map & map, StringRef & k)
|
||||
{
|
||||
bool inserted;
|
||||
typename Map::iterator it;
|
||||
map.emplace(k, it, inserted, nullptr);
|
||||
if (inserted)
|
||||
*it = 1;
|
||||
else
|
||||
++*it;
|
||||
std::cout << *map.find(k) << std::endl;
|
||||
}
|
||||
|
||||
int main(int argc, char ** argv)
|
||||
{
|
||||
if (argc < 3)
|
||||
{
|
||||
std::cerr << "Usage: program n m\n";
|
||||
return 1;
|
||||
}
|
||||
|
||||
size_t n = atoi(argv[1]);
|
||||
size_t m = atoi(argv[2]);
|
||||
|
||||
std::vector<UInt16> data(n);
|
||||
|
||||
{
|
||||
Stopwatch watch;
|
||||
DB::ReadBufferFromFileDescriptor in1(STDIN_FILENO);
|
||||
DB::CompressedReadBuffer in2(in1);
|
||||
for (size_t i = 0; i < n && !in2.eof(); ++i)
|
||||
{
|
||||
DB::readBinary(data[i], in2);
|
||||
}
|
||||
|
||||
watch.stop();
|
||||
std::cerr << std::fixed << std::setprecision(2) << "Vector. Size: " << n << ", elapsed: " << watch.elapsedSeconds() << " ("
|
||||
<< n / watch.elapsedSeconds() << " elem/sec.)" << std::endl;
|
||||
}
|
||||
|
||||
using OldLookup = HashMap<UInt16, UInt8, TrivialHash, HashTableFixedGrower<16>>;
|
||||
using NewLookup = FixedHashMap<UInt16, UInt8>;
|
||||
|
||||
if (!m || m == 1)
|
||||
bench<OldLookup>(data, "Old Lookup");
|
||||
if (!m || m == 2)
|
||||
bench<NewLookup>(data, "New Lookup");
|
||||
return 0;
|
||||
}
|
@ -337,8 +337,8 @@ int main(int argc, char ** argv)
|
||||
{
|
||||
map.emplace(data[i], it, inserted);
|
||||
if (inserted)
|
||||
it->second = 0;
|
||||
++it->second;
|
||||
it->getSecond() = 0;
|
||||
++it->getSecond();
|
||||
}
|
||||
|
||||
watch.stop();
|
||||
@ -366,8 +366,8 @@ int main(int argc, char ** argv)
|
||||
{
|
||||
map.emplace(data[i], it, inserted);
|
||||
if (inserted)
|
||||
it->second = 0;
|
||||
++it->second;
|
||||
it->getSecond() = 0;
|
||||
++it->getSecond();
|
||||
}
|
||||
|
||||
watch.stop();
|
||||
@ -396,8 +396,8 @@ int main(int argc, char ** argv)
|
||||
{
|
||||
map.emplace(data[i], it, inserted);
|
||||
if (inserted)
|
||||
it->second = 0;
|
||||
++it->second;
|
||||
it->getSecond() = 0;
|
||||
++it->getSecond();
|
||||
}
|
||||
|
||||
watch.stop();
|
||||
@ -426,8 +426,8 @@ int main(int argc, char ** argv)
|
||||
{
|
||||
map.emplace(data[i], it, inserted);
|
||||
if (inserted)
|
||||
it->second = 0;
|
||||
++it->second;
|
||||
it->getSecond() = 0;
|
||||
++it->getSecond();
|
||||
}
|
||||
|
||||
watch.stop();
|
||||
|
@ -595,8 +595,8 @@ void NO_INLINE bench(const std::vector<StringRef> & data, const char * name)
|
||||
{
|
||||
map.emplace(static_cast<const Key &>(data[i]), it, inserted);
|
||||
if (inserted)
|
||||
it->second = 0;
|
||||
++it->second;
|
||||
it->getSecond() = 0;
|
||||
++it->getSecond();
|
||||
}
|
||||
|
||||
watch.stop();
|
||||
|
@ -442,8 +442,8 @@ void NO_INLINE bench(const std::vector<StringRef> & data, const char * name)
|
||||
{
|
||||
map.emplace(static_cast<const Key &>(data[i]), it, inserted);
|
||||
if (inserted)
|
||||
it->second = 0;
|
||||
++it->second;
|
||||
it->getSecond() = 0;
|
||||
++it->getSecond();
|
||||
}
|
||||
|
||||
watch.stop();
|
||||
|
@ -144,8 +144,8 @@ int main(int argc, char ** argv)
|
||||
{
|
||||
map.emplace(data[i], it, inserted);
|
||||
if (inserted)
|
||||
it->second = 0;
|
||||
++it->second;
|
||||
it->getSecond() = 0;
|
||||
++it->getSecond();
|
||||
}
|
||||
|
||||
watch.stop();
|
||||
@ -173,8 +173,8 @@ int main(int argc, char ** argv)
|
||||
{
|
||||
map.emplace(SmallStringRef(data[i].data, data[i].size), it, inserted);
|
||||
if (inserted)
|
||||
it->second = 0;
|
||||
++it->second;
|
||||
it->getSecond() = 0;
|
||||
++it->getSecond();
|
||||
}
|
||||
|
||||
watch.stop();
|
||||
|
@ -67,8 +67,8 @@ int main(int argc, char ** argv)
|
||||
{
|
||||
map.emplace(data[i], it, inserted);
|
||||
if (inserted)
|
||||
it->second = 0;
|
||||
++it->second;
|
||||
it->getSecond() = 0;
|
||||
++it->getSecond();
|
||||
}
|
||||
|
||||
watch.stop();
|
||||
@ -82,7 +82,7 @@ int main(int argc, char ** argv)
|
||||
size_t elems = 0;
|
||||
for (const auto & kv : map)
|
||||
{
|
||||
sum_counts += kv.second;
|
||||
sum_counts += kv.getSecond();
|
||||
++elems;
|
||||
}
|
||||
|
||||
@ -103,8 +103,8 @@ int main(int argc, char ** argv)
|
||||
{
|
||||
map.emplace(i, it, inserted);
|
||||
if (inserted)
|
||||
it->second = 0;
|
||||
++it->second;
|
||||
it->getSecond() = 0;
|
||||
++it->getSecond();
|
||||
}
|
||||
|
||||
watch.stop();
|
||||
@ -118,11 +118,11 @@ int main(int argc, char ** argv)
|
||||
size_t elems = 0;
|
||||
for (const auto & kv : map)
|
||||
{
|
||||
sum_counts += kv.second;
|
||||
sum_counts += kv.getSecond();
|
||||
++elems;
|
||||
|
||||
if (kv.first > n)
|
||||
std::cerr << kv.first << std::endl;
|
||||
if (kv.getFirst() > n)
|
||||
std::cerr << kv.getFirst() << std::endl;
|
||||
}
|
||||
|
||||
std::cerr << "sum_counts: " << sum_counts << ", elems: " << elems << std::endl;
|
||||
|
@ -14,13 +14,15 @@ public:
|
||||
enum ExplainKind
|
||||
{
|
||||
ParsedAST,
|
||||
AnalyzedSyntax,
|
||||
};
|
||||
|
||||
ASTExplainQuery(ExplainKind kind_ = ParsedAST)
|
||||
ASTExplainQuery(ExplainKind kind_)
|
||||
: kind(kind_)
|
||||
{}
|
||||
|
||||
String getID(char delim) const override { return "Explain" + (delim + toString(kind)); }
|
||||
ExplainKind getKind() const { return kind; }
|
||||
ASTPtr clone() const override { return std::make_shared<ASTExplainQuery>(*this); }
|
||||
|
||||
protected:
|
||||
@ -37,7 +39,9 @@ private:
|
||||
switch (kind)
|
||||
{
|
||||
case ParsedAST: return "ParsedAST";
|
||||
case AnalyzedSyntax: return "AnalyzedSyntax";
|
||||
}
|
||||
|
||||
__builtin_unreachable();
|
||||
}
|
||||
};
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user