mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-13 09:52:38 +00:00
support hash dict short circuit
This commit is contained in:
parent
11f63d59a5
commit
3bb196f612
@ -156,6 +156,7 @@ ColumnPtr FlatDictionary::getColumnOrDefaultShortCircuit(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types [[maybe_unused]],
|
||||
const ColumnWithTypeAndName & default_argument,
|
||||
const DataTypePtr & result_type_short_circuit) const
|
||||
{
|
||||
|
@ -86,6 +86,7 @@ public:
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnWithTypeAndName & default_argument,
|
||||
const DataTypePtr & result_type_short_circuit) const override;
|
||||
|
||||
|
@ -27,6 +27,7 @@
|
||||
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <Columns/ColumnNullable.h>
|
||||
#include <Columns/MaskOperations.h>
|
||||
#include <Functions/FunctionHelpers.h>
|
||||
|
||||
#include <atomic>
|
||||
@ -144,6 +145,14 @@ public:
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr & default_values_column) const override;
|
||||
|
||||
ColumnPtr getColumnOrDefaultShortCircuit(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnWithTypeAndName & default_argument,
|
||||
const DataTypePtr & result_type_short_circuit) const override;
|
||||
|
||||
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
|
||||
|
||||
bool hasHierarchy() const override { return dictionary_key_type == DictionaryKeyType::Simple && dict_struct.hierarchical_attribute_index.has_value(); }
|
||||
@ -242,6 +251,15 @@ private:
|
||||
ValueSetter && set_value,
|
||||
DefaultValueExtractor & default_value_extractor) const;
|
||||
|
||||
template <typename AttributeType, bool is_nullable, typename DictionaryAttributeType, typename ValueSetter>
|
||||
void getItemsShortCircuitImpl(
|
||||
const Attribute & attribute,
|
||||
DictionaryKeysExtractor<dictionary_key_type> & keys_extractor,
|
||||
ValueSetter && set_value,
|
||||
const ColumnWithTypeAndName & default_argument,
|
||||
const DictionaryAttribute & dictionary_attribute,
|
||||
const DataTypePtr & result_type_short_circuit) const;
|
||||
|
||||
template <typename GetContainersFunc>
|
||||
void getAttributeContainers(size_t attribute_index, GetContainersFunc && get_containers_func);
|
||||
|
||||
@ -374,7 +392,7 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getColumn(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types [[maybe_unused]],
|
||||
const DataTypes & key_types,
|
||||
const ColumnPtr & default_values_column) const
|
||||
{
|
||||
if (dictionary_key_type == DictionaryKeyType::Complex)
|
||||
@ -476,6 +494,122 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getColumn(
|
||||
return result;
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
|
||||
ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getColumnOrDefaultShortCircuit(
|
||||
const std::string & attribute_name,
|
||||
const DataTypePtr & result_type,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnWithTypeAndName & default_argument,
|
||||
const DataTypePtr & result_type_short_circuit) const
|
||||
{
|
||||
if (dictionary_key_type == DictionaryKeyType::Complex)
|
||||
dict_struct.validateKeyTypes(key_types);
|
||||
|
||||
ColumnPtr result;
|
||||
|
||||
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
|
||||
DictionaryKeysExtractor<dictionary_key_type> extractor(key_columns, arena_holder.getComplexKeyArena());
|
||||
|
||||
const size_t size = extractor.getKeysSize();
|
||||
|
||||
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
|
||||
const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second;
|
||||
auto & attribute = attributes[attribute_index];
|
||||
|
||||
bool is_attribute_nullable = attribute.is_nullable_sets.has_value();
|
||||
|
||||
ColumnUInt8::MutablePtr col_null_map_to;
|
||||
ColumnUInt8::Container * vec_null_map_to = nullptr;
|
||||
if (is_attribute_nullable)
|
||||
{
|
||||
col_null_map_to = ColumnUInt8::create(size, false);
|
||||
vec_null_map_to = &col_null_map_to->getData();
|
||||
}
|
||||
|
||||
auto type_call = [&](const auto & dictionary_attribute_type)
|
||||
{
|
||||
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
|
||||
using AttributeType = typename Type::AttributeType;
|
||||
using ValueType = DictionaryValueType<AttributeType>;
|
||||
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
|
||||
|
||||
auto column = ColumnProvider::getColumn(dictionary_attribute, size);
|
||||
|
||||
if constexpr (std::is_same_v<ValueType, Array>)
|
||||
{
|
||||
auto * out = column.get();
|
||||
|
||||
getItemsShortCircuitImpl<ValueType, false, AttributeType>(
|
||||
attribute,
|
||||
extractor,
|
||||
[&](const size_t, const Array & value, bool) { out->insert(value); },
|
||||
default_argument,
|
||||
dictionary_attribute,
|
||||
result_type_short_circuit);
|
||||
}
|
||||
else if constexpr (std::is_same_v<ValueType, StringRef>)
|
||||
{
|
||||
auto * out = column.get();
|
||||
|
||||
if (is_attribute_nullable)
|
||||
getItemsShortCircuitImpl<ValueType, true, AttributeType>(
|
||||
attribute,
|
||||
extractor,
|
||||
[&](size_t row, StringRef value, bool is_null)
|
||||
{
|
||||
(*vec_null_map_to)[row] = is_null;
|
||||
out->insertData(value.data, value.size);
|
||||
},
|
||||
default_argument,
|
||||
dictionary_attribute,
|
||||
result_type_short_circuit);
|
||||
else
|
||||
getItemsShortCircuitImpl<ValueType, false, AttributeType>(
|
||||
attribute,
|
||||
extractor,
|
||||
[&](size_t, StringRef value, bool) { out->insertData(value.data, value.size); },
|
||||
default_argument,
|
||||
dictionary_attribute,
|
||||
result_type_short_circuit);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto & out = column->getData();
|
||||
|
||||
if (is_attribute_nullable)
|
||||
getItemsShortCircuitImpl<ValueType, true, AttributeType>(
|
||||
attribute,
|
||||
extractor,
|
||||
[&](size_t row, const auto value, bool is_null)
|
||||
{
|
||||
(*vec_null_map_to)[row] = is_null;
|
||||
out[row] = value;
|
||||
},
|
||||
default_argument,
|
||||
dictionary_attribute,
|
||||
result_type_short_circuit);
|
||||
else
|
||||
getItemsShortCircuitImpl<ValueType, false, AttributeType>(
|
||||
attribute,
|
||||
extractor,
|
||||
[&](size_t row, const auto value, bool) { out[row] = value; },
|
||||
default_argument,
|
||||
dictionary_attribute,
|
||||
result_type_short_circuit);
|
||||
}
|
||||
|
||||
result = std::move(column);
|
||||
};
|
||||
|
||||
callOnDictionaryAttributeType(attribute.type, type_call);
|
||||
|
||||
if (is_attribute_nullable)
|
||||
result = ColumnNullable::create(result, std::move(col_null_map_to));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
|
||||
ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse, sharded>::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
|
||||
{
|
||||
@ -1015,6 +1149,83 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::getItemsImpl(
|
||||
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
|
||||
template <typename AttributeType, bool is_nullable, typename DictionaryAttributeType, typename ValueSetter>
|
||||
void HashedDictionary<dictionary_key_type, sparse, sharded>::getItemsShortCircuitImpl(
|
||||
const Attribute & attribute,
|
||||
DictionaryKeysExtractor<dictionary_key_type> & keys_extractor,
|
||||
ValueSetter && set_value,
|
||||
const ColumnWithTypeAndName & default_argument,
|
||||
const DictionaryAttribute & dictionary_attribute,
|
||||
const DataTypePtr & result_type_short_circuit) const
|
||||
{
|
||||
const auto & attribute_containers = std::get<CollectionsHolder<AttributeType>>(attribute.containers);
|
||||
const size_t keys_size = keys_extractor.getKeysSize();
|
||||
auto cond_col = ColumnVector<UInt8>::create(keys_size);
|
||||
auto & cond = cond_col->getData();
|
||||
|
||||
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
||||
{
|
||||
auto key = keys_extractor.extractCurrentKey();
|
||||
auto shard = getShard(key);
|
||||
|
||||
const auto & container = attribute_containers[shard];
|
||||
const auto it = container.find(key);
|
||||
|
||||
cond[key_index] = (it != container.end()) ? 1 : 0;
|
||||
keys_extractor.rollbackCurrentKey();
|
||||
}
|
||||
|
||||
IColumn::Filter mask(keys_size, 1);
|
||||
auto mask_info = extractMask(mask, std::move(cond_col));
|
||||
inverseMask(mask, mask_info);
|
||||
ColumnWithTypeAndName column_before_cast = default_argument;
|
||||
maskedExecute(column_before_cast, mask, mask_info);
|
||||
|
||||
ColumnWithTypeAndName column_to_cast = {
|
||||
column_before_cast.column->convertToFullColumnIfConst(),
|
||||
column_before_cast.type,
|
||||
column_before_cast.name};
|
||||
auto result = castColumnAccurate(column_to_cast, result_type_short_circuit);
|
||||
|
||||
DictionaryDefaultValueExtractor<DictionaryAttributeType> default_value_extractor(
|
||||
dictionary_attribute.null_value, result);
|
||||
|
||||
size_t keys_found = 0;
|
||||
|
||||
for (size_t key_index = 0; key_index < keys_size; ++key_index)
|
||||
{
|
||||
auto key = keys_extractor.extractCurrentKey();
|
||||
auto shard = getShard(key);
|
||||
|
||||
const auto & container = attribute_containers[shard];
|
||||
const auto it = container.find(key);
|
||||
|
||||
if (it != container.end())
|
||||
{
|
||||
set_value(key_index, getValueFromCell(it), false);
|
||||
++keys_found;
|
||||
}
|
||||
else
|
||||
{
|
||||
if constexpr (is_nullable)
|
||||
{
|
||||
bool is_value_nullable = ((*attribute.is_nullable_sets)[shard].find(key) != nullptr) || default_value_extractor.isNullAt(key_index);
|
||||
set_value(key_index, default_value_extractor[key_index], is_value_nullable);
|
||||
}
|
||||
else
|
||||
{
|
||||
set_value(key_index, default_value_extractor[key_index], false);
|
||||
}
|
||||
}
|
||||
|
||||
keys_extractor.rollbackCurrentKey();
|
||||
}
|
||||
|
||||
query_count.fetch_add(keys_size, std::memory_order_relaxed);
|
||||
found_count.fetch_add(keys_found, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
|
||||
void HashedDictionary<dictionary_key_type, sparse, sharded>::loadData()
|
||||
{
|
||||
|
@ -259,6 +259,7 @@ public:
|
||||
const std::string & attribute_name [[maybe_unused]],
|
||||
const DataTypePtr & result_type [[maybe_unused]],
|
||||
const Columns & key_columns [[maybe_unused]],
|
||||
const DataTypes & key_types [[maybe_unused]],
|
||||
const ColumnWithTypeAndName & default_argument [[maybe_unused]],
|
||||
const DataTypePtr & result_type_short_circuit [[maybe_unused]]) const
|
||||
{
|
||||
@ -276,6 +277,7 @@ public:
|
||||
const Strings & attribute_names,
|
||||
const DataTypes & result_types,
|
||||
const Columns & key_columns,
|
||||
const DataTypes & key_types,
|
||||
const ColumnWithTypeAndName & default_argument,
|
||||
const DataTypes & result_short_circuit_types) const
|
||||
{
|
||||
@ -289,8 +291,8 @@ public:
|
||||
const auto & attribute_name = attribute_names[i];
|
||||
const auto & result_type = result_types[i];
|
||||
const auto & result_short_circuit_type = result_short_circuit_types[i];
|
||||
result.emplace_back(getColumnOrDefaultShortCircuit(attribute_name,
|
||||
result_type, key_columns, default_argument, result_short_circuit_type));
|
||||
result.emplace_back(getColumnOrDefaultShortCircuit(attribute_name, result_type,
|
||||
key_columns, key_types, default_argument, result_short_circuit_type));
|
||||
}
|
||||
|
||||
return result;
|
||||
|
@ -321,13 +321,13 @@ public:
|
||||
String getName() const override { return name; }
|
||||
|
||||
bool isVariadic() const override { return true; }
|
||||
bool isShortCircuit(ShortCircuitSettings & settings, size_t /*number_of_arguments*/) const override
|
||||
{
|
||||
settings.enable_lazy_execution_for_first_argument = false;
|
||||
settings.enable_lazy_execution_for_common_descendants_of_arguments = false;
|
||||
settings.force_enable_lazy_execution = false;
|
||||
return true;
|
||||
}
|
||||
// bool isShortCircuit(ShortCircuitSettings & settings, size_t /*number_of_arguments*/) const override
|
||||
// {
|
||||
// settings.enable_lazy_execution_for_first_argument = false;
|
||||
// settings.enable_lazy_execution_for_common_descendants_of_arguments = false;
|
||||
// settings.force_enable_lazy_execution = false;
|
||||
// return true;
|
||||
// }
|
||||
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
|
||||
size_t getNumberOfArguments() const override { return 0; }
|
||||
|
||||
@ -634,8 +634,8 @@ private:
|
||||
else if (dictionary_get_function_type == DictionaryGetFunctionType::getOrDefault && default_cols.empty())
|
||||
{
|
||||
result_columns = dictionary->getColumnsOrDefaultShortCircuit(
|
||||
attribute_names, result_tuple_type.getElements(), key_columns, last_argument,
|
||||
result_short_circuit_tuple_type.getElements());
|
||||
attribute_names, result_tuple_type.getElements(), key_columns, key_types,
|
||||
last_argument, result_short_circuit_tuple_type.getElements());
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -655,7 +655,7 @@ private:
|
||||
else if (dictionary_get_function_type == DictionaryGetFunctionType::getOrDefault && default_cols.empty())
|
||||
{
|
||||
result = dictionary->getColumnOrDefaultShortCircuit(
|
||||
attribute_names[0], result_type, key_columns, last_argument, result_type_short_circuit);
|
||||
attribute_names[0], result_type, key_columns, key_types, last_argument, result_type_short_circuit);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user