support hash dict short circuit

This commit is contained in:
jsc0218 2023-12-20 02:53:07 +00:00
parent 11f63d59a5
commit 3bb196f612
5 changed files with 228 additions and 13 deletions

View File

@ -156,6 +156,7 @@ ColumnPtr FlatDictionary::getColumnOrDefaultShortCircuit(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types [[maybe_unused]],
const ColumnWithTypeAndName & default_argument,
const DataTypePtr & result_type_short_circuit) const
{

View File

@ -86,6 +86,7 @@ public:
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnWithTypeAndName & default_argument,
const DataTypePtr & result_type_short_circuit) const override;

View File

@ -27,6 +27,7 @@
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnNullable.h>
#include <Columns/MaskOperations.h>
#include <Functions/FunctionHelpers.h>
#include <atomic>
@ -144,6 +145,14 @@ public:
const DataTypes & key_types,
const ColumnPtr & default_values_column) const override;
ColumnPtr getColumnOrDefaultShortCircuit(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnWithTypeAndName & default_argument,
const DataTypePtr & result_type_short_circuit) const override;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
bool hasHierarchy() const override { return dictionary_key_type == DictionaryKeyType::Simple && dict_struct.hierarchical_attribute_index.has_value(); }
@ -242,6 +251,15 @@ private:
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename AttributeType, bool is_nullable, typename DictionaryAttributeType, typename ValueSetter>
void getItemsShortCircuitImpl(
const Attribute & attribute,
DictionaryKeysExtractor<dictionary_key_type> & keys_extractor,
ValueSetter && set_value,
const ColumnWithTypeAndName & default_argument,
const DictionaryAttribute & dictionary_attribute,
const DataTypePtr & result_type_short_circuit) const;
template <typename GetContainersFunc>
void getAttributeContainers(size_t attribute_index, GetContainersFunc && get_containers_func);
@ -374,7 +392,7 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getColumn(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types [[maybe_unused]],
const DataTypes & key_types,
const ColumnPtr & default_values_column) const
{
if (dictionary_key_type == DictionaryKeyType::Complex)
@ -476,6 +494,122 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getColumn(
return result;
}
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getColumnOrDefaultShortCircuit(
const std::string & attribute_name,
const DataTypePtr & result_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnWithTypeAndName & default_argument,
const DataTypePtr & result_type_short_circuit) const
{
if (dictionary_key_type == DictionaryKeyType::Complex)
dict_struct.validateKeyTypes(key_types);
ColumnPtr result;
DictionaryKeysArenaHolder<dictionary_key_type> arena_holder;
DictionaryKeysExtractor<dictionary_key_type> extractor(key_columns, arena_holder.getComplexKeyArena());
const size_t size = extractor.getKeysSize();
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second;
auto & attribute = attributes[attribute_index];
bool is_attribute_nullable = attribute.is_nullable_sets.has_value();
ColumnUInt8::MutablePtr col_null_map_to;
ColumnUInt8::Container * vec_null_map_to = nullptr;
if (is_attribute_nullable)
{
col_null_map_to = ColumnUInt8::create(size, false);
vec_null_map_to = &col_null_map_to->getData();
}
auto type_call = [&](const auto & dictionary_attribute_type)
{
using Type = std::decay_t<decltype(dictionary_attribute_type)>;
using AttributeType = typename Type::AttributeType;
using ValueType = DictionaryValueType<AttributeType>;
using ColumnProvider = DictionaryAttributeColumnProvider<AttributeType>;
auto column = ColumnProvider::getColumn(dictionary_attribute, size);
if constexpr (std::is_same_v<ValueType, Array>)
{
auto * out = column.get();
getItemsShortCircuitImpl<ValueType, false, AttributeType>(
attribute,
extractor,
[&](const size_t, const Array & value, bool) { out->insert(value); },
default_argument,
dictionary_attribute,
result_type_short_circuit);
}
else if constexpr (std::is_same_v<ValueType, StringRef>)
{
auto * out = column.get();
if (is_attribute_nullable)
getItemsShortCircuitImpl<ValueType, true, AttributeType>(
attribute,
extractor,
[&](size_t row, StringRef value, bool is_null)
{
(*vec_null_map_to)[row] = is_null;
out->insertData(value.data, value.size);
},
default_argument,
dictionary_attribute,
result_type_short_circuit);
else
getItemsShortCircuitImpl<ValueType, false, AttributeType>(
attribute,
extractor,
[&](size_t, StringRef value, bool) { out->insertData(value.data, value.size); },
default_argument,
dictionary_attribute,
result_type_short_circuit);
}
else
{
auto & out = column->getData();
if (is_attribute_nullable)
getItemsShortCircuitImpl<ValueType, true, AttributeType>(
attribute,
extractor,
[&](size_t row, const auto value, bool is_null)
{
(*vec_null_map_to)[row] = is_null;
out[row] = value;
},
default_argument,
dictionary_attribute,
result_type_short_circuit);
else
getItemsShortCircuitImpl<ValueType, false, AttributeType>(
attribute,
extractor,
[&](size_t row, const auto value, bool) { out[row] = value; },
default_argument,
dictionary_attribute,
result_type_short_circuit);
}
result = std::move(column);
};
callOnDictionaryAttributeType(attribute.type, type_call);
if (is_attribute_nullable)
result = ColumnNullable::create(result, std::move(col_null_map_to));
return result;
}
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
ColumnUInt8::Ptr HashedDictionary<dictionary_key_type, sparse, sharded>::hasKeys(const Columns & key_columns, const DataTypes & key_types) const
{
@ -1015,6 +1149,83 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::getItemsImpl(
found_count.fetch_add(keys_found, std::memory_order_relaxed);
}
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
template <typename AttributeType, bool is_nullable, typename DictionaryAttributeType, typename ValueSetter>
void HashedDictionary<dictionary_key_type, sparse, sharded>::getItemsShortCircuitImpl(
const Attribute & attribute,
DictionaryKeysExtractor<dictionary_key_type> & keys_extractor,
ValueSetter && set_value,
const ColumnWithTypeAndName & default_argument,
const DictionaryAttribute & dictionary_attribute,
const DataTypePtr & result_type_short_circuit) const
{
const auto & attribute_containers = std::get<CollectionsHolder<AttributeType>>(attribute.containers);
const size_t keys_size = keys_extractor.getKeysSize();
auto cond_col = ColumnVector<UInt8>::create(keys_size);
auto & cond = cond_col->getData();
for (size_t key_index = 0; key_index < keys_size; ++key_index)
{
auto key = keys_extractor.extractCurrentKey();
auto shard = getShard(key);
const auto & container = attribute_containers[shard];
const auto it = container.find(key);
cond[key_index] = (it != container.end()) ? 1 : 0;
keys_extractor.rollbackCurrentKey();
}
IColumn::Filter mask(keys_size, 1);
auto mask_info = extractMask(mask, std::move(cond_col));
inverseMask(mask, mask_info);
ColumnWithTypeAndName column_before_cast = default_argument;
maskedExecute(column_before_cast, mask, mask_info);
ColumnWithTypeAndName column_to_cast = {
column_before_cast.column->convertToFullColumnIfConst(),
column_before_cast.type,
column_before_cast.name};
auto result = castColumnAccurate(column_to_cast, result_type_short_circuit);
DictionaryDefaultValueExtractor<DictionaryAttributeType> default_value_extractor(
dictionary_attribute.null_value, result);
size_t keys_found = 0;
for (size_t key_index = 0; key_index < keys_size; ++key_index)
{
auto key = keys_extractor.extractCurrentKey();
auto shard = getShard(key);
const auto & container = attribute_containers[shard];
const auto it = container.find(key);
if (it != container.end())
{
set_value(key_index, getValueFromCell(it), false);
++keys_found;
}
else
{
if constexpr (is_nullable)
{
bool is_value_nullable = ((*attribute.is_nullable_sets)[shard].find(key) != nullptr) || default_value_extractor.isNullAt(key_index);
set_value(key_index, default_value_extractor[key_index], is_value_nullable);
}
else
{
set_value(key_index, default_value_extractor[key_index], false);
}
}
keys_extractor.rollbackCurrentKey();
}
query_count.fetch_add(keys_size, std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
}
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
void HashedDictionary<dictionary_key_type, sparse, sharded>::loadData()
{

View File

@ -259,6 +259,7 @@ public:
const std::string & attribute_name [[maybe_unused]],
const DataTypePtr & result_type [[maybe_unused]],
const Columns & key_columns [[maybe_unused]],
const DataTypes & key_types [[maybe_unused]],
const ColumnWithTypeAndName & default_argument [[maybe_unused]],
const DataTypePtr & result_type_short_circuit [[maybe_unused]]) const
{
@ -276,6 +277,7 @@ public:
const Strings & attribute_names,
const DataTypes & result_types,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnWithTypeAndName & default_argument,
const DataTypes & result_short_circuit_types) const
{
@ -289,8 +291,8 @@ public:
const auto & attribute_name = attribute_names[i];
const auto & result_type = result_types[i];
const auto & result_short_circuit_type = result_short_circuit_types[i];
result.emplace_back(getColumnOrDefaultShortCircuit(attribute_name,
result_type, key_columns, default_argument, result_short_circuit_type));
result.emplace_back(getColumnOrDefaultShortCircuit(attribute_name, result_type,
key_columns, key_types, default_argument, result_short_circuit_type));
}
return result;

View File

@ -321,13 +321,13 @@ public:
String getName() const override { return name; }
bool isVariadic() const override { return true; }
bool isShortCircuit(ShortCircuitSettings & settings, size_t /*number_of_arguments*/) const override
{
settings.enable_lazy_execution_for_first_argument = false;
settings.enable_lazy_execution_for_common_descendants_of_arguments = false;
settings.force_enable_lazy_execution = false;
return true;
}
// bool isShortCircuit(ShortCircuitSettings & settings, size_t /*number_of_arguments*/) const override
// {
// settings.enable_lazy_execution_for_first_argument = false;
// settings.enable_lazy_execution_for_common_descendants_of_arguments = false;
// settings.force_enable_lazy_execution = false;
// return true;
// }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
@ -634,8 +634,8 @@ private:
else if (dictionary_get_function_type == DictionaryGetFunctionType::getOrDefault && default_cols.empty())
{
result_columns = dictionary->getColumnsOrDefaultShortCircuit(
attribute_names, result_tuple_type.getElements(), key_columns, last_argument,
result_short_circuit_tuple_type.getElements());
attribute_names, result_tuple_type.getElements(), key_columns, key_types,
last_argument, result_short_circuit_tuple_type.getElements());
}
else
{
@ -655,7 +655,7 @@ private:
else if (dictionary_get_function_type == DictionaryGetFunctionType::getOrDefault && default_cols.empty())
{
result = dictionary->getColumnOrDefaultShortCircuit(
attribute_names[0], result_type, key_columns, last_argument, result_type_short_circuit);
attribute_names[0], result_type, key_columns, key_types, last_argument, result_type_short_circuit);
}
else
{