This commit is contained in:
jsc0218 2023-12-22 22:12:32 +00:00
parent a3a080f916
commit e25eda91b0
5 changed files with 172 additions and 140 deletions

View File

@ -154,11 +154,10 @@ ColumnPtr FlatDictionary::getColumn(
ColumnPtr FlatDictionary::getColumnOrDefaultShortCircuit(
const std::string & attribute_name,
const DataTypePtr & result_type,
const DataTypePtr & atribute_type,
const Columns & key_columns,
const DataTypes & key_types [[maybe_unused]],
const ColumnWithTypeAndName & default_argument,
const DataTypePtr & result_type_short_circuit) const
IColumn::Filter & default_mask) const
{
ColumnPtr result;
@ -167,7 +166,7 @@ ColumnPtr FlatDictionary::getColumnOrDefaultShortCircuit(
auto size = ids.size();
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, atribute_type);
size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second;
const auto & attribute = attributes[attribute_index];
@ -195,20 +194,18 @@ ColumnPtr FlatDictionary::getColumnOrDefaultShortCircuit(
{
auto * out = column.get();
getItemsShortCircuitImpl<ValueType, false, AttributeType>(
getItemsShortCircuitImpl<ValueType, false>(
attribute,
ids,
[&](size_t, const Array & value, bool) { out->insert(value); },
default_argument,
dictionary_attribute,
result_type_short_circuit);
default_mask);
}
else if constexpr (std::is_same_v<ValueType, StringRef>)
{
auto * out = column.get();
if (is_attribute_nullable)
getItemsShortCircuitImpl<ValueType, true, AttributeType>(
getItemsShortCircuitImpl<ValueType, true>(
attribute,
ids,
[&](size_t row, StringRef value, bool is_null)
@ -216,24 +213,20 @@ ColumnPtr FlatDictionary::getColumnOrDefaultShortCircuit(
(*vec_null_map_to)[row] = is_null;
out->insertData(value.data, value.size);
},
default_argument,
dictionary_attribute,
result_type_short_circuit);
default_mask);
else
getItemsShortCircuitImpl<ValueType, false, AttributeType>(
getItemsShortCircuitImpl<ValueType, false>(
attribute,
ids,
[&](size_t, StringRef value, bool) { out->insertData(value.data, value.size); },
default_argument,
dictionary_attribute,
result_type_short_circuit);
default_mask);
}
else
{
auto & out = column->getData();
if (is_attribute_nullable)
getItemsShortCircuitImpl<ValueType, true, AttributeType>(
getItemsShortCircuitImpl<ValueType, true>(
attribute,
ids,
[&](size_t row, const auto value, bool is_null)
@ -241,17 +234,13 @@ ColumnPtr FlatDictionary::getColumnOrDefaultShortCircuit(
(*vec_null_map_to)[row] = is_null;
out[row] = value;
},
default_argument,
dictionary_attribute,
result_type_short_circuit);
default_mask);
else
getItemsShortCircuitImpl<ValueType, false, AttributeType>(
getItemsShortCircuitImpl<ValueType, false>(
attribute,
ids,
[&](size_t row, const auto value, bool) { out[row] = value; },
default_argument,
dictionary_attribute,
result_type_short_circuit);
default_mask);
}
result = std::move(column);
@ -683,42 +672,39 @@ void FlatDictionary::getItemsImpl(
found_count.fetch_add(keys_found, std::memory_order_relaxed);
}
template <typename AttributeType, bool is_nullable, typename DictionaryAttributeType,
typename ValueSetter>
template <typename AttributeType, bool is_nullable, typename ValueSetter>
void FlatDictionary::getItemsShortCircuitImpl(
const Attribute & attribute,
const PaddedPODArray<UInt64> & keys,
ValueSetter && set_value,
const ColumnWithTypeAndName & default_argument,
const DictionaryAttribute & dictionary_attribute,
const DataTypePtr & result_type_short_circuit) const
IColumn::Filter & default_mask) const
{
const auto rows = keys.size();
auto cond_col = ColumnVector<UInt8>::create(rows);
auto & cond = cond_col->getData();
default_mask.resize(rows);
const auto & container = std::get<ContainerType<AttributeType>>(attribute.container);
size_t keys_found = 0;
for (size_t row = 0; row < rows; ++row)
{
const auto key = keys[row];
cond[row] = (key < loaded_keys.size() && loaded_keys[key]) ? 1 : 0;
if (key < loaded_keys.size() && loaded_keys[key])
{
default_mask[row] = 1;
if constexpr (is_nullable)
set_value(keys_found, container[key], attribute.is_nullable_set->find(key) != nullptr);
else
set_value(keys_found, container[key], false);
++keys_found;
}
else
default_mask[row] = 0;
}
IColumn::Filter mask(rows, 1);
auto mask_info = extractMask(mask, std::move(cond_col));
inverseMask(mask, mask_info);
ColumnWithTypeAndName column_before_cast = default_argument;
maskedExecute(column_before_cast, mask, mask_info);
ColumnWithTypeAndName column_to_cast = {
column_before_cast.column->convertToFullColumnIfConst(),
column_before_cast.type,
column_before_cast.name};
auto result = castColumnAccurate(column_to_cast, result_type_short_circuit);
DictionaryDefaultValueExtractor<DictionaryAttributeType> default_value_extractor(
dictionary_attribute.null_value, result);
getItemsImpl<AttributeType, is_nullable>(attribute, keys, set_value, default_value_extractor);
query_count.fetch_add(rows, std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
}
template <typename T>

View File

@ -84,11 +84,10 @@ public:
ColumnPtr getColumnOrDefaultShortCircuit(
const std::string & attribute_name,
const DataTypePtr & result_type,
const DataTypePtr & attribute_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnWithTypeAndName & default_argument,
const DataTypePtr & result_type_short_circuit) const override;
IColumn::Filter & default_mask) const override;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
@ -173,14 +172,12 @@ private:
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename AttributeType, bool is_nullable, typename DictionaryAttributeType, typename ValueSetter>
template <typename AttributeType, bool is_nullable, typename ValueSetter>
void getItemsShortCircuitImpl(
const Attribute & attribute,
const PaddedPODArray<UInt64> & keys,
ValueSetter && set_value,
const ColumnWithTypeAndName & default_argument,
const DictionaryAttribute & dictionary_attribute,
const DataTypePtr & result_type_short_circuit) const;
IColumn::Filter & default_mask) const;
template <typename T>
void resize(Attribute & attribute, UInt64 key);

View File

@ -148,11 +148,10 @@ public:
ColumnPtr getColumnOrDefaultShortCircuit(
const std::string & attribute_name,
const DataTypePtr & result_type,
const DataTypePtr & attribute_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnWithTypeAndName & default_argument,
const DataTypePtr & result_type_short_circuit) const override;
IColumn::Filter & default_mask) const override;
ColumnUInt8::Ptr hasKeys(const Columns & key_columns, const DataTypes & key_types) const override;
@ -252,14 +251,12 @@ private:
ValueSetter && set_value,
DefaultValueExtractor & default_value_extractor) const;
template <typename AttributeType, bool is_nullable, typename DictionaryAttributeType, typename ValueSetter>
template <typename AttributeType, typename ValueSetter>
void getItemsShortCircuitImpl(
const Attribute & attribute,
DictionaryKeysExtractor<dictionary_key_type> & keys_extractor,
ValueSetter && set_value,
const ColumnWithTypeAndName & default_argument,
const DictionaryAttribute & dictionary_attribute,
const DataTypePtr & result_type_short_circuit) const;
IColumn::Filter & default_mask) const;
template <typename GetContainersFunc>
void getAttributeContainers(size_t attribute_index, GetContainersFunc && get_containers_func);
@ -498,11 +495,10 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getColumn(
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getColumnOrDefaultShortCircuit(
const std::string & attribute_name,
const DataTypePtr & result_type,
const DataTypePtr & attribute_type,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnWithTypeAndName & default_argument,
const DataTypePtr & result_type_short_circuit) const
IColumn::Filter & default_mask) const
{
if (dictionary_key_type == DictionaryKeyType::Complex)
dict_struct.validateKeyTypes(key_types);
@ -514,7 +510,7 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getColumnOrDef
const size_t size = extractor.getKeysSize();
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, result_type);
const auto & dictionary_attribute = dict_struct.getAttribute(attribute_name, attribute_type);
const size_t attribute_index = dict_struct.attribute_name_to_index.find(attribute_name)->second;
auto & attribute = attributes[attribute_index];
@ -541,20 +537,18 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getColumnOrDef
{
auto * out = column.get();
getItemsShortCircuitImpl<ValueType, false, AttributeType>(
getItemsShortCircuitImpl<ValueType>(
attribute,
extractor,
[&](const size_t, const Array & value, bool) { out->insert(value); },
default_argument,
dictionary_attribute,
result_type_short_circuit);
default_mask);
}
else if constexpr (std::is_same_v<ValueType, StringRef>)
{
auto * out = column.get();
if (is_attribute_nullable)
getItemsShortCircuitImpl<ValueType, true, AttributeType>(
getItemsShortCircuitImpl<ValueType>(
attribute,
extractor,
[&](size_t row, StringRef value, bool is_null)
@ -562,24 +556,20 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getColumnOrDef
(*vec_null_map_to)[row] = is_null;
out->insertData(value.data, value.size);
},
default_argument,
dictionary_attribute,
result_type_short_circuit);
default_mask);
else
getItemsShortCircuitImpl<ValueType, false, AttributeType>(
getItemsShortCircuitImpl<ValueType>(
attribute,
extractor,
[&](size_t, StringRef value, bool) { out->insertData(value.data, value.size); },
default_argument,
dictionary_attribute,
result_type_short_circuit);
default_mask);
}
else
{
auto & out = column->getData();
if (is_attribute_nullable)
getItemsShortCircuitImpl<ValueType, true, AttributeType>(
getItemsShortCircuitImpl<ValueType>(
attribute,
extractor,
[&](size_t row, const auto value, bool is_null)
@ -587,17 +577,13 @@ ColumnPtr HashedDictionary<dictionary_key_type, sparse, sharded>::getColumnOrDef
(*vec_null_map_to)[row] = is_null;
out[row] = value;
},
default_argument,
dictionary_attribute,
result_type_short_circuit);
default_mask);
else
getItemsShortCircuitImpl<ValueType, false, AttributeType>(
getItemsShortCircuitImpl<ValueType>(
attribute,
extractor,
[&](size_t row, const auto value, bool) { out[row] = value; },
default_argument,
dictionary_attribute,
result_type_short_circuit);
default_mask);
}
result = std::move(column);
@ -1151,19 +1137,17 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::getItemsImpl(
}
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>
template <typename AttributeType, bool is_nullable, typename DictionaryAttributeType, typename ValueSetter>
template <typename AttributeType, typename ValueSetter>
void HashedDictionary<dictionary_key_type, sparse, sharded>::getItemsShortCircuitImpl(
const Attribute & attribute,
DictionaryKeysExtractor<dictionary_key_type> & keys_extractor,
ValueSetter && set_value,
const ColumnWithTypeAndName & default_argument,
const DictionaryAttribute & dictionary_attribute,
const DataTypePtr & result_type_short_circuit) const
IColumn::Filter & default_mask) const
{
const auto & attribute_containers = std::get<CollectionsHolder<AttributeType>>(attribute.containers);
const size_t keys_size = keys_extractor.getKeysSize();
auto cond_col = ColumnVector<UInt8>::create(keys_size);
auto & cond = cond_col->getData();
default_mask.resize(keys_size);
size_t keys_found = 0;
for (size_t key_index = 0; key_index < keys_size; ++key_index)
{
@ -1173,28 +1157,21 @@ void HashedDictionary<dictionary_key_type, sparse, sharded>::getItemsShortCircui
const auto & container = attribute_containers[shard];
const auto it = container.find(key);
cond[key_index] = (it != container.end()) ? 1 : 0;
if (it != container.end())
{
set_value(key_index, getValueFromCell(it), false);
default_mask[key_index] = 1;
++keys_found;
}
else
default_mask[key_index] = 0;
keys_extractor.rollbackCurrentKey();
}
keys_extractor.reset();
IColumn::Filter mask(keys_size, 1);
auto mask_info = extractMask(mask, std::move(cond_col));
inverseMask(mask, mask_info);
ColumnWithTypeAndName column_before_cast = default_argument;
maskedExecute(column_before_cast, mask, mask_info);
ColumnWithTypeAndName column_to_cast = {
column_before_cast.column->convertToFullColumnIfConst(),
column_before_cast.type,
column_before_cast.name};
auto result = castColumnAccurate(column_to_cast, result_type_short_circuit);
DictionaryDefaultValueExtractor<DictionaryAttributeType> default_value_extractor(
dictionary_attribute.null_value, result);
getItemsImpl<AttributeType, is_nullable>(attribute, keys_extractor, set_value, default_value_extractor);
query_count.fetch_add(keys_size, std::memory_order_relaxed);
found_count.fetch_add(keys_found, std::memory_order_relaxed);
}
template <DictionaryKeyType dictionary_key_type, bool sparse, bool sharded>

View File

@ -257,11 +257,10 @@ public:
*/
virtual ColumnPtr getColumnOrDefaultShortCircuit(
const std::string & attribute_name [[maybe_unused]],
const DataTypePtr & result_type [[maybe_unused]],
const DataTypePtr & attribute_type [[maybe_unused]],
const Columns & key_columns [[maybe_unused]],
const DataTypes & key_types [[maybe_unused]],
const ColumnWithTypeAndName & default_argument [[maybe_unused]],
const DataTypePtr & result_type_short_circuit [[maybe_unused]]) const
IColumn::Filter & default_mask [[maybe_unused]]) const
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED,
"Method getColumnOrDefault is not supported for {} dictionary.",
@ -275,11 +274,10 @@ public:
*/
virtual Columns getColumnsOrDefaultShortCircuit(
const Strings & attribute_names,
const DataTypes & result_types,
const DataTypes & attribute_types,
const Columns & key_columns,
const DataTypes & key_types,
const ColumnWithTypeAndName & default_argument,
const DataTypes & result_short_circuit_types) const
IColumn::Filter & default_mask) const
{
size_t attribute_names_size = attribute_names.size();
@ -289,10 +287,9 @@ public:
for (size_t i = 0; i < attribute_names_size; ++i)
{
const auto & attribute_name = attribute_names[i];
const auto & result_type = result_types[i];
const auto & result_short_circuit_type = result_short_circuit_types[i];
result.emplace_back(getColumnOrDefaultShortCircuit(attribute_name, result_type,
key_columns, key_types, default_argument, result_short_circuit_type));
const auto & attribute_type = attribute_types[i];
result.emplace_back(getColumnOrDefaultShortCircuit(attribute_name,
attribute_type, key_columns, key_types, default_mask));
}
return result;

View File

@ -14,6 +14,7 @@
#include <Common/typeid_cast.h>
#include <Common/assert_cast.h>
#include <Columns/MaskOperations.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnConst.h>
@ -22,6 +23,7 @@
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnFunction.h>
#include <Functions/FunctionFactory.h>
#include <Access/Common/AccessFlags.h>
@ -63,7 +65,7 @@ namespace ErrorCodes
*/
class FunctionDictHelper : WithContext
class FunctionDictHelper : public WithContext
{
public:
explicit FunctionDictHelper(ContextPtr context_) : WithContext(context_) {}
@ -321,13 +323,16 @@ public:
String getName() const override { return name; }
bool isVariadic() const override { return true; }
// bool isShortCircuit(ShortCircuitSettings & settings, size_t /*number_of_arguments*/) const override
// {
// settings.enable_lazy_execution_for_first_argument = false;
// settings.enable_lazy_execution_for_common_descendants_of_arguments = false;
// settings.force_enable_lazy_execution = false;
// return true;
// }
bool isShortCircuit(ShortCircuitSettings & settings, size_t /*number_of_arguments*/) const override
{
if constexpr (dictionary_get_function_type != DictionaryGetFunctionType::getOrDefault)
return false;
settings.enable_lazy_execution_for_first_argument = false;
settings.enable_lazy_execution_for_common_descendants_of_arguments = false;
settings.force_enable_lazy_execution = false;
return true;
}
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
size_t getNumberOfArguments() const override { return 0; }
@ -607,40 +612,104 @@ public:
private:
std::pair<ColumnPtr, ColumnPtr> getDefaultsShortCircuit(
IColumn::Filter & default_mask,
const DataTypePtr & result_type,
const ColumnWithTypeAndName & last_argument) const
{
auto rows = default_mask.size();
auto mask_col = ColumnUInt8::create();
mask_col->getData() = std::move(default_mask);
ColumnPtr mask_col_res = std::move(mask_col);
IColumn::Filter mask(rows, 1);
auto mask_info = extractInvertedMask(mask, mask_col_res);
ColumnWithTypeAndName column_before_cast = last_argument;
maskedExecute(column_before_cast, mask, mask_info);
default_mask = std::move(mask);
ColumnWithTypeAndName column_to_cast = {
column_before_cast.column->convertToFullColumnIfConst(),
column_before_cast.type,
column_before_cast.name};
auto casted = IColumn::mutate(castColumnAccurate(column_to_cast, result_type));
casted->expand(default_mask, false);
return {std::move(casted), mask_col_res};
}
void restoreShortCircuitColumn(
ColumnPtr & result_column,
ColumnPtr defaults_column,
ColumnPtr mask_column,
IColumn::Filter & inverted_mask,
const DataTypePtr & result_type) const
{
auto mut_lhs = IColumn::mutate(std::move(result_column));
mut_lhs->expand(inverted_mask, true);
auto if_func = FunctionFactory::instance().get("if", helper.getContext());
ColumnsWithTypeAndName if_args =
{
{mask_column, std::make_shared<DataTypeUInt8>(), {}},
{std::move(mut_lhs), result_type, {}},
{defaults_column, result_type, {}},
};
auto rows = inverted_mask.size();
result_column = if_func->build(if_args)->execute(if_args, result_type, rows);
}
ColumnPtr executeDictionaryRequest(
std::shared_ptr<const IDictionary> & dictionary,
const Strings & attribute_names,
const Columns & key_columns,
const DataTypes & key_types,
const DataTypePtr & result_type,
const DataTypePtr & attribute_type,
const Columns & default_cols,
size_t collect_values_limit,
const ColumnWithTypeAndName & last_argument,
const DataTypePtr & result_type_short_circuit) const
const DataTypePtr & result_type) const
{
ColumnPtr result;
if (attribute_names.size() > 1)
{
const auto & result_tuple_type = assert_cast<const DataTypeTuple &>(*result_type);
const auto & result_short_circuit_tuple_type = assert_cast<const DataTypeTuple &>(*result_type_short_circuit);
const auto & attribute_tuple_type = assert_cast<const DataTypeTuple &>(*attribute_type);
Columns result_columns;
if constexpr (dictionary_get_function_type == DictionaryGetFunctionType::getAll)
{
result_columns = dictionary->getColumnsAllValues(
attribute_names, result_tuple_type.getElements(), key_columns, key_types, default_cols, collect_values_limit);
attribute_names, attribute_tuple_type.getElements(), key_columns, key_types, default_cols, collect_values_limit);
}
else if (dictionary_get_function_type == DictionaryGetFunctionType::getOrDefault && default_cols.empty())
{
IColumn::Filter default_mask;
result_columns = dictionary->getColumnsOrDefaultShortCircuit(
attribute_names, result_tuple_type.getElements(), key_columns, key_types,
last_argument, result_short_circuit_tuple_type.getElements());
attribute_names, attribute_tuple_type.getElements(), key_columns, key_types, default_mask);
auto [defaults_column, mask_column] =
getDefaultsShortCircuit(default_mask, result_type, last_argument);
const auto & tuple_defaults = assert_cast<const ColumnTuple &>(*defaults_column);
const auto & result_tuple_type = assert_cast<const DataTypeTuple &>(*result_type);
for (size_t col = 0; col < result_columns.size(); ++col)
{
restoreShortCircuitColumn(
result_columns[col],
tuple_defaults.getColumnPtr(col),
mask_column,
default_mask,
result_tuple_type.getElements()[col]);
}
}
else
{
result_columns = dictionary->getColumns(
attribute_names, result_tuple_type.getElements(), key_columns, key_types, default_cols);
attribute_names, attribute_tuple_type.getElements(), key_columns, key_types, default_cols);
}
result = ColumnTuple::create(std::move(result_columns));
@ -650,17 +719,23 @@ private:
if constexpr (dictionary_get_function_type == DictionaryGetFunctionType::getAll)
{
result = dictionary->getColumnAllValues(
attribute_names[0], result_type, key_columns, key_types, default_cols.front(), collect_values_limit);
attribute_names[0], attribute_type, key_columns, key_types, default_cols.front(), collect_values_limit);
}
else if (dictionary_get_function_type == DictionaryGetFunctionType::getOrDefault && default_cols.empty())
{
IColumn::Filter default_mask;
result = dictionary->getColumnOrDefaultShortCircuit(
attribute_names[0], result_type, key_columns, key_types, last_argument, result_type_short_circuit);
attribute_names[0], attribute_type, key_columns, key_types, default_mask);
auto [defaults_column, mask_column] =
getDefaultsShortCircuit(default_mask, result_type, last_argument);
restoreShortCircuitColumn(result, defaults_column, mask_column, default_mask, result_type);
}
else
{
result = dictionary->getColumn(
attribute_names[0], result_type, key_columns, key_types, default_cols.front());
attribute_names[0], attribute_type, key_columns, key_types, default_cols.front());
}
}