Support Dynamic in functions toFloat64/touInt32/etc

This commit is contained in:
avogar 2024-12-09 18:07:20 +00:00
parent e83921d2cb
commit 18e8af493c
3 changed files with 300 additions and 132 deletions

View File

@ -1678,8 +1678,8 @@ struct ConvertImpl
&& !(std::is_same_v<DataTypeDateTime64, FromDataType> || std::is_same_v<DataTypeDateTime64, ToDataType>) && !(std::is_same_v<DataTypeDateTime64, FromDataType> || std::is_same_v<DataTypeDateTime64, ToDataType>)
&& (!IsDataTypeDecimalOrNumber<FromDataType> || !IsDataTypeDecimalOrNumber<ToDataType>)) && (!IsDataTypeDecimalOrNumber<FromDataType> || !IsDataTypeDecimalOrNumber<ToDataType>))
{ {
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {} of first argument of function {}", throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Illegal column {}/{} of first argument of function {}",
named_from.column->getName(), Name::name); named_from.column->getName(), typeid(FromDataType).name(), Name::name);
} }
const ColVecFrom * col_from = checkAndGetColumn<ColVecFrom>(named_from.column.get()); const ColVecFrom * col_from = checkAndGetColumn<ColVecFrom>(named_from.column.get());
@ -1993,6 +1993,124 @@ struct ConvertImplGenericFromString
} }
}; };
struct ConvertImplFromDynamicToColumn
{
static ColumnPtr execute(
const ColumnsWithTypeAndName & arguments,
const DataTypePtr & result_type,
size_t input_rows_count,
const std::function<ColumnPtr(ColumnsWithTypeAndName &, const DataTypePtr)> & nested_convert)
{
/// When casting Dynamic to regular column we should cast all variants from current Dynamic column
/// and construct the result based on discriminators.
const auto & column_dynamic = assert_cast<const ColumnDynamic &>(*arguments.front().column.get());
const auto & variant_column = column_dynamic.getVariantColumn();
const auto & variant_info = column_dynamic.getVariantInfo();
/// First, cast usual variants to result type.
const auto & variant_types = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariants();
std::vector<ColumnPtr> cast_variant_columns;
cast_variant_columns.reserve(variant_types.size());
for (size_t i = 0; i != variant_types.size(); ++i)
{
/// Skip shared variant, it will be processed later.
if (i == column_dynamic.getSharedVariantDiscriminator())
{
cast_variant_columns.push_back(nullptr);
continue;
}
ColumnsWithTypeAndName new_args = arguments;
new_args[0] = {variant_column.getVariantPtrByGlobalDiscriminator(i), variant_types[i], ""};
cast_variant_columns.push_back(nested_convert(new_args, result_type));
}
/// Second, collect all variants stored in shared variant and cast them to result type.
std::vector<MutableColumnPtr> variant_columns_from_shared_variant;
DataTypes variant_types_from_shared_variant;
/// We will need to know what variant to use when we see discriminator of a shared variant.
/// To do it, we remember what variant was extracted from each row and what was it's offset.
PaddedPODArray<UInt64> shared_variant_indexes;
PaddedPODArray<UInt64> shared_variant_offsets;
std::unordered_map<String, UInt64> shared_variant_to_index;
const auto & shared_variant = column_dynamic.getSharedVariant();
const auto shared_variant_discr = column_dynamic.getSharedVariantDiscriminator();
const auto & local_discriminators = variant_column.getLocalDiscriminators();
const auto & offsets = variant_column.getOffsets();
if (!shared_variant.empty())
{
shared_variant_indexes.reserve(input_rows_count);
shared_variant_offsets.reserve(input_rows_count);
FormatSettings format_settings;
const auto shared_variant_local_discr = variant_column.localDiscriminatorByGlobal(shared_variant_discr);
for (size_t i = 0; i != input_rows_count; ++i)
{
if (local_discriminators[i] == shared_variant_local_discr)
{
auto value = shared_variant.getDataAt(offsets[i]);
ReadBufferFromMemory buf(value.data, value.size);
auto type = decodeDataType(buf);
auto type_name = type->getName();
auto it = shared_variant_to_index.find(type_name);
/// Check if we didn't create column for this variant yet.
if (it == shared_variant_to_index.end())
{
it = shared_variant_to_index.emplace(type_name, variant_columns_from_shared_variant.size()).first;
variant_columns_from_shared_variant.push_back(type->createColumn());
variant_types_from_shared_variant.push_back(type);
}
shared_variant_indexes.push_back(it->second);
shared_variant_offsets.push_back(variant_columns_from_shared_variant[it->second]->size());
type->getDefaultSerialization()->deserializeBinary(*variant_columns_from_shared_variant[it->second], buf, format_settings);
}
else
{
shared_variant_indexes.emplace_back();
shared_variant_offsets.emplace_back();
}
}
}
/// Cast all extracted variants into result type.
std::vector<ColumnPtr> cast_shared_variant_columns;
cast_shared_variant_columns.reserve(variant_types_from_shared_variant.size());
for (size_t i = 0; i != variant_types_from_shared_variant.size(); ++i)
{
ColumnsWithTypeAndName new_args = arguments;
new_args[0] = {variant_columns_from_shared_variant[i]->getPtr(), variant_types_from_shared_variant[i], ""};
cast_shared_variant_columns.push_back(nested_convert(new_args, result_type));
}
/// Construct result column from all cast variants.
auto res = result_type->createColumn();
res->reserve(input_rows_count);
for (size_t i = 0; i != input_rows_count; ++i)
{
auto global_discr = variant_column.globalDiscriminatorByLocal(local_discriminators[i]);
if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
{
res->insertDefault();
}
else if (global_discr == shared_variant_discr)
{
if (cast_shared_variant_columns[shared_variant_indexes[i]])
res->insertFrom(*cast_shared_variant_columns[shared_variant_indexes[i]], shared_variant_offsets[i]);
else
res->insertDefault();
}
else
{
if (cast_variant_columns[global_discr])
res->insertFrom(*cast_variant_columns[global_discr], offsets[i]);
else
res->insertDefault();
}
}
return res;
}
};
/// Declared early because used below. /// Declared early because used below.
struct NameToDate { static constexpr auto name = "toDate"; }; struct NameToDate { static constexpr auto name = "toDate"; };
@ -2326,6 +2444,16 @@ private:
if (context) if (context)
date_time_overflow_behavior = context->getSettingsRef()[Setting::date_time_overflow_behavior].value; date_time_overflow_behavior = context->getSettingsRef()[Setting::date_time_overflow_behavior].value;
if (isDynamic(from_type))
{
auto nested_convert = [this](ColumnsWithTypeAndName & args, const DataTypePtr & to_type) -> ColumnPtr
{
return executeInternal(args, to_type, args[0].column->size());
};
return ConvertImplFromDynamicToColumn::execute(arguments, result_type, input_rows_count, nested_convert);
}
auto call = [&](const auto & types, BehaviourOnErrorFromString from_string_tag) -> bool auto call = [&](const auto & types, BehaviourOnErrorFromString from_string_tag) -> bool
{ {
using Types = std::decay_t<decltype(types)>; using Types = std::decay_t<decltype(types)>;
@ -4692,138 +4820,160 @@ private:
WrapperType createDynamicToColumnWrapper(const DataTypePtr &) const WrapperType createDynamicToColumnWrapper(const DataTypePtr &) const
{ {
return [this] auto nested_convert = [this](ColumnsWithTypeAndName & args, const DataTypePtr & result_type) -> ColumnPtr
{
WrapperType wrapper;
if (cast_type == CastType::accurateOrNull)
{
/// Create wrapper only if we support conversion from variant to the resulting type.
wrapper = createWrapperIfCanConvert(args[0].type, result_type);
if (!wrapper)
return nullptr;
}
else
{
wrapper = prepareUnpackDictionaries(args[0].type, result_type);
}
return wrapper(args, result_type, nullptr, args[0].column->size());
};
return [nested_convert]
(ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr (ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, const ColumnNullable *, size_t input_rows_count) -> ColumnPtr
{ {
/// When casting Dynamic to regular column we should cast all variants from current Dynamic column return ConvertImplFromDynamicToColumn::execute(arguments, result_type, input_rows_count, nested_convert);
/// and construct the result based on discriminators. //
const auto & column_dynamic = assert_cast<const ColumnDynamic &>(*arguments.front().column.get()); //
const auto & variant_column = column_dynamic.getVariantColumn(); //
const auto & variant_info = column_dynamic.getVariantInfo(); // /// When casting Dynamic to regular column we should cast all variants from current Dynamic column
// /// and construct the result based on discriminators.
/// First, cast usual variants to result type. // const auto & column_dynamic = assert_cast<const ColumnDynamic &>(*arguments.front().column.get());
const auto & variant_types = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariants(); // const auto & variant_column = column_dynamic.getVariantColumn();
std::vector<ColumnPtr> cast_variant_columns; // const auto & variant_info = column_dynamic.getVariantInfo();
cast_variant_columns.reserve(variant_types.size()); //
for (size_t i = 0; i != variant_types.size(); ++i) // /// First, cast usual variants to result type.
{ // const auto & variant_types = assert_cast<const DataTypeVariant &>(*variant_info.variant_type).getVariants();
/// Skip shared variant, it will be processed later. // std::vector<ColumnPtr> cast_variant_columns;
if (i == column_dynamic.getSharedVariantDiscriminator()) // cast_variant_columns.reserve(variant_types.size());
{ // for (size_t i = 0; i != variant_types.size(); ++i)
cast_variant_columns.push_back(nullptr); // {
continue; // /// Skip shared variant, it will be processed later.
} // if (i == column_dynamic.getSharedVariantDiscriminator())
// {
const auto & variant_col = variant_column.getVariantPtrByGlobalDiscriminator(i); // cast_variant_columns.push_back(nullptr);
ColumnsWithTypeAndName variant = {{variant_col, variant_types[i], ""}}; // continue;
WrapperType variant_wrapper; // }
if (cast_type == CastType::accurateOrNull) //
/// Create wrapper only if we support conversion from variant to the resulting type. // const auto & variant_col = variant_column.getVariantPtrByGlobalDiscriminator(i);
variant_wrapper = createWrapperIfCanConvert(variant_types[i], result_type); // ColumnsWithTypeAndName variant = {{variant_col, variant_types[i], ""}};
else // WrapperType variant_wrapper;
variant_wrapper = prepareUnpackDictionaries(variant_types[i], result_type); // if (cast_type == CastType::accurateOrNull)
// /// Create wrapper only if we support conversion from variant to the resulting type.
ColumnPtr cast_variant; // variant_wrapper = createWrapperIfCanConvert(variant_types[i], result_type);
/// Check if we have wrapper for this variant. // else
if (variant_wrapper) // variant_wrapper = prepareUnpackDictionaries(variant_types[i], result_type);
cast_variant = variant_wrapper(variant, result_type, nullptr, variant_col->size()); //
cast_variant_columns.push_back(cast_variant); // ColumnPtr cast_variant;
} // /// Check if we have wrapper for this variant.
// if (variant_wrapper)
/// Second, collect all variants stored in shared variant and cast them to result type. // cast_variant = variant_wrapper(variant, result_type, nullptr, variant_col->size());
std::vector<MutableColumnPtr> variant_columns_from_shared_variant; // cast_variant_columns.push_back(cast_variant);
DataTypes variant_types_from_shared_variant; // }
/// We will need to know what variant to use when we see discriminator of a shared variant. //
/// To do it, we remember what variant was extracted from each row and what was it's offset. // /// Second, collect all variants stored in shared variant and cast them to result type.
PaddedPODArray<UInt64> shared_variant_indexes; // std::vector<MutableColumnPtr> variant_columns_from_shared_variant;
PaddedPODArray<UInt64> shared_variant_offsets; // DataTypes variant_types_from_shared_variant;
std::unordered_map<String, UInt64> shared_variant_to_index; // /// We will need to know what variant to use when we see discriminator of a shared variant.
const auto & shared_variant = column_dynamic.getSharedVariant(); // /// To do it, we remember what variant was extracted from each row and what was it's offset.
const auto shared_variant_discr = column_dynamic.getSharedVariantDiscriminator(); // PaddedPODArray<UInt64> shared_variant_indexes;
const auto & local_discriminators = variant_column.getLocalDiscriminators(); // PaddedPODArray<UInt64> shared_variant_offsets;
const auto & offsets = variant_column.getOffsets(); // std::unordered_map<String, UInt64> shared_variant_to_index;
if (!shared_variant.empty()) // const auto & shared_variant = column_dynamic.getSharedVariant();
{ // const auto shared_variant_discr = column_dynamic.getSharedVariantDiscriminator();
shared_variant_indexes.reserve(input_rows_count); // const auto & local_discriminators = variant_column.getLocalDiscriminators();
shared_variant_offsets.reserve(input_rows_count); // const auto & offsets = variant_column.getOffsets();
FormatSettings format_settings; // if (!shared_variant.empty())
const auto shared_variant_local_discr = variant_column.localDiscriminatorByGlobal(shared_variant_discr); // {
for (size_t i = 0; i != input_rows_count; ++i) // shared_variant_indexes.reserve(input_rows_count);
{ // shared_variant_offsets.reserve(input_rows_count);
if (local_discriminators[i] == shared_variant_local_discr) // FormatSettings format_settings;
{ // const auto shared_variant_local_discr = variant_column.localDiscriminatorByGlobal(shared_variant_discr);
auto value = shared_variant.getDataAt(offsets[i]); // for (size_t i = 0; i != input_rows_count; ++i)
ReadBufferFromMemory buf(value.data, value.size); // {
auto type = decodeDataType(buf); // if (local_discriminators[i] == shared_variant_local_discr)
auto type_name = type->getName(); // {
auto it = shared_variant_to_index.find(type_name); // auto value = shared_variant.getDataAt(offsets[i]);
/// Check if we didn't create column for this variant yet. // ReadBufferFromMemory buf(value.data, value.size);
if (it == shared_variant_to_index.end()) // auto type = decodeDataType(buf);
{ // auto type_name = type->getName();
it = shared_variant_to_index.emplace(type_name, variant_columns_from_shared_variant.size()).first; // auto it = shared_variant_to_index.find(type_name);
variant_columns_from_shared_variant.push_back(type->createColumn()); // /// Check if we didn't create column for this variant yet.
variant_types_from_shared_variant.push_back(type); // if (it == shared_variant_to_index.end())
} // {
// it = shared_variant_to_index.emplace(type_name, variant_columns_from_shared_variant.size()).first;
shared_variant_indexes.push_back(it->second); // variant_columns_from_shared_variant.push_back(type->createColumn());
shared_variant_offsets.push_back(variant_columns_from_shared_variant[it->second]->size()); // variant_types_from_shared_variant.push_back(type);
type->getDefaultSerialization()->deserializeBinary(*variant_columns_from_shared_variant[it->second], buf, format_settings); // }
} //
else // shared_variant_indexes.push_back(it->second);
{ // shared_variant_offsets.push_back(variant_columns_from_shared_variant[it->second]->size());
shared_variant_indexes.emplace_back(); // type->getDefaultSerialization()->deserializeBinary(*variant_columns_from_shared_variant[it->second], buf, format_settings);
shared_variant_offsets.emplace_back(); // }
} // else
} // {
} // shared_variant_indexes.emplace_back();
// shared_variant_offsets.emplace_back();
/// Cast all extracted variants into result type. // }
std::vector<ColumnPtr> cast_shared_variant_columns; // }
cast_shared_variant_columns.reserve(variant_types_from_shared_variant.size()); // }
for (size_t i = 0; i != variant_types_from_shared_variant.size(); ++i) //
{ // /// Cast all extracted variants into result type.
ColumnsWithTypeAndName variant = {{variant_columns_from_shared_variant[i]->getPtr(), variant_types_from_shared_variant[i], ""}}; // std::vector<ColumnPtr> cast_shared_variant_columns;
WrapperType variant_wrapper; // cast_shared_variant_columns.reserve(variant_types_from_shared_variant.size());
if (cast_type == CastType::accurateOrNull) // for (size_t i = 0; i != variant_types_from_shared_variant.size(); ++i)
/// Create wrapper only if we support conversion from variant to the resulting type. // {
variant_wrapper = createWrapperIfCanConvert(variant_types_from_shared_variant[i], result_type); // ColumnsWithTypeAndName variant = {{variant_columns_from_shared_variant[i]->getPtr(), variant_types_from_shared_variant[i], ""}};
else // WrapperType variant_wrapper;
variant_wrapper = prepareUnpackDictionaries(variant_types_from_shared_variant[i], result_type); // if (cast_type == CastType::accurateOrNull)
// /// Create wrapper only if we support conversion from variant to the resulting type.
ColumnPtr cast_variant; // variant_wrapper = createWrapperIfCanConvert(variant_types_from_shared_variant[i], result_type);
/// Check if we have wrapper for this variant. // else
if (variant_wrapper) // variant_wrapper = prepareUnpackDictionaries(variant_types_from_shared_variant[i], result_type);
cast_variant = variant_wrapper(variant, result_type, nullptr, variant_columns_from_shared_variant[i]->size()); //
cast_shared_variant_columns.push_back(cast_variant); // ColumnPtr cast_variant;
} // /// Check if we have wrapper for this variant.
// if (variant_wrapper)
/// Construct result column from all cast variants. // cast_variant = variant_wrapper(variant, result_type, nullptr, variant_columns_from_shared_variant[i]->size());
auto res = result_type->createColumn(); // cast_shared_variant_columns.push_back(cast_variant);
res->reserve(input_rows_count); // }
for (size_t i = 0; i != input_rows_count; ++i) //
{ // /// Construct result column from all cast variants.
auto global_discr = variant_column.globalDiscriminatorByLocal(local_discriminators[i]); // auto res = result_type->createColumn();
if (global_discr == ColumnVariant::NULL_DISCRIMINATOR) // res->reserve(input_rows_count);
{ // for (size_t i = 0; i != input_rows_count; ++i)
res->insertDefault(); // {
} // auto global_discr = variant_column.globalDiscriminatorByLocal(local_discriminators[i]);
else if (global_discr == shared_variant_discr) // if (global_discr == ColumnVariant::NULL_DISCRIMINATOR)
{ // {
if (cast_shared_variant_columns[shared_variant_indexes[i]]) // res->insertDefault();
res->insertFrom(*cast_shared_variant_columns[shared_variant_indexes[i]], shared_variant_offsets[i]); // }
else // else if (global_discr == shared_variant_discr)
res->insertDefault(); // {
} // if (cast_shared_variant_columns[shared_variant_indexes[i]])
else // res->insertFrom(*cast_shared_variant_columns[shared_variant_indexes[i]], shared_variant_offsets[i]);
{ // else
if (cast_variant_columns[global_discr]) // res->insertDefault();
res->insertFrom(*cast_variant_columns[global_discr], offsets[i]); // }
else // else
res->insertDefault(); // {
} // if (cast_variant_columns[global_discr])
} // res->insertFrom(*cast_variant_columns[global_discr], offsets[i]);
// else
return res; // res->insertDefault();
// }
// }
//
// return res;
}; };
} }

View File

@ -0,0 +1,12 @@
1
2
3
4
5
18262
1
2
3
4
5
18262

View File

@ -0,0 +1,6 @@
set enable_dynamic_type = 1;
create table test (d Dynamic(max_types=3)) engine=Memory;
insert into test values (1::UInt8), (2::UInt16), (3::UInt32), (4::UInt64), ('5'::String), ('2020-01-01'::Date);
select toFloat64(d) from test;
select toUInt32(d) from test;
drop table test;