diff --git a/src/Functions/array/array.cpp b/src/Functions/array/array.cpp index dfe589fb74f..d2aedd57f99 100644 --- a/src/Functions/array/array.cpp +++ b/src/Functions/array/array.cpp @@ -1,11 +1,15 @@ -#include -#include +#include +#include +#include +#include +#include +#include #include #include -#include -#include -#include +#include +#include #include +#include namespace DB @@ -44,11 +48,13 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { - size_t num_elements = arguments.size(); + const size_t num_elements = arguments.size(); if (num_elements == 0) + { /// We should return constant empty array. return result_type->createColumnConstWithDefaultValue(input_rows_count); + } const DataTypePtr & elem_type = static_cast(*result_type).getNestedType(); @@ -60,7 +66,6 @@ public: Columns columns_holder(num_elements); ColumnRawPtrs column_ptrs(num_elements); - for (size_t i = 0; i < num_elements; ++i) { const auto & arg = arguments[i]; @@ -77,35 +82,199 @@ public: } /// Create and fill the result array. - auto out = ColumnArray::create(elem_type->createColumn()); IColumn & out_data = out->getData(); IColumn::Offsets & out_offsets = out->getOffsets(); - out_data.reserve(input_rows_count * num_elements); - out_offsets.resize(input_rows_count); - + /// Fill out_offsets + out_offsets.resize_exact(input_rows_count); IColumn::Offset current_offset = 0; for (size_t i = 0; i < input_rows_count; ++i) { - for (size_t j = 0; j < num_elements; ++j) - out_data.insertFrom(*column_ptrs[j], i); - current_offset += num_elements; out_offsets[i] = current_offset; } + /// Fill out_data + out_data.reserve(input_rows_count * num_elements); + if (num_elements == 1) + out_data.insertRangeFrom(*column_ptrs[0], 0, input_rows_count); + else + execute(column_ptrs, out_data, input_rows_count); return out; } private: + bool execute(const ColumnRawPtrs & columns, IColumn & out_data, size_t input_rows_count) const + { + return executeNumber(columns, out_data, input_rows_count) || executeNumber(columns, out_data, input_rows_count) + || executeNumber(columns, out_data, input_rows_count) || executeNumber(columns, out_data, input_rows_count) + || executeNumber(columns, out_data, input_rows_count) || executeNumber(columns, out_data, input_rows_count) + || executeNumber(columns, out_data, input_rows_count) || executeNumber(columns, out_data, input_rows_count) + || executeNumber(columns, out_data, input_rows_count) || executeNumber(columns, out_data, input_rows_count) + || executeNumber(columns, out_data, input_rows_count) || executeNumber(columns, out_data, input_rows_count) + || executeNumber(columns, out_data, input_rows_count) || executeNumber(columns, out_data, input_rows_count) + || executeNumber(columns, out_data, input_rows_count) + || executeNumber(columns, out_data, input_rows_count) + || executeNumber(columns, out_data, input_rows_count) + || executeNumber(columns, out_data, input_rows_count) + || executeNumber(columns, out_data, input_rows_count) || executeString(columns, out_data, input_rows_count) + || executeNullable(columns, out_data, input_rows_count) || executeTuple(columns, out_data, input_rows_count) + || executeFixedString(columns, out_data, input_rows_count) || executeGeneric(columns, out_data, input_rows_count); + } + + template + bool executeNumber(const ColumnRawPtrs & columns, IColumn & out_data, size_t input_rows_count) const + { + using Container = ColumnVectorOrDecimal::Container; + std::vector containers(columns.size(), nullptr); + for (size_t i = 0; i < columns.size(); ++i) + { + const ColumnVectorOrDecimal * concrete_column = checkAndGetColumn>(columns[i]); + if (!concrete_column) + return false; + + containers[i] = &concrete_column->getData(); + } + + ColumnVectorOrDecimal & concrete_out_data = assert_cast &>(out_data); + Container & out_container = concrete_out_data.getData(); + out_container.resize_exact(columns.size() * input_rows_count); + + for (size_t row_i = 0; row_i < input_rows_count; ++row_i) + { + const size_t base = row_i * columns.size(); + for (size_t col_i = 0; col_i < columns.size(); ++col_i) + out_container[base + col_i] = (*containers[col_i])[row_i]; + } + return true; + } + + bool executeString(const ColumnRawPtrs & columns, IColumn & out_data, size_t input_rows_count) const + { + size_t total_bytes = 0; + std::vector concrete_columns(columns.size(), nullptr); + for (size_t i = 0; i < columns.size(); ++i) + { + const ColumnString * concrete_column = checkAndGetColumn(columns[i]); + if (!concrete_column) + return false; + + total_bytes += concrete_column->getChars().size(); + concrete_columns[i] = concrete_column; + } + + ColumnString & concrete_out_data = assert_cast(out_data); + auto & out_chars = concrete_out_data.getChars(); + auto & out_offsets = concrete_out_data.getOffsets(); + out_chars.resize_exact(total_bytes); + out_offsets.resize_exact(input_rows_count * columns.size()); + + size_t cur_out_offset = 0; + for (size_t row_i = 0; row_i < input_rows_count; ++row_i) + { + const size_t base = row_i * columns.size(); + for (size_t col_i = 0; col_i < columns.size(); ++col_i) + { + StringRef ref = concrete_columns[col_i]->getDataAt(row_i); + memcpySmallAllowReadWriteOverflow15(&out_chars[cur_out_offset], ref.data, ref.size); + out_chars[cur_out_offset + ref.size] = 0; + + cur_out_offset += ref.size + 1; + out_offsets[base + col_i] = cur_out_offset; + } + } + return true; + } + + bool executeFixedString(const ColumnRawPtrs & columns, IColumn & out_data, size_t input_rows_count) const + { + std::vector concrete_columns(columns.size(), nullptr); + for (size_t i = 0; i < columns.size(); ++i) + { + const ColumnFixedString * concrete_column = checkAndGetColumn(columns[i]); + if (!concrete_column) + return false; + + concrete_columns[i] = concrete_column; + } + + ColumnFixedString & concrete_out_data = assert_cast(out_data); + auto & out_chars = concrete_out_data.getChars(); + + const size_t n = concrete_out_data.getN(); + size_t total_bytes = n * columns.size() * input_rows_count; + out_chars.resize_exact(total_bytes); + + size_t curr_out_offset = 0; + for (size_t row_i = 0; row_i < input_rows_count; ++row_i) + { + for (size_t col_i = 0; col_i < columns.size(); ++col_i) + { + StringRef ref = concrete_columns[col_i]->getDataAt(row_i); + memcpySmallAllowReadWriteOverflow15(&out_chars[curr_out_offset], ref.data, n); + curr_out_offset += n; + } + } + return true; + } + + bool executeNullable(const ColumnRawPtrs & columns, IColumn & out_data, size_t input_rows_count) const + { + ColumnRawPtrs null_maps(columns.size(), nullptr); + ColumnRawPtrs nested_columns(columns.size(), nullptr); + for (size_t i = 0; i < columns.size(); ++i) + { + const ColumnNullable * concrete_column = checkAndGetColumn(columns[i]); + if (!concrete_column) + return false; + + null_maps[i] = &concrete_column->getNullMapColumn(); + nested_columns[i] = &concrete_column->getNestedColumn(); + } + + ColumnNullable & concrete_out_data = assert_cast(out_data); + auto & out_null_map = concrete_out_data.getNullMapColumn(); + auto & out_nested_column = concrete_out_data.getNestedColumn(); + execute(null_maps, out_null_map, input_rows_count); + execute(nested_columns, out_nested_column, input_rows_count); + return true; + } + + bool executeTuple(const ColumnRawPtrs & columns, IColumn & out_data, size_t input_rows_count) const + { + ColumnTuple * concrete_out_data = typeid_cast(&out_data); + if (!concrete_out_data) + return false; + + const size_t tuple_size = concrete_out_data->tupleSize(); + for (size_t i = 0; i < tuple_size; ++i) + { + ColumnRawPtrs elem_columns(columns.size(), nullptr); + for (size_t j = 0; j < columns.size(); ++j) + { + const ColumnTuple * concrete_column = assert_cast(columns[j]); + elem_columns[j] = &concrete_column->getColumn(i); + } + execute(elem_columns, concrete_out_data->getColumn(i), input_rows_count); + } + return true; + } + + bool executeGeneric(const ColumnRawPtrs & columns, IColumn & out_data, size_t input_rows_count) const + { + for (size_t i = 0; i < input_rows_count; ++i) + for (const auto * column : columns) + out_data.insertFrom(*column, i); + return true; + } + + String getName() const override { return name; } - bool addField(DataTypePtr type_res, const Field & f, Array & arr) const; - bool use_variant_as_common_type = false; }; diff --git a/src/Functions/map.cpp b/src/Functions/map.cpp index 534f7c0d8cd..14672cfd568 100644 --- a/src/Functions/map.cpp +++ b/src/Functions/map.cpp @@ -2,6 +2,8 @@ #include #include #include +#include +#include #include #include #include @@ -13,7 +15,6 @@ #include #include #include -#include namespace DB @@ -36,11 +37,18 @@ class FunctionMap : public IFunction public: static constexpr auto name = "map"; - explicit FunctionMap(bool use_variant_as_common_type_) : use_variant_as_common_type(use_variant_as_common_type_) {} + explicit FunctionMap(ContextPtr context_) + : context(context_) + , use_variant_as_common_type( + context->getSettingsRef().allow_experimental_variant_type && context->getSettingsRef().use_variant_as_common_type) + , function_array(FunctionFactory::instance().get("array", context)) + , function_map_from_arrays(FunctionFactory::instance().get("mapFromArrays", context)) + { + } static FunctionPtr create(ContextPtr context) { - return std::make_shared(context->getSettingsRef().allow_experimental_variant_type && context->getSettingsRef().use_variant_as_common_type); + return std::make_shared(context); } String getName() const override @@ -101,62 +109,38 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override { size_t num_elements = arguments.size(); - if (num_elements == 0) return result_type->createColumnConstWithDefaultValue(input_rows_count); + ColumnsWithTypeAndName key_args; + ColumnsWithTypeAndName value_args; + for (size_t i = 0; i < num_elements; i += 2) + { + key_args.emplace_back(arguments[i]); + value_args.emplace_back(arguments[i+1]); + } + const auto & result_type_map = static_cast(*result_type); const DataTypePtr & key_type = result_type_map.getKeyType(); const DataTypePtr & value_type = result_type_map.getValueType(); + const DataTypePtr & key_array_type = std::make_shared(key_type); + const DataTypePtr & value_array_type = std::make_shared(value_type); - Columns columns_holder(num_elements); - ColumnRawPtrs column_ptrs(num_elements); + /// key_array = array(args[0], args[2]...) + ColumnPtr key_array = function_array->build(key_args)->execute(key_args, key_array_type, input_rows_count); + /// value_array = array(args[1], args[3]...) + ColumnPtr value_array = function_array->build(value_args)->execute(value_args, value_array_type, input_rows_count); - for (size_t i = 0; i < num_elements; ++i) - { - const auto & arg = arguments[i]; - const auto to_type = i % 2 == 0 ? key_type : value_type; - - ColumnPtr preprocessed_column = castColumn(arg, to_type); - preprocessed_column = preprocessed_column->convertToFullColumnIfConst(); - - columns_holder[i] = std::move(preprocessed_column); - column_ptrs[i] = columns_holder[i].get(); - } - - /// Create and fill the result map. - - MutableColumnPtr keys_data = key_type->createColumn(); - MutableColumnPtr values_data = value_type->createColumn(); - MutableColumnPtr offsets = DataTypeNumber().createColumn(); - - size_t total_elements = input_rows_count * num_elements / 2; - keys_data->reserve(total_elements); - values_data->reserve(total_elements); - offsets->reserve(input_rows_count); - - IColumn::Offset current_offset = 0; - for (size_t i = 0; i < input_rows_count; ++i) - { - for (size_t j = 0; j < num_elements; j += 2) - { - keys_data->insertFrom(*column_ptrs[j], i); - values_data->insertFrom(*column_ptrs[j + 1], i); - } - - current_offset += num_elements / 2; - offsets->insert(current_offset); - } - - auto nested_column = ColumnArray::create( - ColumnTuple::create(Columns{std::move(keys_data), std::move(values_data)}), - std::move(offsets)); - - return ColumnMap::create(nested_column); + /// result = mapFromArrays(key_array, value_array) + ColumnsWithTypeAndName map_args{{key_array, key_array_type, ""}, {value_array, value_array_type, ""}}; + return function_map_from_arrays->build(map_args)->execute(map_args, result_type, input_rows_count); } private: + ContextPtr context; bool use_variant_as_common_type = false; + FunctionOverloadResolverPtr function_array; + FunctionOverloadResolverPtr function_map_from_arrays; }; /// mapFromArrays(keys, values) is a function that allows you to make key-value pair from a pair of arrays or maps @@ -173,6 +157,7 @@ public: bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } bool useDefaultImplementationForNulls() const override { return false; } bool useDefaultImplementationForConstants() const override { return true; } + bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override {