From 9528405fe437192e62e0f0db1231fd571a7ce39a Mon Sep 17 00:00:00 2001 From: Alexey Arno Date: Mon, 29 Aug 2016 19:57:59 +0300 Subject: [PATCH] dbms: Extended CAST to nullable types. Added ALTER TABLE ... MODIFY COLUMN ... for nullable types. A column with a nullable type can have a default value in CREATE TABLE. [#METR-19266] --- .../DB/Functions/FunctionsConversion.h | 88 +++++++++++++++++-- dbms/src/Storages/MergeTree/MergeTreeData.cpp | 25 ++++-- .../MergeTree/MergedBlockOutputStream.cpp | 11 ++- 3 files changed, 109 insertions(+), 15 deletions(-) diff --git a/dbms/include/DB/Functions/FunctionsConversion.h b/dbms/include/DB/Functions/FunctionsConversion.h index a4108841c2b..32acf31fe36 100644 --- a/dbms/include/DB/Functions/FunctionsConversion.h +++ b/dbms/include/DB/Functions/FunctionsConversion.h @@ -16,10 +16,12 @@ #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -1864,7 +1866,7 @@ private: }; /// Prepare nested type conversion - const auto nested_function = prepare(from_nested_type, to_nested_type.get()); + const auto nested_function = prepareImpl(from_nested_type, to_nested_type.get()); return [nested_function, from_nested_type, to_nested_type] ( Block & block, const ColumnNumbers & arguments, const size_t result) @@ -1943,7 +1945,7 @@ private: /// Create conversion wrapper for each element in tuple for (const auto & idx_type : ext::enumerate(from_type->getElements())) - element_wrappers.push_back(prepare(idx_type.second, to_element_types[idx_type.first].get())); + element_wrappers.push_back(prepareImpl(idx_type.second, to_element_types[idx_type.first].get())); auto function_tuple = FunctionTuple::create(context); return [element_wrappers, function_tuple, from_element_types, to_element_types] @@ -2087,7 +2089,52 @@ private: }; } - WrapperType prepare(const DataTypePtr & from_type, const IDataType * const to_type) + WrapperType prepare(const DataTypePtr & from_type, const IDataType * const to_type, bool unwrap_from_nullable, bool wrap_into_nullable) + { + auto wrapper = prepareImpl(from_type, to_type); + + if (wrap_into_nullable) + { + return [wrapper, unwrap_from_nullable] (Block & block, const ColumnNumbers & arguments, const size_t result) + { + /// Create a temporary block on which to perform the operation. + const auto & ret_type = block.getByPosition(result).type; + const auto & nullable_type = static_cast(*ret_type); + const auto & nested_type = nullable_type.getNestedType(); + + Block tmp_block = unwrap_from_nullable ? createBlockWithNestedColumns(block, arguments) : block; + size_t tmp_res = block.columns(); + tmp_block.insert({nullptr, nested_type, ""}); + + /// Perform the requested conversion. + wrapper(tmp_block, arguments, tmp_res); + + /// Wrap the result into a nullable column. + ColumnPtr null_map; + + if (unwrap_from_nullable) + { + /// This is a conversion from a nullable to a nullable type. + /// So we just keep the null map of the input argument. + const auto & col = block.getByPosition(arguments[0]).column; + const auto & nullable_col = static_cast(*col); + null_map = nullable_col.getNullValuesByteMap(); + } + else + { + /// This is a conversion from an ordinary type to a nullable type. + /// So we create a trivial null map. + null_map = std::make_shared(block.rowsInFirstColumn(), 0); + } + + block.getByPosition(result).column = std::make_shared(tmp_block.getByPosition(tmp_res).column, null_map); + }; + } + else + return wrapper; + } + + WrapperType prepareImpl(const DataTypePtr & from_type, const IDataType * const to_type) { if (const auto to_actual_type = typeid_cast(to_type)) return createWrapper(from_type, to_actual_type); @@ -2185,6 +2232,8 @@ public: String getName() const override { return name; } + bool hasSpecialSupportForNulls() const override { return true; } + void getReturnTypeAndPrerequisitesImpl( const ColumnsWithTypeAndName & arguments, DataTypePtr & out_return_type, std::vector & out_prerequisites) override @@ -2201,9 +2250,38 @@ public: out_return_type = DataTypeFactory::instance().get(type_col->getData()); - wrapper_function = prepare(arguments.front().type, out_return_type.get()); + const auto & from_type = arguments.front().type; + const DataTypePtr * from_inner_type; + const IDataType * to_inner_type; - prepareMonotonicityInformation(arguments.front().type, out_return_type.get()); + bool unwrap_from_nullable = from_type->isNullable(); + bool wrap_into_nullable = out_return_type->isNullable(); + + if (wrap_into_nullable) + { + if (unwrap_from_nullable) + { + const auto & nullable_type = static_cast(*from_type); + from_inner_type = &nullable_type.getNestedType(); + } + else + from_inner_type = &from_type; + + const auto & nullable_type = static_cast(*out_return_type); + to_inner_type = nullable_type.getNestedType().get(); + } + else + { + if (unwrap_from_nullable) + throw Exception{"Cannot convert data from a nullable type to a non-nullable type", + ErrorCodes::CANNOT_CONVERT_TYPE}; + + from_inner_type = &from_type; + to_inner_type = out_return_type.get(); + } + + wrapper_function = prepare(*from_inner_type, to_inner_type, unwrap_from_nullable, wrap_into_nullable); + prepareMonotonicityInformation(*from_inner_type, to_inner_type); } void executeImpl(Block & block, const ColumnNumbers & arguments, const size_t result) override diff --git a/dbms/src/Storages/MergeTree/MergeTreeData.cpp b/dbms/src/Storages/MergeTree/MergeTreeData.cpp index 5bc4f7315ff..36085720aa1 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeData.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeData.cpp @@ -585,8 +585,6 @@ void MergeTreeData::checkAlter(const AlterCommands & params) std::begin(new_materialized_columns), std::end(new_materialized_columns)); createConvertExpression(nullptr, getColumnsList(), new_columns, unused_expression, unused_map, unused_bool); - - } void MergeTreeData::createConvertExpression(const DataPartPtr & part, const NamesAndTypesList & old_columns, const NamesAndTypesList & new_columns, @@ -612,10 +610,10 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name for (const NameAndTypePair & column : old_columns) { - bool is_nullable = column.type.get()->isNullable(); - if (!new_types.count(column.name)) { + bool is_nullable = column.type.get()->isNullable(); + if (!part || part->hasColumnFiles(column.name)) { /// Столбец нужно удалить. @@ -634,6 +632,8 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name if (is_nullable) { + /// It is a nullable column so remove its null map + /// and its corresponding marks file. out_rename_map[escaped_column + ".null"] = ""; out_rename_map[escaped_column + ".null_mrk"] = ""; } @@ -696,8 +696,10 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name out_rename_map[escaped_expr + ".bin"] = escaped_column + ".bin"; out_rename_map[escaped_expr + ".mrk"] = escaped_column + ".mrk"; - if (is_nullable) + if (new_type->isNullable()) { + /// The original column, whether it be nullable or not, + /// is converted to a nullable column. out_rename_map[escaped_expr + ".null"] = escaped_column + ".null"; out_rename_map[escaped_expr + ".null_mrk"] = escaped_column + ".null_mrk"; } @@ -878,20 +880,24 @@ void MergeTreeData::AlterDataPartTransaction::commit() String path = data_part->storage.full_path + data_part->name + "/"; + /// NOTE: checking that a file exists before renaming or deleting it + /// is justified by the fact that, when converting an ordinary column + /// to a nullable column, new files are created which did not exist + /// before, i.e. they do not have older versions. + /// 1) Переименуем старые файлы. for (auto it : rename_map) { String name = it.second.empty() ? it.first : it.second; - Poco::File(path + name).renameTo(path + name + ".tmp2"); + if (Poco::File{path + name}.exists()) + Poco::File(path + name).renameTo(path + name + ".tmp2"); } /// 2) Переместим на их место новые и обновим метаданные в оперативке. for (auto it : rename_map) { if (!it.second.empty()) - { Poco::File(path + it.first).renameTo(path + it.second); - } } DataPart & mutable_part = const_cast(*data_part); @@ -902,7 +908,8 @@ void MergeTreeData::AlterDataPartTransaction::commit() for (auto it : rename_map) { String name = it.second.empty() ? it.first : it.second; - Poco::File(path + name + ".tmp2").remove(); + if (Poco::File{path + name + ".tmp2"}.exists()) + Poco::File(path + name + ".tmp2").remove(); } mutable_part.size_in_bytes = MergeTreeData::DataPart::calcTotalSize(path); diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp index fa955282d0c..6ace03e684c 100644 --- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -6,6 +6,7 @@ #include #include +#include namespace DB { @@ -549,7 +550,15 @@ MergeTreeData::DataPart::Checksums MergedColumnOnlyOutputStream::writeSuffixAndG column_stream.second->finalize(); if (sync) column_stream.second->sync(); - std::string column = escapeForFileName(column_stream.first); + + /// Get the file basename for the given column. If this is an entry + /// for a null map, first remove from its key the ".null" extension + /// that was used to make this key unique. + std::string column = column_stream.first; + if (endsWith(column, NULL_MAP_EXTENSION)) + column = column.substr(0, column.length() - strlen(NULL_MAP_EXTENSION)); + column = escapeForFileName(column); + column_stream.second->addToChecksums(checksums, column); }