From 02a7edf90211b2757e6bc49e6db81860fd1dcb8d Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Sat, 11 Sep 2021 23:24:01 +0300 Subject: [PATCH] allow to include subccolumns in describe query --- src/Core/Settings.h | 1 + src/DataTypes/IDataType.cpp | 29 ++++++-- src/DataTypes/IDataType.h | 3 + src/Interpreters/InterpreterDescribeQuery.cpp | 66 +++++++++++++++---- src/Interpreters/InterpreterDescribeQuery.h | 4 +- ...2026_describe_include_subcolumns.reference | 23 +++++++ .../02026_describe_include_subcolumns.sql | 18 +++++ 7 files changed, 121 insertions(+), 23 deletions(-) create mode 100644 tests/queries/0_stateless/02026_describe_include_subcolumns.reference create mode 100644 tests/queries/0_stateless/02026_describe_include_subcolumns.sql diff --git a/src/Core/Settings.h b/src/Core/Settings.h index f331ad2d7d3..9b29b0b1ee6 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -466,6 +466,7 @@ class IColumn; M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \ M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \ M(Bool, insert_null_as_default, true, "Insert DEFAULT values instead of NULL in INSERT SELECT (UNION ALL)", 0) \ + M(Bool, describe_include_subcolumns, false, "If true, subcolumns of all table columns will be included into result of DESCRIBE query", 0) \ \ M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \ M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \ diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp index 4b727a49861..c69f8ce5e8a 100644 --- a/src/DataTypes/IDataType.cpp +++ b/src/DataTypes/IDataType.cpp @@ -87,23 +87,38 @@ ColumnPtr IDataType::getSubcolumn(const String & subcolumn_name, const IColumn & throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName()); } -Names IDataType::getSubcolumnNames() const +void IDataType::forEachSubcolumn(const SubcolumnCallback & callback) const { - NameSet res; - getDefaultSerialization()->enumerateStreams([&res, this](const ISerialization::SubstreamPath & substream_path) + NameSet set; + getDefaultSerialization()->enumerateStreams([&, this](const ISerialization::SubstreamPath & substream_path) { ISerialization::SubstreamPath new_path; /// Iterate over path to try to get intermediate subcolumns for complex nested types. for (const auto & elem : substream_path) { new_path.push_back(elem); - auto subcolumn_name = ISerialization::getSubcolumnNameForStream(new_path); - if (!subcolumn_name.empty() && tryGetSubcolumnType(subcolumn_name)) - res.insert(subcolumn_name); + auto name = ISerialization::getSubcolumnNameForStream(new_path); + auto type = tryGetSubcolumnType(name); + + /// Subcolumn names may repeat among several substream paths. + if (!name.empty() && type && !set.count(name)) + { + callback(name, type, substream_path); + set.insert(name); + } } }); +} - return Names(std::make_move_iterator(res.begin()), std::make_move_iterator(res.end())); +Names IDataType::getSubcolumnNames() const +{ + Names res; + forEachSubcolumn([&](const auto & name, const auto &, const auto &) + { + res.push_back(name); + }); + + return res; } void IDataType::insertDefaultInto(IColumn & column) const diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 9d943769c0a..017534c4743 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -80,6 +80,9 @@ public: virtual DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const; DataTypePtr getSubcolumnType(const String & subcolumn_name) const; virtual ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const; + + using SubcolumnCallback = std::function; + void forEachSubcolumn(const SubcolumnCallback & callback) const; Names getSubcolumnNames() const; /// Returns default serialization of data type. diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index 705e52da72c..5fd9c2539eb 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -1,5 +1,4 @@ #include -#include #include #include #include @@ -15,20 +14,14 @@ #include #include #include +#include +#include namespace DB { -BlockIO InterpreterDescribeQuery::execute() -{ - BlockIO res; - res.in = executeImpl(); - return res; -} - - -Block InterpreterDescribeQuery::getSampleBlock() +Block InterpreterDescribeQuery::getSampleBlock(bool include_subcolumns) { Block block; @@ -56,11 +49,19 @@ Block InterpreterDescribeQuery::getSampleBlock() col.name = "ttl_expression"; block.insert(col); + if (include_subcolumns) + { + col.name = "is_subcolumn"; + col.type = std::make_shared(); + col.column = col.type->createColumn(); + block.insert(col); + } + return block; } -BlockInputStreamPtr InterpreterDescribeQuery::executeImpl() +BlockIO InterpreterDescribeQuery::execute() { ColumnsDescription columns; @@ -87,7 +88,8 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl() columns = metadata_snapshot->getColumns(); } - Block sample_block = getSampleBlock(); + bool include_subcolumns = getContext()->getSettingsRef().describe_include_subcolumns; + Block sample_block = getSampleBlock(include_subcolumns); MutableColumns res_columns = sample_block.cloneEmptyColumns(); for (const auto & column : columns) @@ -117,9 +119,47 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl() res_columns[6]->insert(queryToString(column.ttl)); else res_columns[6]->insertDefault(); + + if (include_subcolumns) + res_columns[7]->insertDefault(); } - return std::make_shared(sample_block.cloneWithColumns(std::move(res_columns))); + if (include_subcolumns) + { + for (const auto & column : columns) + { + column.type->forEachSubcolumn([&](const auto & name, const auto & type, const auto & path) + { + res_columns[0]->insert(Nested::concatenateName(column.name, name)); + res_columns[1]->insert(type->getName()); + + /// It's not trivial to calculate default expression for subcolumn. + /// So, leave it empty. + res_columns[2]->insertDefault(); + res_columns[3]->insertDefault(); + res_columns[4]->insert(column.comment); + + if (column.codec && ISerialization::isSpecialCompressionAllowed(path)) + res_columns[5]->insert(queryToString(column.codec->as()->arguments)); + else + res_columns[5]->insertDefault(); + + if (column.ttl) + res_columns[6]->insert(queryToString(column.ttl)); + else + res_columns[6]->insertDefault(); + + res_columns[7]->insert(1u); + }); + } + } + + BlockIO res; + size_t num_rows = res_columns[0]->size(); + auto source = std::make_shared(sample_block, Chunk(std::move(res_columns), num_rows)); + res.pipeline.init(Pipe(std::move(source))); + + return res; } } diff --git a/src/Interpreters/InterpreterDescribeQuery.h b/src/Interpreters/InterpreterDescribeQuery.h index 627d1ca0353..c0b380d57ab 100644 --- a/src/Interpreters/InterpreterDescribeQuery.h +++ b/src/Interpreters/InterpreterDescribeQuery.h @@ -16,12 +16,10 @@ public: BlockIO execute() override; - static Block getSampleBlock(); + static Block getSampleBlock(bool include_subcolumns); private: ASTPtr query_ptr; - - BlockInputStreamPtr executeImpl(); }; diff --git a/tests/queries/0_stateless/02026_describe_include_subcolumns.reference b/tests/queries/0_stateless/02026_describe_include_subcolumns.reference new file mode 100644 index 00000000000..ba792ea9f74 --- /dev/null +++ b/tests/queries/0_stateless/02026_describe_include_subcolumns.reference @@ -0,0 +1,23 @@ +┌─name─┬─type────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────┬─codec_expression─┬─ttl_expression───────┐ +│ d │ Date │ │ │ │ │ │ +│ n │ Nullable(String) │ │ │ It is a nullable column │ │ │ +│ arr1 │ Array(UInt32) │ │ │ │ ZSTD(1) │ │ +│ arr2 │ Array(Array(String)) │ │ │ │ │ d + toIntervalDay(1) │ +│ t │ Tuple(s String, a Array(Tuple(a UInt32, b UInt32))) │ │ │ │ ZSTD(1) │ │ +└──────┴─────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────────────────────┴──────────────────┴──────────────────────┘ +┌─name───────┬─type────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────┬─codec_expression─┬─ttl_expression───────┬─is_subcolumn─┐ +│ d │ Date │ │ │ │ │ │ 0 │ +│ n │ Nullable(String) │ │ │ It is a nullable column │ │ │ 0 │ +│ arr1 │ Array(UInt32) │ │ │ │ ZSTD(1) │ │ 0 │ +│ arr2 │ Array(Array(String)) │ │ │ │ │ d + toIntervalDay(1) │ 0 │ +│ t │ Tuple(s String, a Array(Tuple(a UInt32, b UInt32))) │ │ │ │ ZSTD(1) │ │ 0 │ +│ n.null │ UInt8 │ │ │ It is a nullable column │ │ │ 1 │ +│ arr1.size0 │ UInt64 │ │ │ │ │ │ 1 │ +│ arr2.size0 │ UInt64 │ │ │ │ │ d + toIntervalDay(1) │ 1 │ +│ arr2.size1 │ Array(UInt64) │ │ │ │ │ d + toIntervalDay(1) │ 1 │ +│ t.s │ String │ │ │ │ ZSTD(1) │ │ 1 │ +│ t.a │ Array(Tuple(a UInt32, b UInt32)) │ │ │ │ │ │ 1 │ +│ t.a.size0 │ UInt64 │ │ │ │ │ │ 1 │ +│ t.a.a │ Array(UInt32) │ │ │ │ ZSTD(1) │ │ 1 │ +│ t.a.b │ Array(UInt32) │ │ │ │ ZSTD(1) │ │ 1 │ +└────────────┴─────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────────────────────┴──────────────────┴──────────────────────┴──────────────┘ diff --git a/tests/queries/0_stateless/02026_describe_include_subcolumns.sql b/tests/queries/0_stateless/02026_describe_include_subcolumns.sql new file mode 100644 index 00000000000..1a58913425c --- /dev/null +++ b/tests/queries/0_stateless/02026_describe_include_subcolumns.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS t_desc_subcolumns; + +CREATE TABLE t_desc_subcolumns +( + d Date, + n Nullable(String) COMMENT 'It is a nullable column', + arr1 Array(UInt32) CODEC(ZSTD), + arr2 Array(Array(String)) TTL d + INTERVAL 1 DAY, + t Tuple(s String, a Array(Tuple(a UInt32, b UInt32))) CODEC(ZSTD) +) +ENGINE = MergeTree ORDER BY d; + +DESCRIBE TABLE t_desc_subcolumns FORMAT PrettyCompactNoEscapes; + +DESCRIBE TABLE t_desc_subcolumns FORMAT PrettyCompactNoEscapes +SETTINGS describe_include_subcolumns = 1; + +DROP TABLE t_desc_subcolumns;