allow to include subccolumns in describe query

This commit is contained in:
Anton Popov 2021-09-11 23:24:01 +03:00
parent 43102e8427
commit 02a7edf902
7 changed files with 121 additions and 23 deletions

View File

@ -466,6 +466,7 @@ class IColumn;
M(Bool, force_optimize_projection, false, "If projection optimization is enabled, SELECT queries need to use projection", 0) \
M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \
M(Bool, insert_null_as_default, true, "Insert DEFAULT values instead of NULL in INSERT SELECT (UNION ALL)", 0) \
M(Bool, describe_include_subcolumns, false, "If true, subcolumns of all table columns will be included into result of DESCRIBE query", 0) \
\
M(Bool, optimize_rewrite_sum_if_to_count_if, true, "Rewrite sumIf() and sum(if()) function countIf() function when logically equivalent", 0) \
M(UInt64, insert_shard_id, 0, "If non zero, when insert into a distributed table, the data will be inserted into the shard `insert_shard_id` synchronously. Possible values range from 1 to `shards_number` of corresponding distributed table", 0) \

View File

@ -87,23 +87,38 @@ ColumnPtr IDataType::getSubcolumn(const String & subcolumn_name, const IColumn &
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "There is no subcolumn {} in type {}", subcolumn_name, getName());
}
Names IDataType::getSubcolumnNames() const
void IDataType::forEachSubcolumn(const SubcolumnCallback & callback) const
{
NameSet res;
getDefaultSerialization()->enumerateStreams([&res, this](const ISerialization::SubstreamPath & substream_path)
NameSet set;
getDefaultSerialization()->enumerateStreams([&, this](const ISerialization::SubstreamPath & substream_path)
{
ISerialization::SubstreamPath new_path;
/// Iterate over path to try to get intermediate subcolumns for complex nested types.
for (const auto & elem : substream_path)
{
new_path.push_back(elem);
auto subcolumn_name = ISerialization::getSubcolumnNameForStream(new_path);
if (!subcolumn_name.empty() && tryGetSubcolumnType(subcolumn_name))
res.insert(subcolumn_name);
auto name = ISerialization::getSubcolumnNameForStream(new_path);
auto type = tryGetSubcolumnType(name);
/// Subcolumn names may repeat among several substream paths.
if (!name.empty() && type && !set.count(name))
{
callback(name, type, substream_path);
set.insert(name);
}
}
});
}
return Names(std::make_move_iterator(res.begin()), std::make_move_iterator(res.end()));
Names IDataType::getSubcolumnNames() const
{
Names res;
forEachSubcolumn([&](const auto & name, const auto &, const auto &)
{
res.push_back(name);
});
return res;
}
void IDataType::insertDefaultInto(IColumn & column) const

View File

@ -80,6 +80,9 @@ public:
virtual DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const;
DataTypePtr getSubcolumnType(const String & subcolumn_name) const;
virtual ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const;
using SubcolumnCallback = std::function<void(const String &, const DataTypePtr &, const ISerialization::SubstreamPath &)>;
void forEachSubcolumn(const SubcolumnCallback & callback) const;
Names getSubcolumnNames() const;
/// Returns default serialization of data type.

View File

@ -1,5 +1,4 @@
#include <Storages/IStorage.h>
#include <DataStreams/OneBlockInputStream.h>
#include <DataStreams/BlockIO.h>
#include <DataTypes/DataTypeString.h>
#include <Parsers/queryToString.h>
@ -15,20 +14,14 @@
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/TablePropertiesQueriesASTs.h>
#include <DataTypes/NestedUtils.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
namespace DB
{
BlockIO InterpreterDescribeQuery::execute()
{
BlockIO res;
res.in = executeImpl();
return res;
}
Block InterpreterDescribeQuery::getSampleBlock()
Block InterpreterDescribeQuery::getSampleBlock(bool include_subcolumns)
{
Block block;
@ -56,11 +49,19 @@ Block InterpreterDescribeQuery::getSampleBlock()
col.name = "ttl_expression";
block.insert(col);
if (include_subcolumns)
{
col.name = "is_subcolumn";
col.type = std::make_shared<DataTypeUInt8>();
col.column = col.type->createColumn();
block.insert(col);
}
return block;
}
BlockInputStreamPtr InterpreterDescribeQuery::executeImpl()
BlockIO InterpreterDescribeQuery::execute()
{
ColumnsDescription columns;
@ -87,7 +88,8 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl()
columns = metadata_snapshot->getColumns();
}
Block sample_block = getSampleBlock();
bool include_subcolumns = getContext()->getSettingsRef().describe_include_subcolumns;
Block sample_block = getSampleBlock(include_subcolumns);
MutableColumns res_columns = sample_block.cloneEmptyColumns();
for (const auto & column : columns)
@ -117,9 +119,47 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl()
res_columns[6]->insert(queryToString(column.ttl));
else
res_columns[6]->insertDefault();
if (include_subcolumns)
res_columns[7]->insertDefault();
}
return std::make_shared<OneBlockInputStream>(sample_block.cloneWithColumns(std::move(res_columns)));
if (include_subcolumns)
{
for (const auto & column : columns)
{
column.type->forEachSubcolumn([&](const auto & name, const auto & type, const auto & path)
{
res_columns[0]->insert(Nested::concatenateName(column.name, name));
res_columns[1]->insert(type->getName());
/// It's not trivial to calculate default expression for subcolumn.
/// So, leave it empty.
res_columns[2]->insertDefault();
res_columns[3]->insertDefault();
res_columns[4]->insert(column.comment);
if (column.codec && ISerialization::isSpecialCompressionAllowed(path))
res_columns[5]->insert(queryToString(column.codec->as<ASTFunction>()->arguments));
else
res_columns[5]->insertDefault();
if (column.ttl)
res_columns[6]->insert(queryToString(column.ttl));
else
res_columns[6]->insertDefault();
res_columns[7]->insert(1u);
});
}
}
BlockIO res;
size_t num_rows = res_columns[0]->size();
auto source = std::make_shared<SourceFromSingleChunk>(sample_block, Chunk(std::move(res_columns), num_rows));
res.pipeline.init(Pipe(std::move(source)));
return res;
}
}

View File

@ -16,12 +16,10 @@ public:
BlockIO execute() override;
static Block getSampleBlock();
static Block getSampleBlock(bool include_subcolumns);
private:
ASTPtr query_ptr;
BlockInputStreamPtr executeImpl();
};

View File

@ -0,0 +1,23 @@
┌─name─┬─type────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────┬─codec_expression─┬─ttl_expression───────┐
│ d │ Date │ │ │ │ │ │
│ n │ Nullable(String) │ │ │ It is a nullable column │ │ │
│ arr1 │ Array(UInt32) │ │ │ │ ZSTD(1) │ │
│ arr2 │ Array(Array(String)) │ │ │ │ │ d + toIntervalDay(1) │
│ t │ Tuple(s String, a Array(Tuple(a UInt32, b UInt32))) │ │ │ │ ZSTD(1) │ │
└──────┴─────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────────────────────┴──────────────────┴──────────────────────┘
┌─name───────┬─type────────────────────────────────────────────────┬─default_type─┬─default_expression─┬─comment─────────────────┬─codec_expression─┬─ttl_expression───────┬─is_subcolumn─┐
│ d │ Date │ │ │ │ │ │ 0 │
│ n │ Nullable(String) │ │ │ It is a nullable column │ │ │ 0 │
│ arr1 │ Array(UInt32) │ │ │ │ ZSTD(1) │ │ 0 │
│ arr2 │ Array(Array(String)) │ │ │ │ │ d + toIntervalDay(1) │ 0 │
│ t │ Tuple(s String, a Array(Tuple(a UInt32, b UInt32))) │ │ │ │ ZSTD(1) │ │ 0 │
│ n.null │ UInt8 │ │ │ It is a nullable column │ │ │ 1 │
│ arr1.size0 │ UInt64 │ │ │ │ │ │ 1 │
│ arr2.size0 │ UInt64 │ │ │ │ │ d + toIntervalDay(1) │ 1 │
│ arr2.size1 │ Array(UInt64) │ │ │ │ │ d + toIntervalDay(1) │ 1 │
│ t.s │ String │ │ │ │ ZSTD(1) │ │ 1 │
│ t.a │ Array(Tuple(a UInt32, b UInt32)) │ │ │ │ │ │ 1 │
│ t.a.size0 │ UInt64 │ │ │ │ │ │ 1 │
│ t.a.a │ Array(UInt32) │ │ │ │ ZSTD(1) │ │ 1 │
│ t.a.b │ Array(UInt32) │ │ │ │ ZSTD(1) │ │ 1 │
└────────────┴─────────────────────────────────────────────────────┴──────────────┴────────────────────┴─────────────────────────┴──────────────────┴──────────────────────┴──────────────┘

View File

@ -0,0 +1,18 @@
DROP TABLE IF EXISTS t_desc_subcolumns;
CREATE TABLE t_desc_subcolumns
(
d Date,
n Nullable(String) COMMENT 'It is a nullable column',
arr1 Array(UInt32) CODEC(ZSTD),
arr2 Array(Array(String)) TTL d + INTERVAL 1 DAY,
t Tuple(s String, a Array(Tuple(a UInt32, b UInt32))) CODEC(ZSTD)
)
ENGINE = MergeTree ORDER BY d;
DESCRIBE TABLE t_desc_subcolumns FORMAT PrettyCompactNoEscapes;
DESCRIBE TABLE t_desc_subcolumns FORMAT PrettyCompactNoEscapes
SETTINGS describe_include_subcolumns = 1;
DROP TABLE t_desc_subcolumns;