ClickHouse/dbms/src/Storages/System/StorageSystemColumns.cpp

341 lines
13 KiB
C++
Raw Normal View History

#include <optional>
#include <Storages/System/StorageSystemColumns.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataStreams/NullBlockInputStream.h>
2017-11-20 05:22:54 +00:00
#include <Storages/VirtualColumnUtils.h>
#include <Parsers/queryToString.h>
#include <Parsers/ASTSelectQuery.h>
#include <Access/ContextAccess.h>
#include <Databases/IDatabase.h>
2020-01-24 16:13:46 +00:00
#include <Processors/Sources/NullSource.h>
Squashed commit of the following: commit f9b478181cd49224154cc350fb57df7121842f1c Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Mar 19 04:06:36 2016 +0300 Database engines: development [#METR-19997]. commit f7a10a67761ccfd05f3dac32d6444920cd8d4d60 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Mar 19 03:44:37 2016 +0300 Database engines: development [#METR-19997]. commit bd98a8558e98bad2bed278e5762c4e0fc66e6f38 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Mar 19 00:33:59 2016 +0300 Database engines: development [#METR-19997]. commit 19712fd884c22a4e2c2b67474086dea8f44e7c7b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Sat Mar 19 00:03:11 2016 +0300 Database engines: development [#METR-19997]. commit 50274d6df7e91fcc34aab8a8c72347daa2c6512f Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Mar 18 23:24:57 2016 +0300 Database engines: development [#METR-19997]. commit 4a0b99b19b34e90ef8b7be2d199f6232e36ef3f7 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Mar 18 22:50:36 2016 +0300 Database engines: development [#METR-19997]. commit 44ff3ebba7a3e460a27a89f31ddf199dbea1d182 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Mar 18 15:09:17 2016 +0300 Database engines: development [#METR-19997]. commit 137c31f3004cfd282473b6acb01cbe1b4ca2aadd Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Mar 18 03:26:34 2016 +0300 Database engines: development [#METR-19997]. commit aa4c0496d4afe4a691164254be2bd5600542b38a Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Mar 18 03:22:59 2016 +0300 Database engines: development [#METR-19997]. commit 5a94d1f0607450a2dac28a4d7df8b1393a864c23 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Fri Mar 18 01:02:40 2016 +0300 Database engines: development [#METR-19997]. commit 50fd5b52ea1141955a5dfba0dcb191f3289ac25b Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Thu Mar 17 23:23:40 2016 +0300 Database engines: development [#METR-19997]. commit a333d91b058e4f56dd83a6d2878c3c2bd8efc002 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Thu Mar 17 20:29:07 2016 +0300 Database engines: development [#METR-19997]. commit f81d366e7ac8348436f2698d040f8e341743a024 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Thu Mar 17 01:30:23 2016 +0300 Database engines: development [#METR-19997]. commit d0696860c9060827896214c08d147c759ea79376 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Wed Mar 16 21:55:31 2016 +0300 Database engines: development [#METR-19997]. commit 46a168c2ada140a0e95cd8d4b9d8ba9bac855d11 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Wed Mar 16 08:00:58 2016 +0300 Database engines: development [#METR-19997]. commit 20a2bad161454225fc1b5f9b919b842fbebc3231 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Wed Mar 16 06:51:10 2016 +0300 Database engines: development [#METR-19997]. commit ca0a77fcc2a8d0b276eb3743c53551ad3fe16314 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Wed Mar 16 06:02:20 2016 +0300 Reverted erroneous modification [#METR-19997]. commit 1370bdcc4594182f6ef2b146f9afabfe1c295080 Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Wed Mar 16 00:41:34 2016 +0300 Database engines: development [#METR-19997]. commit 16e72c67041cae6471509d3f0f3d4a9aa7b7dc0f Author: Alexey Milovidov <milovidov@yandex-team.ru> Date: Tue Mar 15 00:41:48 2016 +0300 Database engines: development [#METR-19997].
2016-03-19 01:18:49 +00:00
2015-04-24 12:26:23 +00:00
namespace DB
{
namespace ErrorCodes
2018-07-24 18:46:23 +00:00
{
2019-08-27 23:47:30 +00:00
extern const int TABLE_IS_DROPPED;
}
StorageSystemColumns::StorageSystemColumns(const std::string & name_)
2019-12-03 16:25:32 +00:00
: IStorage({"system", name_})
{
setColumns(ColumnsDescription(
{
{ "database", std::make_shared<DataTypeString>() },
{ "table", std::make_shared<DataTypeString>() },
{ "name", std::make_shared<DataTypeString>() },
{ "type", std::make_shared<DataTypeString>() },
{ "default_kind", std::make_shared<DataTypeString>() },
{ "default_expression", std::make_shared<DataTypeString>() },
{ "data_compressed_bytes", std::make_shared<DataTypeUInt64>() },
{ "data_uncompressed_bytes", std::make_shared<DataTypeUInt64>() },
{ "marks_bytes", std::make_shared<DataTypeUInt64>() },
{ "comment", std::make_shared<DataTypeString>() },
2018-11-19 17:21:34 +00:00
{ "is_in_partition_key", std::make_shared<DataTypeUInt8>() },
2019-05-03 18:07:59 +00:00
{ "is_in_sorting_key", std::make_shared<DataTypeUInt8>() },
{ "is_in_primary_key", std::make_shared<DataTypeUInt8>() },
{ "is_in_sampling_key", std::make_shared<DataTypeUInt8>() },
{ "compression_codec", std::make_shared<DataTypeString>() },
}));
2015-04-24 12:26:23 +00:00
}
namespace
2015-04-24 12:26:23 +00:00
{
using Storages = std::map<std::pair<std::string, std::string>, StoragePtr>;
}
2020-01-24 16:13:46 +00:00
class ColumnsSource : public SourceWithProgress
{
public:
2020-01-24 16:13:46 +00:00
ColumnsSource(
std::vector<UInt8> columns_mask_,
Block header_,
2019-08-03 11:02:40 +00:00
UInt64 max_block_size_,
ColumnPtr databases_,
ColumnPtr tables_,
Storages storages_,
const std::shared_ptr<const ContextAccess> & access_,
String query_id_)
2020-01-24 16:13:46 +00:00
: SourceWithProgress(header_)
, columns_mask(std::move(columns_mask_)), max_block_size(max_block_size_)
, databases(std::move(databases_)), tables(std::move(tables_)), storages(std::move(storages_))
, query_id(std::move(query_id_)), total_tables(tables->size()), access(access_)
{
}
String getName() const override { return "Columns"; }
protected:
2020-01-24 16:13:46 +00:00
Chunk generate() override
{
if (db_table_num >= total_tables)
return {};
2020-01-24 16:13:46 +00:00
MutableColumns res_columns = getPort().getHeader().cloneEmptyColumns();
size_t rows_count = 0;
const bool check_access_for_tables = !access->isGranted(AccessType::SHOW_COLUMNS);
2020-01-24 16:20:36 +00:00
while (rows_count < max_block_size && db_table_num < total_tables)
{
const std::string database_name = (*databases)[db_table_num].get<std::string>();
const std::string table_name = (*tables)[db_table_num].get<std::string>();
++db_table_num;
ColumnsDescription columns;
2018-11-27 17:07:10 +00:00
Names cols_required_for_partition_key;
Names cols_required_for_sorting_key;
Names cols_required_for_primary_key;
Names cols_required_for_sampling;
MergeTreeData::ColumnSizeByName column_sizes;
{
StoragePtr storage = storages.at(std::make_pair(database_name, table_name));
TableStructureReadLockHolder table_lock;
try
{
2020-03-28 02:09:29 +00:00
table_lock = storage->lockStructureForShare(query_id);
}
catch (const Exception & e)
{
/** There are case when IStorage::drop was called,
* but we still own the object.
* Then table will throw exception at attempt to lock it.
* Just skip the table.
*/
if (e.code() == ErrorCodes::TABLE_IS_DROPPED)
continue;
else
throw;
}
columns = storage->getColumns();
2018-11-27 17:07:10 +00:00
cols_required_for_partition_key = storage->getColumnsRequiredForPartitionKey();
cols_required_for_sorting_key = storage->getColumnsRequiredForSortingKey();
cols_required_for_primary_key = storage->getColumnsRequiredForPrimaryKey();
cols_required_for_sampling = storage->getColumnsRequiredForSampling();
column_sizes = storage->getColumnSizes();
}
bool check_access_for_columns = check_access_for_tables && !access->isGranted(AccessType::SHOW_COLUMNS, database_name, table_name);
2020-01-24 16:20:36 +00:00
for (const auto & column : columns)
{
if (column.is_virtual)
continue;
if (check_access_for_columns && !access->isGranted(AccessType::SHOW_COLUMNS, database_name, table_name, column.name))
2020-01-24 16:20:36 +00:00
continue;
size_t src_index = 0;
size_t res_index = 0;
if (columns_mask[src_index++])
res_columns[res_index++]->insert(database_name);
if (columns_mask[src_index++])
res_columns[res_index++]->insert(table_name);
if (columns_mask[src_index++])
res_columns[res_index++]->insert(column.name);
if (columns_mask[src_index++])
res_columns[res_index++]->insert(column.type->getName());
if (column.default_desc.expression)
{
if (columns_mask[src_index++])
res_columns[res_index++]->insert(toString(column.default_desc.kind));
if (columns_mask[src_index++])
res_columns[res_index++]->insert(queryToString(column.default_desc.expression));
}
else
{
if (columns_mask[src_index++])
res_columns[res_index++]->insertDefault();
if (columns_mask[src_index++])
res_columns[res_index++]->insertDefault();
}
{
const auto it = column_sizes.find(column.name);
if (it == std::end(column_sizes))
{
if (columns_mask[src_index++])
res_columns[res_index++]->insertDefault();
if (columns_mask[src_index++])
res_columns[res_index++]->insertDefault();
if (columns_mask[src_index++])
res_columns[res_index++]->insertDefault();
}
else
{
if (columns_mask[src_index++])
res_columns[res_index++]->insert(it->second.data_compressed);
if (columns_mask[src_index++])
res_columns[res_index++]->insert(it->second.data_uncompressed);
if (columns_mask[src_index++])
res_columns[res_index++]->insert(it->second.marks);
}
}
if (columns_mask[src_index++])
res_columns[res_index++]->insert(column.comment);
{
2018-11-19 17:21:34 +00:00
auto find_in_vector = [&key = column.name](const Names& names)
{
return std::find(names.cbegin(), names.cend(), key) != names.end();
};
if (columns_mask[src_index++])
2018-11-27 17:07:10 +00:00
res_columns[res_index++]->insert(find_in_vector(cols_required_for_partition_key));
if (columns_mask[src_index++])
2018-11-27 17:07:10 +00:00
res_columns[res_index++]->insert(find_in_vector(cols_required_for_sorting_key));
if (columns_mask[src_index++])
2018-11-27 17:07:10 +00:00
res_columns[res_index++]->insert(find_in_vector(cols_required_for_primary_key));
if (columns_mask[src_index++])
2018-11-27 17:07:10 +00:00
res_columns[res_index++]->insert(find_in_vector(cols_required_for_sampling));
}
if (columns_mask[src_index++])
2018-12-21 14:40:20 +00:00
{
if (column.codec)
res_columns[res_index++]->insert("CODEC(" + column.codec->getCodecDesc() + ")");
2018-12-21 14:40:20 +00:00
else
res_columns[res_index++]->insertDefault();
2018-12-21 14:40:20 +00:00
}
++rows_count;
}
}
2020-01-24 16:13:46 +00:00
return Chunk(std::move(res_columns), rows_count);
}
private:
std::vector<UInt8> columns_mask;
2019-02-10 16:55:12 +00:00
UInt64 max_block_size;
ColumnPtr databases;
ColumnPtr tables;
Storages storages;
String query_id;
size_t db_table_num = 0;
size_t total_tables;
std::shared_ptr<const ContextAccess> access;
};
Pipes StorageSystemColumns::read(
const Names & column_names,
const SelectQueryInfo & query_info,
const Context & context,
QueryProcessingStage::Enum /*processed_stage*/,
const size_t max_block_size,
const unsigned /*num_streams*/)
{
check(column_names);
/// Create a mask of what columns are needed in the result.
NameSet names_set(column_names.begin(), column_names.end());
Block sample_block = getSampleBlock();
2020-01-24 16:13:46 +00:00
Block header;
std::vector<UInt8> columns_mask(sample_block.columns());
for (size_t i = 0, size = columns_mask.size(); i < size; ++i)
{
if (names_set.count(sample_block.getByPosition(i).name))
{
columns_mask[i] = 1;
2020-01-24 16:13:46 +00:00
header.insert(sample_block.getByPosition(i));
}
}
Block block_to_filter;
Storages storages;
2020-01-24 16:13:46 +00:00
Pipes pipes;
{
Databases databases = DatabaseCatalog::instance().getDatabases();
/// Add `database` column.
MutableColumnPtr database_column_mut = ColumnString::create();
for (const auto & database : databases)
{
2019-10-08 00:27:57 +00:00
/// We are skipping "Lazy" database because we cannot afford initialization of all its tables.
/// This should be documented.
2020-01-24 16:20:36 +00:00
if (database.second->getEngineName() != "Lazy")
database_column_mut->insert(database.first);
}
block_to_filter.insert(ColumnWithTypeAndName(std::move(database_column_mut), std::make_shared<DataTypeString>(), "database"));
/// Filter block with `database` column.
VirtualColumnUtils::filterBlockWithQuery(query_info.query, block_to_filter, context);
if (!block_to_filter.rows())
2020-01-24 16:13:46 +00:00
{
pipes.emplace_back(std::make_shared<NullSource>(header));
return pipes;
}
ColumnPtr & database_column = block_to_filter.getByName("database").column;
size_t rows = database_column->size();
/// Add `table` column.
MutableColumnPtr table_column_mut = ColumnString::create();
IColumn::Offsets offsets(rows);
for (size_t i = 0; i < rows; ++i)
{
const std::string database_name = (*database_column)[i].get<std::string>();
const DatabasePtr database = databases.at(database_name);
offsets[i] = i ? offsets[i - 1] : 0;
2019-10-17 13:05:12 +00:00
for (auto iterator = database->getTablesWithDictionaryTablesIterator(context); iterator->isValid(); iterator->next())
{
const String & table_name = iterator->name();
storages.emplace(std::piecewise_construct,
std::forward_as_tuple(database_name, table_name),
std::forward_as_tuple(iterator->table()));
table_column_mut->insert(table_name);
++offsets[i];
}
}
database_column = database_column->replicate(offsets);
block_to_filter.insert(ColumnWithTypeAndName(std::move(table_column_mut), std::make_shared<DataTypeString>(), "table"));
}
/// Filter block with `database` and `table` columns.
VirtualColumnUtils::filterBlockWithQuery(query_info.query, block_to_filter, context);
if (!block_to_filter.rows())
2020-01-24 16:13:46 +00:00
{
pipes.emplace_back(std::make_shared<NullSource>(header));
return pipes;
}
ColumnPtr filtered_database_column = block_to_filter.getByName("database").column;
ColumnPtr filtered_table_column = block_to_filter.getByName("table").column;
2020-01-24 16:13:46 +00:00
pipes.emplace_back(std::make_shared<ColumnsSource>(
std::move(columns_mask), std::move(header), max_block_size,
std::move(filtered_database_column), std::move(filtered_table_column), std::move(storages),
context.getAccess(), context.getCurrentQueryId()));
2020-01-24 16:13:46 +00:00
return pipes;
2015-04-24 12:26:23 +00:00
}
}