Introduce system.data_skipping_indices table

This commit is contained in:
Dmitry Novik 2021-06-25 02:05:45 +03:00
parent f7571a2d3b
commit 960d0de73d
5 changed files with 263 additions and 0 deletions

View File

@ -0,0 +1,190 @@
#include <Storages/System/StorageSystemDataSkippingIndices.h>
#include <Access/ContextAccess.h>
#include <Columns/ColumnString.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <Databases/IDatabase.h>
#include <Storages/VirtualColumnUtils.h>
#include <Interpreters/Context.h>
#include <Interpreters/DatabaseCatalog.h>
#include <Parsers/queryToString.h>
namespace DB
{
StorageSystemDataSkippingIndices::StorageSystemDataSkippingIndices(const StorageID & table_id_)
: IStorage(table_id_)
{
StorageInMemoryMetadata storage_metadata;
storage_metadata.setColumns(ColumnsDescription(
{
{ "database", std::make_shared<DataTypeString>() },
{ "table", std::make_shared<DataTypeString>() },
{ "name", std::make_shared<DataTypeString>() },
{ "type", std::make_shared<DataTypeString>() },
{ "expr", std::make_shared<DataTypeString>() },
{ "granularity", std::make_shared<DataTypeUInt64>() },
}));
setInMemoryMetadata(storage_metadata);
}
class DataSkippingIndicesSource : public SourceWithProgress
{
public:
DataSkippingIndicesSource(
std::vector<UInt8> columns_mask_,
Block header,
UInt64 max_block_size_,
ColumnPtr databases_,
ContextPtr context_)
: SourceWithProgress(header)
, column_mask(std::move(columns_mask_))
, max_block_size(max_block_size_)
, databases(std::move(databases_))
, context(Context::createCopy(context_))
, database_idx(0)
{}
String getName() const override { return "DataSkippingIndices"; }
protected:
Chunk generate() override
{
if (database_idx >= databases->size())
return {};
MutableColumns res_columns = getPort().getHeader().cloneEmptyColumns();
const auto access = context->getAccess();
const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES);
size_t rows_count = 0;
while (rows_count < max_block_size)
{
if (tables_it && !tables_it->isValid())
++database_idx;
while (database_idx < databases->size() && (!tables_it || !tables_it->isValid()))
{
database_name = databases->getDataAt(database_idx).toString();
database = DatabaseCatalog::instance().tryGetDatabase(database_name);
if (database)
break;
++database_idx;
}
if (database_idx >= databases->size())
break;
if (!tables_it || !tables_it->isValid())
tables_it = database->getTablesIterator(context);
const bool check_access_for_tables = check_access_for_databases && !access->isGranted(AccessType::SHOW_TABLES, database_name);
for (; rows_count < max_block_size && tables_it->isValid(); tables_it->next())
{
auto table_name = tables_it->name();
if (check_access_for_tables && !access->isGranted(AccessType::SHOW_TABLES, database_name, table_name))
continue;
auto const table = tables_it->table();
if (!table)
continue;
StorageMetadataPtr metadata_snapshot = table->getInMemoryMetadataPtr();
if (!metadata_snapshot)
continue;
auto const indices = metadata_snapshot->getSecondaryIndices();
for (auto const& index : indices)
{
++rows_count;
size_t src_index = 0;
size_t res_index = 0;
// 'database' column
if (column_mask[src_index++])
res_columns[res_index++]->insert(database_name);
// 'table' column
if (column_mask[src_index++])
res_columns[res_index++]->insert(table_name);
// 'name' column
if (column_mask[src_index++])
res_columns[res_index++]->insert(index.name);
// 'type' column
if (column_mask[src_index++])
res_columns[res_index++]->insert(index.type);
// 'expr' column
if (column_mask[src_index++])
{
if (auto expression = index.expression_list_ast)
res_columns[res_index++]->insert(queryToString(expression));
else
res_columns[res_index++]->insertDefault();
}
// 'granularity' column
if (column_mask[src_index++])
res_columns[res_index++]->insert(index.granularity);
}
}
}
return Chunk(std::move(res_columns), rows_count);
}
private:
std::vector<UInt8> column_mask;
UInt64 max_block_size;
ColumnPtr databases;
ContextPtr context;
size_t database_idx;
DatabasePtr database;
std::string database_name;
DatabaseTablesIteratorPtr tables_it;
};
Pipe StorageSystemDataSkippingIndices::read(
const Names & column_names,
const StorageMetadataPtr & metadata_snapshot,
SelectQueryInfo & query_info,
ContextPtr context,
QueryProcessingStage::Enum /* processed_stage */,
size_t max_block_size,
unsigned int /* num_streams */)
{
metadata_snapshot->check(column_names, getVirtuals(), getStorageID());
NameSet names_set(column_names.begin(), column_names.end());
Block sample_block = metadata_snapshot->getSampleBlock();
Block header;
std::vector<UInt8> columns_mask(sample_block.columns());
for (size_t i = 0, size = columns_mask.size(); i < size; ++i)
{
if (names_set.count(sample_block.getByPosition(i).name))
{
columns_mask[i] = 1;
header.insert(sample_block.getByPosition(i));
}
}
MutableColumnPtr column = ColumnString::create();
const auto databases = DatabaseCatalog::instance().getDatabases();
for (const auto & [database_name, database] : databases)
{
if (database_name == DatabaseCatalog::TEMPORARY_DATABASE)
continue;
if (database->getEngineName() != "Lazy")
column->insert(database_name);
}
Block block { ColumnWithTypeAndName(std::move(column), std::make_shared<DataTypeString>(), "database") };
VirtualColumnUtils::filterBlockWithQuery(query_info.query, block, context);
ColumnPtr& filtered_databases = block.getByPosition(0).column;
return Pipe(std::make_shared<DataSkippingIndicesSource>(
std::move(columns_mask), std::move(header), max_block_size, std::move(filtered_databases), context));
}
}

View File

@ -0,0 +1,26 @@
#pragma once
#include <common/shared_ptr_helper.h>
#include <Storages/System/IStorageSystemOneBlock.h>
namespace DB
{
class StorageSystemDataSkippingIndices : public shared_ptr_helper<StorageSystemDataSkippingIndices>, public IStorage
{
friend struct shared_ptr_helper<StorageSystemDataSkippingIndices>;
public:
std::string getName() const override { return "SystemDataSkippingIndices"; }
Pipe read(
const Names & column_names,
const StorageMetadataPtr & /*metadata_snapshot*/,
SelectQueryInfo & query_info,
ContextPtr context,
QueryProcessingStage::Enum processed_stage,
size_t max_block_size,
unsigned num_streams) override;
protected:
StorageSystemDataSkippingIndices(const StorageID& table_id_);
};
}

View File

@ -9,6 +9,7 @@
#include <Storages/System/StorageSystemClusters.h>
#include <Storages/System/StorageSystemColumns.h>
#include <Storages/System/StorageSystemDatabases.h>
#include <Storages/System/StorageSystemDataSkippingIndices.h>
#include <Storages/System/StorageSystemDataTypeFamilies.h>
#include <Storages/System/StorageSystemDetachedParts.h>
#include <Storages/System/StorageSystemDictionaries.h>
@ -115,6 +116,7 @@ void attachSystemTablesLocal(IDatabase & system_database)
attach<StorageSystemUserDirectories>(system_database, "user_directories");
attach<StorageSystemPrivileges>(system_database, "privileges");
attach<StorageSystemErrors>(system_database, "errors");
attach<StorageSystemDataSkippingIndices>(system_database, "data_skipping_indices");
#if !defined(ARCADIA_BUILD)
attach<StorageSystemLicenses>(system_database, "licenses");
attach<StorageSystemTimeZones>(system_database, "time_zones");

View File

@ -0,0 +1,10 @@
default data_01917 d1_idx minmax d1 1
default data_01917 d1_null_idx minmax assumeNotNull(d1_null) 1
default data_01917_2 memory set frequency * length(name) 5
default data_01917_2 sample_index1 minmax length(name), name 4
default data_01917_2 sample_index2 ngrambf_v1 lower(name), name 4
2
3
d1_idx
d1_null_idx
sample_index1

View File

@ -0,0 +1,35 @@
DROP TABLE IF EXISTS data_01917;
DROP TABLE IF EXISTS data_01917_2;
CREATE TABLE data_01917
(
key Int,
d1 Int,
d1_null Nullable(Int),
INDEX d1_idx d1 TYPE minmax GRANULARITY 1,
INDEX d1_null_idx assumeNotNull(d1_null) TYPE minmax GRANULARITY 1
)
Engine=MergeTree()
ORDER BY key;
CREATE TABLE data_01917_2
(
name String,
frequency UInt64,
INDEX memory (frequency * length(name)) TYPE set(1000) GRANULARITY 5,
INDEX sample_index1 (length(name), name) TYPE minmax GRANULARITY 4,
INDEX sample_index2 (lower(name), name) TYPE ngrambf_v1(3, 256, 2, 0) GRANULARITY 4
)
Engine=MergeTree()
ORDER BY name;
SELECT * FROM system.data_skipping_indices;
SELECT count(*) FROM system.data_skipping_indices WHERE table = 'data_01917';
SELECT count(*) FROM system.data_skipping_indices WHERE table = 'data_01917_2';
SELECT name FROM system.data_skipping_indices WHERE type = 'minmax';
DROP TABLE data_01917;
DROP TABLE data_01917_2;