Merge pull request #60205 from CurtizJ/refactor-virtual-columns

Refactoring of virtual columns
This commit is contained in:
Anton Popov 2024-03-07 13:38:35 +01:00 committed by GitHub
commit c103b0084b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
162 changed files with 1456 additions and 1842 deletions

View File

@ -38,7 +38,6 @@
#include <Storages/StorageInMemoryMetadata.h>
#include <Storages/WindowView/StorageWindowView.h>
#include <Storages/StorageReplicatedMergeTree.h>
#include <Storages/BlockNumberColumn.h>
#include <Interpreters/Context.h>
#include <Interpreters/executeDDLQueryOnCluster.h>
@ -894,24 +893,6 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat
throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Column {} already exists", backQuoteIfNeed(column.name));
}
/// Check if _row_exists for lightweight delete column in column_lists for merge tree family.
if (create.storage && create.storage->engine && endsWith(create.storage->engine->name, "MergeTree"))
{
auto search = all_columns.find(LightweightDeleteDescription::FILTER_COLUMN.name);
if (search != all_columns.end())
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"Cannot create table with column '{}' for *MergeTree engines because it "
"is reserved for lightweight delete feature",
LightweightDeleteDescription::FILTER_COLUMN.name);
auto search_block_number = all_columns.find(BlockNumberColumn::name);
if (search_block_number != all_columns.end())
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"Cannot create table with column '{}' for *MergeTree engines because it "
"is reserved for storing block number",
BlockNumberColumn::name);
}
const auto & settings = getContext()->getSettingsRef();
/// If it's not attach and not materialized view to existing table,
@ -924,9 +905,23 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat
}
}
void validateVirtualColumns(const IStorage & storage)
{
auto virtual_columns = storage.getVirtualsPtr();
for (const auto & storage_column : storage.getInMemoryMetadataPtr()->getColumns())
{
if (virtual_columns->tryGet(storage_column.name, VirtualsKind::Persistent))
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"Cannot create table with column '{}' for {} engines because it is reserved for persistent virtual column",
storage_column.name, storage.getName());
}
}
}
namespace
{
void checkTemporaryTableEngineName(const String& name)
void checkTemporaryTableEngineName(const String & name)
{
if (name.starts_with("Replicated") || name.starts_with("Shared") || name == "KeeperMap")
throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated, Shared or KeeperMap table engines");
@ -1509,6 +1504,16 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
addColumnsDescriptionToCreateQueryIfNecessary(query_ptr->as<ASTCreateQuery &>(), res);
}
validateVirtualColumns(*res);
if (!res->supportsDynamicSubcolumns() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns()))
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"Cannot create table with column of type Object, "
"because storage {} doesn't support dynamic subcolumns",
res->getName());
}
if (!create.attach && getContext()->getSettingsRef().database_replicated_allow_only_replicated_engine)
{
bool is_replicated_storage = typeid_cast<const StorageReplicatedMergeTree *>(res.get()) != nullptr;
@ -1558,14 +1563,6 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
/// we can safely destroy the object without a call to "shutdown", because there is guarantee
/// that no background threads/similar resources remain after exception from "startup".
if (!res->supportsDynamicSubcolumns() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns()))
{
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"Cannot create table with column of type Object, "
"because storage {} doesn't support dynamic subcolumns",
res->getName());
}
res->startup();
return true;
}

View File

@ -15,7 +15,6 @@
#include <Storages/AlterCommands.h>
#include <Storages/IStorage.h>
#include <Storages/MutationCommands.h>
#include <Storages/LightweightDeleteDescription.h>
namespace DB

View File

@ -123,28 +123,29 @@ BlockIO InterpreterDescribeQuery::execute()
void InterpreterDescribeQuery::fillColumnsFromSubquery(const ASTTableExpression & table_expression)
{
NamesAndTypesList names_and_types;
Block sample_block;
auto select_query = table_expression.subquery->children.at(0);
auto current_context = getContext();
if (settings.allow_experimental_analyzer)
{
SelectQueryOptions select_query_options;
names_and_types = InterpreterSelectQueryAnalyzer(select_query, current_context, select_query_options).getSampleBlock().getNamesAndTypesList();
sample_block = InterpreterSelectQueryAnalyzer(select_query, current_context, select_query_options).getSampleBlock();
}
else
{
names_and_types = InterpreterSelectWithUnionQuery::getSampleBlock(select_query, current_context).getNamesAndTypesList();
sample_block = InterpreterSelectWithUnionQuery::getSampleBlock(select_query, current_context);
}
for (auto && [name, type] : names_and_types)
columns.emplace_back(std::move(name), std::move(type));
for (auto && column : sample_block)
columns.emplace_back(std::move(column.name), std::move(column.type));
}
void InterpreterDescribeQuery::fillColumnsFromTableFunction(const ASTTableExpression & table_expression)
{
auto current_context = getContext();
TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression.table_function, current_context);
auto column_descriptions = table_function_ptr->getActualTableStructure(getContext(), /*is_insert_query*/ true);
for (const auto & column : column_descriptions)
columns.emplace_back(column);
@ -154,14 +155,16 @@ void InterpreterDescribeQuery::fillColumnsFromTableFunction(const ASTTableExpres
auto table = table_function_ptr->execute(table_expression.table_function, getContext(), table_function_ptr->getName());
if (table)
{
for (const auto & column : table->getVirtuals())
auto virtuals = table->getVirtualsPtr();
for (const auto & column : *virtuals)
{
if (!column_descriptions.has(column.name))
virtual_columns.emplace_back(column.name, column.type);
virtual_columns.push_back(column);
}
}
}
}
void InterpreterDescribeQuery::fillColumnsFromTable(const ASTTableExpression & table_expression)
{
auto table_id = getContext()->resolveStorageID(table_expression.database_and_table_name);
@ -176,10 +179,11 @@ void InterpreterDescribeQuery::fillColumnsFromTable(const ASTTableExpression & t
if (settings.describe_include_virtual_columns)
{
for (const auto & column : table->getVirtuals())
auto virtuals = table->getVirtualsPtr();
for (const auto & column : *virtuals)
{
if (!column_descriptions.has(column.name))
virtual_columns.emplace_back(column.name, column.type);
virtual_columns.push_back(column);
}
}

View File

@ -136,7 +136,7 @@ Block InterpreterInsertQuery::getSampleBlock(
if (auto * window_view = dynamic_cast<StorageWindowView *>(table.get()))
return window_view->getInputHeader();
else if (no_destination)
return metadata_snapshot->getSampleBlockWithVirtuals(table->getVirtuals());
return metadata_snapshot->getSampleBlockWithVirtuals(table->getVirtualsList());
else
return metadata_snapshot->getSampleBlockNonMaterialized();
}

View File

@ -107,7 +107,7 @@ SELECT
'' AS extra )";
// TODO Interpret query.extended. It is supposed to show internal/virtual columns. Need to fetch virtual column names, see
// IStorage::getVirtuals(). We can't easily do that via SQL.
// IStorage::getVirtualsList(). We can't easily do that via SQL.
if (query.full)
{

View File

@ -272,7 +272,7 @@ void JoinedTables::makeFakeTable(StoragePtr storage, const StorageMetadataPtr &
auto & table = tables_with_columns.back();
table.addHiddenColumns(storage_columns.getMaterialized());
table.addHiddenColumns(storage_columns.getAliases());
table.addHiddenColumns(storage->getVirtuals());
table.addHiddenColumns(storage->getVirtualsList());
}
else
tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, source_header.getNamesAndTypesList());

View File

@ -7,7 +7,7 @@
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/MergeTree/StorageFromMergeTreeDataPart.h>
#include <Storages/StorageMergeTree.h>
#include <Storages/BlockNumberColumn.h>
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
#include <Processors/Transforms/FilterTransform.h>
#include <Processors/Transforms/ExpressionTransform.h>
#include <Processors/Transforms/CreatingSetsTransform.h>
@ -31,7 +31,6 @@
#include <Processors/QueryPlan/CreatingSetsStep.h>
#include <DataTypes/NestedUtils.h>
#include <Interpreters/PreparedSets.h>
#include <Storages/LightweightDeleteDescription.h>
#include <Storages/MergeTree/MergeTreeSequentialSource.h>
#include <Processors/Sources/ThrowingExceptionSource.h>
#include <Analyzer/QueryTreeBuilder.h>
@ -265,7 +264,7 @@ MutationCommand createCommandToApplyDeletedMask(const MutationCommand & command)
alter_command->partition = alter_command->children.emplace_back(command.partition).get();
auto row_exists_predicate = makeASTFunction("equals",
std::make_shared<ASTIdentifier>(LightweightDeleteDescription::FILTER_COLUMN.name),
std::make_shared<ASTIdentifier>(RowExistsColumn::name),
std::make_shared<ASTLiteral>(Field(0)));
if (command.predicate)
@ -350,7 +349,8 @@ bool MutationsInterpreter::Source::isCompactPart() const
static Names getAvailableColumnsWithVirtuals(StorageMetadataPtr metadata_snapshot, const IStorage & storage)
{
auto all_columns = metadata_snapshot->getColumns().getNamesOfPhysical();
for (const auto & column : storage.getVirtuals())
auto virtuals = storage.getVirtualsPtr();
for (const auto & column : *virtuals)
all_columns.push_back(column.name);
return all_columns;
}
@ -435,60 +435,54 @@ static NameSet getKeyColumns(const MutationsInterpreter::Source & source, const
static void validateUpdateColumns(
const MutationsInterpreter::Source & source,
const StorageMetadataPtr & metadata_snapshot, const NameSet & updated_columns,
const std::unordered_map<String, Names> & column_to_affected_materialized)
const StorageMetadataPtr & metadata_snapshot,
const NameSet & updated_columns,
const std::unordered_map<String, Names> & column_to_affected_materialized,
const ContextPtr & context)
{
auto storage_snapshot = source.getStorageSnapshot(metadata_snapshot, context);
NameSet key_columns = getKeyColumns(source, metadata_snapshot);
for (const String & column_name : updated_columns)
const auto & storage_columns = storage_snapshot->metadata->getColumns();
const auto & virtual_columns = *storage_snapshot->virtual_columns;
for (const auto & column_name : updated_columns)
{
auto found = false;
for (const auto & col : metadata_snapshot->getColumns().getOrdinary())
{
if (col.name == column_name)
{
found = true;
break;
}
}
/// Allow to override value of lightweight delete filter virtual column
if (!found && column_name == LightweightDeleteDescription::FILTER_COLUMN.name)
{
if (!source.supportsLightweightDelete())
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table");
found = true;
}
/// Dont allow to override value of block number virtual column
if (!found && column_name == BlockNumberColumn::name)
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Update is not supported for virtual column {} ", backQuote(column_name));
}
if (!found)
{
for (const auto & col : metadata_snapshot->getColumns().getMaterialized())
{
if (col.name == column_name)
throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Cannot UPDATE materialized column {}", backQuote(column_name));
}
throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table", backQuote(column_name));
}
if (key_columns.contains(column_name))
throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Cannot UPDATE key column {}", backQuote(column_name));
if (storage_columns.tryGetColumn(GetColumnsOptions::Materialized, column_name))
throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Cannot UPDATE materialized column {}", backQuote(column_name));
auto materialized_it = column_to_affected_materialized.find(column_name);
if (materialized_it != column_to_affected_materialized.end())
{
for (const String & materialized : materialized_it->second)
for (const auto & materialized : materialized_it->second)
{
if (key_columns.contains(materialized))
{
throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN,
"Updated column {} affects MATERIALIZED column {}, which is a key column. "
"Cannot UPDATE it.", backQuote(column_name), backQuote(materialized));
}
}
}
if (!storage_columns.tryGetColumn(GetColumnsOptions::Ordinary, column_name))
{
/// Allow to override value of lightweight delete filter virtual column
if (column_name == RowExistsColumn::name)
{
if (!source.supportsLightweightDelete())
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table");
}
else if (virtual_columns.tryGet(column_name))
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Update is not supported for virtual column {} ", backQuote(column_name));
}
else
{
throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table", backQuote(column_name));
}
}
}
@ -546,8 +540,8 @@ void MutationsInterpreter::prepare(bool dry_run)
/// Add _row_exists column if it is physically present in the part
if (source.hasLightweightDeleteMask())
{
all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN});
available_columns_set.insert(LightweightDeleteDescription::FILTER_COLUMN.name);
all_columns.emplace_back(RowExistsColumn::name, RowExistsColumn::type);
available_columns_set.insert(RowExistsColumn::name);
}
NameSet updated_columns;
@ -563,9 +557,7 @@ void MutationsInterpreter::prepare(bool dry_run)
for (const auto & [name, _] : command.column_to_update_expression)
{
if (!available_columns_set.contains(name)
&& name != LightweightDeleteDescription::FILTER_COLUMN.name
&& name != BlockNumberColumn::name)
if (!available_columns_set.contains(name) && name != RowExistsColumn::name)
throw Exception(ErrorCodes::THERE_IS_NO_COLUMN,
"Column {} is updated but not requested to read", name);
@ -590,7 +582,7 @@ void MutationsInterpreter::prepare(bool dry_run)
}
}
validateUpdateColumns(source, metadata_snapshot, updated_columns, column_to_affected_materialized);
validateUpdateColumns(source, metadata_snapshot, updated_columns, column_to_affected_materialized, context);
}
StorageInMemoryMetadata::HasDependencyCallback has_dependency =
@ -666,15 +658,11 @@ void MutationsInterpreter::prepare(bool dry_run)
{
type = physical_column->type;
}
else if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name)
else if (column_name == RowExistsColumn::name)
{
type = LightweightDeleteDescription::FILTER_COLUMN.type;
type = RowExistsColumn::type;
deleted_mask_updated = true;
}
else if (column_name == BlockNumberColumn::name)
{
type = BlockNumberColumn::type;
}
else
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown column {}", column_name);
@ -1028,7 +1016,7 @@ void MutationsInterpreter::prepareMutationStages(std::vector<Stage> & prepared_s
/// Add _row_exists column if it is present in the part
if (source.hasLightweightDeleteMask() || deleted_mask_updated)
all_columns.push_back(LightweightDeleteDescription::FILTER_COLUMN);
all_columns.emplace_back(RowExistsColumn::name, RowExistsColumn::type);
bool has_filters = false;
/// Next, for each stage calculate columns changed by this and previous stages.
@ -1038,7 +1026,7 @@ void MutationsInterpreter::prepareMutationStages(std::vector<Stage> & prepared_s
{
for (const auto & column : all_columns)
{
if (column.name == LightweightDeleteDescription::FILTER_COLUMN.name && !deleted_mask_updated)
if (column.name == RowExistsColumn::name && !deleted_mask_updated)
continue;
prepared_stages[i].output_columns.insert(column.name);
@ -1057,7 +1045,7 @@ void MutationsInterpreter::prepareMutationStages(std::vector<Stage> & prepared_s
/// and so it is not in the list of AllPhysical columns.
for (const auto & [column_name, _] : prepared_stages[i].column_to_updated)
{
if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name && has_filters && !deleted_mask_updated)
if (column_name == RowExistsColumn::name && has_filters && !deleted_mask_updated)
continue;
prepared_stages[i].output_columns.insert(column_name);
@ -1148,93 +1136,6 @@ void MutationsInterpreter::prepareMutationStages(std::vector<Stage> & prepared_s
}
}
/// This structure re-implements adding virtual columns while reading from MergeTree part.
/// It would be good to unify it with IMergeTreeSelectAlgorithm.
struct VirtualColumns
{
struct ColumnAndPosition
{
ColumnWithTypeAndName column;
size_t position;
};
using Columns = std::vector<ColumnAndPosition>;
Columns virtuals;
Names columns_to_read;
VirtualColumns(Names required_columns, const MergeTreeData::DataPartPtr & part) : columns_to_read(std::move(required_columns))
{
for (size_t i = 0; i < columns_to_read.size(); ++i)
{
if (columns_to_read[i] == LightweightDeleteDescription::FILTER_COLUMN.name)
{
if (!part->getColumns().contains(LightweightDeleteDescription::FILTER_COLUMN.name))
{
ColumnWithTypeAndName mask_column;
mask_column.type = LightweightDeleteDescription::FILTER_COLUMN.type;
mask_column.column = mask_column.type->createColumnConst(0, 1);
mask_column.name = std::move(columns_to_read[i]);
virtuals.emplace_back(ColumnAndPosition{.column = std::move(mask_column), .position = i});
}
}
else if (columns_to_read[i] == "_partition_id")
{
ColumnWithTypeAndName column;
column.type = std::make_shared<DataTypeString>();
column.column = column.type->createColumnConst(0, part->info.partition_id);
column.name = std::move(columns_to_read[i]);
virtuals.emplace_back(ColumnAndPosition{.column = std::move(column), .position = i});
}
else if (columns_to_read[i] == BlockNumberColumn::name)
{
if (!part->getColumns().contains(BlockNumberColumn::name))
{
ColumnWithTypeAndName block_number_column;
block_number_column.type = BlockNumberColumn::type;
block_number_column.column = block_number_column.type->createColumnConst(0, part->info.min_block);
block_number_column.name = std::move(columns_to_read[i]);
virtuals.emplace_back(ColumnAndPosition{.column = std::move(block_number_column), .position = i});
}
}
}
if (!virtuals.empty())
{
Names columns_no_virtuals;
columns_no_virtuals.reserve(columns_to_read.size());
size_t next_virtual = 0;
for (size_t i = 0; i < columns_to_read.size(); ++i)
{
if (next_virtual < virtuals.size() && i == virtuals[next_virtual].position)
++next_virtual;
else
columns_no_virtuals.emplace_back(std::move(columns_to_read[i]));
}
columns_to_read.swap(columns_no_virtuals);
}
}
void addVirtuals(QueryPlan & plan)
{
auto dag = std::make_unique<ActionsDAG>(plan.getCurrentDataStream().header.getColumnsWithTypeAndName());
for (auto & column : virtuals)
{
const auto & adding_const = dag->addColumn(std::move(column.column));
auto & outputs = dag->getOutputs();
outputs.insert(outputs.begin() + column.position, &adding_const);
}
auto step = std::make_unique<ExpressionStep>(plan.getCurrentDataStream(), std::move(dag));
plan.addStep(std::move(step));
}
};
void MutationsInterpreter::Source::read(
Stage & first_stage,
QueryPlan & plan,
@ -1277,16 +1178,12 @@ void MutationsInterpreter::Source::read(
filter = ActionsDAG::buildFilterActionsDAG(nodes);
}
VirtualColumns virtual_columns(std::move(required_columns), part);
createReadFromPartStep(
MergeTreeSequentialSourceType::Mutation,
plan, *data, storage_snapshot, part,
std::move(virtual_columns.columns_to_read),
plan, *data, storage_snapshot,
part, required_columns,
apply_deleted_mask_, filter, context_,
getLogger("MutationsInterpreter"));
virtual_columns.addVirtuals(plan);
}
else
{

View File

@ -56,6 +56,7 @@
#include <Storages/IStorage.h>
#include <Storages/StorageJoin.h>
#include <Common/checkStackSize.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Storages/StorageView.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
@ -990,8 +991,7 @@ void TreeRewriterResult::collectSourceColumns(bool add_special)
{
auto options = GetColumnsOptions(add_special ? GetColumnsOptions::All : GetColumnsOptions::AllPhysical);
options.withExtendedObjects();
if (storage->supportsSubcolumns())
options.withSubcolumns();
options.withSubcolumns(storage->supportsSubcolumns());
auto columns_from_storage = storage_snapshot->getColumns(options);
@ -1001,8 +1001,7 @@ void TreeRewriterResult::collectSourceColumns(bool add_special)
source_columns.insert(source_columns.end(), columns_from_storage.begin(), columns_from_storage.end());
auto metadata_snapshot = storage->getInMemoryMetadataPtr();
auto metadata_column_descriptions = metadata_snapshot->getColumns();
source_columns_ordinary = metadata_column_descriptions.getOrdinary();
source_columns_ordinary = metadata_snapshot->getColumns().getOrdinary();
}
source_columns_set = removeDuplicateColumns(source_columns);
@ -1109,16 +1108,16 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
const auto & partition_desc = storage_snapshot->metadata->getPartitionKey();
if (partition_desc.expression)
{
auto partition_source_columns = partition_desc.expression->getRequiredColumns();
partition_source_columns.push_back("_part");
partition_source_columns.push_back("_partition_id");
partition_source_columns.push_back("_part_uuid");
partition_source_columns.push_back("_partition_value");
auto partition_columns = partition_desc.expression->getRequiredColumns();
NameSet partition_columns_set(partition_columns.begin(), partition_columns.end());
const auto & parititon_virtuals = MergeTreeData::virtuals_useful_for_filter;
partition_columns_set.insert(parititon_virtuals.begin(), parititon_virtuals.end());
optimize_trivial_count = true;
for (const auto & required_column : required)
{
if (std::find(partition_source_columns.begin(), partition_source_columns.end(), required_column)
== partition_source_columns.end())
if (!partition_columns_set.contains(required_column))
{
optimize_trivial_count = false;
break;
@ -1129,7 +1128,7 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
NameSet unknown_required_source_columns = required;
for (NamesAndTypesList::iterator it = source_columns.begin(); it != source_columns.end();)
for (auto it = source_columns.begin(); it != source_columns.end();)
{
const String & column_name = it->name;
unknown_required_source_columns.erase(column_name);
@ -1143,32 +1142,23 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
has_virtual_shard_num = false;
/// If there are virtual columns among the unknown columns. Remove them from the list of unknown and add
/// in columns list, so that when further processing they are also considered.
if (storage)
if (storage_snapshot)
{
const auto storage_virtuals = storage->getVirtuals();
const auto & virtuals = storage_snapshot->virtual_columns;
for (auto it = unknown_required_source_columns.begin(); it != unknown_required_source_columns.end();)
{
auto column = storage_virtuals.tryGetByName(*it);
if (column)
if (auto column = virtuals->tryGet(*it))
{
source_columns.push_back(*column);
it = unknown_required_source_columns.erase(it);
}
else
++it;
}
if (is_remote_storage)
{
for (const auto & name_type : storage_virtuals)
{
if (name_type.name == "_shard_num" && storage->isVirtualColumn("_shard_num", storage_snapshot->getMetadataForQuery()))
{
has_virtual_shard_num = true;
break;
}
++it;
}
}
has_virtual_shard_num = is_remote_storage && storage->isVirtualColumn("_shard_num", storage_snapshot->getMetadataForQuery()) && virtuals->has("_shard_num");
}
/// Collect missed object subcolumns

View File

@ -99,7 +99,7 @@ static NamesAndTypesList getColumnsFromTableExpression(
names_and_type_list = columns.getOrdinary();
materialized = columns.getMaterialized();
aliases = columns.getAliases();
virtuals = function_storage->getVirtuals();
virtuals = function_storage->getVirtualsList();
}
else if (table_expression.database_and_table_name)
{
@ -110,7 +110,7 @@ static NamesAndTypesList getColumnsFromTableExpression(
names_and_type_list = columns.getOrdinary();
materialized = columns.getMaterialized();
aliases = columns.getAliases();
virtuals = table->getVirtuals();
virtuals = table->getVirtualsList();
}
return names_and_type_list;

View File

@ -20,7 +20,6 @@
#include <Columns/ColumnArray.h>
#include <DataTypes/DataTypeArray.h>
#include <Storages/StorageInMemoryMetadata.h>
#include <Storages/BlockNumberColumn.h>
namespace DB
@ -280,7 +279,7 @@ void fillMissingColumns(
const NamesAndTypesList & requested_columns,
const NamesAndTypesList & available_columns,
const NameSet & partially_read_columns,
StorageMetadataPtr metadata_snapshot, size_t block_number)
StorageMetadataPtr metadata_snapshot)
{
size_t num_columns = requested_columns.size();
if (num_columns != res_columns.size())
@ -359,14 +358,9 @@ void fillMissingColumns(
}
else
{
if (requested_column->name == BlockNumberColumn::name)
res_columns[i] = type->createColumnConst(num_rows, block_number)->convertToFullColumnIfConst();
else
/// We must turn a constant column into a full column because the interpreter could infer
/// that it is constant everywhere but in some blocks (from other parts) it can be a full column.
res_columns[i] = type->createColumnConstWithDefaultValue(num_rows)->convertToFullColumnIfConst();
/// We must turn a constant column into a full column because the interpreter could infer
/// that it is constant everywhere but in some blocks (from other parts) it can be a full column.
res_columns[i] = type->createColumnConstWithDefaultValue(num_rows)->convertToFullColumnIfConst();
}
}
}

View File

@ -46,6 +46,6 @@ void fillMissingColumns(
const NamesAndTypesList & requested_columns,
const NamesAndTypesList & available_columns,
const NameSet & partially_read_columns,
StorageMetadataPtr metadata_snapshot, size_t block_number = 0);
StorageMetadataPtr metadata_snapshot);
}

View File

@ -32,7 +32,7 @@ ASTPtr processColumnTransformers(
tables_with_columns[0].addHiddenColumns(columns.getMaterialized());
tables_with_columns[0].addHiddenColumns(columns.getAliases());
tables_with_columns[0].addHiddenColumns(table->getVirtuals());
tables_with_columns[0].addHiddenColumns(table->getVirtualsList());
NameSet source_columns_set;
for (const auto & identifier : query_columns->children)

View File

@ -12,7 +12,7 @@
#include <DataTypes/NestedUtils.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <IO/WriteHelpers.h>
#include <Storages/BlockNumberColumn.h>
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
namespace DB

View File

@ -431,7 +431,7 @@ AggregateProjectionCandidates getAggregateProjectionCandidates(
{
const auto & keys = aggregating.getParams().keys;
const auto & aggregates = aggregating.getParams().aggregates;
Block key_virtual_columns = reading.getMergeTreeData().getSampleBlockWithVirtualColumns();
Block key_virtual_columns = reading.getMergeTreeData().getHeaderWithVirtualsForFilter();
AggregateProjectionCandidates candidates;

View File

@ -135,7 +135,7 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes)
std::list<NormalProjectionCandidate> candidates;
NormalProjectionCandidate * best_candidate = nullptr;
const Names & required_columns = reading->getRealColumnNames();
const Names & required_columns = reading->getAllColumnNames();
const auto & parts = reading->getParts();
const auto & alter_conversions = reading->getAlterConvertionsForParts();
const auto & query_info = reading->getQueryInfo();

View File

@ -261,30 +261,24 @@ void ReadFromMergeTree::AnalysisResult::checkLimits(const Settings & settings, c
ReadFromMergeTree::ReadFromMergeTree(
MergeTreeData::DataPartsVector parts_,
std::vector<AlterConversionsPtr> alter_conversions_,
const Names & column_names_,
Names real_column_names_,
Names virt_column_names_,
Names all_column_names_,
const MergeTreeData & data_,
const SelectQueryInfo & query_info_,
const StorageSnapshotPtr & storage_snapshot_,
const ContextPtr & context_,
size_t max_block_size_,
size_t num_streams_,
bool sample_factor_column_queried_,
std::shared_ptr<PartitionIdToMaxBlock> max_block_numbers_to_read_,
LoggerPtr log_,
AnalysisResultPtr analyzed_result_ptr_,
bool enable_parallel_reading)
: SourceStepWithFilter(DataStream{.header = MergeTreeSelectProcessor::transformHeader(
storage_snapshot_->getSampleBlockForColumns(real_column_names_),
query_info_.prewhere_info,
data_.getPartitionValueType(),
virt_column_names_)}, column_names_, query_info_, storage_snapshot_, context_)
storage_snapshot_->getSampleBlockForColumns(all_column_names_),
query_info_.prewhere_info)}, all_column_names_, query_info_, storage_snapshot_, context_)
, reader_settings(getMergeTreeReaderSettings(context_, query_info_))
, prepared_parts(std::move(parts_))
, alter_conversions_for_parts(std::move(alter_conversions_))
, real_column_names(std::move(real_column_names_))
, virt_column_names(std::move(virt_column_names_))
, all_column_names(std::move(all_column_names_))
, data(data_)
, actions_settings(ExpressionActionsSettings::fromContext(context_))
, metadata_for_reading(storage_snapshot->getMetadataForQuery())
@ -293,20 +287,11 @@ ReadFromMergeTree::ReadFromMergeTree(
.preferred_block_size_bytes = context->getSettingsRef().preferred_block_size_bytes,
.preferred_max_column_in_block_size_bytes = context->getSettingsRef().preferred_max_column_in_block_size_bytes}
, requested_num_streams(num_streams_)
, sample_factor_column_queried(sample_factor_column_queried_)
, max_block_numbers_to_read(std::move(max_block_numbers_to_read_))
, log(std::move(log_))
, analyzed_result_ptr(analyzed_result_ptr_)
, is_parallel_reading_from_replicas(enable_parallel_reading)
{
if (sample_factor_column_queried)
{
/// Only _sample_factor virtual column is added by ReadFromMergeTree
/// Other virtual columns are added by MergeTreeSelectProcessor.
auto type = std::make_shared<DataTypeFloat64>();
output_stream->header.insert({type->createColumn(), type, "_sample_factor"});
}
if (is_parallel_reading_from_replicas)
{
all_ranges_callback = context->getMergeTreeAllRangesCallback();
@ -368,12 +353,12 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas(
auto pool = std::make_shared<MergeTreeReadPoolParallelReplicas>(
std::move(extension),
std::move(parts_with_range),
shared_virtual_fields,
storage_snapshot,
prewhere_info,
actions_settings,
reader_settings,
required_columns,
virt_column_names,
pool_settings,
context);
@ -387,8 +372,8 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas(
auto algorithm = std::make_unique<MergeTreeThreadSelectAlgorithm>(i);
auto processor = std::make_unique<MergeTreeSelectProcessor>(
pool, std::move(algorithm), data, prewhere_info,
actions_settings, block_size_copy, reader_settings, virt_column_names);
pool, std::move(algorithm), storage_snapshot, prewhere_info,
actions_settings, block_size_copy, reader_settings);
auto source = std::make_shared<MergeTreeSource>(std::move(processor));
pipes.emplace_back(std::move(source));
@ -449,12 +434,12 @@ Pipe ReadFromMergeTree::readFromPool(
{
pool = std::make_shared<MergeTreePrefetchedReadPool>(
std::move(parts_with_range),
shared_virtual_fields,
storage_snapshot,
prewhere_info,
actions_settings,
reader_settings,
required_columns,
virt_column_names,
pool_settings,
context);
}
@ -462,12 +447,12 @@ Pipe ReadFromMergeTree::readFromPool(
{
pool = std::make_shared<MergeTreeReadPool>(
std::move(parts_with_range),
shared_virtual_fields,
storage_snapshot,
prewhere_info,
actions_settings,
reader_settings,
required_columns,
virt_column_names,
pool_settings,
context);
}
@ -486,8 +471,8 @@ Pipe ReadFromMergeTree::readFromPool(
auto algorithm = std::make_unique<MergeTreeThreadSelectAlgorithm>(i);
auto processor = std::make_unique<MergeTreeSelectProcessor>(
pool, std::move(algorithm), data, prewhere_info,
actions_settings, block_size_copy, reader_settings, virt_column_names);
pool, std::move(algorithm), storage_snapshot, prewhere_info,
actions_settings, block_size_copy, reader_settings);
auto source = std::make_shared<MergeTreeSource>(std::move(processor));
@ -538,12 +523,12 @@ Pipe ReadFromMergeTree::readInOrder(
std::move(extension),
mode,
parts_with_ranges,
shared_virtual_fields,
storage_snapshot,
prewhere_info,
actions_settings,
reader_settings,
required_columns,
virt_column_names,
pool_settings,
context);
}
@ -553,12 +538,12 @@ Pipe ReadFromMergeTree::readInOrder(
has_limit_below_one_block,
read_type,
parts_with_ranges,
shared_virtual_fields,
storage_snapshot,
prewhere_info,
actions_settings,
reader_settings,
required_columns,
virt_column_names,
pool_settings,
context);
}
@ -592,8 +577,8 @@ Pipe ReadFromMergeTree::readInOrder(
algorithm = std::make_unique<MergeTreeInOrderSelectAlgorithm>(i);
auto processor = std::make_unique<MergeTreeSelectProcessor>(
pool, std::move(algorithm), data, prewhere_info,
actions_settings, block_size, reader_settings, virt_column_names);
pool, std::move(algorithm), storage_snapshot, prewhere_info,
actions_settings, block_size, reader_settings);
processor->addPartLevelToChunk(isQueryWithFinal());
@ -1302,8 +1287,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
requested_num_streams,
max_block_numbers_to_read,
data,
real_column_names,
sample_factor_column_queried,
all_column_names,
log,
indexes);
}
@ -1489,8 +1473,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
size_t num_streams,
std::shared_ptr<PartitionIdToMaxBlock> max_block_numbers_to_read,
const MergeTreeData & data,
const Names & real_column_names,
bool sample_factor_column_queried,
const Names & all_column_names,
LoggerPtr log,
std::optional<Indexes> & indexes)
{
@ -1503,8 +1486,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
num_streams,
max_block_numbers_to_read,
data,
real_column_names,
sample_factor_column_queried,
all_column_names,
log,
indexes);
}
@ -1518,8 +1500,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl(
size_t num_streams,
std::shared_ptr<PartitionIdToMaxBlock> max_block_numbers_to_read,
const MergeTreeData & data,
const Names & real_column_names,
bool sample_factor_column_queried,
const Names & all_column_names,
LoggerPtr log,
std::optional<Indexes> & indexes)
{
@ -1528,7 +1509,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl(
size_t total_parts = parts.size();
result.column_names_to_read = real_column_names;
result.column_names_to_read = all_column_names;
/// If there are only virtual columns in the query, you must request at least one non-virtual one.
if (result.column_names_to_read.empty())
@ -1587,7 +1568,6 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl(
data,
metadata_snapshot,
context_,
sample_factor_column_queried,
log);
if (result.sampling.read_nothing)
@ -1704,10 +1684,8 @@ void ReadFromMergeTree::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info
prewhere_info = prewhere_info_value;
output_stream = DataStream{.header = MergeTreeSelectProcessor::transformHeader(
storage_snapshot->getSampleBlockForColumns(real_column_names),
prewhere_info_value,
data.getPartitionValueType(),
virt_column_names)};
storage_snapshot->getSampleBlockForColumns(all_column_names),
prewhere_info_value)};
updateSortDescriptionForOutputStream(
*output_stream,
@ -1901,6 +1879,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons
storage_snapshot->data = std::make_unique<MergeTreeData::SnapshotData>();
result.checkLimits(context->getSettingsRef(), query_info);
shared_virtual_fields.emplace("_sample_factor", result.sampling.used_sample_factor);
LOG_DEBUG(
log,
@ -1985,18 +1964,6 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons
result_projection = ActionsDAG::merge(std::move(*result_projection), std::move(*actions));
};
/// By the way, if a distributed query or query to a Merge table is made, then the `_sample_factor` column can have different values.
if (sample_factor_column_queried)
{
ColumnWithTypeAndName column;
column.name = "_sample_factor";
column.type = std::make_shared<DataTypeFloat64>();
column.column = column.type->createColumnConst(0, Field(result.sampling.used_sample_factor));
auto adding_column = ActionsDAG::makeAddingColumnActions(std::move(column));
append_actions(std::move(adding_column));
}
if (result_projection)
cur_header = result_projection->updateHeader(cur_header);

View File

@ -110,16 +110,13 @@ public:
ReadFromMergeTree(
MergeTreeData::DataPartsVector parts_,
std::vector<AlterConversionsPtr> alter_conversions_,
const Names & column_names_,
Names real_column_names_,
Names virt_column_names_,
Names all_column_names_,
const MergeTreeData & data_,
const SelectQueryInfo & query_info_,
const StorageSnapshotPtr & storage_snapshot,
const ContextPtr & context_,
size_t max_block_size_,
size_t num_streams_,
bool sample_factor_column_queried_,
std::shared_ptr<PartitionIdToMaxBlock> max_block_numbers_to_read_,
LoggerPtr log_,
AnalysisResultPtr analyzed_result_ptr_,
@ -136,8 +133,7 @@ public:
void describeActions(JSONBuilder::JSONMap & map) const override;
void describeIndexes(JSONBuilder::JSONMap & map) const override;
const Names & getRealColumnNames() const { return real_column_names; }
const Names & getVirtualColumnNames() const { return virt_column_names; }
const Names & getAllColumnNames() const { return all_column_names; }
StorageID getStorageID() const { return data.getStorageID(); }
UInt64 getSelectedParts() const { return selected_parts; }
@ -164,8 +160,7 @@ public:
size_t num_streams,
std::shared_ptr<PartitionIdToMaxBlock> max_block_numbers_to_read,
const MergeTreeData & data,
const Names & real_column_names,
bool sample_factor_column_queried,
const Names & all_column_names,
LoggerPtr log,
std::optional<Indexes> & indexes);
@ -209,8 +204,7 @@ private:
size_t num_streams,
std::shared_ptr<PartitionIdToMaxBlock> max_block_numbers_to_read,
const MergeTreeData & data,
const Names & real_column_names,
bool sample_factor_column_queried,
const Names & all_column_names,
LoggerPtr log,
std::optional<Indexes> & indexes);
@ -227,8 +221,7 @@ private:
MergeTreeData::DataPartsVector prepared_parts;
std::vector<AlterConversionsPtr> alter_conversions_for_parts;
Names real_column_names;
Names virt_column_names;
Names all_column_names;
const MergeTreeData & data;
ExpressionActionsSettings actions_settings;
@ -239,7 +232,6 @@ private:
size_t requested_num_streams;
size_t output_streams_limit = 0;
const bool sample_factor_column_queried;
/// Used for aggregation optimization (see DB::QueryPlanOptimizations::tryAggregateEachPartitionIndependently).
bool output_each_partition_through_separate_port = false;
@ -280,7 +272,9 @@ private:
RangesInDataParts && parts, size_t num_streams, const Names & origin_column_names, const Names & column_names, ActionsDAGPtr & out_projection);
ReadFromMergeTree::AnalysisResult getAnalysisResult() const;
AnalysisResultPtr analyzed_result_ptr;
VirtualFields shared_virtual_fields;
bool is_parallel_reading_from_replicas;
std::optional<MergeTreeAllRangesCallback> all_ranges_callback;

View File

@ -451,7 +451,7 @@ Chain buildPushingToViewsChain(
/// If we don't write directly to the destination
/// then expect that we're inserting with precalculated virtual columns
auto storage_header = no_destination ? metadata_snapshot->getSampleBlockWithVirtuals(storage->getVirtuals())
auto storage_header = no_destination ? metadata_snapshot->getSampleBlockWithVirtuals(storage->getVirtualsList())
: metadata_snapshot->getSampleBlock();
/** TODO This is a very important line. At any insertion into the table one of chains should own lock.
@ -597,7 +597,7 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat
views_data.source_storage_id,
views_data.source_metadata_snapshot->getColumns(),
std::move(block),
views_data.source_storage->getVirtuals()));
*views_data.source_storage->getVirtualsPtr()));
QueryPipelineBuilder pipeline;

View File

@ -31,8 +31,6 @@
#include <Parsers/queryToString.h>
#include <Storages/AlterCommands.h>
#include <Storages/IStorage.h>
#include <Storages/LightweightDeleteDescription.h>
#include <Storages/BlockNumberColumn.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Common/typeid_cast.h>
#include <Common/randomSeed.h>
@ -965,8 +963,7 @@ bool AlterCommand::isRequireMutationStage(const StorageInMemoryMetadata & metada
/// Drop alias is metadata alter, in other case mutation is required.
if (type == DROP_COLUMN)
return metadata.columns.hasColumnOrNested(GetColumnsOptions::AllPhysical, column_name) ||
column_name == LightweightDeleteDescription::FILTER_COLUMN.name || column_name == BlockNumberColumn::name;
return metadata.columns.hasColumnOrNested(GetColumnsOptions::AllPhysical, column_name);
if (type != MODIFY_COLUMN || data_type == nullptr)
return false;
@ -1256,7 +1253,9 @@ void AlterCommands::prepare(const StorageInMemoryMetadata & metadata)
void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const
{
const StorageInMemoryMetadata & metadata = table->getInMemoryMetadata();
const auto & metadata = table->getInMemoryMetadata();
auto virtuals = table->getVirtualsPtr();
auto all_columns = metadata.columns;
/// Default expression for all added/modified columns
ASTPtr default_expr_list = std::make_shared<ASTExpressionList>();
@ -1292,16 +1291,20 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const
if (command.data_type->hasDynamicSubcolumns())
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Adding a new column of a type which has dynamic subcolumns to an existing table is not allowed. It has known bugs");
if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name && std::dynamic_pointer_cast<MergeTreeData>(table))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add column {}: "
"this column name is reserved for lightweight delete feature", backQuote(column_name));
if (column_name == BlockNumberColumn::name && std::dynamic_pointer_cast<MergeTreeData>(table))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add column {}: "
"this column name is reserved for _block_number persisting feature", backQuote(column_name));
if (virtuals->tryGet(column_name, VirtualsKind::Persistent))
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"Cannot add column {}: this column name is reserved for persistent virtual column", backQuote(column_name));
if (command.codec)
CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_deflate_qpl_codec, context->getSettingsRef().enable_zstd_qat_codec);
{
const auto & settings = context->getSettingsRef();
CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(
command.codec, command.data_type,
!settings.allow_suspicious_codecs,
settings.allow_experimental_codecs,
settings.enable_deflate_qpl_codec,
settings.enable_zstd_qat_codec);
}
all_columns.add(ColumnDescription(column_name, command.data_type));
}
@ -1415,9 +1418,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const
}
else if (command.type == AlterCommand::DROP_COLUMN)
{
if (all_columns.has(command.column_name) ||
all_columns.hasNested(command.column_name) ||
(command.clear && column_name == LightweightDeleteDescription::FILTER_COLUMN.name))
if (all_columns.has(command.column_name) || all_columns.hasNested(command.column_name))
{
if (!command.clear) /// CLEAR column is Ok even if there are dependencies.
{
@ -1501,16 +1502,12 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const
}
if (all_columns.has(command.rename_to))
throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Cannot rename to {}: "
"column with this name already exists", backQuote(command.rename_to));
throw Exception(ErrorCodes::DUPLICATE_COLUMN,
"Cannot rename to {}: column with this name already exists", backQuote(command.rename_to));
if (command.rename_to == LightweightDeleteDescription::FILTER_COLUMN.name && std::dynamic_pointer_cast<MergeTreeData>(table))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot rename to {}: "
"this column name is reserved for lightweight delete feature", backQuote(command.rename_to));
if (command.rename_to == BlockNumberColumn::name && std::dynamic_pointer_cast<MergeTreeData>(table))
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot rename to {}: "
"this column name is reserved for _block_number persisting feature", backQuote(command.rename_to));
if (virtuals->tryGet(command.rename_to, VirtualsKind::Persistent))
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
"Cannot rename to {}: this column name is reserved for persistent virtual column", backQuote(command.rename_to));
if (modified_columns.contains(column_name))
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot rename and modify the same column {} "

View File

@ -1,23 +0,0 @@
#include <Storages/BlockNumberColumn.h>
#include <Compression/CompressionCodecMultiple.h>
namespace DB
{
CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size);
CompressionCodecPtr getCompressionCodecForBlockNumberColumn()
{
std::vector <CompressionCodecPtr> codecs;
codecs.reserve(2);
auto data_bytes_size = BlockNumberColumn::type->getSizeOfValueInMemory();
codecs.emplace_back(getCompressionCodecDelta(data_bytes_size));
codecs.emplace_back(CompressionCodecFactory::instance().get("LZ4", {}));
return std::make_shared<CompressionCodecMultiple>(codecs);
}
const String BlockNumberColumn::name = "_block_number";
const DataTypePtr BlockNumberColumn::type = std::make_shared<DataTypeUInt64>();
const CompressionCodecPtr BlockNumberColumn::compression_codec = getCompressionCodecForBlockNumberColumn();
}

View File

@ -1,16 +0,0 @@
#pragma once
#include <Core/NamesAndTypes.h>
#include <DataTypes/DataTypesNumber.h>
#include <Compression/CompressionFactory.h>
namespace DB
{
struct BlockNumberColumn
{
static const String name;
static const DataTypePtr type;
static const CompressionCodecPtr compression_codec;
};
}

View File

@ -31,15 +31,11 @@
#include <Interpreters/TreeRewriter.h>
#include <Interpreters/ExpressionActions.h>
#include <Interpreters/FunctionNameNormalizer.h>
#include <Storages/BlockNumberColumn.h>
namespace DB
{
CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size);
namespace ErrorCodes
{
extern const int NO_SUCH_COLUMN_IN_TABLE;
@ -482,6 +478,10 @@ NamesAndTypesList ColumnsDescription::get(const GetColumnsOptions & options) con
NamesAndTypesList res;
switch (options.kind)
{
case GetColumnsOptions::None:
{
break;
}
case GetColumnsOptions::All:
{
res = getAll();
@ -559,6 +559,12 @@ const ColumnDescription & ColumnsDescription::get(const String & column_name) co
return *it;
}
const ColumnDescription * ColumnsDescription::tryGet(const String & column_name) const
{
auto it = columns.get<1>().find(column_name);
return it == columns.get<1>().end() ? nullptr : &(*it);
}
static GetColumnsOptions::Kind defaultKindToGetKind(ColumnDefaultKind kind)
{
switch (kind)
@ -572,7 +578,8 @@ static GetColumnsOptions::Kind defaultKindToGetKind(ColumnDefaultKind kind)
case ColumnDefaultKind::Ephemeral:
return GetColumnsOptions::Ephemeral;
}
UNREACHABLE();
return GetColumnsOptions::None;
}
NamesAndTypesList ColumnsDescription::getByNames(const GetColumnsOptions & options, const Names & names) const
@ -784,33 +791,6 @@ bool ColumnsDescription::hasCompressionCodec(const String & column_name) const
return it != columns.get<1>().end() && it->codec != nullptr;
}
CompressionCodecPtr ColumnsDescription::getCodecOrDefault(const String & column_name, CompressionCodecPtr default_codec) const
{
const auto it = columns.get<1>().find(column_name);
if (it == columns.get<1>().end() || !it->codec)
return default_codec;
return CompressionCodecFactory::instance().get(it->codec, it->type, default_codec);
}
CompressionCodecPtr ColumnsDescription::getCodecOrDefault(const String & column_name) const
{
assert (column_name != BlockNumberColumn::name);
return getCodecOrDefault(column_name, CompressionCodecFactory::instance().getDefaultCodec());
}
ASTPtr ColumnsDescription::getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const
{
assert (column_name != BlockNumberColumn::name);
const auto it = columns.get<1>().find(column_name);
if (it == columns.get<1>().end() || !it->codec)
return default_codec->getFullCodecDesc();
return it->codec;
}
ColumnsDescription::ColumnTTLs ColumnsDescription::getColumnTTLs() const
{
ColumnTTLs ret;

View File

@ -29,10 +29,19 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
enum class VirtualsKind : UInt8
{
None = 0,
Ephemeral = 1,
Persistent = 2,
All = Ephemeral | Persistent,
};
struct GetColumnsOptions
{
enum Kind : UInt8
{
None = 0,
Ordinary = 1,
Materialized = 2,
Aliases = 4,
@ -51,9 +60,9 @@ struct GetColumnsOptions
return *this;
}
GetColumnsOptions & withVirtuals(bool value = true)
GetColumnsOptions & withVirtuals(VirtualsKind value = VirtualsKind::All)
{
with_virtuals = value;
virtuals_kind = value;
return *this;
}
@ -63,17 +72,11 @@ struct GetColumnsOptions
return *this;
}
GetColumnsOptions & withSystemColumns(bool value = true)
{
with_system_columns = value;
return *this;
}
Kind kind;
VirtualsKind virtuals_kind = VirtualsKind::None;
bool with_subcolumns = false;
bool with_virtuals = false;
bool with_extended_objects = false;
bool with_system_columns = false;
};
/// Description of a single table column (in CREATE TABLE for example).
@ -160,6 +163,7 @@ public:
bool hasNested(const String & column_name) const;
bool hasSubcolumn(const String & column_name) const;
const ColumnDescription & get(const String & column_name) const;
const ColumnDescription * tryGet(const String & column_name) const;
template <typename F>
void modify(const String & column_name, F && f)
@ -213,9 +217,6 @@ public:
/// Does column has non default specified compression codec
bool hasCompressionCodec(const String & column_name) const;
CompressionCodecPtr getCodecOrDefault(const String & column_name, CompressionCodecPtr default_codec) const;
CompressionCodecPtr getCodecOrDefault(const String & column_name) const;
ASTPtr getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const;
String toString() const;
static ColumnsDescription parse(const String & str);
@ -269,4 +270,5 @@ private:
/// don't have strange constructions in default expression like SELECT query or
/// arrayJoin function.
Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const NamesAndTypesList & all_columns, ContextPtr context);
}

View File

@ -31,7 +31,7 @@ FileLogSource::FileLogSource(
, max_streams_number(max_streams_number_)
, handle_error_mode(handle_error_mode_)
, non_virtual_header(storage_snapshot->metadata->getSampleBlockNonMaterialized())
, virtual_header(storage_snapshot->getSampleBlockForColumns(storage.getVirtuals().getNames()))
, virtual_header(storage_snapshot->virtual_columns->getSampleBlock())
{
consumer = std::make_unique<FileLogConsumer>(storage, max_block_size, poll_time_out, context, stream_number_, max_streams_number_);

View File

@ -147,6 +147,7 @@ StorageFileLog::StorageFileLog(
storage_metadata.setColumns(columns_);
storage_metadata.setComment(comment);
setInMemoryMetadata(storage_metadata);
setVirtuals(createVirtuals(filelog_settings->handle_error_mode));
if (!fileOrSymlinkPathStartsWith(path, getContext()->getUserFilesPath()))
{
@ -203,6 +204,22 @@ StorageFileLog::StorageFileLog(
}
}
VirtualColumnsDescription StorageFileLog::createVirtuals(StreamingHandleErrorMode handle_error_mode)
{
VirtualColumnsDescription desc;
desc.addEphemeral("_filename", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "");
desc.addEphemeral("_offset", std::make_shared<DataTypeUInt64>(), "");
if (handle_error_mode == StreamingHandleErrorMode::STREAM)
{
desc.addEphemeral("_raw_record", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()), "");
desc.addEphemeral("_error", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()), "");
}
return desc;
}
void StorageFileLog::loadMetaFiles(bool attach)
{
/// Attach table
@ -1009,19 +1026,4 @@ bool StorageFileLog::updateFileInfos()
return events.empty() || file_infos.file_names.empty();
}
NamesAndTypesList StorageFileLog::getVirtuals() const
{
auto virtuals = NamesAndTypesList{
{"_filename", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"_offset", std::make_shared<DataTypeUInt64>()}};
if (filelog_settings->handle_error_mode == StreamingHandleErrorMode::STREAM)
{
virtuals.push_back({"_raw_record", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>())});
virtuals.push_back({"_error", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>())});
}
return virtuals;
}
}

View File

@ -102,8 +102,6 @@ public:
String getFullMetaPath(const String & file_name) const { return std::filesystem::path(metadata_base_path) / file_name; }
String getFullDataPath(const String & file_name) const { return std::filesystem::path(root_data_path) / file_name; }
NamesAndTypesList getVirtuals() const override;
static UInt64 getInode(const String & file_name);
void openFilesAndSetPos();
@ -212,6 +210,8 @@ private:
UInt64 inode = 0;
};
ReadMetadataResult readMetadata(const String & filename) const;
static VirtualColumnsDescription createVirtuals(StreamingHandleErrorMode handle_error_mode);
};
}

View File

@ -241,8 +241,7 @@ StorageHDFS::StorageHDFS(
storage_metadata.setConstraints(constraints_);
storage_metadata.setComment(comment);
setInMemoryMetadata(storage_metadata);
virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()));
}
namespace
@ -975,7 +974,7 @@ void StorageHDFS::read(
size_t max_block_size,
size_t num_streams)
{
auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context_), virtual_columns);
auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context_));
bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
&& context_->getSettingsRef().optimize_count_from_files;
@ -1011,7 +1010,7 @@ void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate)
else if (storage->is_path_with_globs)
{
/// Iterate through disclosed globs and make a source for each file
auto glob_iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(storage->uris[0], predicate, storage->virtual_columns, context);
auto glob_iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(storage->uris[0], predicate, storage->getVirtualsList(), context);
iterator_wrapper = std::make_shared<HDFSSource::IteratorWrapper>([glob_iterator]()
{
return glob_iterator->next();
@ -1019,7 +1018,7 @@ void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate)
}
else
{
auto uris_iterator = std::make_shared<HDFSSource::URISIterator>(storage->uris, predicate, storage->virtual_columns, context);
auto uris_iterator = std::make_shared<HDFSSource::URISIterator>(storage->uris, predicate, storage->getVirtualsList(), context);
iterator_wrapper = std::make_shared<HDFSSource::IteratorWrapper>([uris_iterator]()
{
return uris_iterator->next();
@ -1179,16 +1178,6 @@ void registerStorageHDFS(StorageFactory & factory)
});
}
NamesAndTypesList StorageHDFS::getVirtuals() const
{
return virtual_columns;
}
Names StorageHDFS::getVirtualColumnNames()
{
return VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage({}).getNames();
}
SchemaCache & StorageHDFS::getSchemaCache(const ContextPtr & ctx)
{
static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_hdfs", DEFAULT_SCHEMA_CACHE_ELEMENTS));

View File

@ -69,9 +69,6 @@ public:
ContextPtr local_context,
TableExclusiveLockHolder &) override;
NamesAndTypesList getVirtuals() const override;
static Names getVirtualColumnNames();
bool supportsPartitionBy() const override { return true; }
/// Check if the format is column-oriented.
@ -114,7 +111,6 @@ private:
const bool distributed_processing;
ASTPtr partition_by;
bool is_path_with_globs;
NamesAndTypesList virtual_columns;
LoggerPtr log = getLogger("StorageHDFS");
};

View File

@ -72,8 +72,7 @@ StorageHDFSCluster::StorageHDFSCluster(
storage_metadata.setConstraints(constraints_);
setInMemoryMetadata(storage_metadata);
virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()));
}
void StorageHDFSCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB::StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context)
@ -89,18 +88,11 @@ void StorageHDFSCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB:
RemoteQueryExecutor::Extension StorageHDFSCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const
{
auto iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(uri, predicate, virtual_columns, context);
auto iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(uri, predicate, getVirtualsList(), context);
auto callback = std::make_shared<std::function<String()>>([iter = std::move(iterator)]() mutable -> String { return iter->next().path; });
return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)};
}
NamesAndTypesList StorageHDFSCluster::getVirtuals() const
{
return NamesAndTypesList{
{"_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())}};
}
}
#endif

View File

@ -32,8 +32,6 @@ public:
std::string getName() const override { return "HDFSCluster"; }
NamesAndTypesList getVirtuals() const override;
RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override;
bool supportsSubcolumns() const override { return true; }
@ -45,7 +43,6 @@ private:
String uri;
String format_name;
NamesAndTypesList virtual_columns;
};

View File

@ -45,6 +45,7 @@
#include <Storages/MergeTree/KeyCondition.h>
#include <Storages/StorageFactory.h>
#include <Storages/checkAndGetLiteralArgument.h>
#include <Storages/VirtualColumnUtils.h>
namespace CurrentMetrics
{
@ -444,6 +445,7 @@ StorageHive::StorageHive(
storage_metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_ast, storage_metadata.columns, getContext());
setInMemoryMetadata(storage_metadata);
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()));
}
void StorageHive::lazyInitialize()
@ -1020,13 +1022,6 @@ SinkToStoragePtr StorageHive::write(const ASTPtr & /*query*/, const StorageMetad
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method write is not implemented for StorageHive");
}
NamesAndTypesList StorageHive::getVirtuals() const
{
return NamesAndTypesList{
{"_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())}};
}
std::optional<UInt64> StorageHive::totalRows(const Settings & settings) const
{
/// query_info is not used when prune_level == PruneLevel::None

View File

@ -54,8 +54,6 @@ public:
SinkToStoragePtr write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr /*context*/, bool async_insert) override;
NamesAndTypesList getVirtuals() const override;
bool supportsSubsetOfColumns() const;
std::optional<UInt64> totalRows(const Settings & settings) const override;

View File

@ -27,10 +27,17 @@ namespace ErrorCodes
extern const int CANNOT_RESTORE_TABLE;
}
IStorage::IStorage(StorageID storage_id_)
: storage_id(std::move(storage_id_))
, metadata(std::make_unique<StorageInMemoryMetadata>())
, virtuals(std::make_unique<VirtualColumnsDescription>())
{
}
bool IStorage::isVirtualColumn(const String & column_name, const StorageMetadataPtr & metadata_snapshot) const
{
/// Virtual column maybe overridden by real column
return !metadata_snapshot->getColumns().has(column_name) && getVirtuals().contains(column_name);
return !metadata_snapshot->getColumns().has(column_name) && virtuals.get()->has(column_name);
}
RWLockImpl::LockHolder IStorage::tryLockTimed(
@ -237,11 +244,6 @@ void IStorage::renameInMemory(const StorageID & new_table_id)
storage_id = new_table_id;
}
NamesAndTypesList IStorage::getVirtuals() const
{
return {};
}
Names IStorage::getAllRegisteredNames() const
{
Names result;

View File

@ -11,6 +11,7 @@
#include <Storages/IStorage_fwd.h>
#include <Storages/SelectQueryDescription.h>
#include <Storages/StorageInMemoryMetadata.h>
#include <Storages/VirtualColumnsDescription.h>
#include <Storages/TableLockHolder.h>
#include <Storages/StorageSnapshot.h>
#include <Common/ActionLock.h>
@ -98,9 +99,7 @@ class IStorage : public std::enable_shared_from_this<IStorage>, public TypePromo
public:
IStorage() = delete;
/// Storage metadata can be set separately in setInMemoryMetadata method
explicit IStorage(StorageID storage_id_)
: storage_id(std::move(storage_id_))
, metadata(std::make_unique<StorageInMemoryMetadata>()) {}
explicit IStorage(StorageID storage_id_);
IStorage(const IStorage &) = delete;
IStorage & operator=(const IStorage &) = delete;
@ -215,6 +214,10 @@ public:
metadata.set(std::make_unique<StorageInMemoryMetadata>(metadata_));
}
void setVirtuals(VirtualColumnsDescription virtuals_)
{
virtuals.set(std::make_unique<VirtualColumnsDescription>(std::move(virtuals_)));
}
/// Return list of virtual columns (like _part, _table, etc). In the vast
/// majority of cases virtual columns are static constant part of Storage
@ -226,7 +229,9 @@ public:
/// virtual column will be overridden and inaccessible.
///
/// By default return empty list of columns.
virtual NamesAndTypesList getVirtuals() const;
VirtualsDescriptionPtr getVirtualsPtr() const { return virtuals.get(); }
NamesAndTypesList getVirtualsList() const { return virtuals.get()->getNamesAndTypesList(); }
Block getVirtualsHeader() const { return virtuals.get()->getSampleBlock(); }
Names getAllRegisteredNames() const override;
@ -263,15 +268,16 @@ public:
virtual bool supportsTrivialCountOptimization() const { return false; }
private:
StorageID storage_id;
mutable std::mutex id_mutex;
/// Multiversion storage metadata. Allows to read/write storage metadata
/// without locks.
/// Multiversion storage metadata. Allows to read/write storage metadata without locks.
MultiVersionStorageMetadataPtr metadata;
/// Description of virtual columns. Optional, may be set in constructor.
MultiVersionVirtualsDescriptionPtr virtuals;
protected:
RWLockImpl::LockHolder tryLockTimed(
const RWLock & rwlock, RWLockImpl::Type type, const String & query_id, const std::chrono::milliseconds & acquire_timeout) const;

View File

@ -45,7 +45,7 @@ KafkaSource::KafkaSource(
, max_block_size(max_block_size_)
, commit_in_suffix(commit_in_suffix_)
, non_virtual_header(storage_snapshot->metadata->getSampleBlockNonMaterialized())
, virtual_header(storage_snapshot->getSampleBlockForColumns(storage.getVirtualColumnNames()))
, virtual_header(storage.getVirtualsHeader())
, handle_error_mode(storage.getStreamingHandleErrorMode())
{
}

View File

@ -363,6 +363,8 @@ StorageKafka::StorageKafka(
StorageInMemoryMetadata storage_metadata;
storage_metadata.setColumns(columns_);
setInMemoryMetadata(storage_metadata);
setVirtuals(createVirtuals(kafka_settings->kafka_handle_error_mode));
auto task_count = thread_per_consumer ? num_consumers : 1;
for (size_t i = 0; i < task_count; ++i)
{
@ -384,6 +386,28 @@ StorageKafka::StorageKafka(
});
}
VirtualColumnsDescription StorageKafka::createVirtuals(StreamingHandleErrorMode handle_error_mode)
{
VirtualColumnsDescription desc;
desc.addEphemeral("_topic", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "");
desc.addEphemeral("_key", std::make_shared<DataTypeString>(), "");
desc.addEphemeral("_offset", std::make_shared<DataTypeUInt64>(), "");
desc.addEphemeral("_partition", std::make_shared<DataTypeUInt64>(), "");
desc.addEphemeral("_timestamp", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTime>()), "");
desc.addEphemeral("_timestamp_ms", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTime64>(3)), "");
desc.addEphemeral("_headers.name", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "");
desc.addEphemeral("_headers.value", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "");
if (handle_error_mode == StreamingHandleErrorMode::STREAM)
{
desc.addEphemeral("_raw_message", std::make_shared<DataTypeString>(), "");
desc.addEphemeral("_error", std::make_shared<DataTypeString>(), "");
}
return desc;
}
SettingsChanges StorageKafka::createSettingsAdjustments()
{
SettingsChanges result;
@ -1194,43 +1218,4 @@ void registerStorageKafka(StorageFactory & factory)
factory.registerStorage("Kafka", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, });
}
NamesAndTypesList StorageKafka::getVirtuals() const
{
auto result = NamesAndTypesList{
{"_topic", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
{"_key", std::make_shared<DataTypeString>()},
{"_offset", std::make_shared<DataTypeUInt64>()},
{"_partition", std::make_shared<DataTypeUInt64>()},
{"_timestamp", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTime>())},
{"_timestamp_ms", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTime64>(3))},
{"_headers.name", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
{"_headers.value", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())}};
if (kafka_settings->kafka_handle_error_mode == StreamingHandleErrorMode::STREAM)
{
result.push_back({"_raw_message", std::make_shared<DataTypeString>()});
result.push_back({"_error", std::make_shared<DataTypeString>()});
}
return result;
}
Names StorageKafka::getVirtualColumnNames() const
{
auto result = Names {
"_topic",
"_key",
"_offset",
"_partition",
"_timestamp",
"_timestamp_ms",
"_headers.name",
"_headers.value",
};
if (kafka_settings->kafka_handle_error_mode == StreamingHandleErrorMode::STREAM)
{
result.push_back({"_raw_message"});
result.push_back({"_error"});
}
return result;
}
}

View File

@ -74,8 +74,6 @@ public:
const auto & getFormatName() const { return format_name; }
NamesAndTypesList getVirtuals() const override;
Names getVirtualColumnNames() const;
StreamingHandleErrorMode getStreamingHandleErrorMode() const { return kafka_settings->kafka_handle_error_mode; }
struct SafeConsumers
@ -158,6 +156,8 @@ private:
bool checkDependencies(const StorageID & table_id);
void cleanConsumers();
static VirtualColumnsDescription createVirtuals(StreamingHandleErrorMode handle_error_mode);
};
}

View File

@ -1,9 +0,0 @@
#include <Storages/LightweightDeleteDescription.h>
#include <DataTypes/DataTypesNumber.h>
namespace DB
{
const NameAndTypePair LightweightDeleteDescription::FILTER_COLUMN {"_row_exists", std::make_shared<DataTypeUInt8>()};
}

View File

@ -1,13 +0,0 @@
#pragma once
#include <Core/NamesAndTypes.h>
#include "Storages/TTLDescription.h"
namespace DB
{
struct LightweightDeleteDescription
{
static const NameAndTypePair FILTER_COLUMN;
};
}

View File

@ -218,6 +218,10 @@ StorageLiveView::StorageLiveView(
setInMemoryMetadata(storage_metadata);
VirtualColumnsDescription virtuals;
virtuals.addEphemeral("_version", std::make_shared<DataTypeUInt64>(), "");
setVirtuals(std::move(virtuals));
if (!query.select)
throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName());
@ -236,13 +240,6 @@ StorageLiveView::~StorageLiveView()
shutdown(false);
}
NamesAndTypesList StorageLiveView::getVirtuals() const
{
return NamesAndTypesList{
NameAndTypePair("_version", std::make_shared<DataTypeUInt64>())
};
}
void StorageLiveView::checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const
{
auto table_id = getStorageID();

View File

@ -74,8 +74,6 @@ public:
bool supportsFinal() const override { return true; }
NamesAndTypesList getVirtuals() const override;
void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override;
void drop() override;

View File

@ -1451,6 +1451,11 @@ bool IMergeTreeDataPart::supportLightweightDeleteMutate() const
parent_part == nullptr && projection_parts.empty();
}
bool IMergeTreeDataPart::hasLightweightDelete() const
{
return columns.contains(RowExistsColumn::name);
}
void IMergeTreeDataPart::assertHasVersionMetadata(MergeTreeTransaction * txn) const
{
TransactionID expected_tid = txn ? txn->tid : Tx::PrehistoricTID;

View File

@ -1,12 +1,12 @@
#pragma once
#include <unordered_map>
#include <IO/WriteSettings.h>
#include <Core/Block.h>
#include <base/types.h>
#include <base/defines.h>
#include <Core/NamesAndTypes.h>
#include <Storages/IStorage.h>
#include <Storages/LightweightDeleteDescription.h>
#include <Storages/MergeTree/AlterConversions.h>
#include <Storages/MergeTree/IDataPartStorage.h>
#include <Storages/MergeTree/MergeTreeDataPartState.h>
@ -48,6 +48,8 @@ class MarkCache;
class UncompressedCache;
class MergeTreeTransaction;
struct MergeTreeReadTaskInfo;
using MergeTreeReadTaskInfoPtr = std::shared_ptr<const MergeTreeReadTaskInfo>;
enum class DataPartRemovalState
{
@ -69,6 +71,7 @@ public:
using Checksums = MergeTreeDataPartChecksums;
using Checksum = MergeTreeDataPartChecksums::Checksum;
using ValueSizeMap = std::map<std::string, double>;
using VirtualFields = std::unordered_map<String, Field>;
using MergeTreeReaderPtr = std::unique_ptr<IMergeTreeReader>;
using MergeTreeWriterPtr = std::unique_ptr<IMergeTreeDataPartWriter>;
@ -95,6 +98,7 @@ public:
const NamesAndTypesList & columns_,
const StorageSnapshotPtr & storage_snapshot,
const MarkRanges & mark_ranges,
const VirtualFields & virtual_fields,
UncompressedCache * uncompressed_cache,
MarkCache * mark_cache,
const AlterConversionsPtr & alter_conversions,
@ -493,7 +497,7 @@ public:
bool supportLightweightDeleteMutate() const;
/// True if here is lightweight deleted mask file in part.
bool hasLightweightDelete() const { return columns.contains(LightweightDeleteDescription::FILTER_COLUMN.name); }
bool hasLightweightDelete() const;
void writeChecksums(const MergeTreeDataPartChecksums & checksums_, const WriteSettings & settings);

View File

@ -1,7 +1,8 @@
#include <Storages/MergeTree/IMergeTreeReader.h>
#include <Storages/MergeTree/MergeTreeReadTask.h>
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
#include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
#include <DataTypes/NestedUtils.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNested.h>
#include <Common/escapeForFileName.h>
#include <Compression/CachedCompressedReadBuffer.h>
#include <Columns/ColumnArray.h>
@ -19,12 +20,13 @@ namespace
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
}
IMergeTreeReader::IMergeTreeReader(
MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_,
const NamesAndTypesList & columns_,
const VirtualFields & virtual_fields_,
const StorageSnapshotPtr & storage_snapshot_,
UncompressedCache * uncompressed_cache_,
MarkCache * mark_cache_,
@ -47,6 +49,7 @@ IMergeTreeReader::IMergeTreeReader(
, part_columns(data_part_info_for_read->isWidePart()
? data_part_info_for_read->getColumnsDescriptionWithCollectedNested()
: data_part_info_for_read->getColumnsDescription())
, virtual_fields(virtual_fields_)
{
columns_to_read.reserve(requested_columns.size());
serializations.reserve(requested_columns.size());
@ -63,7 +66,49 @@ const IMergeTreeReader::ValueSizeMap & IMergeTreeReader::getAvgValueSizeHints()
return avg_value_size_hints;
}
void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows, size_t block_number) const
void IMergeTreeReader::fillVirtualColumns(Columns & columns, size_t rows) const
{
chassert(columns.size() == requested_columns.size());
const auto * loaded_part_info = typeid_cast<const LoadedMergeTreeDataPartInfoForReader *>(data_part_info_for_read.get());
if (!loaded_part_info)
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Filling of virtual columns is supported only for LoadedMergeTreeDataPartInfoForReader");
const auto & data_part = loaded_part_info->getDataPart();
const auto & storage_columns = storage_snapshot->getMetadataForQuery()->getColumns();
const auto & virtual_columns = storage_snapshot->virtual_columns;
auto it = requested_columns.begin();
for (size_t pos = 0; pos < columns.size(); ++pos, ++it)
{
if (columns[pos] || storage_columns.has(it->name))
continue;
auto virtual_column = virtual_columns->tryGet(it->name);
if (!virtual_column)
continue;
if (!it->type->equals(*virtual_column->type))
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Data type for virtual column {} mismatched. Requested type: {}, virtual column type: {}",
it->name, it->type->getName(), virtual_column->type->getName());
}
if (it->name == "_part_offset")
throw Exception(ErrorCodes::LOGICAL_ERROR, "Virtual column {} must be filled by range reader", it->name);
Field field;
if (auto field_it = virtual_fields.find(it->name); field_it != virtual_fields.end())
field = field_it->second;
else
field = getFieldForConstVirtualColumn(it->name, *data_part);
columns[pos] = virtual_column->type->createColumnConst(rows, field)->convertToFullColumnIfConst();
}
}
void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows) const
{
try
{
@ -72,7 +117,7 @@ void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_e
res_columns, num_rows,
Nested::convertToSubcolumns(requested_columns),
Nested::convertToSubcolumns(available_columns),
partially_read_columns, storage_snapshot->metadata, block_number);
partially_read_columns, storage_snapshot->metadata);
should_evaluate_missing_defaults = std::any_of(
res_columns.begin(), res_columns.end(), [](const auto & column) { return column == nullptr; });

View File

@ -9,8 +9,6 @@
namespace DB
{
class IDataType;
/// Reads the data between pairs of marks in the same part. When reading consecutive ranges, avoids unnecessary seeks.
/// When ranges are almost consecutive, seeks are fast because they are performed inside the buffer.
/// Avoids loading the marks file if it is not needed (e.g. when reading the whole part).
@ -18,11 +16,13 @@ class IMergeTreeReader : private boost::noncopyable
{
public:
using ValueSizeMap = std::map<std::string, double>;
using VirtualFields = std::unordered_map<String, Field>;
using DeserializeBinaryBulkStateMap = std::map<std::string, ISerialization::DeserializeBinaryBulkStatePtr>;
IMergeTreeReader(
MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_,
const NamesAndTypesList & columns_,
const VirtualFields & virtual_fields_,
const StorageSnapshotPtr & storage_snapshot_,
UncompressedCache * uncompressed_cache_,
MarkCache * mark_cache_,
@ -42,10 +42,13 @@ public:
const ValueSizeMap & getAvgValueSizeHints() const;
/// Add virtual columns that are not present in the block.
void fillVirtualColumns(Columns & columns, size_t rows) const;
/// Add columns from ordered_names that are not present in the block.
/// Missing columns are added in the order specified by ordered_names.
/// num_rows is needed in case if all res_columns are nullptr.
void fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows, size_t block_number = 0) const;
void fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows) const;
/// Evaluate defaulted columns if necessary.
void evaluateMissingDefaults(Block additional_columns, Columns & res_columns) const;
@ -113,6 +116,9 @@ private:
/// Actual columns description in part.
const ColumnsDescription & part_columns;
/// Fields of virtual columns that were filled in previous stages.
VirtualFields virtual_fields;
};
}

View File

@ -8,7 +8,6 @@
#include <Common/logger_useful.h>
#include <Common/ActionBlocker.h>
#include <Processors/Transforms/CheckSortedTransform.h>
#include <Storages/LightweightDeleteDescription.h>
#include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
#include <DataTypes/ObjectUtils.h>
@ -1075,14 +1074,18 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
if (global_ctx->deduplicate)
{
/// We don't want to deduplicate by block number column
/// so if deduplicate_by_columns is empty, add all columns except _block_number
if (supportsBlockNumberColumn(global_ctx) && global_ctx->deduplicate_by_columns.empty())
const auto & virtuals = *global_ctx->data->getVirtualsPtr();
/// We don't want to deduplicate by virtual persistent column.
/// If deduplicate_by_columns is empty, add all columns except virtuals.
if (global_ctx->deduplicate_by_columns.empty())
{
for (const auto & col : global_ctx->merging_column_names)
for (const auto & column_name : global_ctx->merging_column_names)
{
if (col != BlockNumberColumn::name)
global_ctx->deduplicate_by_columns.emplace_back(col);
if (virtuals.tryGet(column_name, VirtualsKind::Persistent))
continue;
global_ctx->deduplicate_by_columns.emplace_back(column_name);
}
}

View File

@ -15,7 +15,7 @@
#include <QueryPipeline/QueryPipeline.h>
#include <Storages/BlockNumberColumn.h>
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
#include <Storages/MergeTree/ColumnSizeEstimator.h>
#include <Storages/MergeTree/FutureMergedMutatedPart.h>
#include <Storages/MergeTree/IExecutableTask.h>

View File

@ -6,6 +6,7 @@
#include <Core/NamesAndTypes.h>
#include <Common/checkStackSize.h>
#include <Common/typeid_cast.h>
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
#include <Storages/MergeTree/MergeTreeSelectProcessor.h>
#include <Columns/ColumnConst.h>
#include <IO/WriteBufferFromString.h>
@ -106,16 +107,14 @@ NameSet injectRequiredColumns(
auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical)
.withExtendedObjects()
.withSystemColumns();
if (with_subcolumns)
options.withSubcolumns();
.withVirtuals()
.withSubcolumns(with_subcolumns);
for (size_t i = 0; i < columns.size(); ++i)
{
/// We are going to fetch only physical columns and system columns
/// We are going to fetch physical columns and system columns first
if (!storage_snapshot->tryGetColumn(options, columns[i]))
throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no physical column or subcolumn {} in table", columns[i]);
throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column or subcolumn {} in table", columns[i]);
have_at_least_one_physical_column |= injectRequiredColumnsRecursively(
columns[i], storage_snapshot, alter_conversions,
@ -258,11 +257,10 @@ void MergeTreeBlockSizePredictor::update(const Block & sample_block, const Colum
}
MergeTreeReadTask::Columns getReadTaskColumns(
MergeTreeReadTaskColumns getReadTaskColumns(
const IMergeTreeDataPartInfoForReader & data_part_info_for_reader,
const StorageSnapshotPtr & storage_snapshot,
const Names & required_columns,
const Names & system_columns,
const PrewhereInfoPtr & prewhere_info,
const ExpressionActionsSettings & actions_settings,
const MergeTreeReaderSettings & reader_settings,
@ -270,28 +268,30 @@ MergeTreeReadTask::Columns getReadTaskColumns(
{
Names column_to_read_after_prewhere = required_columns;
/// Read system columns such as lightweight delete mask "_row_exists" if it is persisted in the part
for (const auto & name : system_columns)
if (data_part_info_for_reader.getColumns().contains(name))
column_to_read_after_prewhere.push_back(name);
/// Inject columns required for defaults evaluation
injectRequiredColumns(
data_part_info_for_reader, storage_snapshot, with_subcolumns, column_to_read_after_prewhere);
MergeTreeReadTask::Columns result;
MergeTreeReadTaskColumns result;
auto options = GetColumnsOptions(GetColumnsOptions::All)
.withExtendedObjects()
.withSystemColumns();
.withVirtuals()
.withSubcolumns(with_subcolumns);
if (with_subcolumns)
options.withSubcolumns();
static const NameSet columns_to_read_at_first_step = {"_part_offset"};
NameSet columns_from_previous_steps;
auto add_step = [&](const PrewhereExprStep & step)
{
Names step_column_names;
if (columns_from_previous_steps.empty())
{
for (const auto & required_column : required_columns)
if (columns_to_read_at_first_step.contains(required_column))
step_column_names.push_back(required_column);
}
/// Computation results from previous steps might be used in the current step as well. In such a case these
/// computed columns will be present in the current step inputs. They don't need to be read from the disk so
/// exclude them from the list of columns to read. This filtering must be done before injecting required

View File

@ -1,6 +1,5 @@
#pragma once
#include <optional>
#include <Core/NamesAndTypes.h>
#include <Storages/MergeTree/MergeTreeReadTask.h>
@ -22,11 +21,10 @@ NameSet injectRequiredColumns(
bool with_subcolumns,
Names & columns);
MergeTreeReadTask::Columns getReadTaskColumns(
MergeTreeReadTaskColumns getReadTaskColumns(
const IMergeTreeDataPartInfoForReader & data_part_info_for_reader,
const StorageSnapshotPtr & storage_snapshot,
const Names & required_columns,
const Names & system_columns,
const PrewhereInfoPtr & prewhere_info,
const ExpressionActionsSettings & actions_settings,
const MergeTreeReaderSettings & reader_settings,

View File

@ -67,7 +67,7 @@
#include <Processors/QueryPlan/QueryIdHolder.h>
#include <Processors/QueryPlan/ReadFromMergeTree.h>
#include <Storages/AlterCommands.h>
#include <Storages/BlockNumberColumn.h>
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
#include <Storages/Freeze.h>
#include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
#include <Storages/MergeTree/MergeTreeDataPartBuilder.h>
@ -430,6 +430,29 @@ MergeTreeData::MergeTreeData(
};
}
VirtualColumnsDescription MergeTreeData::createVirtuals(const StorageInMemoryMetadata & metadata)
{
VirtualColumnsDescription desc;
desc.addEphemeral("_part", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "Name of part");
desc.addEphemeral("_part_index", std::make_shared<DataTypeUInt64>(), "Sequential index of the part in the query result");
desc.addEphemeral("_part_uuid", std::make_shared<DataTypeUUID>(), "Unique part identifier (if enabled MergeTree setting assign_part_uuids)");
desc.addEphemeral("_partition_id", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "Name of partition");
desc.addEphemeral("_sample_factor", std::make_shared<DataTypeFloat64>(), "Sample factor (from the query)");
desc.addEphemeral("_part_offset", std::make_shared<DataTypeUInt64>(), "Number of row in the part");
if (metadata.hasPartitionKey())
{
auto partition_types = metadata.partition_key.sample_block.getDataTypes();
desc.addEphemeral("_partition_value", std::make_shared<DataTypeTuple>(std::move(partition_types)), "Value (a tuple) of a PARTITION BY expression");
}
desc.addPersistent(RowExistsColumn::name, RowExistsColumn::type, nullptr, "Persisted mask created by lightweight delete that show whether row exists or is deleted");
desc.addPersistent(BlockNumberColumn::name, BlockNumberColumn::type, BlockNumberColumn::codec, "Persisted original number of block that was assigned at insert");
return desc;
}
StoragePolicyPtr MergeTreeData::getStoragePolicy() const
{
auto settings = getSettings();
@ -677,6 +700,7 @@ void MergeTreeData::setProperties(
{
checkProperties(new_metadata, old_metadata, attach, false, allow_nullable_key, local_context);
setInMemoryMetadata(new_metadata);
setVirtuals(createVirtuals(new_metadata));
}
namespace
@ -1002,73 +1026,38 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat
/// TODO Checks for Graphite mode.
}
const Names MergeTreeData::virtuals_useful_for_filter = {"_part", "_partition_id", "_part_uuid", "_partition_value"};
DataTypePtr MergeTreeData::getPartitionValueType() const
Block MergeTreeData::getHeaderWithVirtualsForFilter() const
{
DataTypePtr partition_value_type;
auto partition_types = getInMemoryMetadataPtr()->partition_key.sample_block.getDataTypes();
if (partition_types.empty())
partition_value_type = std::make_shared<DataTypeUInt8>();
else
partition_value_type = std::make_shared<DataTypeTuple>(std::move(partition_types));
return partition_value_type;
Block header;
auto virtuals_desc = getVirtualsPtr();
for (const auto & name : virtuals_useful_for_filter)
if (auto column = virtuals_desc->tryGet(name))
header.insert({column->type->createColumn(), column->type, name});
return header;
}
Block MergeTreeData::getSampleBlockWithVirtualColumns() const
Block MergeTreeData::getBlockWithVirtualsForFilter(const MergeTreeData::DataPartsVector & parts, bool ignore_empty) const
{
DataTypePtr partition_value_type = getPartitionValueType();
return {
ColumnWithTypeAndName(
DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumn(),
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()),
"_part"),
ColumnWithTypeAndName(
DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumn(),
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()),
"_partition_id"),
ColumnWithTypeAndName(ColumnUUID::create(), std::make_shared<DataTypeUUID>(), "_part_uuid"),
ColumnWithTypeAndName(partition_value_type->createColumn(), partition_value_type, "_partition_value")};
}
auto block = getHeaderWithVirtualsForFilter();
Block MergeTreeData::getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool one_part, bool ignore_empty) const
{
auto block = getSampleBlockWithVirtualColumns();
MutableColumns columns = block.mutateColumns();
auto & part_column = columns[0];
auto & partition_id_column = columns[1];
auto & part_uuid_column = columns[2];
auto & partition_value_column = columns[3];
bool has_partition_value = typeid_cast<const ColumnTuple *>(partition_value_column.get());
for (const auto & part_or_projection : parts)
{
if (ignore_empty && part_or_projection->isEmpty())
continue;
const auto * part = part_or_projection->isProjectionPart() ? part_or_projection->getParentPart() : part_or_projection.get();
part_column->insert(part->name);
partition_id_column->insert(part->info.partition_id);
part_uuid_column->insert(part->uuid);
Tuple tuple(part->partition.value.begin(), part->partition.value.end());
if (has_partition_value)
partition_value_column->insert(tuple);
if (one_part)
const auto * part = part_or_projection->isProjectionPart()
? part_or_projection->getParentPart()
: part_or_projection.get();
for (auto & column : block)
{
part_column = ColumnConst::create(std::move(part_column), 1);
partition_id_column = ColumnConst::create(std::move(partition_id_column), 1);
part_uuid_column = ColumnConst::create(std::move(part_uuid_column), 1);
if (has_partition_value)
partition_value_column = ColumnConst::create(std::move(partition_value_column), 1);
break;
auto field = getFieldForConstVirtualColumn(column.name, *part);
column.column->assumeMutableRef().insert(field);
}
}
block.setColumns(std::move(columns));
if (!has_partition_value)
block.erase("_partition_value");
return block;
}
@ -1077,13 +1066,14 @@ std::optional<UInt64> MergeTreeData::totalRowsByPartitionPredicateImpl(
const ActionsDAGPtr & filter_actions_dag, ContextPtr local_context, const DataPartsVector & parts) const
{
if (parts.empty())
return 0u;
return 0;
auto metadata_snapshot = getInMemoryMetadataPtr();
Block virtual_columns_block = getBlockWithVirtualPartColumns(parts, true /* one_part */);
auto virtual_columns_block = getBlockWithVirtualsForFilter({parts[0]});
auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr);
// Generate valid expressions for filtering
/// Generate valid expressions for filtering
bool valid = true;
for (const auto * input : filter_dag->getInputs())
if (!virtual_columns_block.has(input->result_name))
@ -1096,7 +1086,7 @@ std::optional<UInt64> MergeTreeData::totalRowsByPartitionPredicateImpl(
std::unordered_set<String> part_values;
if (valid)
{
virtual_columns_block = getBlockWithVirtualPartColumns(parts, false /* one_part */);
virtual_columns_block = getBlockWithVirtualsForFilter(parts);
VirtualColumnUtils::filterBlockWithDAG(filter_dag, virtual_columns_block, local_context);
part_values = VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_part");
if (part_values.empty())
@ -3658,6 +3648,7 @@ void MergeTreeData::checkPartDynamicColumns(MutableDataPartPtr & part, DataParts
{
auto metadata_snapshot = getInMemoryMetadataPtr();
const auto & columns = metadata_snapshot->getColumns();
auto virtuals = getVirtualsPtr();
if (!hasDynamicSubcolumns(columns))
return;
@ -3665,7 +3656,7 @@ void MergeTreeData::checkPartDynamicColumns(MutableDataPartPtr & part, DataParts
const auto & part_columns = part->getColumns();
for (const auto & part_column : part_columns)
{
if (part_column.name == LightweightDeleteDescription::FILTER_COLUMN.name || part_column.name == BlockNumberColumn::name)
if (virtuals->has(part_column.name))
continue;
auto storage_column = columns.getPhysical(part_column.name);
@ -6669,14 +6660,6 @@ Block MergeTreeData::getMinMaxCountProjectionBlock(
const auto & primary_key_max_column_name = metadata_snapshot->minmax_count_projection->primary_key_max_column_name;
NameSet required_columns_set(required_columns.begin(), required_columns.end());
if (required_columns_set.contains("_partition_value") && !typeid_cast<const DataTypeTuple *>(getPartitionValueType().get()))
{
throw Exception(
ErrorCodes::NO_SUCH_COLUMN_IN_TABLE,
"Missing column `_partition_value` because there is no partition column in table {}",
getStorageID().getTableName());
}
if (!primary_key_max_column_name.empty())
need_primary_key_max_column = required_columns_set.contains(primary_key_max_column_name);
@ -6702,11 +6685,11 @@ Block MergeTreeData::getMinMaxCountProjectionBlock(
};
Block virtual_columns_block;
auto virtual_block = getSampleBlockWithVirtualColumns();
auto virtual_block = getHeaderWithVirtualsForFilter();
bool has_virtual_column = std::any_of(required_columns.begin(), required_columns.end(), [&](const auto & name) { return virtual_block.has(name); });
if (has_virtual_column || filter_dag)
{
virtual_columns_block = getBlockWithVirtualPartColumns(parts, false /* one_part */, true /* ignore_empty */);
virtual_columns_block = getBlockWithVirtualsForFilter(parts, /*ignore_empty=*/ true);
if (virtual_columns_block.rows() == 0)
return {};
}
@ -7952,21 +7935,6 @@ AlterConversionsPtr MergeTreeData::getAlterConversionsForPart(MergeTreeDataPartP
return result;
}
NamesAndTypesList MergeTreeData::getVirtuals() const
{
return NamesAndTypesList{
NameAndTypePair("_part", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())),
NameAndTypePair("_part_index", std::make_shared<DataTypeUInt64>()),
NameAndTypePair("_part_uuid", std::make_shared<DataTypeUUID>()),
NameAndTypePair("_partition_id", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())),
NameAndTypePair("_partition_value", getPartitionValueType()),
NameAndTypePair("_sample_factor", std::make_shared<DataTypeFloat64>()),
NameAndTypePair("_part_offset", std::make_shared<DataTypeUInt64>()),
LightweightDeleteDescription::FILTER_COLUMN,
NameAndTypePair(BlockNumberColumn::name, BlockNumberColumn::type),
};
}
size_t MergeTreeData::getTotalMergesWithTTLInMergeList() const
{
return getContext()->getMergeList().getMergesWithTTLCount();

View File

@ -444,8 +444,6 @@ public:
bool supportsTrivialCountOptimization() const override { return !hasLightweightDeletedMask(); }
NamesAndTypesList getVirtuals() const override;
/// Snapshot for MergeTree contains the current set of data parts
/// at the moment of the start of query.
struct SnapshotData : public StorageSnapshot::Data
@ -988,15 +986,13 @@ public:
void removeQueryId(const String & query_id) const;
void removeQueryIdNoLock(const String & query_id) const TSA_REQUIRES(query_id_set_mutex);
/// Return the partition expression types as a Tuple type. Return DataTypeUInt8 if partition expression is empty.
DataTypePtr getPartitionValueType() const;
static const Names virtuals_useful_for_filter;
/// Construct a sample block of virtual columns.
Block getSampleBlockWithVirtualColumns() const;
Block getHeaderWithVirtualsForFilter() const;
/// Construct a block consisting only of possible virtual columns for part pruning.
/// If one_part is true, fill in at most one part.
Block getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool one_part, bool ignore_empty = false) const;
Block getBlockWithVirtualsForFilter(const MergeTreeData::DataPartsVector & parts, bool ignore_empty = false) const;
/// In merge tree we do inserts with several steps. One of them:
/// X. write part to temporary directory with some temp name
@ -1087,6 +1083,8 @@ public:
bool initializeDiskOnConfigChange(const std::set<String> & /*new_added_disks*/) override;
static VirtualColumnsDescription createVirtuals(const StorageInMemoryMetadata & metadata);
protected:
friend class IMergeTreeDataPart;
friend class MergeTreeDataMergerMutator;

View File

@ -5,7 +5,6 @@
#include <Interpreters/Context.h>
#include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
#include <Compression/CompressedReadBufferFromFile.h>
#include <Storages/BlockNumberColumn.h>
namespace DB
@ -33,6 +32,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader(
const NamesAndTypesList & columns_to_read,
const StorageSnapshotPtr & storage_snapshot,
const MarkRanges & mark_ranges,
const VirtualFields & virtual_fields,
UncompressedCache * uncompressed_cache,
MarkCache * mark_cache,
const AlterConversionsPtr & alter_conversions,
@ -41,12 +41,21 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader(
const ReadBufferFromFileBase::ProfileCallback & profile_callback) const
{
auto read_info = std::make_shared<LoadedMergeTreeDataPartInfoForReader>(shared_from_this(), alter_conversions);
auto * load_marks_threadpool = reader_settings.read_settings.load_marks_asynchronously ? &read_info->getContext()->getLoadMarksThreadpool() : nullptr;
auto * load_marks_threadpool
= reader_settings.read_settings.load_marks_asynchronously ? &read_info->getContext()->getLoadMarksThreadpool() : nullptr;
return std::make_unique<MergeTreeReaderCompact>(
read_info, columns_to_read, storage_snapshot, uncompressed_cache,
mark_cache, mark_ranges, reader_settings, load_marks_threadpool,
avg_value_size_hints, profile_callback);
read_info,
columns_to_read,
virtual_fields,
storage_snapshot,
uncompressed_cache,
mark_cache,
mark_ranges,
reader_settings,
load_marks_threadpool,
avg_value_size_hints,
profile_callback);
}
IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartCompact::getWriter(
@ -66,12 +75,6 @@ IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartCompact::getWriter(
ordered_columns_list.sort([this](const auto & lhs, const auto & rhs)
{ return *getColumnPosition(lhs.name) < *getColumnPosition(rhs.name); });
/// _block_number column is not added by user, but is persisted in a part after merge
/// If _block_number is not present in the parts to be merged, then it won't have a position
/// So check if its not present and add it at the end
if (columns_list.contains(BlockNumberColumn::name) && !ordered_columns_list.contains(BlockNumberColumn::name))
ordered_columns_list.emplace_back(NameAndTypePair{BlockNumberColumn::name, BlockNumberColumn::type});
return std::make_unique<MergeTreeDataPartWriterCompact>(
shared_from_this(), ordered_columns_list, metadata_snapshot,
indices_to_recalc, stats_to_recalc_, getMarksFileExtension(),

View File

@ -32,6 +32,7 @@ public:
const NamesAndTypesList & columns,
const StorageSnapshotPtr & storage_snapshot,
const MarkRanges & mark_ranges,
const VirtualFields & virtual_fields,
UncompressedCache * uncompressed_cache,
MarkCache * mark_cache,
const AlterConversionsPtr & alter_conversions,

View File

@ -33,6 +33,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartInMemory::getReader(
const NamesAndTypesList & columns_to_read,
const StorageSnapshotPtr & storage_snapshot,
const MarkRanges & mark_ranges,
const VirtualFields & virtual_fields,
UncompressedCache * /* uncompressed_cache */,
MarkCache * /* mark_cache */,
const AlterConversionsPtr & alter_conversions,
@ -44,7 +45,13 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartInMemory::getReader(
auto ptr = std::static_pointer_cast<const MergeTreeDataPartInMemory>(shared_from_this());
return std::make_unique<MergeTreeReaderInMemory>(
read_info, ptr, columns_to_read, storage_snapshot, mark_ranges, reader_settings);
read_info,
ptr,
columns_to_read,
virtual_fields,
storage_snapshot,
mark_ranges,
reader_settings);
}
IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartInMemory::getWriter(

View File

@ -21,6 +21,7 @@ public:
const NamesAndTypesList & columns,
const StorageSnapshotPtr & storage_snapshot,
const MarkRanges & mark_ranges,
const VirtualFields & virtual_fields,
UncompressedCache * uncompressed_cache,
MarkCache * mark_cache,
const AlterConversionsPtr & alter_conversions,

View File

@ -31,6 +31,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader(
const NamesAndTypesList & columns_to_read,
const StorageSnapshotPtr & storage_snapshot,
const MarkRanges & mark_ranges,
const VirtualFields & virtual_fields,
UncompressedCache * uncompressed_cache,
MarkCache * mark_cache,
const AlterConversionsPtr & alter_conversions,
@ -40,10 +41,16 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader(
{
auto read_info = std::make_shared<LoadedMergeTreeDataPartInfoForReader>(shared_from_this(), alter_conversions);
return std::make_unique<MergeTreeReaderWide>(
read_info, columns_to_read,
storage_snapshot, uncompressed_cache,
mark_cache, mark_ranges, reader_settings,
avg_value_size_hints, profile_callback);
read_info,
columns_to_read,
virtual_fields,
storage_snapshot,
uncompressed_cache,
mark_cache,
mark_ranges,
reader_settings,
avg_value_size_hints,
profile_callback);
}
IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartWide::getWriter(

View File

@ -27,6 +27,7 @@ public:
const NamesAndTypesList & columns,
const StorageSnapshotPtr & storage_snapshot,
const MarkRanges & mark_ranges,
const VirtualFields & virtual_fields,
UncompressedCache * uncompressed_cache,
MarkCache * mark_cache,
const AlterConversionsPtr & alter_conversions,

View File

@ -1,12 +1,9 @@
#include <Storages/MergeTree/MergeTreeDataPartWriterCompact.h>
#include <Storages/MergeTree/MergeTreeDataPartCompact.h>
#include <Storages/BlockNumberColumn.h>
namespace DB
{
CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size);
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
@ -55,14 +52,10 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
marks_source_hashing = std::make_unique<HashingWriteBuffer>(*marks_compressor);
}
const auto & storage_columns = metadata_snapshot->getColumns();
auto storage_snapshot = std::make_shared<StorageSnapshot>(data_part->storage, metadata_snapshot);
for (const auto & column : columns_list)
{
ASTPtr compression;
if (column.name == BlockNumberColumn::name)
compression = BlockNumberColumn::compression_codec->getFullCodecDesc();
else
compression = storage_columns.getCodecDescOrDefault(column.name, default_codec);
auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec);
addStreams(column, compression);
}
}

View File

@ -6,12 +6,10 @@
#include <Common/escapeForFileName.h>
#include <Columns/ColumnSparse.h>
#include <Common/logger_useful.h>
#include <Storages/BlockNumberColumn.h>
#include <Storages/ColumnsDescription.h>
namespace DB
{
CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size);
namespace ErrorCodes
{
@ -91,15 +89,11 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide(
indices_to_recalc_, stats_to_recalc_, marks_file_extension_,
default_codec_, settings_, index_granularity_)
{
const auto & columns = metadata_snapshot->getColumns();
for (const auto & it : columns_list)
auto storage_snapshot = std::make_shared<StorageSnapshot>(data_part->storage, metadata_snapshot);
for (const auto & column : columns_list)
{
ASTPtr compression;
if (it.name == BlockNumberColumn::name)
compression = BlockNumberColumn::compression_codec->getFullCodecDesc();
else
compression = columns.getCodecDescOrDefault(it.name, default_codec);
addStreams(it, compression);
auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec);
addStreams(column, compression);
}
}

View File

@ -46,7 +46,6 @@
#include <Functions/IFunction.h>
#include <IO/WriteBufferFromOStream.h>
#include <Storages/BlockNumberColumn.h>
#include <Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h>
namespace CurrentMetrics
@ -69,7 +68,6 @@ namespace ErrorCodes
extern const int CANNOT_PARSE_TEXT;
extern const int TOO_MANY_PARTITIONS;
extern const int DUPLICATED_PART_UUIDS;
extern const int NO_SUCH_COLUMN_IN_TABLE;
}
@ -166,7 +164,6 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling(
const MergeTreeData & data,
const StorageMetadataPtr & metadata_snapshot,
ContextPtr context,
bool sample_factor_column_queried,
LoggerPtr log)
{
const Settings & settings = context->getSettingsRef();
@ -296,7 +293,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling(
if (sampling.use_sampling)
{
if (sample_factor_column_queried && relative_sample_size != RelativeSize(0))
if (relative_sample_size != RelativeSize(0))
sampling.used_sample_factor = 1.0 / boost::rational_cast<Float64>(relative_sample_size);
RelativeSize size_of_universum = 0;
@ -483,12 +480,13 @@ std::optional<std::unordered_set<String>> MergeTreeDataSelectExecutor::filterPar
{
if (!filter_dag)
return {};
auto sample = data.getSampleBlockWithVirtualColumns();
auto sample = data.getHeaderWithVirtualsForFilter();
auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_dag->getOutputs().at(0), &sample);
if (!dag)
return {};
auto virtual_columns_block = data.getBlockWithVirtualPartColumns(parts, false /* one_part */);
auto virtual_columns_block = data.getBlockWithVirtualsForFilter(parts);
VirtualColumnUtils::filterBlockWithDAG(dag, virtual_columns_block, context);
return VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_part");
}
@ -868,69 +866,6 @@ std::shared_ptr<QueryIdHolder> MergeTreeDataSelectExecutor::checkLimits(
return nullptr;
}
static void selectColumnNames(
const Names & column_names_to_return,
const MergeTreeData & data,
Names & real_column_names,
Names & virt_column_names,
bool & sample_factor_column_queried)
{
sample_factor_column_queried = false;
for (const String & name : column_names_to_return)
{
if (name == "_part")
{
virt_column_names.push_back(name);
}
else if (name == "_part_index")
{
virt_column_names.push_back(name);
}
else if (name == "_partition_id")
{
virt_column_names.push_back(name);
}
else if (name == "_part_offset")
{
virt_column_names.push_back(name);
}
else if (name == LightweightDeleteDescription::FILTER_COLUMN.name)
{
virt_column_names.push_back(name);
}
else if (name == BlockNumberColumn::name)
{
virt_column_names.push_back(name);
}
else if (name == "_part_uuid")
{
virt_column_names.push_back(name);
}
else if (name == "_partition_value")
{
if (!typeid_cast<const DataTypeTuple *>(data.getPartitionValueType().get()))
{
throw Exception(
ErrorCodes::NO_SUCH_COLUMN_IN_TABLE,
"Missing column `_partition_value` because there is no partition column in table {}",
data.getStorageID().getTableName());
}
virt_column_names.push_back(name);
}
else if (name == "_sample_factor")
{
sample_factor_column_queried = true;
virt_column_names.push_back(name);
}
else
{
real_column_names.push_back(name);
}
}
}
ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMarksToRead(
MergeTreeData::DataPartsVector parts,
const Names & column_names_to_return,
@ -944,14 +879,6 @@ ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar
if (total_parts == 0)
return std::make_shared<ReadFromMergeTree::AnalysisResult>();
Names real_column_names;
Names virt_column_names;
/// If query contains restrictions on the virtual column `_part` or `_part_index`, select only parts suitable for it.
/// The virtual column `_sample_factor` (which is equal to 1 / used sample rate) can be requested in the query.
bool sample_factor_column_queried = false;
selectColumnNames(column_names_to_return, data, real_column_names, virt_column_names, sample_factor_column_queried);
std::optional<ReadFromMergeTree::Indexes> indexes;
/// NOTE: We don't need alter_conversions because the returned analysis_result is only used for:
/// 1. estimate the number of rows to read; 2. projection reading, which doesn't have alter_conversions.
@ -964,8 +891,7 @@ ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar
num_streams,
max_block_numbers_to_read,
data,
real_column_names,
sample_factor_column_queried,
column_names_to_return,
log,
indexes);
}
@ -992,27 +918,16 @@ QueryPlanStepPtr MergeTreeDataSelectExecutor::readFromParts(
else if (parts.empty())
return {};
Names real_column_names;
Names virt_column_names;
/// If query contains restrictions on the virtual column `_part` or `_part_index`, select only parts suitable for it.
/// The virtual column `_sample_factor` (which is equal to 1 / used sample rate) can be requested in the query.
bool sample_factor_column_queried = false;
selectColumnNames(column_names_to_return, data, real_column_names, virt_column_names, sample_factor_column_queried);
return std::make_unique<ReadFromMergeTree>(
std::move(parts),
std::move(alter_conversions),
column_names_to_return,
real_column_names,
virt_column_names,
data,
query_info,
storage_snapshot,
context,
max_block_size,
num_streams,
sample_factor_column_queried,
max_block_numbers_to_read,
log,
merge_tree_select_result_ptr,

View File

@ -213,7 +213,6 @@ public:
const MergeTreeData & data,
const StorageMetadataPtr & metadata_snapshot,
ContextPtr context,
bool sample_factor_column_queried,
LoggerPtr log);
/// Check query limits: max_partitions_to_read, max_concurrent_queries.

View File

@ -9,6 +9,7 @@
#include <Storages/MergeTree/MergeTreePrefetchedReadPool.h>
#include <Storages/MergeTree/MergeTreeRangeReader.h>
#include <Storages/MergeTree/RangesInDataPart.h>
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
#include <base/getThreadId.h>
#include <Common/ElapsedTimeProfileEventIncrement.h>
#include <Common/logger_useful.h>
@ -108,22 +109,22 @@ MergeTreeReadTask::Readers MergeTreePrefetchedReadPool::PrefetchedReaders::get()
MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool(
RangesInDataParts && parts_,
VirtualFields shared_virtual_fields_,
const StorageSnapshotPtr & storage_snapshot_,
const PrewhereInfoPtr & prewhere_info_,
const ExpressionActionsSettings & actions_settings_,
const MergeTreeReaderSettings & reader_settings_,
const Names & column_names_,
const Names & virtual_column_names_,
const PoolSettings & settings_,
const ContextPtr & context_)
: MergeTreeReadPoolBase(
std::move(parts_),
std::move(shared_virtual_fields_),
storage_snapshot_,
prewhere_info_,
actions_settings_,
reader_settings_,
column_names_,
virtual_column_names_,
settings_,
context_)
, WithContext(context_)
@ -375,7 +376,7 @@ void MergeTreePrefetchedReadPool::fillPerPartStatistics()
update_stat_for_column(column.name);
if (reader_settings.apply_deleted_mask && read_info.data_part->hasLightweightDelete())
update_stat_for_column(LightweightDeleteDescription::FILTER_COLUMN.name);
update_stat_for_column(RowExistsColumn::name);
for (const auto & pre_columns : read_info.task_columns.pre_columns)
for (const auto & column : pre_columns)

View File

@ -18,12 +18,12 @@ class MergeTreePrefetchedReadPool : public MergeTreeReadPoolBase, private WithCo
public:
MergeTreePrefetchedReadPool(
RangesInDataParts && parts_,
VirtualFields shared_virtual_fields_,
const StorageSnapshotPtr & storage_snapshot_,
const PrewhereInfoPtr & prewhere_info_,
const ExpressionActionsSettings & actions_settings_,
const MergeTreeReaderSettings & reader_settings_,
const Names & column_names_,
const Names & virtual_column_names_,
const PoolSettings & settings_,
const ContextPtr & context_);
@ -67,7 +67,7 @@ private:
struct ThreadTask
{
using InfoPtr = MergeTreeReadTask::InfoPtr;
using InfoPtr = MergeTreeReadTaskInfoPtr;
ThreadTask(InfoPtr read_info_, MarkRanges ranges_, Priority priority_)
: read_info(std::move(read_info_)), ranges(std::move(ranges_)), priority(priority_)

View File

@ -362,7 +362,7 @@ void MergeTreeRangeReader::ReadResult::shrink(Columns & old_columns, const NumRo
}
}
/// The main invariant of the data in the read result is that he number of rows is
/// The main invariant of the data in the read result is that the number of rows is
/// either equal to total_rows_per_granule (if filter has not been applied) or to the number of
/// 1s in the filter (if filter has been applied).
void MergeTreeRangeReader::ReadResult::checkInternalConsistency() const
@ -803,8 +803,7 @@ MergeTreeRangeReader::MergeTreeRangeReader(
IMergeTreeReader * merge_tree_reader_,
MergeTreeRangeReader * prev_reader_,
const PrewhereExprStep * prewhere_info_,
bool last_reader_in_chain_,
const Names & non_const_virtual_column_names_)
bool last_reader_in_chain_)
: merge_tree_reader(merge_tree_reader_)
, index_granularity(&(merge_tree_reader->data_part_info_for_read->getIndexGranularity()))
, prev_reader(prev_reader_)
@ -821,21 +820,6 @@ MergeTreeRangeReader::MergeTreeRangeReader(
result_sample_block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name});
}
for (const auto & column_name : non_const_virtual_column_names_)
{
if (result_sample_block.has(column_name))
continue;
non_const_virtual_column_names.push_back(column_name);
if (column_name == "_part_offset" && !prev_reader)
{
/// _part_offset column is filled by the first reader.
read_sample_block.insert(ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), column_name));
result_sample_block.insert(ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), column_name));
}
}
if (prewhere_info)
{
const auto & step = *prewhere_info;
@ -1001,6 +985,8 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar
if (num_read_rows == 0)
num_read_rows = read_result.num_rows;
merge_tree_reader->fillVirtualColumns(columns, num_read_rows);
/// fillMissingColumns() must be called after reading but befoe any filterings because
/// some columns (e.g. arrays) might be only partially filled and thus not be valid and
/// fillMissingColumns() fixes this.
@ -1050,23 +1036,23 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar
return read_result;
{
/// Physical columns go first and then some virtual columns follow
size_t physical_columns_count = merge_tree_reader->getColumns().size();
Columns physical_columns(read_result.columns.begin(), read_result.columns.begin() + physical_columns_count);
size_t columns_count = merge_tree_reader->getColumns().size();
Columns columns(read_result.columns.begin(), read_result.columns.begin() + columns_count);
merge_tree_reader->fillVirtualColumns(columns, read_result.num_rows);
bool should_evaluate_missing_defaults;
merge_tree_reader->fillMissingColumns(physical_columns, should_evaluate_missing_defaults, read_result.num_rows);
merge_tree_reader->fillMissingColumns(columns, should_evaluate_missing_defaults, read_result.num_rows);
/// If some columns absent in part, then evaluate default values
if (should_evaluate_missing_defaults)
merge_tree_reader->evaluateMissingDefaults({}, physical_columns);
merge_tree_reader->evaluateMissingDefaults({}, columns);
/// If result not empty, then apply on-fly alter conversions if any required
if (!prewhere_info || prewhere_info->perform_alter_conversions)
merge_tree_reader->performRequiredConversions(physical_columns);
merge_tree_reader->performRequiredConversions(columns);
for (size_t i = 0; i < physical_columns.size(); ++i)
read_result.columns[i] = std::move(physical_columns[i]);
for (size_t i = 0; i < columns.size(); ++i)
read_result.columns[i] = std::move(columns[i]);
}
size_t total_bytes = 0;
@ -1158,12 +1144,17 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t
result.adjustLastGranule();
if (read_sample_block.has("_part_offset"))
fillPartOffsetColumn(result, leading_begin_part_offset, leading_end_part_offset);
{
size_t pos = read_sample_block.getPositionByName("_part_offset");
chassert(pos < result.columns.size());
chassert(result.columns[pos] == nullptr);
result.columns[pos] = createPartOffsetColumn(result, leading_begin_part_offset, leading_end_part_offset);
}
return result;
}
void MergeTreeRangeReader::fillPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset)
ColumnPtr MergeTreeRangeReader::createPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset)
{
size_t num_rows = result.numReadRows();
@ -1189,7 +1180,7 @@ void MergeTreeRangeReader::fillPartOffsetColumn(ReadResult & result, UInt64 lead
*pos++ = start_part_offset++;
}
result.columns.emplace_back(std::move(column));
return column;
}
Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, size_t & num_rows)
@ -1203,7 +1194,7 @@ Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, si
if (result.rows_per_granule.empty())
{
/// If zero rows were read on prev step, than there is no more rows to read.
/// If zero rows were read on prev step, there is no more rows to read.
/// Last granule may have less rows than index_granularity, so finish reading manually.
stream.finish();
return columns;

View File

@ -101,8 +101,7 @@ public:
IMergeTreeReader * merge_tree_reader_,
MergeTreeRangeReader * prev_reader_,
const PrewhereExprStep * prewhere_info_,
bool last_reader_in_chain_,
const Names & non_const_virtual_column_names);
bool last_reader_in_chain_);
MergeTreeRangeReader() = default;
@ -309,7 +308,7 @@ private:
ReadResult startReadingChain(size_t max_rows, MarkRanges & ranges);
Columns continueReadingChain(const ReadResult & result, size_t & num_rows);
void executePrewhereActionsAndFilterColumns(ReadResult & result) const;
void fillPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset);
ColumnPtr createPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset);
IMergeTreeReader * merge_tree_reader = nullptr;
const MergeTreeIndexGranularity * index_granularity = nullptr;
@ -323,7 +322,6 @@ private:
bool last_reader_in_chain = false;
bool is_initialized = false;
Names non_const_virtual_column_names;
LoggerPtr log = getLogger("MergeTreeRangeReader");
};

View File

@ -35,22 +35,22 @@ size_t getApproxSizeOfPart(const IMergeTreeDataPart & part, const Names & column
MergeTreeReadPool::MergeTreeReadPool(
RangesInDataParts && parts_,
VirtualFields shared_virtual_fields_,
const StorageSnapshotPtr & storage_snapshot_,
const PrewhereInfoPtr & prewhere_info_,
const ExpressionActionsSettings & actions_settings_,
const MergeTreeReaderSettings & reader_settings_,
const Names & column_names_,
const Names & virtual_column_names_,
const PoolSettings & settings_,
const ContextPtr & context_)
: MergeTreeReadPoolBase(
std::move(parts_),
std::move(shared_virtual_fields_),
storage_snapshot_,
prewhere_info_,
actions_settings_,
reader_settings_,
column_names_,
virtual_column_names_,
settings_,
context_)
, min_marks_for_concurrent_read(pool_settings.min_marks_for_concurrent_read)

View File

@ -26,12 +26,12 @@ public:
MergeTreeReadPool(
RangesInDataParts && parts_,
VirtualFields shared_virtual_fields_,
const StorageSnapshotPtr & storage_snapshot_,
const PrewhereInfoPtr & prewhere_info_,
const ExpressionActionsSettings & actions_settings_,
const MergeTreeReaderSettings & reader_settings_,
const Names & column_names_,
const Names & virtual_column_names_,
const PoolSettings & settings_,
const ContextPtr & context_);

View File

@ -8,21 +8,21 @@ namespace DB
MergeTreeReadPoolBase::MergeTreeReadPoolBase(
RangesInDataParts && parts_,
VirtualFields shared_virtual_fields_,
const StorageSnapshotPtr & storage_snapshot_,
const PrewhereInfoPtr & prewhere_info_,
const ExpressionActionsSettings & actions_settings_,
const MergeTreeReaderSettings & reader_settings_,
const Names & column_names_,
const Names & virtual_column_names_,
const PoolSettings & pool_settings_,
const ContextPtr & context_)
: parts_ranges(std::move(parts_))
, shared_virtual_fields(std::move(shared_virtual_fields_))
, storage_snapshot(storage_snapshot_)
, prewhere_info(prewhere_info_)
, actions_settings(actions_settings_)
, reader_settings(reader_settings_)
, column_names(column_names_)
, virtual_column_names(virtual_column_names_)
, pool_settings(pool_settings_)
, owned_mark_cache(context_->getGlobalContext()->getMarkCache())
, owned_uncompressed_cache(pool_settings_.use_uncompressed_cache ? context_->getGlobalContext()->getUncompressedCache() : nullptr)
@ -45,7 +45,7 @@ void MergeTreeReadPoolBase::fillPerPartInfos()
assertSortedAndNonIntersecting(part_with_ranges.ranges);
#endif
MergeTreeReadTask::Info read_task_info;
MergeTreeReadTaskInfo read_task_info;
read_task_info.data_part = part_with_ranges.data_part;
read_task_info.part_index_in_query = part_with_ranges.part_index_in_query;
@ -54,9 +54,16 @@ void MergeTreeReadPoolBase::fillPerPartInfos()
LoadedMergeTreeDataPartInfoForReader part_info(part_with_ranges.data_part, part_with_ranges.alter_conversions);
read_task_info.task_columns = getReadTaskColumns(
part_info, storage_snapshot, column_names, virtual_column_names,
prewhere_info, actions_settings,
reader_settings, /*with_subcolumns=*/ true);
part_info,
storage_snapshot,
column_names,
prewhere_info,
actions_settings,
reader_settings,
/*with_subcolumns=*/true);
read_task_info.const_virtual_fields = shared_virtual_fields;
read_task_info.const_virtual_fields.emplace("_part_index", read_task_info.part_index_in_query);
if (pool_settings.preferred_block_size_bytes > 0)
{
@ -76,7 +83,7 @@ void MergeTreeReadPoolBase::fillPerPartInfos()
}
is_part_on_remote_disk.push_back(part_with_ranges.data_part->isStoredOnRemoteDisk());
per_part_infos.push_back(std::make_shared<MergeTreeReadTask::Info>(std::move(read_task_info)));
per_part_infos.push_back(std::make_shared<MergeTreeReadTaskInfo>(std::move(read_task_info)));
}
}
@ -98,7 +105,7 @@ std::vector<size_t> MergeTreeReadPoolBase::getPerPartSumMarks() const
}
MergeTreeReadTaskPtr MergeTreeReadPoolBase::createTask(
MergeTreeReadTask::InfoPtr read_info,
MergeTreeReadTaskInfoPtr read_info,
MarkRanges ranges,
MergeTreeReadTask * previous_task) const
{

View File

@ -23,12 +23,12 @@ public:
MergeTreeReadPoolBase(
RangesInDataParts && parts_,
VirtualFields shared_virtual_fields_,
const StorageSnapshotPtr & storage_snapshot_,
const PrewhereInfoPtr & prewhere_info_,
const ExpressionActionsSettings & actions_settings_,
const MergeTreeReaderSettings & reader_settings_,
const Names & column_names_,
const Names & virtual_column_names_,
const PoolSettings & settings_,
const ContextPtr & context_);
@ -37,12 +37,12 @@ public:
protected:
/// Initialized in constructor
const RangesInDataParts parts_ranges;
const VirtualFields shared_virtual_fields;
const StorageSnapshotPtr storage_snapshot;
const PrewhereInfoPtr prewhere_info;
const ExpressionActionsSettings actions_settings;
const MergeTreeReaderSettings reader_settings;
const Names column_names;
const Names virtual_column_names;
const PoolSettings pool_settings;
const MarkCachePtr owned_mark_cache;
const UncompressedCachePtr owned_uncompressed_cache;
@ -52,13 +52,13 @@ protected:
std::vector<size_t> getPerPartSumMarks() const;
MergeTreeReadTaskPtr createTask(
MergeTreeReadTask::InfoPtr read_info,
MergeTreeReadTaskInfoPtr read_info,
MarkRanges ranges,
MergeTreeReadTask * previous_task) const;
MergeTreeReadTask::Extras getExtras() const;
std::vector<MergeTreeReadTask::InfoPtr> per_part_infos;
std::vector<MergeTreeReadTaskInfoPtr> per_part_infos;
std::vector<bool> is_part_on_remote_disk;
ReadBufferFromFileBase::ProfileCallback profile_callback;

View File

@ -12,22 +12,22 @@ MergeTreeReadPoolInOrder::MergeTreeReadPoolInOrder(
bool has_limit_below_one_block_,
MergeTreeReadType read_type_,
RangesInDataParts parts_,
VirtualFields shared_virtual_fields_,
const StorageSnapshotPtr & storage_snapshot_,
const PrewhereInfoPtr & prewhere_info_,
const ExpressionActionsSettings & actions_settings_,
const MergeTreeReaderSettings & reader_settings_,
const Names & column_names_,
const Names & virtual_column_names_,
const PoolSettings & settings_,
const ContextPtr & context_)
: MergeTreeReadPoolBase(
std::move(parts_),
std::move(shared_virtual_fields_),
storage_snapshot_,
prewhere_info_,
actions_settings_,
reader_settings_,
column_names_,
virtual_column_names_,
settings_,
context_)
, has_limit_below_one_block(has_limit_below_one_block_)

View File

@ -11,12 +11,12 @@ public:
bool has_limit_below_one_block_,
MergeTreeReadType read_type_,
RangesInDataParts parts_,
VirtualFields shared_virtual_fields_,
const StorageSnapshotPtr & storage_snapshot_,
const PrewhereInfoPtr & prewhere_info_,
const ExpressionActionsSettings & actions_settings_,
const MergeTreeReaderSettings & reader_settings_,
const Names & column_names_,
const Names & virtual_column_names_,
const PoolSettings & settings_,
const ContextPtr & context_);

View File

@ -13,22 +13,22 @@ namespace ErrorCodes
MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas(
ParallelReadingExtension extension_,
RangesInDataParts && parts_,
VirtualFields shared_virtual_fields_,
const StorageSnapshotPtr & storage_snapshot_,
const PrewhereInfoPtr & prewhere_info_,
const ExpressionActionsSettings & actions_settings_,
const MergeTreeReaderSettings & reader_settings_,
const Names & column_names_,
const Names & virtual_column_names_,
const PoolSettings & settings_,
const ContextPtr & context_)
: MergeTreeReadPoolBase(
std::move(parts_),
std::move(shared_virtual_fields_),
storage_snapshot_,
prewhere_info_,
actions_settings_,
reader_settings_,
column_names_,
virtual_column_names_,
settings_,
context_)
, extension(std::move(extension_))

View File

@ -11,12 +11,12 @@ public:
MergeTreeReadPoolParallelReplicas(
ParallelReadingExtension extension_,
RangesInDataParts && parts_,
VirtualFields shared_virtual_fields_,
const StorageSnapshotPtr & storage_snapshot_,
const PrewhereInfoPtr & prewhere_info_,
const ExpressionActionsSettings & actions_settings_,
const MergeTreeReaderSettings & reader_settings_,
const Names & column_names_,
const Names & virtual_column_names_,
const PoolSettings & settings_,
const ContextPtr & context_);

View File

@ -12,22 +12,22 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd
ParallelReadingExtension extension_,
CoordinationMode mode_,
RangesInDataParts parts_,
VirtualFields shared_virtual_fields_,
const StorageSnapshotPtr & storage_snapshot_,
const PrewhereInfoPtr & prewhere_info_,
const ExpressionActionsSettings & actions_settings_,
const MergeTreeReaderSettings & reader_settings_,
const Names & column_names_,
const Names & virtual_column_names_,
const PoolSettings & settings_,
const ContextPtr & context_)
: MergeTreeReadPoolBase(
std::move(parts_),
std::move(shared_virtual_fields_),
storage_snapshot_,
prewhere_info_,
actions_settings_,
reader_settings_,
column_names_,
virtual_column_names_,
settings_,
context_)
, extension(std::move(extension_))

View File

@ -12,12 +12,12 @@ public:
ParallelReadingExtension extension_,
CoordinationMode mode_,
RangesInDataParts parts_,
VirtualFields shared_virtual_fields_,
const StorageSnapshotPtr & storage_snapshot_,
const PrewhereInfoPtr & prewhere_info_,
const ExpressionActionsSettings & actions_settings_,
const MergeTreeReaderSettings & reader_settings_,
const Names & column_names_,
const Names & virtual_column_names_,
const PoolSettings & settings_,
const ContextPtr & context_);

View File

@ -1,5 +1,6 @@
#include <Storages/MergeTree/MergeTreeReadTask.h>
#include <Storages/MergeTree/MergeTreeBlockReadUtils.h>
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
#include <Common/Exception.h>
namespace DB
@ -10,7 +11,7 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
String MergeTreeReadTask::Columns::dump() const
String MergeTreeReadTaskColumns::dump() const
{
WriteBufferFromOwnString s;
for (size_t i = 0; i < pre_columns.size(); ++i)
@ -22,7 +23,7 @@ String MergeTreeReadTask::Columns::dump() const
}
MergeTreeReadTask::MergeTreeReadTask(
InfoPtr info_,
MergeTreeReadTaskInfoPtr info_,
Readers readers_,
MarkRanges mark_ranges_,
MergeTreeBlockSizePredictorPtr size_predictor_)
@ -34,23 +35,30 @@ MergeTreeReadTask::MergeTreeReadTask(
}
MergeTreeReadTask::Readers MergeTreeReadTask::createReaders(
const InfoPtr & read_info, const Extras & extras, const MarkRanges & ranges)
const MergeTreeReadTaskInfoPtr & read_info, const Extras & extras, const MarkRanges & ranges)
{
Readers new_readers;
auto create_reader = [&](const NamesAndTypesList & columns_to_read)
{
return read_info->data_part->getReader(
columns_to_read, extras.storage_snapshot, ranges,
extras.uncompressed_cache, extras.mark_cache,
read_info->alter_conversions, extras.reader_settings, extras.value_size_map, extras.profile_callback);
columns_to_read,
extras.storage_snapshot,
ranges,
read_info->const_virtual_fields,
extras.uncompressed_cache,
extras.mark_cache,
read_info->alter_conversions,
extras.reader_settings,
extras.value_size_map,
extras.profile_callback);
};
new_readers.main = create_reader(read_info->task_columns.columns);
/// Add lightweight delete filtering step
if (extras.reader_settings.apply_deleted_mask && read_info->data_part->hasLightweightDelete())
new_readers.prewhere.push_back(create_reader({LightweightDeleteDescription::FILTER_COLUMN}));
new_readers.prewhere.push_back(create_reader({{RowExistsColumn::name, RowExistsColumn::type}}));
for (const auto & pre_columns_per_step : read_info->task_columns.pre_columns)
new_readers.prewhere.push_back(create_reader(pre_columns_per_step));
@ -58,10 +66,8 @@ MergeTreeReadTask::Readers MergeTreeReadTask::createReaders(
return new_readers;
}
MergeTreeReadTask::RangeReaders MergeTreeReadTask::createRangeReaders(
const Readers & task_readers,
const PrewhereExprInfo & prewhere_actions,
const Names & non_const_virtual_column_names)
MergeTreeReadTask::RangeReaders
MergeTreeReadTask::createRangeReaders(const Readers & task_readers, const PrewhereExprInfo & prewhere_actions)
{
MergeTreeReadTask::RangeReaders new_range_readers;
if (prewhere_actions.steps.size() != task_readers.prewhere.size())
@ -77,10 +83,7 @@ MergeTreeReadTask::RangeReaders MergeTreeReadTask::createRangeReaders(
{
last_reader = task_readers.main->getColumns().empty() && (i + 1 == prewhere_actions.steps.size());
MergeTreeRangeReader current_reader(
task_readers.prewhere[i].get(),
prev_reader, prewhere_actions.steps[i].get(),
last_reader, non_const_virtual_column_names);
MergeTreeRangeReader current_reader(task_readers.prewhere[i].get(), prev_reader, prewhere_actions.steps[i].get(), last_reader);
new_range_readers.prewhere.push_back(std::move(current_reader));
prev_reader = &new_range_readers.prewhere.back();
@ -88,11 +91,11 @@ MergeTreeReadTask::RangeReaders MergeTreeReadTask::createRangeReaders(
if (!last_reader)
{
new_range_readers.main = MergeTreeRangeReader(task_readers.main.get(), prev_reader, nullptr, true, non_const_virtual_column_names);
new_range_readers.main = MergeTreeRangeReader(task_readers.main.get(), prev_reader, nullptr, true);
}
else
{
/// If all columns are read by prewhere range readers than move last prewhere range reader to main.
/// If all columns are read by prewhere range readers, move last prewhere range reader to main.
new_range_readers.main = std::move(new_range_readers.prewhere.back());
new_range_readers.prewhere.pop_back();
}
@ -100,14 +103,12 @@ MergeTreeReadTask::RangeReaders MergeTreeReadTask::createRangeReaders(
return new_range_readers;
}
void MergeTreeReadTask::initializeRangeReaders(
const PrewhereExprInfo & prewhere_actions,
const Names & non_const_virtual_column_names)
void MergeTreeReadTask::initializeRangeReaders(const PrewhereExprInfo & prewhere_actions)
{
if (range_readers.main.isInitialized())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Range reader is already initialized");
range_readers = createRangeReaders(readers, prewhere_actions, non_const_virtual_column_names);
range_readers = createRangeReaders(readers, prewhere_actions);
}
UInt64 MergeTreeReadTask::estimateNumRows(const BlockSizeParams & params) const

View File

@ -20,6 +20,8 @@ using MergeTreeBlockSizePredictorPtr = std::shared_ptr<MergeTreeBlockSizePredict
class IMergeTreeDataPart;
using DataPartPtr = std::shared_ptr<const IMergeTreeDataPart>;
using MergeTreeReaderPtr = std::unique_ptr<IMergeTreeReader>;
using VirtualFields = std::unordered_map<String, Field>;
enum class MergeTreeReadType
{
@ -40,36 +42,38 @@ enum class MergeTreeReadType
ParallelReplicas,
};
struct MergeTreeReadTaskColumns
{
/// Column names to read during WHERE
NamesAndTypesList columns;
/// Column names to read during each PREWHERE step
std::vector<NamesAndTypesList> pre_columns;
String dump() const;
};
struct MergeTreeReadTaskInfo
{
/// Data part which should be read while performing this task
DataPartPtr data_part;
/// For `part_index` virtual column
size_t part_index_in_query;
/// Alter converversionss that should be applied on-fly for part.
AlterConversionsPtr alter_conversions;
/// Column names to read during PREWHERE and WHERE
MergeTreeReadTaskColumns task_columns;
/// Shared initialized size predictor. It is copied for each new task.
MergeTreeBlockSizePredictorPtr shared_size_predictor;
/// TODO: comment
VirtualFields const_virtual_fields;
};
using MergeTreeReadTaskInfoPtr = std::shared_ptr<const MergeTreeReadTaskInfo>;
/// A batch of work for MergeTreeSelectProcessor
struct MergeTreeReadTask : private boost::noncopyable
{
public:
struct Columns
{
/// Column names to read during WHERE
NamesAndTypesList columns;
/// Column names to read during each PREWHERE step
std::vector<NamesAndTypesList> pre_columns;
String dump() const;
};
struct Info
{
/// Data part which should be read while performing this task
DataPartPtr data_part;
/// For virtual `part_index` virtual column
size_t part_index_in_query;
/// Alter converversionss that should be applied on-fly for part.
AlterConversionsPtr alter_conversions;
/// Column names to read during PREWHERE and WHERE
Columns task_columns;
/// Shared initialized size predictor. It is copied for each new task.
MergeTreeBlockSizePredictorPtr shared_size_predictor;
};
using InfoPtr = std::shared_ptr<const Info>;
/// Extra params that required for creation of reader.
struct Extras
{
@ -115,27 +119,32 @@ public:
size_t num_read_bytes = 0;
};
MergeTreeReadTask(InfoPtr info_, Readers readers_, MarkRanges mark_ranges_, MergeTreeBlockSizePredictorPtr size_predictor_);
MergeTreeReadTask(
MergeTreeReadTaskInfoPtr info_,
Readers readers_,
MarkRanges mark_ranges_,
void initializeRangeReaders(const PrewhereExprInfo & prewhere_actions, const Names & non_const_virtual_column_names);
MergeTreeBlockSizePredictorPtr size_predictor_);
void initializeRangeReaders(const PrewhereExprInfo & prewhere_actions);
BlockAndProgress read(const BlockSizeParams & params);
bool isFinished() const { return mark_ranges.empty() && range_readers.main.isCurrentRangeFinished(); }
const Info & getInfo() const { return *info; }
const MergeTreeReadTaskInfo & getInfo() const { return *info; }
const MergeTreeRangeReader & getMainRangeReader() const { return range_readers.main; }
const IMergeTreeReader & getMainReader() const { return *readers.main; }
Readers releaseReaders() { return std::move(readers); }
static Readers createReaders(const InfoPtr & read_info, const Extras & extras, const MarkRanges & ranges);
static RangeReaders createRangeReaders(const Readers & readers, const PrewhereExprInfo & prewhere_actions, const Names & non_const_virtual_column_names);
static Readers createReaders(const MergeTreeReadTaskInfoPtr & read_info, const Extras & extras, const MarkRanges & ranges);
static RangeReaders createRangeReaders(const Readers & readers, const PrewhereExprInfo & prewhere_actions);
private:
UInt64 estimateNumRows(const BlockSizeParams & params) const;
/// Shared information required for reading.
InfoPtr info;
MergeTreeReadTaskInfoPtr info;
/// Readers for data_part of this task.
/// May be reused and released to the next task.

View File

@ -17,6 +17,7 @@ namespace ErrorCodes
MergeTreeReaderCompact::MergeTreeReaderCompact(
MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_,
NamesAndTypesList columns_,
const VirtualFields & virtual_fields_,
const StorageSnapshotPtr & storage_snapshot_,
UncompressedCache * uncompressed_cache_,
MarkCache * mark_cache_,
@ -29,6 +30,7 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
: IMergeTreeReader(
data_part_info_for_read_,
columns_,
virtual_fields_,
storage_snapshot_,
uncompressed_cache_,
mark_cache_,

View File

@ -21,6 +21,7 @@ public:
MergeTreeReaderCompact(
MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_,
NamesAndTypesList columns_,
const VirtualFields & virtual_fields_,
const StorageSnapshotPtr & storage_snapshot_,
UncompressedCache * uncompressed_cache_,
MarkCache * mark_cache_,

View File

@ -19,12 +19,14 @@ MergeTreeReaderInMemory::MergeTreeReaderInMemory(
MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_,
DataPartInMemoryPtr data_part_,
NamesAndTypesList columns_,
const VirtualFields & virtual_fields_,
const StorageSnapshotPtr & storage_snapshot_,
MarkRanges mark_ranges_,
MergeTreeReaderSettings settings_)
: IMergeTreeReader(
data_part_info_for_read_,
columns_,
virtual_fields_,
storage_snapshot_,
nullptr,
nullptr,

View File

@ -18,6 +18,7 @@ public:
MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_,
DataPartInMemoryPtr data_part_,
NamesAndTypesList columns_,
const VirtualFields & virtual_fields_,
const StorageSnapshotPtr & storage_snapshot_,
MarkRanges mark_ranges_,
MergeTreeReaderSettings settings_);

View File

@ -24,6 +24,7 @@ namespace
MergeTreeReaderWide::MergeTreeReaderWide(
MergeTreeDataPartInfoForReaderPtr data_part_info_,
NamesAndTypesList columns_,
const VirtualFields & virtual_fields_,
const StorageSnapshotPtr & storage_snapshot_,
UncompressedCache * uncompressed_cache_,
MarkCache * mark_cache_,
@ -35,6 +36,7 @@ MergeTreeReaderWide::MergeTreeReaderWide(
: IMergeTreeReader(
data_part_info_,
columns_,
virtual_fields_,
storage_snapshot_,
uncompressed_cache_,
mark_cache_,

View File

@ -17,6 +17,7 @@ public:
MergeTreeReaderWide(
MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_,
NamesAndTypesList columns_,
const VirtualFields & virtual_fields_,
const StorageSnapshotPtr & storage_snapshot_,
UncompressedCache * uncompressed_cache_,
MarkCache * mark_cache_,

View File

@ -12,7 +12,7 @@
#include <Processors/Chunk.h>
#include <Processors/QueryPlan/SourceStepWithFilter.h>
#include <Processors/Transforms/AggregatingTransform.h>
#include <Storages/BlockNumberColumn.h>
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
#include <city.h>
namespace DB
@ -20,41 +20,26 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int QUERY_WAS_CANCELLED;
}
static void injectNonConstVirtualColumns(
size_t rows,
Block & block,
const Names & virtual_columns,
MergeTreeReadTask * task = nullptr);
static void injectPartConstVirtualColumns(
size_t rows,
Block & block,
MergeTreeReadTask * task,
const DataTypePtr & partition_value_type,
const Names & virtual_columns);
MergeTreeSelectProcessor::MergeTreeSelectProcessor(
MergeTreeReadPoolPtr pool_,
MergeTreeSelectAlgorithmPtr algorithm_,
const MergeTreeData & storage_,
const StorageSnapshotPtr & storage_snapshot_,
const PrewhereInfoPtr & prewhere_info_,
const ExpressionActionsSettings & actions_settings_,
const MergeTreeReadTask::BlockSizeParams & block_size_params_,
const MergeTreeReaderSettings & reader_settings_,
const Names & virt_column_names_)
const MergeTreeReaderSettings & reader_settings_)
: pool(std::move(pool_))
, algorithm(std::move(algorithm_))
, storage_snapshot(storage_snapshot_)
, prewhere_info(prewhere_info_)
, actions_settings(actions_settings_)
, prewhere_actions(getPrewhereActions(prewhere_info, actions_settings, reader_settings_.enable_multiple_prewhere_read_steps))
, reader_settings(reader_settings_)
, block_size_params(block_size_params_)
, virt_column_names(virt_column_names_)
, partition_value_type(storage_.getPartitionValueType())
, result_header(transformHeader(pool->getHeader(), prewhere_info))
{
if (reader_settings.apply_deleted_mask)
{
@ -62,7 +47,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor(
{
.type = PrewhereExprStep::Filter,
.actions = nullptr,
.filter_column_name = LightweightDeleteDescription::FILTER_COLUMN.name,
.filter_column_name = RowExistsColumn::name,
.remove_filter_column = true,
.need_filter = true,
.perform_alter_conversions = true,
@ -71,16 +56,6 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor(
lightweight_delete_filter_step = std::make_shared<PrewhereExprStep>(std::move(step));
}
header_without_const_virtual_columns = SourceStepWithFilter::applyPrewhereActions(pool->getHeader(), prewhere_info);
size_t non_const_columns_offset = header_without_const_virtual_columns.columns();
injectNonConstVirtualColumns(0, header_without_const_virtual_columns, virt_column_names);
for (size_t col_num = non_const_columns_offset; col_num < header_without_const_virtual_columns.columns(); ++col_num)
non_const_virtual_column_names.emplace_back(header_without_const_virtual_columns.getByPosition(col_num).name);
result_header = header_without_const_virtual_columns;
injectPartConstVirtualColumns(0, result_header, nullptr, partition_value_type, virt_column_names);
if (!prewhere_actions.steps.empty())
LOG_TRACE(log, "PREWHERE condition was split into {} steps: {}", prewhere_actions.steps.size(), prewhere_actions.dumpConditions());
@ -163,8 +138,6 @@ ChunkAndProgress MergeTreeSelectProcessor::read()
if (res.row_count)
{
injectVirtualColumns(res.block, res.row_count, task.get(), partition_value_type, virt_column_names);
/// Reorder the columns according to result_header
Columns ordered_columns;
ordered_columns.reserve(result_header.columns());
@ -198,209 +171,12 @@ void MergeTreeSelectProcessor::initializeRangeReaders()
for (const auto & step : prewhere_actions.steps)
all_prewhere_actions.steps.push_back(step);
task->initializeRangeReaders(all_prewhere_actions, non_const_virtual_column_names);
task->initializeRangeReaders(all_prewhere_actions);
}
namespace
Block MergeTreeSelectProcessor::transformHeader(Block block, const PrewhereInfoPtr & prewhere_info)
{
struct VirtualColumnsInserter
{
explicit VirtualColumnsInserter(Block & block_) : block(block_) {}
bool columnExists(const String & name) const { return block.has(name); }
void insertUInt8Column(const ColumnPtr & column, const String & name)
{
block.insert({column, std::make_shared<DataTypeUInt8>(), name});
}
void insertUInt64Column(const ColumnPtr & column, const String & name)
{
block.insert({column, std::make_shared<DataTypeUInt64>(), name});
}
void insertUUIDColumn(const ColumnPtr & column, const String & name)
{
block.insert({column, std::make_shared<DataTypeUUID>(), name});
}
void insertLowCardinalityColumn(const ColumnPtr & column, const String & name)
{
block.insert({column, std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), name});
}
void insertPartitionValueColumn(
size_t rows, const Row & partition_value, const DataTypePtr & partition_value_type, const String & name)
{
ColumnPtr column;
if (rows)
column = partition_value_type->createColumnConst(rows, Tuple(partition_value.begin(), partition_value.end()))
->convertToFullColumnIfConst();
else
column = partition_value_type->createColumn();
block.insert({column, partition_value_type, name});
}
Block & block;
};
}
/// Adds virtual columns that are not const for all rows
static void injectNonConstVirtualColumns(
size_t rows,
Block & block,
const Names & virtual_columns,
MergeTreeReadTask * task)
{
VirtualColumnsInserter inserter(block);
for (const auto & virtual_column_name : virtual_columns)
{
if (virtual_column_name == "_part_offset")
{
if (!rows)
{
inserter.insertUInt64Column(DataTypeUInt64().createColumn(), virtual_column_name);
}
else
{
if (!inserter.columnExists(virtual_column_name))
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Column {} must have been filled part reader",
virtual_column_name);
}
}
if (virtual_column_name == LightweightDeleteDescription::FILTER_COLUMN.name)
{
/// If _row_exists column isn't present in the part then fill it here with 1s
ColumnPtr column;
if (rows)
column = LightweightDeleteDescription::FILTER_COLUMN.type->createColumnConst(rows, 1)->convertToFullColumnIfConst();
else
column = LightweightDeleteDescription::FILTER_COLUMN.type->createColumn();
inserter.insertUInt8Column(column, virtual_column_name);
}
if (virtual_column_name == BlockNumberColumn::name)
{
ColumnPtr column;
if (rows)
{
size_t value = 0;
if (task)
{
value = task->getInfo().data_part ? task->getInfo().data_part->info.min_block : 0;
}
column = BlockNumberColumn::type->createColumnConst(rows, value)->convertToFullColumnIfConst();
}
else
column = BlockNumberColumn::type->createColumn();
inserter.insertUInt64Column(column, virtual_column_name);
}
}
}
/// Adds virtual columns that are const for the whole part
static void injectPartConstVirtualColumns(
size_t rows,
Block & block,
MergeTreeReadTask * task,
const DataTypePtr & partition_value_type,
const Names & virtual_columns)
{
VirtualColumnsInserter inserter(block);
/// add virtual columns
/// Except _sample_factor, which is added from the outside.
if (!virtual_columns.empty())
{
if (unlikely(rows && !task))
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot insert virtual columns to non-empty chunk without specified task.");
const IMergeTreeDataPart * part = nullptr;
if (rows)
{
part = task->getInfo().data_part.get();
if (part->isProjectionPart())
part = part->getParentPart();
}
for (const auto & virtual_column_name : virtual_columns)
{
if (virtual_column_name == "_part")
{
ColumnPtr column;
if (rows)
column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}
.createColumnConst(rows, part->name)
->convertToFullColumnIfConst();
else
column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumn();
inserter.insertLowCardinalityColumn(column, virtual_column_name);
}
else if (virtual_column_name == "_part_index")
{
ColumnPtr column;
if (rows)
column = DataTypeUInt64().createColumnConst(rows, task->getInfo().part_index_in_query)->convertToFullColumnIfConst();
else
column = DataTypeUInt64().createColumn();
inserter.insertUInt64Column(column, virtual_column_name);
}
else if (virtual_column_name == "_part_uuid")
{
ColumnPtr column;
if (rows)
column = DataTypeUUID().createColumnConst(rows, part->uuid)->convertToFullColumnIfConst();
else
column = DataTypeUUID().createColumn();
inserter.insertUUIDColumn(column, virtual_column_name);
}
else if (virtual_column_name == "_partition_id")
{
ColumnPtr column;
if (rows)
column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}
.createColumnConst(rows, part->info.partition_id)
->convertToFullColumnIfConst();
else
column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumn();
inserter.insertLowCardinalityColumn(column, virtual_column_name);
}
else if (virtual_column_name == "_partition_value")
{
if (rows)
inserter.insertPartitionValueColumn(rows, part->partition.value, partition_value_type, virtual_column_name);
else
inserter.insertPartitionValueColumn(rows, {}, partition_value_type, virtual_column_name);
}
}
}
}
void MergeTreeSelectProcessor::injectVirtualColumns(
Block & block, size_t row_count, MergeTreeReadTask * task, const DataTypePtr & partition_value_type, const Names & virtual_columns)
{
/// First add non-const columns that are filled by the range reader and then const columns that we will fill ourselves.
/// Note that the order is important: virtual columns filled by the range reader must go first
injectNonConstVirtualColumns(row_count, block, virtual_columns,task);
injectPartConstVirtualColumns(row_count, block, task, partition_value_type, virtual_columns);
}
Block MergeTreeSelectProcessor::transformHeader(
Block block, const PrewhereInfoPtr & prewhere_info, const DataTypePtr & partition_value_type, const Names & virtual_columns)
{
injectVirtualColumns(block, 0, nullptr, partition_value_type, virtual_columns);
auto transformed = SourceStepWithFilter::applyPrewhereActions(std::move(block), prewhere_info);
return transformed;
return SourceStepWithFilter::applyPrewhereActions(std::move(block), prewhere_info);
}
}

View File

@ -41,21 +41,15 @@ public:
MergeTreeSelectProcessor(
MergeTreeReadPoolPtr pool_,
MergeTreeSelectAlgorithmPtr algorithm_,
const MergeTreeData & storage_,
const StorageSnapshotPtr & storage_snapshot_,
const PrewhereInfoPtr & prewhere_info_,
const ExpressionActionsSettings & actions_settings_,
const MergeTreeReadTask::BlockSizeParams & block_size_params_,
const MergeTreeReaderSettings & reader_settings_,
const Names & virt_column_names_);
const MergeTreeReaderSettings & reader_settings_);
String getName() const;
static Block transformHeader(
Block block,
const PrewhereInfoPtr & prewhere_info,
const DataTypePtr & partition_value_type,
const Names & virtual_columns);
static Block transformHeader(Block block, const PrewhereInfoPtr & prewhere_info);
Block getHeader() const { return result_header; }
ChunkAndProgress read();
@ -81,14 +75,12 @@ private:
size_t num_read_bytes = 0;
};
/// Used for filling header with no rows as well as block with data
static void injectVirtualColumns(Block & block, size_t row_count, MergeTreeReadTask * task, const DataTypePtr & partition_value_type, const Names & virtual_columns);
/// Sets up range readers corresponding to data readers
void initializeRangeReaders();
const MergeTreeReadPoolPtr pool;
const MergeTreeSelectAlgorithmPtr algorithm;
const StorageSnapshotPtr storage_snapshot;
const PrewhereInfoPtr prewhere_info;
const ExpressionActionsSettings actions_settings;
@ -96,17 +88,11 @@ private:
const MergeTreeReaderSettings reader_settings;
const MergeTreeReadTask::BlockSizeParams block_size_params;
const Names virt_column_names;
const DataTypePtr partition_value_type;
/// Current task to read from.
MergeTreeReadTaskPtr task;
/// This step is added when the part has lightweight delete mask
PrewhereExprStepPtr lightweight_delete_filter_step;
/// These columns will be filled by the merge tree range reader
Names non_const_virtual_column_names;
/// This header is used for chunks from readFromPart().
Block header_without_const_virtual_columns;
/// A result of getHeader(). A chunk which this header is returned from read().
Block result_header;

View File

@ -2,6 +2,7 @@
#include <Storages/MergeTree/MergeTreeBlockReadUtils.h>
#include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
#include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
#include <Processors/Transforms/FilterTransform.h>
#include <Processors/QueryPlan/ISourceStep.h>
#include <QueryPipeline/QueryPipelineBuilder.h>
@ -16,6 +17,7 @@
namespace DB
{
namespace ErrorCodes
{
extern const int MEMORY_LIMIT_EXCEEDED;
@ -55,7 +57,6 @@ protected:
Chunk generate() override;
private:
const MergeTreeData & storage;
StorageSnapshotPtr storage_snapshot;
@ -86,7 +87,6 @@ private:
void finish();
};
MergeTreeSequentialSource::MergeTreeSequentialSource(
MergeTreeSequentialSourceType type,
const MergeTreeData & storage_,
@ -136,10 +136,8 @@ MergeTreeSequentialSource::MergeTreeSequentialSource(
{
auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical)
.withExtendedObjects()
.withSystemColumns();
if (storage.supportsSubcolumns())
options.withSubcolumns();
.withVirtuals()
.withSubcolumns(storage.supportsSubcolumns());
columns_for_reader = storage_snapshot->getColumnsByNames(options, columns_to_read);
}
@ -181,9 +179,37 @@ MergeTreeSequentialSource::MergeTreeSequentialSource(
mark_ranges.emplace(MarkRanges{MarkRange(0, data_part->getMarksCount())});
reader = data_part->getReader(
columns_for_reader, storage_snapshot,
*mark_ranges, /* uncompressed_cache = */ nullptr,
mark_cache.get(), alter_conversions, reader_settings, {}, {});
columns_for_reader,
storage_snapshot,
*mark_ranges,
/*virtual_fields=*/ {},
/*uncompressed_cache=*/{},
mark_cache.get(),
alter_conversions,
reader_settings,
{},
{});
}
static void fillBlockNumberColumns(
Columns & res_columns,
const NamesAndTypesList & columns_list,
UInt64 block_number,
UInt64 num_rows)
{
chassert(res_columns.size() == columns_list.size());
auto it = columns_list.begin();
for (size_t i = 0; i < res_columns.size(); ++i, ++it)
{
if (res_columns[i])
continue;
if (it->name == BlockNumberColumn::name)
{
res_columns[i] = BlockNumberColumn::type->createColumnConst(num_rows, block_number)->convertToFullColumnIfConst();
}
}
}
Chunk MergeTreeSequentialSource::generate()
@ -204,16 +230,17 @@ try
if (rows_read)
{
fillBlockNumberColumns(columns, sample, data_part->info.min_block, rows_read);
reader->fillVirtualColumns(columns, rows_read);
current_row += rows_read;
current_mark += (rows_to_read == rows_read);
bool should_evaluate_missing_defaults = false;
reader->fillMissingColumns(columns, should_evaluate_missing_defaults, rows_read, data_part->info.min_block);
reader->fillMissingColumns(columns, should_evaluate_missing_defaults, rows_read);
if (should_evaluate_missing_defaults)
{
reader->evaluateMissingDefaults({}, columns);
}
reader->performRequiredConversions(columns);
@ -278,14 +305,13 @@ Pipe createMergeTreeSequentialSource(
bool quiet,
std::shared_ptr<std::atomic<size_t>> filtered_rows_count)
{
const auto & filter_column = LightweightDeleteDescription::FILTER_COLUMN;
/// The part might have some rows masked by lightweight deletes
const bool need_to_filter_deleted_rows = apply_deleted_mask && data_part->hasLightweightDelete();
const bool has_filter_column = std::ranges::find(columns_to_read, filter_column.name) != columns_to_read.end();
const bool has_filter_column = std::ranges::find(columns_to_read, RowExistsColumn::name) != columns_to_read.end();
if (need_to_filter_deleted_rows && !has_filter_column)
columns_to_read.emplace_back(filter_column.name);
columns_to_read.emplace_back(RowExistsColumn::name);
auto column_part_source = std::make_shared<MergeTreeSequentialSource>(type,
storage, storage_snapshot, data_part, columns_to_read, std::move(mark_ranges),
@ -299,7 +325,7 @@ Pipe createMergeTreeSequentialSource(
pipe.addSimpleTransform([filtered_rows_count, has_filter_column](const Block & header)
{
return std::make_shared<FilterTransform>(
header, nullptr, filter_column.name, !has_filter_column, false, filtered_rows_count);
header, nullptr, RowExistsColumn::name, !has_filter_column, false, filtered_rows_count);
});
}

View File

@ -0,0 +1,52 @@
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
#include <Storages/MergeTree/IMergeTreeDataPart.h>
#include <DataTypes/DataTypesNumber.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTIdentifier.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NO_SUCH_COLUMN_IN_TABLE;
}
static ASTPtr getCompressionCodecDeltaLZ4()
{
return makeASTFunction("CODEC",
std::make_shared<ASTIdentifier>("Delta"),
std::make_shared<ASTIdentifier>("LZ4"));
}
const String RowExistsColumn::name = "_row_exists";
const DataTypePtr RowExistsColumn::type = std::make_shared<DataTypeUInt8>();
const String BlockNumberColumn::name = "_block_number";
const DataTypePtr BlockNumberColumn::type = std::make_shared<DataTypeUInt64>();
const ASTPtr BlockNumberColumn::codec = getCompressionCodecDeltaLZ4();
Field getFieldForConstVirtualColumn(const String & column_name, const IMergeTreeDataPart & part)
{
if (column_name == RowExistsColumn::name)
return 1ULL;
if (column_name == BlockNumberColumn::name)
return part.info.min_block;
if (column_name == "_part")
return part.name;
if (column_name == "_part_uuid")
return part.uuid;
if (column_name == "_partition_id")
return part.info.partition_id;
if (column_name == "_partition_value")
return Tuple(part.partition.value.begin(), part.partition.value.end());
throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Unexpected const virtual column: {}", column_name);
}
}

View File

@ -0,0 +1,26 @@
#pragma once
#include <Core/Types.h>
#include <DataTypes/IDataType.h>
#include <Parsers/IAST_fwd.h>
namespace DB
{
class IMergeTreeDataPart;
struct RowExistsColumn
{
static const String name;
static const DataTypePtr type;
};
struct BlockNumberColumn
{
static const String name;
static const DataTypePtr type;
static const ASTPtr codec;
};
Field getFieldForConstVirtualColumn(const String & column_name, const IMergeTreeDataPart & part);
}

View File

@ -23,7 +23,7 @@
#include <Storages/MutationCommands.h>
#include <Storages/MergeTree/MergeTreeDataMergerMutator.h>
#include <Storages/MergeTree/MergeTreeIndexInverted.h>
#include <Storages/BlockNumberColumn.h>
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeVariant.h>
#include <boost/algorithm/string/replace.hpp>
@ -168,7 +168,7 @@ static void splitAndModifyMutationCommands(
{
if (!mutated_columns.contains(column.name))
{
if (!metadata_snapshot->getColumns().has(column.name) && !part->storage.getVirtuals().contains(column.name))
if (!metadata_snapshot->getColumns().has(column.name) && !part->storage.getVirtualsPtr()->has(column.name))
{
/// We cannot add the column because there's no such column in table.
/// It's okay if the column was dropped. It may also absent in dropped_columns
@ -283,7 +283,6 @@ getColumnsForNewDataPart(
ColumnsDescription part_columns(source_part->getColumns());
NamesAndTypesList system_columns;
const auto & deleted_mask_column = LightweightDeleteDescription::FILTER_COLUMN;
bool supports_lightweight_deletes = source_part->supportLightweightDeleteMutate();
bool deleted_mask_updated = false;
@ -299,9 +298,9 @@ getColumnsForNewDataPart(
{
for (const auto & [column_name, _] : command.column_to_update_expression)
{
if (column_name == deleted_mask_column.name
if (column_name == RowExistsColumn::name
&& supports_lightweight_deletes
&& !storage_columns_set.contains(deleted_mask_column.name))
&& !storage_columns_set.contains(RowExistsColumn::name))
deleted_mask_updated = true;
}
}
@ -323,12 +322,12 @@ getColumnsForNewDataPart(
}
}
if (!storage_columns_set.contains(deleted_mask_column.name))
if (!storage_columns_set.contains(RowExistsColumn::name))
{
if (deleted_mask_updated || (part_columns.has(deleted_mask_column.name) && !has_delete_command))
if (deleted_mask_updated || (part_columns.has(RowExistsColumn::name) && !has_delete_command))
{
storage_columns.push_back(deleted_mask_column);
storage_columns_set.insert(deleted_mask_column.name);
storage_columns.emplace_back(RowExistsColumn::name, RowExistsColumn::type);
storage_columns_set.insert(RowExistsColumn::name);
}
}

View File

@ -34,6 +34,7 @@ public:
, partition_id(part_->info.partition_id)
{
setInMemoryMetadata(storage.getInMemoryMetadata());
setVirtuals(*storage.getVirtualsPtr());
}
/// Used in queries with projection.
@ -90,11 +91,6 @@ public:
bool supportsSubcolumns() const override { return true; }
NamesAndTypesList getVirtuals() const override
{
return storage.getVirtuals();
}
String getPartitionId() const
{
return partition_id;

View File

@ -9,10 +9,10 @@
namespace DB
{
static std::pair<Block, Block> getHeaders(StorageNATS & storage, const StorageSnapshotPtr & storage_snapshot)
static std::pair<Block, Block> getHeaders(const StorageSnapshotPtr & storage_snapshot)
{
auto non_virtual_header = storage_snapshot->metadata->getSampleBlockNonMaterialized();
auto virtual_header = storage_snapshot->getSampleBlockForColumns(storage.getVirtuals().getNames());
auto virtual_header = storage_snapshot->virtual_columns->getSampleBlock();
return {non_virtual_header, virtual_header};
}
@ -33,7 +33,7 @@ NATSSource::NATSSource(
const Names & columns,
size_t max_block_size_,
StreamingHandleErrorMode handle_error_mode_)
: NATSSource(storage_, storage_snapshot_, getHeaders(storage_, storage_snapshot_), context_, columns, max_block_size_, handle_error_mode_)
: NATSSource(storage_, storage_snapshot_, getHeaders(storage_snapshot_), context_, columns, max_block_size_, handle_error_mode_)
{
}

View File

@ -88,6 +88,7 @@ StorageNATS::StorageNATS(
StorageInMemoryMetadata storage_metadata;
storage_metadata.setColumns(columns_);
setInMemoryMetadata(storage_metadata);
setVirtuals(createVirtuals(nats_settings->nats_handle_error_mode));
nats_context = addSettings(getContext());
nats_context->makeQueryContext();
@ -131,6 +132,19 @@ StorageNATS::StorageNATS(
connection_task->deactivate();
}
VirtualColumnsDescription StorageNATS::createVirtuals(StreamingHandleErrorMode handle_error_mode)
{
VirtualColumnsDescription desc;
desc.addEphemeral("_subject", std::make_shared<DataTypeString>(), "");
if (handle_error_mode == StreamingHandleErrorMode::STREAM)
{
desc.addEphemeral("_raw_message", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()), "");
desc.addEphemeral("_error", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()), "");
}
return desc;
}
Names StorageNATS::parseList(const String & list, char delim)
{
@ -746,20 +760,4 @@ void registerStorageNATS(StorageFactory & factory)
factory.registerStorage("NATS", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, });
}
NamesAndTypesList StorageNATS::getVirtuals() const
{
auto virtuals = NamesAndTypesList{
{"_subject", std::make_shared<DataTypeString>()}
};
if (nats_settings->nats_handle_error_mode == StreamingHandleErrorMode::STREAM)
{
virtuals.push_back({"_raw_message", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>())});
virtuals.push_back({"_error", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>())});
}
return virtuals;
}
}

View File

@ -61,7 +61,6 @@ public:
NATSConsumerPtr popConsumer(std::chrono::milliseconds timeout);
const String & getFormatName() const { return format_name; }
NamesAndTypesList getVirtuals() const override;
void incrementReader();
void decrementReader();
@ -137,6 +136,7 @@ private:
static Names parseList(const String & list, char delim);
static String getTableBasedName(String name, const StorageID & table_id);
static VirtualColumnsDescription createVirtuals(StreamingHandleErrorMode handle_error_mode);
ContextMutablePtr addSettings(ContextPtr context) const;
size_t getMaxBlockSize() const;

View File

@ -72,6 +72,7 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL(
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage MaterializedPostgreSQL is allowed only for Atomic database");
setInMemoryMetadata(storage_metadata);
setVirtuals(createVirtuals());
replication_settings->materialized_postgresql_tables_list = remote_table_name_;
@ -127,8 +128,16 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL(
, nested_table_id(nested_storage_->getStorageID())
{
setInMemoryMetadata(nested_storage_->getInMemoryMetadata());
setVirtuals(*nested_storage_->getVirtualsPtr());
}
VirtualColumnsDescription StorageMaterializedPostgreSQL::createVirtuals()
{
VirtualColumnsDescription desc;
desc.addEphemeral("_sign", std::make_shared<DataTypeInt8>(), "");
desc.addEphemeral("_version", std::make_shared<DataTypeUInt64>(), "");
return desc;
}
/// A temporary clone table might be created for current table in order to update its schema and reload
/// all data in the background while current table will still handle read requests.
@ -254,15 +263,6 @@ void StorageMaterializedPostgreSQL::dropInnerTableIfAny(bool sync, ContextPtr lo
}
NamesAndTypesList StorageMaterializedPostgreSQL::getVirtuals() const
{
return NamesAndTypesList{
{"_sign", std::make_shared<DataTypeInt8>()},
{"_version", std::make_shared<DataTypeUInt64>()}
};
}
bool StorageMaterializedPostgreSQL::needRewriteQueryWithFinal(const Names & column_names) const
{
return needRewriteQueryWithFinalForStorage(column_names, getNested());

View File

@ -89,8 +89,6 @@ public:
/// Used only for single MaterializedPostgreSQL storage.
void dropInnerTableIfAny(bool sync, ContextPtr local_context) override;
NamesAndTypesList getVirtuals() const override;
bool needRewriteQueryWithFinal(const Names & column_names) const override;
void read(
@ -138,6 +136,8 @@ private:
static std::shared_ptr<ASTColumnDeclaration> getMaterializedColumnsDeclaration(
String name, String type, UInt64 default_value);
static VirtualColumnsDescription createVirtuals();
ASTPtr getColumnDeclaration(const DataTypePtr & data_type) const;
String getNestedTableName() const;

View File

@ -11,10 +11,10 @@
namespace DB
{
static std::pair<Block, Block> getHeaders(StorageRabbitMQ & storage_, const StorageSnapshotPtr & storage_snapshot)
static std::pair<Block, Block> getHeaders(const StorageSnapshotPtr & storage_snapshot)
{
auto non_virtual_header = storage_snapshot->metadata->getSampleBlockNonMaterialized();
auto virtual_header = storage_snapshot->getSampleBlockForColumns(storage_.getVirtuals().getNames());
auto virtual_header = storage_snapshot->virtual_columns->getSampleBlock();
return {non_virtual_header, virtual_header};
}
@ -40,7 +40,7 @@ RabbitMQSource::RabbitMQSource(
: RabbitMQSource(
storage_,
storage_snapshot_,
getHeaders(storage_, storage_snapshot_),
getHeaders(storage_snapshot_),
context_,
columns,
max_block_size_,

View File

@ -136,6 +136,7 @@ StorageRabbitMQ::StorageRabbitMQ(
StorageInMemoryMetadata storage_metadata;
storage_metadata.setColumns(columns_);
setInMemoryMetadata(storage_metadata);
setVirtuals(createVirtuals(rabbitmq_settings->rabbitmq_handle_error_mode));
rabbitmq_context = addSettings(getContext());
rabbitmq_context->makeQueryContext();
@ -191,6 +192,26 @@ StorageRabbitMQ::StorageRabbitMQ(
init_task->deactivate();
}
VirtualColumnsDescription StorageRabbitMQ::createVirtuals(StreamingHandleErrorMode handle_error_mode)
{
VirtualColumnsDescription desc;
desc.addEphemeral("_exchange_name", std::make_shared<DataTypeString>(), "");
desc.addEphemeral("_channel_id", std::make_shared<DataTypeString>(), "");
desc.addEphemeral("_delivery_tag", std::make_shared<DataTypeUInt64>(), "");
desc.addEphemeral("_redelivered", std::make_shared<DataTypeUInt8>(), "");
desc.addEphemeral("_message_id", std::make_shared<DataTypeString>(), "");
desc.addEphemeral("_timestamp", std::make_shared<DataTypeUInt64>(), "");
if (handle_error_mode == StreamingHandleErrorMode::STREAM)
{
desc.addEphemeral("_raw_message", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()), "");
desc.addEphemeral("_error", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()), "");
}
return desc;
}
Names StorageRabbitMQ::parseSettings(String settings_list)
{
@ -1213,25 +1234,4 @@ void registerStorageRabbitMQ(StorageFactory & factory)
factory.registerStorage("RabbitMQ", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, });
}
NamesAndTypesList StorageRabbitMQ::getVirtuals() const
{
auto virtuals = NamesAndTypesList{
{"_exchange_name", std::make_shared<DataTypeString>()},
{"_channel_id", std::make_shared<DataTypeString>()},
{"_delivery_tag", std::make_shared<DataTypeUInt64>()},
{"_redelivered", std::make_shared<DataTypeUInt8>()},
{"_message_id", std::make_shared<DataTypeString>()},
{"_timestamp", std::make_shared<DataTypeUInt64>()}
};
if (rabbitmq_settings->rabbitmq_handle_error_mode == StreamingHandleErrorMode::STREAM)
{
virtuals.push_back({"_raw_message", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>())});
virtuals.push_back({"_error", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>())});
}
return virtuals;
}
}

View File

@ -68,7 +68,6 @@ public:
RabbitMQConsumerPtr popConsumer(std::chrono::milliseconds timeout);
const String & getFormatName() const { return format_name; }
NamesAndTypesList getVirtuals() const override;
String getExchange() const { return exchange_name; }
void unbindExchange();
@ -191,6 +190,8 @@ private:
bool tryStreamToViews();
bool hasDependencies(const StorageID & table_id);
static VirtualColumnsDescription createVirtuals(StreamingHandleErrorMode handle_error_mode);
static String getRandomName()
{
std::uniform_int_distribution<int> distribution('a', 'z');

View File

@ -155,8 +155,7 @@ StorageS3Queue::StorageS3Queue(
storage_metadata.setConstraints(constraints_);
storage_metadata.setComment(comment);
setInMemoryMetadata(storage_metadata);
virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()));
LOG_INFO(log, "Using zookeeper path: {}", zk_path.string());
task = getContext()->getSchedulePool().createTask("S3QueueStreamingTask", [this] { threadFunc(); });
@ -315,7 +314,7 @@ void StorageS3Queue::read(
}
auto this_ptr = std::static_pointer_cast<StorageS3Queue>(shared_from_this());
auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context));
auto reading = std::make_unique<ReadFromS3Queue>(
column_names,
@ -493,7 +492,7 @@ bool StorageS3Queue::streamToViews()
auto block_io = interpreter.execute();
auto file_iterator = createFileIterator(s3queue_context, nullptr);
auto read_from_format_info = prepareReadingFromFormat(block_io.pipeline.getHeader().getNames(), storage_snapshot, supportsSubsetOfColumns(s3queue_context), getVirtuals());
auto read_from_format_info = prepareReadingFromFormat(block_io.pipeline.getHeader().getNames(), storage_snapshot, supportsSubsetOfColumns(s3queue_context));
Pipes pipes;
pipes.reserve(s3queue_settings->s3queue_processing_threads_num);
@ -602,8 +601,9 @@ void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const
std::shared_ptr<StorageS3Queue::FileIterator> StorageS3Queue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate)
{
auto glob_iterator = std::make_unique<StorageS3QueueSource::GlobIterator>(
*configuration.client, configuration.url, predicate, virtual_columns, local_context,
*configuration.client, configuration.url, predicate, getVirtualsList(), local_context,
/* read_keys */nullptr, configuration.request_settings);
return std::make_shared<FileIterator>(files_metadata, std::move(glob_iterator), s3queue_settings->s3queue_current_shard_num, shutdown_called);
}

View File

@ -51,8 +51,6 @@ public:
size_t max_block_size,
size_t num_streams) override;
NamesAndTypesList getVirtuals() const override { return virtual_columns; }
const auto & getFormatName() const { return configuration.format; }
const fs::path & getZooKeeperPath() const { return zk_path; }
@ -71,7 +69,6 @@ private:
Configuration configuration;
const std::optional<FormatSettings> format_settings;
NamesAndTypesList virtual_columns;
BackgroundSchedulePool::TaskHolder task;
std::atomic<bool> stream_cancelled{false};

Some files were not shown because too many files have changed in this diff Show More