mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
Merge pull request #60205 from CurtizJ/refactor-virtual-columns
Refactoring of virtual columns
This commit is contained in:
commit
c103b0084b
@ -38,7 +38,6 @@
|
||||
#include <Storages/StorageInMemoryMetadata.h>
|
||||
#include <Storages/WindowView/StorageWindowView.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Storages/BlockNumberColumn.h>
|
||||
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/executeDDLQueryOnCluster.h>
|
||||
@ -894,24 +893,6 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat
|
||||
throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Column {} already exists", backQuoteIfNeed(column.name));
|
||||
}
|
||||
|
||||
/// Check if _row_exists for lightweight delete column in column_lists for merge tree family.
|
||||
if (create.storage && create.storage->engine && endsWith(create.storage->engine->name, "MergeTree"))
|
||||
{
|
||||
auto search = all_columns.find(LightweightDeleteDescription::FILTER_COLUMN.name);
|
||||
if (search != all_columns.end())
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Cannot create table with column '{}' for *MergeTree engines because it "
|
||||
"is reserved for lightweight delete feature",
|
||||
LightweightDeleteDescription::FILTER_COLUMN.name);
|
||||
|
||||
auto search_block_number = all_columns.find(BlockNumberColumn::name);
|
||||
if (search_block_number != all_columns.end())
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Cannot create table with column '{}' for *MergeTree engines because it "
|
||||
"is reserved for storing block number",
|
||||
BlockNumberColumn::name);
|
||||
}
|
||||
|
||||
const auto & settings = getContext()->getSettingsRef();
|
||||
|
||||
/// If it's not attach and not materialized view to existing table,
|
||||
@ -924,9 +905,23 @@ void InterpreterCreateQuery::validateTableStructure(const ASTCreateQuery & creat
|
||||
}
|
||||
}
|
||||
|
||||
void validateVirtualColumns(const IStorage & storage)
|
||||
{
|
||||
auto virtual_columns = storage.getVirtualsPtr();
|
||||
for (const auto & storage_column : storage.getInMemoryMetadataPtr()->getColumns())
|
||||
{
|
||||
if (virtual_columns->tryGet(storage_column.name, VirtualsKind::Persistent))
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Cannot create table with column '{}' for {} engines because it is reserved for persistent virtual column",
|
||||
storage_column.name, storage.getName());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
void checkTemporaryTableEngineName(const String& name)
|
||||
void checkTemporaryTableEngineName(const String & name)
|
||||
{
|
||||
if (name.starts_with("Replicated") || name.starts_with("Shared") || name == "KeeperMap")
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "Temporary tables cannot be created with Replicated, Shared or KeeperMap table engines");
|
||||
@ -1509,6 +1504,16 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
|
||||
addColumnsDescriptionToCreateQueryIfNecessary(query_ptr->as<ASTCreateQuery &>(), res);
|
||||
}
|
||||
|
||||
validateVirtualColumns(*res);
|
||||
|
||||
if (!res->supportsDynamicSubcolumns() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns()))
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Cannot create table with column of type Object, "
|
||||
"because storage {} doesn't support dynamic subcolumns",
|
||||
res->getName());
|
||||
}
|
||||
|
||||
if (!create.attach && getContext()->getSettingsRef().database_replicated_allow_only_replicated_engine)
|
||||
{
|
||||
bool is_replicated_storage = typeid_cast<const StorageReplicatedMergeTree *>(res.get()) != nullptr;
|
||||
@ -1558,14 +1563,6 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
|
||||
/// we can safely destroy the object without a call to "shutdown", because there is guarantee
|
||||
/// that no background threads/similar resources remain after exception from "startup".
|
||||
|
||||
if (!res->supportsDynamicSubcolumns() && hasDynamicSubcolumns(res->getInMemoryMetadataPtr()->getColumns()))
|
||||
{
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Cannot create table with column of type Object, "
|
||||
"because storage {} doesn't support dynamic subcolumns",
|
||||
res->getName());
|
||||
}
|
||||
|
||||
res->startup();
|
||||
return true;
|
||||
}
|
||||
|
@ -15,7 +15,6 @@
|
||||
#include <Storages/AlterCommands.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Storages/MutationCommands.h>
|
||||
#include <Storages/LightweightDeleteDescription.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
|
@ -123,28 +123,29 @@ BlockIO InterpreterDescribeQuery::execute()
|
||||
|
||||
void InterpreterDescribeQuery::fillColumnsFromSubquery(const ASTTableExpression & table_expression)
|
||||
{
|
||||
NamesAndTypesList names_and_types;
|
||||
Block sample_block;
|
||||
auto select_query = table_expression.subquery->children.at(0);
|
||||
auto current_context = getContext();
|
||||
|
||||
if (settings.allow_experimental_analyzer)
|
||||
{
|
||||
SelectQueryOptions select_query_options;
|
||||
names_and_types = InterpreterSelectQueryAnalyzer(select_query, current_context, select_query_options).getSampleBlock().getNamesAndTypesList();
|
||||
sample_block = InterpreterSelectQueryAnalyzer(select_query, current_context, select_query_options).getSampleBlock();
|
||||
}
|
||||
else
|
||||
{
|
||||
names_and_types = InterpreterSelectWithUnionQuery::getSampleBlock(select_query, current_context).getNamesAndTypesList();
|
||||
sample_block = InterpreterSelectWithUnionQuery::getSampleBlock(select_query, current_context);
|
||||
}
|
||||
|
||||
for (auto && [name, type] : names_and_types)
|
||||
columns.emplace_back(std::move(name), std::move(type));
|
||||
for (auto && column : sample_block)
|
||||
columns.emplace_back(std::move(column.name), std::move(column.type));
|
||||
}
|
||||
|
||||
void InterpreterDescribeQuery::fillColumnsFromTableFunction(const ASTTableExpression & table_expression)
|
||||
{
|
||||
auto current_context = getContext();
|
||||
TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression.table_function, current_context);
|
||||
|
||||
auto column_descriptions = table_function_ptr->getActualTableStructure(getContext(), /*is_insert_query*/ true);
|
||||
for (const auto & column : column_descriptions)
|
||||
columns.emplace_back(column);
|
||||
@ -154,14 +155,16 @@ void InterpreterDescribeQuery::fillColumnsFromTableFunction(const ASTTableExpres
|
||||
auto table = table_function_ptr->execute(table_expression.table_function, getContext(), table_function_ptr->getName());
|
||||
if (table)
|
||||
{
|
||||
for (const auto & column : table->getVirtuals())
|
||||
auto virtuals = table->getVirtualsPtr();
|
||||
for (const auto & column : *virtuals)
|
||||
{
|
||||
if (!column_descriptions.has(column.name))
|
||||
virtual_columns.emplace_back(column.name, column.type);
|
||||
virtual_columns.push_back(column);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void InterpreterDescribeQuery::fillColumnsFromTable(const ASTTableExpression & table_expression)
|
||||
{
|
||||
auto table_id = getContext()->resolveStorageID(table_expression.database_and_table_name);
|
||||
@ -176,10 +179,11 @@ void InterpreterDescribeQuery::fillColumnsFromTable(const ASTTableExpression & t
|
||||
|
||||
if (settings.describe_include_virtual_columns)
|
||||
{
|
||||
for (const auto & column : table->getVirtuals())
|
||||
auto virtuals = table->getVirtualsPtr();
|
||||
for (const auto & column : *virtuals)
|
||||
{
|
||||
if (!column_descriptions.has(column.name))
|
||||
virtual_columns.emplace_back(column.name, column.type);
|
||||
virtual_columns.push_back(column);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -136,7 +136,7 @@ Block InterpreterInsertQuery::getSampleBlock(
|
||||
if (auto * window_view = dynamic_cast<StorageWindowView *>(table.get()))
|
||||
return window_view->getInputHeader();
|
||||
else if (no_destination)
|
||||
return metadata_snapshot->getSampleBlockWithVirtuals(table->getVirtuals());
|
||||
return metadata_snapshot->getSampleBlockWithVirtuals(table->getVirtualsList());
|
||||
else
|
||||
return metadata_snapshot->getSampleBlockNonMaterialized();
|
||||
}
|
||||
|
@ -107,7 +107,7 @@ SELECT
|
||||
'' AS extra )";
|
||||
|
||||
// TODO Interpret query.extended. It is supposed to show internal/virtual columns. Need to fetch virtual column names, see
|
||||
// IStorage::getVirtuals(). We can't easily do that via SQL.
|
||||
// IStorage::getVirtualsList(). We can't easily do that via SQL.
|
||||
|
||||
if (query.full)
|
||||
{
|
||||
|
@ -272,7 +272,7 @@ void JoinedTables::makeFakeTable(StoragePtr storage, const StorageMetadataPtr &
|
||||
auto & table = tables_with_columns.back();
|
||||
table.addHiddenColumns(storage_columns.getMaterialized());
|
||||
table.addHiddenColumns(storage_columns.getAliases());
|
||||
table.addHiddenColumns(storage->getVirtuals());
|
||||
table.addHiddenColumns(storage->getVirtualsList());
|
||||
}
|
||||
else
|
||||
tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, source_header.getNamesAndTypesList());
|
||||
|
@ -7,7 +7,7 @@
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Storages/MergeTree/StorageFromMergeTreeDataPart.h>
|
||||
#include <Storages/StorageMergeTree.h>
|
||||
#include <Storages/BlockNumberColumn.h>
|
||||
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
|
||||
#include <Processors/Transforms/FilterTransform.h>
|
||||
#include <Processors/Transforms/ExpressionTransform.h>
|
||||
#include <Processors/Transforms/CreatingSetsTransform.h>
|
||||
@ -31,7 +31,6 @@
|
||||
#include <Processors/QueryPlan/CreatingSetsStep.h>
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <Interpreters/PreparedSets.h>
|
||||
#include <Storages/LightweightDeleteDescription.h>
|
||||
#include <Storages/MergeTree/MergeTreeSequentialSource.h>
|
||||
#include <Processors/Sources/ThrowingExceptionSource.h>
|
||||
#include <Analyzer/QueryTreeBuilder.h>
|
||||
@ -265,7 +264,7 @@ MutationCommand createCommandToApplyDeletedMask(const MutationCommand & command)
|
||||
alter_command->partition = alter_command->children.emplace_back(command.partition).get();
|
||||
|
||||
auto row_exists_predicate = makeASTFunction("equals",
|
||||
std::make_shared<ASTIdentifier>(LightweightDeleteDescription::FILTER_COLUMN.name),
|
||||
std::make_shared<ASTIdentifier>(RowExistsColumn::name),
|
||||
std::make_shared<ASTLiteral>(Field(0)));
|
||||
|
||||
if (command.predicate)
|
||||
@ -350,7 +349,8 @@ bool MutationsInterpreter::Source::isCompactPart() const
|
||||
static Names getAvailableColumnsWithVirtuals(StorageMetadataPtr metadata_snapshot, const IStorage & storage)
|
||||
{
|
||||
auto all_columns = metadata_snapshot->getColumns().getNamesOfPhysical();
|
||||
for (const auto & column : storage.getVirtuals())
|
||||
auto virtuals = storage.getVirtualsPtr();
|
||||
for (const auto & column : *virtuals)
|
||||
all_columns.push_back(column.name);
|
||||
return all_columns;
|
||||
}
|
||||
@ -435,60 +435,54 @@ static NameSet getKeyColumns(const MutationsInterpreter::Source & source, const
|
||||
|
||||
static void validateUpdateColumns(
|
||||
const MutationsInterpreter::Source & source,
|
||||
const StorageMetadataPtr & metadata_snapshot, const NameSet & updated_columns,
|
||||
const std::unordered_map<String, Names> & column_to_affected_materialized)
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
const NameSet & updated_columns,
|
||||
const std::unordered_map<String, Names> & column_to_affected_materialized,
|
||||
const ContextPtr & context)
|
||||
{
|
||||
auto storage_snapshot = source.getStorageSnapshot(metadata_snapshot, context);
|
||||
NameSet key_columns = getKeyColumns(source, metadata_snapshot);
|
||||
|
||||
for (const String & column_name : updated_columns)
|
||||
const auto & storage_columns = storage_snapshot->metadata->getColumns();
|
||||
const auto & virtual_columns = *storage_snapshot->virtual_columns;
|
||||
|
||||
for (const auto & column_name : updated_columns)
|
||||
{
|
||||
auto found = false;
|
||||
for (const auto & col : metadata_snapshot->getColumns().getOrdinary())
|
||||
{
|
||||
if (col.name == column_name)
|
||||
{
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/// Allow to override value of lightweight delete filter virtual column
|
||||
if (!found && column_name == LightweightDeleteDescription::FILTER_COLUMN.name)
|
||||
{
|
||||
if (!source.supportsLightweightDelete())
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table");
|
||||
found = true;
|
||||
}
|
||||
|
||||
/// Dont allow to override value of block number virtual column
|
||||
if (!found && column_name == BlockNumberColumn::name)
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Update is not supported for virtual column {} ", backQuote(column_name));
|
||||
}
|
||||
|
||||
if (!found)
|
||||
{
|
||||
for (const auto & col : metadata_snapshot->getColumns().getMaterialized())
|
||||
{
|
||||
if (col.name == column_name)
|
||||
throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Cannot UPDATE materialized column {}", backQuote(column_name));
|
||||
}
|
||||
|
||||
throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table", backQuote(column_name));
|
||||
}
|
||||
|
||||
if (key_columns.contains(column_name))
|
||||
throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Cannot UPDATE key column {}", backQuote(column_name));
|
||||
|
||||
if (storage_columns.tryGetColumn(GetColumnsOptions::Materialized, column_name))
|
||||
throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN, "Cannot UPDATE materialized column {}", backQuote(column_name));
|
||||
|
||||
auto materialized_it = column_to_affected_materialized.find(column_name);
|
||||
if (materialized_it != column_to_affected_materialized.end())
|
||||
{
|
||||
for (const String & materialized : materialized_it->second)
|
||||
for (const auto & materialized : materialized_it->second)
|
||||
{
|
||||
if (key_columns.contains(materialized))
|
||||
{
|
||||
throw Exception(ErrorCodes::CANNOT_UPDATE_COLUMN,
|
||||
"Updated column {} affects MATERIALIZED column {}, which is a key column. "
|
||||
"Cannot UPDATE it.", backQuote(column_name), backQuote(materialized));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!storage_columns.tryGetColumn(GetColumnsOptions::Ordinary, column_name))
|
||||
{
|
||||
/// Allow to override value of lightweight delete filter virtual column
|
||||
if (column_name == RowExistsColumn::name)
|
||||
{
|
||||
if (!source.supportsLightweightDelete())
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table");
|
||||
}
|
||||
else if (virtual_columns.tryGet(column_name))
|
||||
{
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Update is not supported for virtual column {} ", backQuote(column_name));
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column {} in table", backQuote(column_name));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -546,8 +540,8 @@ void MutationsInterpreter::prepare(bool dry_run)
|
||||
/// Add _row_exists column if it is physically present in the part
|
||||
if (source.hasLightweightDeleteMask())
|
||||
{
|
||||
all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN});
|
||||
available_columns_set.insert(LightweightDeleteDescription::FILTER_COLUMN.name);
|
||||
all_columns.emplace_back(RowExistsColumn::name, RowExistsColumn::type);
|
||||
available_columns_set.insert(RowExistsColumn::name);
|
||||
}
|
||||
|
||||
NameSet updated_columns;
|
||||
@ -563,9 +557,7 @@ void MutationsInterpreter::prepare(bool dry_run)
|
||||
|
||||
for (const auto & [name, _] : command.column_to_update_expression)
|
||||
{
|
||||
if (!available_columns_set.contains(name)
|
||||
&& name != LightweightDeleteDescription::FILTER_COLUMN.name
|
||||
&& name != BlockNumberColumn::name)
|
||||
if (!available_columns_set.contains(name) && name != RowExistsColumn::name)
|
||||
throw Exception(ErrorCodes::THERE_IS_NO_COLUMN,
|
||||
"Column {} is updated but not requested to read", name);
|
||||
|
||||
@ -590,7 +582,7 @@ void MutationsInterpreter::prepare(bool dry_run)
|
||||
}
|
||||
}
|
||||
|
||||
validateUpdateColumns(source, metadata_snapshot, updated_columns, column_to_affected_materialized);
|
||||
validateUpdateColumns(source, metadata_snapshot, updated_columns, column_to_affected_materialized, context);
|
||||
}
|
||||
|
||||
StorageInMemoryMetadata::HasDependencyCallback has_dependency =
|
||||
@ -666,15 +658,11 @@ void MutationsInterpreter::prepare(bool dry_run)
|
||||
{
|
||||
type = physical_column->type;
|
||||
}
|
||||
else if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name)
|
||||
else if (column_name == RowExistsColumn::name)
|
||||
{
|
||||
type = LightweightDeleteDescription::FILTER_COLUMN.type;
|
||||
type = RowExistsColumn::type;
|
||||
deleted_mask_updated = true;
|
||||
}
|
||||
else if (column_name == BlockNumberColumn::name)
|
||||
{
|
||||
type = BlockNumberColumn::type;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown column {}", column_name);
|
||||
@ -1028,7 +1016,7 @@ void MutationsInterpreter::prepareMutationStages(std::vector<Stage> & prepared_s
|
||||
|
||||
/// Add _row_exists column if it is present in the part
|
||||
if (source.hasLightweightDeleteMask() || deleted_mask_updated)
|
||||
all_columns.push_back(LightweightDeleteDescription::FILTER_COLUMN);
|
||||
all_columns.emplace_back(RowExistsColumn::name, RowExistsColumn::type);
|
||||
|
||||
bool has_filters = false;
|
||||
/// Next, for each stage calculate columns changed by this and previous stages.
|
||||
@ -1038,7 +1026,7 @@ void MutationsInterpreter::prepareMutationStages(std::vector<Stage> & prepared_s
|
||||
{
|
||||
for (const auto & column : all_columns)
|
||||
{
|
||||
if (column.name == LightweightDeleteDescription::FILTER_COLUMN.name && !deleted_mask_updated)
|
||||
if (column.name == RowExistsColumn::name && !deleted_mask_updated)
|
||||
continue;
|
||||
|
||||
prepared_stages[i].output_columns.insert(column.name);
|
||||
@ -1057,7 +1045,7 @@ void MutationsInterpreter::prepareMutationStages(std::vector<Stage> & prepared_s
|
||||
/// and so it is not in the list of AllPhysical columns.
|
||||
for (const auto & [column_name, _] : prepared_stages[i].column_to_updated)
|
||||
{
|
||||
if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name && has_filters && !deleted_mask_updated)
|
||||
if (column_name == RowExistsColumn::name && has_filters && !deleted_mask_updated)
|
||||
continue;
|
||||
|
||||
prepared_stages[i].output_columns.insert(column_name);
|
||||
@ -1148,93 +1136,6 @@ void MutationsInterpreter::prepareMutationStages(std::vector<Stage> & prepared_s
|
||||
}
|
||||
}
|
||||
|
||||
/// This structure re-implements adding virtual columns while reading from MergeTree part.
|
||||
/// It would be good to unify it with IMergeTreeSelectAlgorithm.
|
||||
struct VirtualColumns
|
||||
{
|
||||
struct ColumnAndPosition
|
||||
{
|
||||
ColumnWithTypeAndName column;
|
||||
size_t position;
|
||||
};
|
||||
|
||||
using Columns = std::vector<ColumnAndPosition>;
|
||||
|
||||
Columns virtuals;
|
||||
Names columns_to_read;
|
||||
|
||||
VirtualColumns(Names required_columns, const MergeTreeData::DataPartPtr & part) : columns_to_read(std::move(required_columns))
|
||||
{
|
||||
for (size_t i = 0; i < columns_to_read.size(); ++i)
|
||||
{
|
||||
if (columns_to_read[i] == LightweightDeleteDescription::FILTER_COLUMN.name)
|
||||
{
|
||||
if (!part->getColumns().contains(LightweightDeleteDescription::FILTER_COLUMN.name))
|
||||
{
|
||||
ColumnWithTypeAndName mask_column;
|
||||
mask_column.type = LightweightDeleteDescription::FILTER_COLUMN.type;
|
||||
mask_column.column = mask_column.type->createColumnConst(0, 1);
|
||||
mask_column.name = std::move(columns_to_read[i]);
|
||||
|
||||
virtuals.emplace_back(ColumnAndPosition{.column = std::move(mask_column), .position = i});
|
||||
}
|
||||
}
|
||||
else if (columns_to_read[i] == "_partition_id")
|
||||
{
|
||||
ColumnWithTypeAndName column;
|
||||
column.type = std::make_shared<DataTypeString>();
|
||||
column.column = column.type->createColumnConst(0, part->info.partition_id);
|
||||
column.name = std::move(columns_to_read[i]);
|
||||
|
||||
virtuals.emplace_back(ColumnAndPosition{.column = std::move(column), .position = i});
|
||||
}
|
||||
else if (columns_to_read[i] == BlockNumberColumn::name)
|
||||
{
|
||||
if (!part->getColumns().contains(BlockNumberColumn::name))
|
||||
{
|
||||
ColumnWithTypeAndName block_number_column;
|
||||
block_number_column.type = BlockNumberColumn::type;
|
||||
block_number_column.column = block_number_column.type->createColumnConst(0, part->info.min_block);
|
||||
block_number_column.name = std::move(columns_to_read[i]);
|
||||
|
||||
virtuals.emplace_back(ColumnAndPosition{.column = std::move(block_number_column), .position = i});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!virtuals.empty())
|
||||
{
|
||||
Names columns_no_virtuals;
|
||||
columns_no_virtuals.reserve(columns_to_read.size());
|
||||
size_t next_virtual = 0;
|
||||
for (size_t i = 0; i < columns_to_read.size(); ++i)
|
||||
{
|
||||
if (next_virtual < virtuals.size() && i == virtuals[next_virtual].position)
|
||||
++next_virtual;
|
||||
else
|
||||
columns_no_virtuals.emplace_back(std::move(columns_to_read[i]));
|
||||
}
|
||||
|
||||
columns_to_read.swap(columns_no_virtuals);
|
||||
}
|
||||
}
|
||||
|
||||
void addVirtuals(QueryPlan & plan)
|
||||
{
|
||||
auto dag = std::make_unique<ActionsDAG>(plan.getCurrentDataStream().header.getColumnsWithTypeAndName());
|
||||
|
||||
for (auto & column : virtuals)
|
||||
{
|
||||
const auto & adding_const = dag->addColumn(std::move(column.column));
|
||||
auto & outputs = dag->getOutputs();
|
||||
outputs.insert(outputs.begin() + column.position, &adding_const);
|
||||
}
|
||||
|
||||
auto step = std::make_unique<ExpressionStep>(plan.getCurrentDataStream(), std::move(dag));
|
||||
plan.addStep(std::move(step));
|
||||
}
|
||||
};
|
||||
|
||||
void MutationsInterpreter::Source::read(
|
||||
Stage & first_stage,
|
||||
QueryPlan & plan,
|
||||
@ -1277,16 +1178,12 @@ void MutationsInterpreter::Source::read(
|
||||
filter = ActionsDAG::buildFilterActionsDAG(nodes);
|
||||
}
|
||||
|
||||
VirtualColumns virtual_columns(std::move(required_columns), part);
|
||||
|
||||
createReadFromPartStep(
|
||||
MergeTreeSequentialSourceType::Mutation,
|
||||
plan, *data, storage_snapshot, part,
|
||||
std::move(virtual_columns.columns_to_read),
|
||||
plan, *data, storage_snapshot,
|
||||
part, required_columns,
|
||||
apply_deleted_mask_, filter, context_,
|
||||
getLogger("MutationsInterpreter"));
|
||||
|
||||
virtual_columns.addVirtuals(plan);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -56,6 +56,7 @@
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Storages/StorageJoin.h>
|
||||
#include <Common/checkStackSize.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Storages/StorageView.h>
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
@ -990,8 +991,7 @@ void TreeRewriterResult::collectSourceColumns(bool add_special)
|
||||
{
|
||||
auto options = GetColumnsOptions(add_special ? GetColumnsOptions::All : GetColumnsOptions::AllPhysical);
|
||||
options.withExtendedObjects();
|
||||
if (storage->supportsSubcolumns())
|
||||
options.withSubcolumns();
|
||||
options.withSubcolumns(storage->supportsSubcolumns());
|
||||
|
||||
auto columns_from_storage = storage_snapshot->getColumns(options);
|
||||
|
||||
@ -1001,8 +1001,7 @@ void TreeRewriterResult::collectSourceColumns(bool add_special)
|
||||
source_columns.insert(source_columns.end(), columns_from_storage.begin(), columns_from_storage.end());
|
||||
|
||||
auto metadata_snapshot = storage->getInMemoryMetadataPtr();
|
||||
auto metadata_column_descriptions = metadata_snapshot->getColumns();
|
||||
source_columns_ordinary = metadata_column_descriptions.getOrdinary();
|
||||
source_columns_ordinary = metadata_snapshot->getColumns().getOrdinary();
|
||||
}
|
||||
|
||||
source_columns_set = removeDuplicateColumns(source_columns);
|
||||
@ -1109,16 +1108,16 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
|
||||
const auto & partition_desc = storage_snapshot->metadata->getPartitionKey();
|
||||
if (partition_desc.expression)
|
||||
{
|
||||
auto partition_source_columns = partition_desc.expression->getRequiredColumns();
|
||||
partition_source_columns.push_back("_part");
|
||||
partition_source_columns.push_back("_partition_id");
|
||||
partition_source_columns.push_back("_part_uuid");
|
||||
partition_source_columns.push_back("_partition_value");
|
||||
auto partition_columns = partition_desc.expression->getRequiredColumns();
|
||||
NameSet partition_columns_set(partition_columns.begin(), partition_columns.end());
|
||||
|
||||
const auto & parititon_virtuals = MergeTreeData::virtuals_useful_for_filter;
|
||||
partition_columns_set.insert(parititon_virtuals.begin(), parititon_virtuals.end());
|
||||
|
||||
optimize_trivial_count = true;
|
||||
for (const auto & required_column : required)
|
||||
{
|
||||
if (std::find(partition_source_columns.begin(), partition_source_columns.end(), required_column)
|
||||
== partition_source_columns.end())
|
||||
if (!partition_columns_set.contains(required_column))
|
||||
{
|
||||
optimize_trivial_count = false;
|
||||
break;
|
||||
@ -1129,7 +1128,7 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
|
||||
|
||||
NameSet unknown_required_source_columns = required;
|
||||
|
||||
for (NamesAndTypesList::iterator it = source_columns.begin(); it != source_columns.end();)
|
||||
for (auto it = source_columns.begin(); it != source_columns.end();)
|
||||
{
|
||||
const String & column_name = it->name;
|
||||
unknown_required_source_columns.erase(column_name);
|
||||
@ -1143,32 +1142,23 @@ bool TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
|
||||
has_virtual_shard_num = false;
|
||||
/// If there are virtual columns among the unknown columns. Remove them from the list of unknown and add
|
||||
/// in columns list, so that when further processing they are also considered.
|
||||
if (storage)
|
||||
if (storage_snapshot)
|
||||
{
|
||||
const auto storage_virtuals = storage->getVirtuals();
|
||||
const auto & virtuals = storage_snapshot->virtual_columns;
|
||||
for (auto it = unknown_required_source_columns.begin(); it != unknown_required_source_columns.end();)
|
||||
{
|
||||
auto column = storage_virtuals.tryGetByName(*it);
|
||||
if (column)
|
||||
if (auto column = virtuals->tryGet(*it))
|
||||
{
|
||||
source_columns.push_back(*column);
|
||||
it = unknown_required_source_columns.erase(it);
|
||||
}
|
||||
else
|
||||
++it;
|
||||
}
|
||||
|
||||
if (is_remote_storage)
|
||||
{
|
||||
for (const auto & name_type : storage_virtuals)
|
||||
{
|
||||
if (name_type.name == "_shard_num" && storage->isVirtualColumn("_shard_num", storage_snapshot->getMetadataForQuery()))
|
||||
{
|
||||
has_virtual_shard_num = true;
|
||||
break;
|
||||
}
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
has_virtual_shard_num = is_remote_storage && storage->isVirtualColumn("_shard_num", storage_snapshot->getMetadataForQuery()) && virtuals->has("_shard_num");
|
||||
}
|
||||
|
||||
/// Collect missed object subcolumns
|
||||
|
@ -99,7 +99,7 @@ static NamesAndTypesList getColumnsFromTableExpression(
|
||||
names_and_type_list = columns.getOrdinary();
|
||||
materialized = columns.getMaterialized();
|
||||
aliases = columns.getAliases();
|
||||
virtuals = function_storage->getVirtuals();
|
||||
virtuals = function_storage->getVirtualsList();
|
||||
}
|
||||
else if (table_expression.database_and_table_name)
|
||||
{
|
||||
@ -110,7 +110,7 @@ static NamesAndTypesList getColumnsFromTableExpression(
|
||||
names_and_type_list = columns.getOrdinary();
|
||||
materialized = columns.getMaterialized();
|
||||
aliases = columns.getAliases();
|
||||
virtuals = table->getVirtuals();
|
||||
virtuals = table->getVirtualsList();
|
||||
}
|
||||
|
||||
return names_and_type_list;
|
||||
|
@ -20,7 +20,6 @@
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <Storages/StorageInMemoryMetadata.h>
|
||||
#include <Storages/BlockNumberColumn.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -280,7 +279,7 @@ void fillMissingColumns(
|
||||
const NamesAndTypesList & requested_columns,
|
||||
const NamesAndTypesList & available_columns,
|
||||
const NameSet & partially_read_columns,
|
||||
StorageMetadataPtr metadata_snapshot, size_t block_number)
|
||||
StorageMetadataPtr metadata_snapshot)
|
||||
{
|
||||
size_t num_columns = requested_columns.size();
|
||||
if (num_columns != res_columns.size())
|
||||
@ -359,14 +358,9 @@ void fillMissingColumns(
|
||||
}
|
||||
else
|
||||
{
|
||||
if (requested_column->name == BlockNumberColumn::name)
|
||||
res_columns[i] = type->createColumnConst(num_rows, block_number)->convertToFullColumnIfConst();
|
||||
else
|
||||
/// We must turn a constant column into a full column because the interpreter could infer
|
||||
/// that it is constant everywhere but in some blocks (from other parts) it can be a full column.
|
||||
res_columns[i] = type->createColumnConstWithDefaultValue(num_rows)->convertToFullColumnIfConst();
|
||||
|
||||
|
||||
/// We must turn a constant column into a full column because the interpreter could infer
|
||||
/// that it is constant everywhere but in some blocks (from other parts) it can be a full column.
|
||||
res_columns[i] = type->createColumnConstWithDefaultValue(num_rows)->convertToFullColumnIfConst();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -46,6 +46,6 @@ void fillMissingColumns(
|
||||
const NamesAndTypesList & requested_columns,
|
||||
const NamesAndTypesList & available_columns,
|
||||
const NameSet & partially_read_columns,
|
||||
StorageMetadataPtr metadata_snapshot, size_t block_number = 0);
|
||||
StorageMetadataPtr metadata_snapshot);
|
||||
|
||||
}
|
||||
|
@ -32,7 +32,7 @@ ASTPtr processColumnTransformers(
|
||||
|
||||
tables_with_columns[0].addHiddenColumns(columns.getMaterialized());
|
||||
tables_with_columns[0].addHiddenColumns(columns.getAliases());
|
||||
tables_with_columns[0].addHiddenColumns(table->getVirtuals());
|
||||
tables_with_columns[0].addHiddenColumns(table->getVirtualsList());
|
||||
|
||||
NameSet source_columns_set;
|
||||
for (const auto & identifier : query_columns->children)
|
||||
|
@ -12,7 +12,7 @@
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Storages/BlockNumberColumn.h>
|
||||
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
|
@ -431,7 +431,7 @@ AggregateProjectionCandidates getAggregateProjectionCandidates(
|
||||
{
|
||||
const auto & keys = aggregating.getParams().keys;
|
||||
const auto & aggregates = aggregating.getParams().aggregates;
|
||||
Block key_virtual_columns = reading.getMergeTreeData().getSampleBlockWithVirtualColumns();
|
||||
Block key_virtual_columns = reading.getMergeTreeData().getHeaderWithVirtualsForFilter();
|
||||
|
||||
AggregateProjectionCandidates candidates;
|
||||
|
||||
|
@ -135,7 +135,7 @@ bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes)
|
||||
std::list<NormalProjectionCandidate> candidates;
|
||||
NormalProjectionCandidate * best_candidate = nullptr;
|
||||
|
||||
const Names & required_columns = reading->getRealColumnNames();
|
||||
const Names & required_columns = reading->getAllColumnNames();
|
||||
const auto & parts = reading->getParts();
|
||||
const auto & alter_conversions = reading->getAlterConvertionsForParts();
|
||||
const auto & query_info = reading->getQueryInfo();
|
||||
|
@ -261,30 +261,24 @@ void ReadFromMergeTree::AnalysisResult::checkLimits(const Settings & settings, c
|
||||
ReadFromMergeTree::ReadFromMergeTree(
|
||||
MergeTreeData::DataPartsVector parts_,
|
||||
std::vector<AlterConversionsPtr> alter_conversions_,
|
||||
const Names & column_names_,
|
||||
Names real_column_names_,
|
||||
Names virt_column_names_,
|
||||
Names all_column_names_,
|
||||
const MergeTreeData & data_,
|
||||
const SelectQueryInfo & query_info_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const ContextPtr & context_,
|
||||
size_t max_block_size_,
|
||||
size_t num_streams_,
|
||||
bool sample_factor_column_queried_,
|
||||
std::shared_ptr<PartitionIdToMaxBlock> max_block_numbers_to_read_,
|
||||
LoggerPtr log_,
|
||||
AnalysisResultPtr analyzed_result_ptr_,
|
||||
bool enable_parallel_reading)
|
||||
: SourceStepWithFilter(DataStream{.header = MergeTreeSelectProcessor::transformHeader(
|
||||
storage_snapshot_->getSampleBlockForColumns(real_column_names_),
|
||||
query_info_.prewhere_info,
|
||||
data_.getPartitionValueType(),
|
||||
virt_column_names_)}, column_names_, query_info_, storage_snapshot_, context_)
|
||||
storage_snapshot_->getSampleBlockForColumns(all_column_names_),
|
||||
query_info_.prewhere_info)}, all_column_names_, query_info_, storage_snapshot_, context_)
|
||||
, reader_settings(getMergeTreeReaderSettings(context_, query_info_))
|
||||
, prepared_parts(std::move(parts_))
|
||||
, alter_conversions_for_parts(std::move(alter_conversions_))
|
||||
, real_column_names(std::move(real_column_names_))
|
||||
, virt_column_names(std::move(virt_column_names_))
|
||||
, all_column_names(std::move(all_column_names_))
|
||||
, data(data_)
|
||||
, actions_settings(ExpressionActionsSettings::fromContext(context_))
|
||||
, metadata_for_reading(storage_snapshot->getMetadataForQuery())
|
||||
@ -293,20 +287,11 @@ ReadFromMergeTree::ReadFromMergeTree(
|
||||
.preferred_block_size_bytes = context->getSettingsRef().preferred_block_size_bytes,
|
||||
.preferred_max_column_in_block_size_bytes = context->getSettingsRef().preferred_max_column_in_block_size_bytes}
|
||||
, requested_num_streams(num_streams_)
|
||||
, sample_factor_column_queried(sample_factor_column_queried_)
|
||||
, max_block_numbers_to_read(std::move(max_block_numbers_to_read_))
|
||||
, log(std::move(log_))
|
||||
, analyzed_result_ptr(analyzed_result_ptr_)
|
||||
, is_parallel_reading_from_replicas(enable_parallel_reading)
|
||||
{
|
||||
if (sample_factor_column_queried)
|
||||
{
|
||||
/// Only _sample_factor virtual column is added by ReadFromMergeTree
|
||||
/// Other virtual columns are added by MergeTreeSelectProcessor.
|
||||
auto type = std::make_shared<DataTypeFloat64>();
|
||||
output_stream->header.insert({type->createColumn(), type, "_sample_factor"});
|
||||
}
|
||||
|
||||
if (is_parallel_reading_from_replicas)
|
||||
{
|
||||
all_ranges_callback = context->getMergeTreeAllRangesCallback();
|
||||
@ -368,12 +353,12 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas(
|
||||
auto pool = std::make_shared<MergeTreeReadPoolParallelReplicas>(
|
||||
std::move(extension),
|
||||
std::move(parts_with_range),
|
||||
shared_virtual_fields,
|
||||
storage_snapshot,
|
||||
prewhere_info,
|
||||
actions_settings,
|
||||
reader_settings,
|
||||
required_columns,
|
||||
virt_column_names,
|
||||
pool_settings,
|
||||
context);
|
||||
|
||||
@ -387,8 +372,8 @@ Pipe ReadFromMergeTree::readFromPoolParallelReplicas(
|
||||
auto algorithm = std::make_unique<MergeTreeThreadSelectAlgorithm>(i);
|
||||
|
||||
auto processor = std::make_unique<MergeTreeSelectProcessor>(
|
||||
pool, std::move(algorithm), data, prewhere_info,
|
||||
actions_settings, block_size_copy, reader_settings, virt_column_names);
|
||||
pool, std::move(algorithm), storage_snapshot, prewhere_info,
|
||||
actions_settings, block_size_copy, reader_settings);
|
||||
|
||||
auto source = std::make_shared<MergeTreeSource>(std::move(processor));
|
||||
pipes.emplace_back(std::move(source));
|
||||
@ -449,12 +434,12 @@ Pipe ReadFromMergeTree::readFromPool(
|
||||
{
|
||||
pool = std::make_shared<MergeTreePrefetchedReadPool>(
|
||||
std::move(parts_with_range),
|
||||
shared_virtual_fields,
|
||||
storage_snapshot,
|
||||
prewhere_info,
|
||||
actions_settings,
|
||||
reader_settings,
|
||||
required_columns,
|
||||
virt_column_names,
|
||||
pool_settings,
|
||||
context);
|
||||
}
|
||||
@ -462,12 +447,12 @@ Pipe ReadFromMergeTree::readFromPool(
|
||||
{
|
||||
pool = std::make_shared<MergeTreeReadPool>(
|
||||
std::move(parts_with_range),
|
||||
shared_virtual_fields,
|
||||
storage_snapshot,
|
||||
prewhere_info,
|
||||
actions_settings,
|
||||
reader_settings,
|
||||
required_columns,
|
||||
virt_column_names,
|
||||
pool_settings,
|
||||
context);
|
||||
}
|
||||
@ -486,8 +471,8 @@ Pipe ReadFromMergeTree::readFromPool(
|
||||
auto algorithm = std::make_unique<MergeTreeThreadSelectAlgorithm>(i);
|
||||
|
||||
auto processor = std::make_unique<MergeTreeSelectProcessor>(
|
||||
pool, std::move(algorithm), data, prewhere_info,
|
||||
actions_settings, block_size_copy, reader_settings, virt_column_names);
|
||||
pool, std::move(algorithm), storage_snapshot, prewhere_info,
|
||||
actions_settings, block_size_copy, reader_settings);
|
||||
|
||||
auto source = std::make_shared<MergeTreeSource>(std::move(processor));
|
||||
|
||||
@ -538,12 +523,12 @@ Pipe ReadFromMergeTree::readInOrder(
|
||||
std::move(extension),
|
||||
mode,
|
||||
parts_with_ranges,
|
||||
shared_virtual_fields,
|
||||
storage_snapshot,
|
||||
prewhere_info,
|
||||
actions_settings,
|
||||
reader_settings,
|
||||
required_columns,
|
||||
virt_column_names,
|
||||
pool_settings,
|
||||
context);
|
||||
}
|
||||
@ -553,12 +538,12 @@ Pipe ReadFromMergeTree::readInOrder(
|
||||
has_limit_below_one_block,
|
||||
read_type,
|
||||
parts_with_ranges,
|
||||
shared_virtual_fields,
|
||||
storage_snapshot,
|
||||
prewhere_info,
|
||||
actions_settings,
|
||||
reader_settings,
|
||||
required_columns,
|
||||
virt_column_names,
|
||||
pool_settings,
|
||||
context);
|
||||
}
|
||||
@ -592,8 +577,8 @@ Pipe ReadFromMergeTree::readInOrder(
|
||||
algorithm = std::make_unique<MergeTreeInOrderSelectAlgorithm>(i);
|
||||
|
||||
auto processor = std::make_unique<MergeTreeSelectProcessor>(
|
||||
pool, std::move(algorithm), data, prewhere_info,
|
||||
actions_settings, block_size, reader_settings, virt_column_names);
|
||||
pool, std::move(algorithm), storage_snapshot, prewhere_info,
|
||||
actions_settings, block_size, reader_settings);
|
||||
|
||||
processor->addPartLevelToChunk(isQueryWithFinal());
|
||||
|
||||
@ -1302,8 +1287,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
|
||||
requested_num_streams,
|
||||
max_block_numbers_to_read,
|
||||
data,
|
||||
real_column_names,
|
||||
sample_factor_column_queried,
|
||||
all_column_names,
|
||||
log,
|
||||
indexes);
|
||||
}
|
||||
@ -1489,8 +1473,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
|
||||
size_t num_streams,
|
||||
std::shared_ptr<PartitionIdToMaxBlock> max_block_numbers_to_read,
|
||||
const MergeTreeData & data,
|
||||
const Names & real_column_names,
|
||||
bool sample_factor_column_queried,
|
||||
const Names & all_column_names,
|
||||
LoggerPtr log,
|
||||
std::optional<Indexes> & indexes)
|
||||
{
|
||||
@ -1503,8 +1486,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
|
||||
num_streams,
|
||||
max_block_numbers_to_read,
|
||||
data,
|
||||
real_column_names,
|
||||
sample_factor_column_queried,
|
||||
all_column_names,
|
||||
log,
|
||||
indexes);
|
||||
}
|
||||
@ -1518,8 +1500,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl(
|
||||
size_t num_streams,
|
||||
std::shared_ptr<PartitionIdToMaxBlock> max_block_numbers_to_read,
|
||||
const MergeTreeData & data,
|
||||
const Names & real_column_names,
|
||||
bool sample_factor_column_queried,
|
||||
const Names & all_column_names,
|
||||
LoggerPtr log,
|
||||
std::optional<Indexes> & indexes)
|
||||
{
|
||||
@ -1528,7 +1509,7 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl(
|
||||
|
||||
size_t total_parts = parts.size();
|
||||
|
||||
result.column_names_to_read = real_column_names;
|
||||
result.column_names_to_read = all_column_names;
|
||||
|
||||
/// If there are only virtual columns in the query, you must request at least one non-virtual one.
|
||||
if (result.column_names_to_read.empty())
|
||||
@ -1587,7 +1568,6 @@ ReadFromMergeTree::AnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl(
|
||||
data,
|
||||
metadata_snapshot,
|
||||
context_,
|
||||
sample_factor_column_queried,
|
||||
log);
|
||||
|
||||
if (result.sampling.read_nothing)
|
||||
@ -1704,10 +1684,8 @@ void ReadFromMergeTree::updatePrewhereInfo(const PrewhereInfoPtr & prewhere_info
|
||||
prewhere_info = prewhere_info_value;
|
||||
|
||||
output_stream = DataStream{.header = MergeTreeSelectProcessor::transformHeader(
|
||||
storage_snapshot->getSampleBlockForColumns(real_column_names),
|
||||
prewhere_info_value,
|
||||
data.getPartitionValueType(),
|
||||
virt_column_names)};
|
||||
storage_snapshot->getSampleBlockForColumns(all_column_names),
|
||||
prewhere_info_value)};
|
||||
|
||||
updateSortDescriptionForOutputStream(
|
||||
*output_stream,
|
||||
@ -1901,6 +1879,7 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons
|
||||
storage_snapshot->data = std::make_unique<MergeTreeData::SnapshotData>();
|
||||
|
||||
result.checkLimits(context->getSettingsRef(), query_info);
|
||||
shared_virtual_fields.emplace("_sample_factor", result.sampling.used_sample_factor);
|
||||
|
||||
LOG_DEBUG(
|
||||
log,
|
||||
@ -1985,18 +1964,6 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons
|
||||
result_projection = ActionsDAG::merge(std::move(*result_projection), std::move(*actions));
|
||||
};
|
||||
|
||||
/// By the way, if a distributed query or query to a Merge table is made, then the `_sample_factor` column can have different values.
|
||||
if (sample_factor_column_queried)
|
||||
{
|
||||
ColumnWithTypeAndName column;
|
||||
column.name = "_sample_factor";
|
||||
column.type = std::make_shared<DataTypeFloat64>();
|
||||
column.column = column.type->createColumnConst(0, Field(result.sampling.used_sample_factor));
|
||||
|
||||
auto adding_column = ActionsDAG::makeAddingColumnActions(std::move(column));
|
||||
append_actions(std::move(adding_column));
|
||||
}
|
||||
|
||||
if (result_projection)
|
||||
cur_header = result_projection->updateHeader(cur_header);
|
||||
|
||||
|
@ -110,16 +110,13 @@ public:
|
||||
ReadFromMergeTree(
|
||||
MergeTreeData::DataPartsVector parts_,
|
||||
std::vector<AlterConversionsPtr> alter_conversions_,
|
||||
const Names & column_names_,
|
||||
Names real_column_names_,
|
||||
Names virt_column_names_,
|
||||
Names all_column_names_,
|
||||
const MergeTreeData & data_,
|
||||
const SelectQueryInfo & query_info_,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
const ContextPtr & context_,
|
||||
size_t max_block_size_,
|
||||
size_t num_streams_,
|
||||
bool sample_factor_column_queried_,
|
||||
std::shared_ptr<PartitionIdToMaxBlock> max_block_numbers_to_read_,
|
||||
LoggerPtr log_,
|
||||
AnalysisResultPtr analyzed_result_ptr_,
|
||||
@ -136,8 +133,7 @@ public:
|
||||
void describeActions(JSONBuilder::JSONMap & map) const override;
|
||||
void describeIndexes(JSONBuilder::JSONMap & map) const override;
|
||||
|
||||
const Names & getRealColumnNames() const { return real_column_names; }
|
||||
const Names & getVirtualColumnNames() const { return virt_column_names; }
|
||||
const Names & getAllColumnNames() const { return all_column_names; }
|
||||
|
||||
StorageID getStorageID() const { return data.getStorageID(); }
|
||||
UInt64 getSelectedParts() const { return selected_parts; }
|
||||
@ -164,8 +160,7 @@ public:
|
||||
size_t num_streams,
|
||||
std::shared_ptr<PartitionIdToMaxBlock> max_block_numbers_to_read,
|
||||
const MergeTreeData & data,
|
||||
const Names & real_column_names,
|
||||
bool sample_factor_column_queried,
|
||||
const Names & all_column_names,
|
||||
LoggerPtr log,
|
||||
std::optional<Indexes> & indexes);
|
||||
|
||||
@ -209,8 +204,7 @@ private:
|
||||
size_t num_streams,
|
||||
std::shared_ptr<PartitionIdToMaxBlock> max_block_numbers_to_read,
|
||||
const MergeTreeData & data,
|
||||
const Names & real_column_names,
|
||||
bool sample_factor_column_queried,
|
||||
const Names & all_column_names,
|
||||
LoggerPtr log,
|
||||
std::optional<Indexes> & indexes);
|
||||
|
||||
@ -227,8 +221,7 @@ private:
|
||||
MergeTreeData::DataPartsVector prepared_parts;
|
||||
std::vector<AlterConversionsPtr> alter_conversions_for_parts;
|
||||
|
||||
Names real_column_names;
|
||||
Names virt_column_names;
|
||||
Names all_column_names;
|
||||
|
||||
const MergeTreeData & data;
|
||||
ExpressionActionsSettings actions_settings;
|
||||
@ -239,7 +232,6 @@ private:
|
||||
|
||||
size_t requested_num_streams;
|
||||
size_t output_streams_limit = 0;
|
||||
const bool sample_factor_column_queried;
|
||||
|
||||
/// Used for aggregation optimization (see DB::QueryPlanOptimizations::tryAggregateEachPartitionIndependently).
|
||||
bool output_each_partition_through_separate_port = false;
|
||||
@ -280,7 +272,9 @@ private:
|
||||
RangesInDataParts && parts, size_t num_streams, const Names & origin_column_names, const Names & column_names, ActionsDAGPtr & out_projection);
|
||||
|
||||
ReadFromMergeTree::AnalysisResult getAnalysisResult() const;
|
||||
|
||||
AnalysisResultPtr analyzed_result_ptr;
|
||||
VirtualFields shared_virtual_fields;
|
||||
|
||||
bool is_parallel_reading_from_replicas;
|
||||
std::optional<MergeTreeAllRangesCallback> all_ranges_callback;
|
||||
|
@ -451,7 +451,7 @@ Chain buildPushingToViewsChain(
|
||||
|
||||
/// If we don't write directly to the destination
|
||||
/// then expect that we're inserting with precalculated virtual columns
|
||||
auto storage_header = no_destination ? metadata_snapshot->getSampleBlockWithVirtuals(storage->getVirtuals())
|
||||
auto storage_header = no_destination ? metadata_snapshot->getSampleBlockWithVirtuals(storage->getVirtualsList())
|
||||
: metadata_snapshot->getSampleBlock();
|
||||
|
||||
/** TODO This is a very important line. At any insertion into the table one of chains should own lock.
|
||||
@ -597,7 +597,7 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat
|
||||
views_data.source_storage_id,
|
||||
views_data.source_metadata_snapshot->getColumns(),
|
||||
std::move(block),
|
||||
views_data.source_storage->getVirtuals()));
|
||||
*views_data.source_storage->getVirtualsPtr()));
|
||||
|
||||
QueryPipelineBuilder pipeline;
|
||||
|
||||
|
@ -31,8 +31,6 @@
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Storages/AlterCommands.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Storages/LightweightDeleteDescription.h>
|
||||
#include <Storages/BlockNumberColumn.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Common/randomSeed.h>
|
||||
@ -965,8 +963,7 @@ bool AlterCommand::isRequireMutationStage(const StorageInMemoryMetadata & metada
|
||||
|
||||
/// Drop alias is metadata alter, in other case mutation is required.
|
||||
if (type == DROP_COLUMN)
|
||||
return metadata.columns.hasColumnOrNested(GetColumnsOptions::AllPhysical, column_name) ||
|
||||
column_name == LightweightDeleteDescription::FILTER_COLUMN.name || column_name == BlockNumberColumn::name;
|
||||
return metadata.columns.hasColumnOrNested(GetColumnsOptions::AllPhysical, column_name);
|
||||
|
||||
if (type != MODIFY_COLUMN || data_type == nullptr)
|
||||
return false;
|
||||
@ -1256,7 +1253,9 @@ void AlterCommands::prepare(const StorageInMemoryMetadata & metadata)
|
||||
|
||||
void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const
|
||||
{
|
||||
const StorageInMemoryMetadata & metadata = table->getInMemoryMetadata();
|
||||
const auto & metadata = table->getInMemoryMetadata();
|
||||
auto virtuals = table->getVirtualsPtr();
|
||||
|
||||
auto all_columns = metadata.columns;
|
||||
/// Default expression for all added/modified columns
|
||||
ASTPtr default_expr_list = std::make_shared<ASTExpressionList>();
|
||||
@ -1292,16 +1291,20 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const
|
||||
if (command.data_type->hasDynamicSubcolumns())
|
||||
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Adding a new column of a type which has dynamic subcolumns to an existing table is not allowed. It has known bugs");
|
||||
|
||||
if (column_name == LightweightDeleteDescription::FILTER_COLUMN.name && std::dynamic_pointer_cast<MergeTreeData>(table))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add column {}: "
|
||||
"this column name is reserved for lightweight delete feature", backQuote(column_name));
|
||||
|
||||
if (column_name == BlockNumberColumn::name && std::dynamic_pointer_cast<MergeTreeData>(table))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot add column {}: "
|
||||
"this column name is reserved for _block_number persisting feature", backQuote(column_name));
|
||||
if (virtuals->tryGet(column_name, VirtualsKind::Persistent))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Cannot add column {}: this column name is reserved for persistent virtual column", backQuote(column_name));
|
||||
|
||||
if (command.codec)
|
||||
CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_deflate_qpl_codec, context->getSettingsRef().enable_zstd_qat_codec);
|
||||
{
|
||||
const auto & settings = context->getSettingsRef();
|
||||
CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(
|
||||
command.codec, command.data_type,
|
||||
!settings.allow_suspicious_codecs,
|
||||
settings.allow_experimental_codecs,
|
||||
settings.enable_deflate_qpl_codec,
|
||||
settings.enable_zstd_qat_codec);
|
||||
}
|
||||
|
||||
all_columns.add(ColumnDescription(column_name, command.data_type));
|
||||
}
|
||||
@ -1415,9 +1418,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const
|
||||
}
|
||||
else if (command.type == AlterCommand::DROP_COLUMN)
|
||||
{
|
||||
if (all_columns.has(command.column_name) ||
|
||||
all_columns.hasNested(command.column_name) ||
|
||||
(command.clear && column_name == LightweightDeleteDescription::FILTER_COLUMN.name))
|
||||
if (all_columns.has(command.column_name) || all_columns.hasNested(command.column_name))
|
||||
{
|
||||
if (!command.clear) /// CLEAR column is Ok even if there are dependencies.
|
||||
{
|
||||
@ -1501,16 +1502,12 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const
|
||||
}
|
||||
|
||||
if (all_columns.has(command.rename_to))
|
||||
throw Exception(ErrorCodes::DUPLICATE_COLUMN, "Cannot rename to {}: "
|
||||
"column with this name already exists", backQuote(command.rename_to));
|
||||
throw Exception(ErrorCodes::DUPLICATE_COLUMN,
|
||||
"Cannot rename to {}: column with this name already exists", backQuote(command.rename_to));
|
||||
|
||||
if (command.rename_to == LightweightDeleteDescription::FILTER_COLUMN.name && std::dynamic_pointer_cast<MergeTreeData>(table))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot rename to {}: "
|
||||
"this column name is reserved for lightweight delete feature", backQuote(command.rename_to));
|
||||
|
||||
if (command.rename_to == BlockNumberColumn::name && std::dynamic_pointer_cast<MergeTreeData>(table))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Cannot rename to {}: "
|
||||
"this column name is reserved for _block_number persisting feature", backQuote(command.rename_to));
|
||||
if (virtuals->tryGet(command.rename_to, VirtualsKind::Persistent))
|
||||
throw Exception(ErrorCodes::ILLEGAL_COLUMN,
|
||||
"Cannot rename to {}: this column name is reserved for persistent virtual column", backQuote(command.rename_to));
|
||||
|
||||
if (modified_columns.contains(column_name))
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot rename and modify the same column {} "
|
||||
|
@ -1,23 +0,0 @@
|
||||
#include <Storages/BlockNumberColumn.h>
|
||||
#include <Compression/CompressionCodecMultiple.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size);
|
||||
|
||||
CompressionCodecPtr getCompressionCodecForBlockNumberColumn()
|
||||
{
|
||||
std::vector <CompressionCodecPtr> codecs;
|
||||
codecs.reserve(2);
|
||||
auto data_bytes_size = BlockNumberColumn::type->getSizeOfValueInMemory();
|
||||
codecs.emplace_back(getCompressionCodecDelta(data_bytes_size));
|
||||
codecs.emplace_back(CompressionCodecFactory::instance().get("LZ4", {}));
|
||||
return std::make_shared<CompressionCodecMultiple>(codecs);
|
||||
}
|
||||
|
||||
const String BlockNumberColumn::name = "_block_number";
|
||||
const DataTypePtr BlockNumberColumn::type = std::make_shared<DataTypeUInt64>();
|
||||
const CompressionCodecPtr BlockNumberColumn::compression_codec = getCompressionCodecForBlockNumberColumn();
|
||||
|
||||
}
|
@ -1,16 +0,0 @@
|
||||
#pragma once
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Compression/CompressionFactory.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct BlockNumberColumn
|
||||
{
|
||||
static const String name;
|
||||
static const DataTypePtr type;
|
||||
static const CompressionCodecPtr compression_codec;
|
||||
};
|
||||
|
||||
}
|
@ -31,15 +31,11 @@
|
||||
#include <Interpreters/TreeRewriter.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
#include <Interpreters/FunctionNameNormalizer.h>
|
||||
#include <Storages/BlockNumberColumn.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size);
|
||||
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NO_SUCH_COLUMN_IN_TABLE;
|
||||
@ -482,6 +478,10 @@ NamesAndTypesList ColumnsDescription::get(const GetColumnsOptions & options) con
|
||||
NamesAndTypesList res;
|
||||
switch (options.kind)
|
||||
{
|
||||
case GetColumnsOptions::None:
|
||||
{
|
||||
break;
|
||||
}
|
||||
case GetColumnsOptions::All:
|
||||
{
|
||||
res = getAll();
|
||||
@ -559,6 +559,12 @@ const ColumnDescription & ColumnsDescription::get(const String & column_name) co
|
||||
return *it;
|
||||
}
|
||||
|
||||
const ColumnDescription * ColumnsDescription::tryGet(const String & column_name) const
|
||||
{
|
||||
auto it = columns.get<1>().find(column_name);
|
||||
return it == columns.get<1>().end() ? nullptr : &(*it);
|
||||
}
|
||||
|
||||
static GetColumnsOptions::Kind defaultKindToGetKind(ColumnDefaultKind kind)
|
||||
{
|
||||
switch (kind)
|
||||
@ -572,7 +578,8 @@ static GetColumnsOptions::Kind defaultKindToGetKind(ColumnDefaultKind kind)
|
||||
case ColumnDefaultKind::Ephemeral:
|
||||
return GetColumnsOptions::Ephemeral;
|
||||
}
|
||||
UNREACHABLE();
|
||||
|
||||
return GetColumnsOptions::None;
|
||||
}
|
||||
|
||||
NamesAndTypesList ColumnsDescription::getByNames(const GetColumnsOptions & options, const Names & names) const
|
||||
@ -784,33 +791,6 @@ bool ColumnsDescription::hasCompressionCodec(const String & column_name) const
|
||||
return it != columns.get<1>().end() && it->codec != nullptr;
|
||||
}
|
||||
|
||||
CompressionCodecPtr ColumnsDescription::getCodecOrDefault(const String & column_name, CompressionCodecPtr default_codec) const
|
||||
{
|
||||
const auto it = columns.get<1>().find(column_name);
|
||||
|
||||
if (it == columns.get<1>().end() || !it->codec)
|
||||
return default_codec;
|
||||
|
||||
return CompressionCodecFactory::instance().get(it->codec, it->type, default_codec);
|
||||
}
|
||||
|
||||
CompressionCodecPtr ColumnsDescription::getCodecOrDefault(const String & column_name) const
|
||||
{
|
||||
assert (column_name != BlockNumberColumn::name);
|
||||
return getCodecOrDefault(column_name, CompressionCodecFactory::instance().getDefaultCodec());
|
||||
}
|
||||
|
||||
ASTPtr ColumnsDescription::getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const
|
||||
{
|
||||
assert (column_name != BlockNumberColumn::name);
|
||||
const auto it = columns.get<1>().find(column_name);
|
||||
|
||||
if (it == columns.get<1>().end() || !it->codec)
|
||||
return default_codec->getFullCodecDesc();
|
||||
|
||||
return it->codec;
|
||||
}
|
||||
|
||||
ColumnsDescription::ColumnTTLs ColumnsDescription::getColumnTTLs() const
|
||||
{
|
||||
ColumnTTLs ret;
|
||||
|
@ -29,10 +29,19 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
enum class VirtualsKind : UInt8
|
||||
{
|
||||
None = 0,
|
||||
Ephemeral = 1,
|
||||
Persistent = 2,
|
||||
All = Ephemeral | Persistent,
|
||||
};
|
||||
|
||||
struct GetColumnsOptions
|
||||
{
|
||||
enum Kind : UInt8
|
||||
{
|
||||
None = 0,
|
||||
Ordinary = 1,
|
||||
Materialized = 2,
|
||||
Aliases = 4,
|
||||
@ -51,9 +60,9 @@ struct GetColumnsOptions
|
||||
return *this;
|
||||
}
|
||||
|
||||
GetColumnsOptions & withVirtuals(bool value = true)
|
||||
GetColumnsOptions & withVirtuals(VirtualsKind value = VirtualsKind::All)
|
||||
{
|
||||
with_virtuals = value;
|
||||
virtuals_kind = value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -63,17 +72,11 @@ struct GetColumnsOptions
|
||||
return *this;
|
||||
}
|
||||
|
||||
GetColumnsOptions & withSystemColumns(bool value = true)
|
||||
{
|
||||
with_system_columns = value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
Kind kind;
|
||||
VirtualsKind virtuals_kind = VirtualsKind::None;
|
||||
|
||||
bool with_subcolumns = false;
|
||||
bool with_virtuals = false;
|
||||
bool with_extended_objects = false;
|
||||
bool with_system_columns = false;
|
||||
};
|
||||
|
||||
/// Description of a single table column (in CREATE TABLE for example).
|
||||
@ -160,6 +163,7 @@ public:
|
||||
bool hasNested(const String & column_name) const;
|
||||
bool hasSubcolumn(const String & column_name) const;
|
||||
const ColumnDescription & get(const String & column_name) const;
|
||||
const ColumnDescription * tryGet(const String & column_name) const;
|
||||
|
||||
template <typename F>
|
||||
void modify(const String & column_name, F && f)
|
||||
@ -213,9 +217,6 @@ public:
|
||||
|
||||
/// Does column has non default specified compression codec
|
||||
bool hasCompressionCodec(const String & column_name) const;
|
||||
CompressionCodecPtr getCodecOrDefault(const String & column_name, CompressionCodecPtr default_codec) const;
|
||||
CompressionCodecPtr getCodecOrDefault(const String & column_name) const;
|
||||
ASTPtr getCodecDescOrDefault(const String & column_name, CompressionCodecPtr default_codec) const;
|
||||
|
||||
String toString() const;
|
||||
static ColumnsDescription parse(const String & str);
|
||||
@ -269,4 +270,5 @@ private:
|
||||
/// don't have strange constructions in default expression like SELECT query or
|
||||
/// arrayJoin function.
|
||||
Block validateColumnsDefaultsAndGetSampleBlock(ASTPtr default_expr_list, const NamesAndTypesList & all_columns, ContextPtr context);
|
||||
|
||||
}
|
||||
|
@ -31,7 +31,7 @@ FileLogSource::FileLogSource(
|
||||
, max_streams_number(max_streams_number_)
|
||||
, handle_error_mode(handle_error_mode_)
|
||||
, non_virtual_header(storage_snapshot->metadata->getSampleBlockNonMaterialized())
|
||||
, virtual_header(storage_snapshot->getSampleBlockForColumns(storage.getVirtuals().getNames()))
|
||||
, virtual_header(storage_snapshot->virtual_columns->getSampleBlock())
|
||||
{
|
||||
consumer = std::make_unique<FileLogConsumer>(storage, max_block_size, poll_time_out, context, stream_number_, max_streams_number_);
|
||||
|
||||
|
@ -147,6 +147,7 @@ StorageFileLog::StorageFileLog(
|
||||
storage_metadata.setColumns(columns_);
|
||||
storage_metadata.setComment(comment);
|
||||
setInMemoryMetadata(storage_metadata);
|
||||
setVirtuals(createVirtuals(filelog_settings->handle_error_mode));
|
||||
|
||||
if (!fileOrSymlinkPathStartsWith(path, getContext()->getUserFilesPath()))
|
||||
{
|
||||
@ -203,6 +204,22 @@ StorageFileLog::StorageFileLog(
|
||||
}
|
||||
}
|
||||
|
||||
VirtualColumnsDescription StorageFileLog::createVirtuals(StreamingHandleErrorMode handle_error_mode)
|
||||
{
|
||||
VirtualColumnsDescription desc;
|
||||
|
||||
desc.addEphemeral("_filename", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "");
|
||||
desc.addEphemeral("_offset", std::make_shared<DataTypeUInt64>(), "");
|
||||
|
||||
if (handle_error_mode == StreamingHandleErrorMode::STREAM)
|
||||
{
|
||||
desc.addEphemeral("_raw_record", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()), "");
|
||||
desc.addEphemeral("_error", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()), "");
|
||||
}
|
||||
|
||||
return desc;
|
||||
}
|
||||
|
||||
void StorageFileLog::loadMetaFiles(bool attach)
|
||||
{
|
||||
/// Attach table
|
||||
@ -1009,19 +1026,4 @@ bool StorageFileLog::updateFileInfos()
|
||||
return events.empty() || file_infos.file_names.empty();
|
||||
}
|
||||
|
||||
NamesAndTypesList StorageFileLog::getVirtuals() const
|
||||
{
|
||||
auto virtuals = NamesAndTypesList{
|
||||
{"_filename", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
|
||||
{"_offset", std::make_shared<DataTypeUInt64>()}};
|
||||
|
||||
if (filelog_settings->handle_error_mode == StreamingHandleErrorMode::STREAM)
|
||||
{
|
||||
virtuals.push_back({"_raw_record", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>())});
|
||||
virtuals.push_back({"_error", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>())});
|
||||
}
|
||||
|
||||
return virtuals;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -102,8 +102,6 @@ public:
|
||||
String getFullMetaPath(const String & file_name) const { return std::filesystem::path(metadata_base_path) / file_name; }
|
||||
String getFullDataPath(const String & file_name) const { return std::filesystem::path(root_data_path) / file_name; }
|
||||
|
||||
NamesAndTypesList getVirtuals() const override;
|
||||
|
||||
static UInt64 getInode(const String & file_name);
|
||||
|
||||
void openFilesAndSetPos();
|
||||
@ -212,6 +210,8 @@ private:
|
||||
UInt64 inode = 0;
|
||||
};
|
||||
ReadMetadataResult readMetadata(const String & filename) const;
|
||||
|
||||
static VirtualColumnsDescription createVirtuals(StreamingHandleErrorMode handle_error_mode);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -241,8 +241,7 @@ StorageHDFS::StorageHDFS(
|
||||
storage_metadata.setConstraints(constraints_);
|
||||
storage_metadata.setComment(comment);
|
||||
setInMemoryMetadata(storage_metadata);
|
||||
|
||||
virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
|
||||
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()));
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -975,7 +974,7 @@ void StorageHDFS::read(
|
||||
size_t max_block_size,
|
||||
size_t num_streams)
|
||||
{
|
||||
auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context_), virtual_columns);
|
||||
auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(context_));
|
||||
bool need_only_count = (query_info.optimize_trivial_count || read_from_format_info.requested_columns.empty())
|
||||
&& context_->getSettingsRef().optimize_count_from_files;
|
||||
|
||||
@ -1011,7 +1010,7 @@ void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate)
|
||||
else if (storage->is_path_with_globs)
|
||||
{
|
||||
/// Iterate through disclosed globs and make a source for each file
|
||||
auto glob_iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(storage->uris[0], predicate, storage->virtual_columns, context);
|
||||
auto glob_iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(storage->uris[0], predicate, storage->getVirtualsList(), context);
|
||||
iterator_wrapper = std::make_shared<HDFSSource::IteratorWrapper>([glob_iterator]()
|
||||
{
|
||||
return glob_iterator->next();
|
||||
@ -1019,7 +1018,7 @@ void ReadFromHDFS::createIterator(const ActionsDAG::Node * predicate)
|
||||
}
|
||||
else
|
||||
{
|
||||
auto uris_iterator = std::make_shared<HDFSSource::URISIterator>(storage->uris, predicate, storage->virtual_columns, context);
|
||||
auto uris_iterator = std::make_shared<HDFSSource::URISIterator>(storage->uris, predicate, storage->getVirtualsList(), context);
|
||||
iterator_wrapper = std::make_shared<HDFSSource::IteratorWrapper>([uris_iterator]()
|
||||
{
|
||||
return uris_iterator->next();
|
||||
@ -1179,16 +1178,6 @@ void registerStorageHDFS(StorageFactory & factory)
|
||||
});
|
||||
}
|
||||
|
||||
NamesAndTypesList StorageHDFS::getVirtuals() const
|
||||
{
|
||||
return virtual_columns;
|
||||
}
|
||||
|
||||
Names StorageHDFS::getVirtualColumnNames()
|
||||
{
|
||||
return VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage({}).getNames();
|
||||
}
|
||||
|
||||
SchemaCache & StorageHDFS::getSchemaCache(const ContextPtr & ctx)
|
||||
{
|
||||
static SchemaCache schema_cache(ctx->getConfigRef().getUInt("schema_inference_cache_max_elements_for_hdfs", DEFAULT_SCHEMA_CACHE_ELEMENTS));
|
||||
|
@ -69,9 +69,6 @@ public:
|
||||
ContextPtr local_context,
|
||||
TableExclusiveLockHolder &) override;
|
||||
|
||||
NamesAndTypesList getVirtuals() const override;
|
||||
static Names getVirtualColumnNames();
|
||||
|
||||
bool supportsPartitionBy() const override { return true; }
|
||||
|
||||
/// Check if the format is column-oriented.
|
||||
@ -114,7 +111,6 @@ private:
|
||||
const bool distributed_processing;
|
||||
ASTPtr partition_by;
|
||||
bool is_path_with_globs;
|
||||
NamesAndTypesList virtual_columns;
|
||||
|
||||
LoggerPtr log = getLogger("StorageHDFS");
|
||||
};
|
||||
|
@ -72,8 +72,7 @@ StorageHDFSCluster::StorageHDFSCluster(
|
||||
|
||||
storage_metadata.setConstraints(constraints_);
|
||||
setInMemoryMetadata(storage_metadata);
|
||||
|
||||
virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
|
||||
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()));
|
||||
}
|
||||
|
||||
void StorageHDFSCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB::StorageSnapshotPtr & storage_snapshot, const DB::ContextPtr & context)
|
||||
@ -89,18 +88,11 @@ void StorageHDFSCluster::updateQueryToSendIfNeeded(DB::ASTPtr & query, const DB:
|
||||
|
||||
RemoteQueryExecutor::Extension StorageHDFSCluster::getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const
|
||||
{
|
||||
auto iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(uri, predicate, virtual_columns, context);
|
||||
auto iterator = std::make_shared<HDFSSource::DisclosedGlobIterator>(uri, predicate, getVirtualsList(), context);
|
||||
auto callback = std::make_shared<std::function<String()>>([iter = std::move(iterator)]() mutable -> String { return iter->next().path; });
|
||||
return RemoteQueryExecutor::Extension{.task_iterator = std::move(callback)};
|
||||
}
|
||||
|
||||
NamesAndTypesList StorageHDFSCluster::getVirtuals() const
|
||||
{
|
||||
return NamesAndTypesList{
|
||||
{"_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
|
||||
{"_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())}};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -32,8 +32,6 @@ public:
|
||||
|
||||
std::string getName() const override { return "HDFSCluster"; }
|
||||
|
||||
NamesAndTypesList getVirtuals() const override;
|
||||
|
||||
RemoteQueryExecutor::Extension getTaskIteratorExtension(const ActionsDAG::Node * predicate, const ContextPtr & context) const override;
|
||||
|
||||
bool supportsSubcolumns() const override { return true; }
|
||||
@ -45,7 +43,6 @@ private:
|
||||
|
||||
String uri;
|
||||
String format_name;
|
||||
NamesAndTypesList virtual_columns;
|
||||
};
|
||||
|
||||
|
||||
|
@ -45,6 +45,7 @@
|
||||
#include <Storages/MergeTree/KeyCondition.h>
|
||||
#include <Storages/StorageFactory.h>
|
||||
#include <Storages/checkAndGetLiteralArgument.h>
|
||||
#include <Storages/VirtualColumnUtils.h>
|
||||
|
||||
namespace CurrentMetrics
|
||||
{
|
||||
@ -444,6 +445,7 @@ StorageHive::StorageHive(
|
||||
storage_metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_ast, storage_metadata.columns, getContext());
|
||||
|
||||
setInMemoryMetadata(storage_metadata);
|
||||
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()));
|
||||
}
|
||||
|
||||
void StorageHive::lazyInitialize()
|
||||
@ -1020,13 +1022,6 @@ SinkToStoragePtr StorageHive::write(const ASTPtr & /*query*/, const StorageMetad
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method write is not implemented for StorageHive");
|
||||
}
|
||||
|
||||
NamesAndTypesList StorageHive::getVirtuals() const
|
||||
{
|
||||
return NamesAndTypesList{
|
||||
{"_path", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
|
||||
{"_file", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())}};
|
||||
}
|
||||
|
||||
std::optional<UInt64> StorageHive::totalRows(const Settings & settings) const
|
||||
{
|
||||
/// query_info is not used when prune_level == PruneLevel::None
|
||||
|
@ -54,8 +54,6 @@ public:
|
||||
|
||||
SinkToStoragePtr write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr /*context*/, bool async_insert) override;
|
||||
|
||||
NamesAndTypesList getVirtuals() const override;
|
||||
|
||||
bool supportsSubsetOfColumns() const;
|
||||
|
||||
std::optional<UInt64> totalRows(const Settings & settings) const override;
|
||||
|
@ -27,10 +27,17 @@ namespace ErrorCodes
|
||||
extern const int CANNOT_RESTORE_TABLE;
|
||||
}
|
||||
|
||||
IStorage::IStorage(StorageID storage_id_)
|
||||
: storage_id(std::move(storage_id_))
|
||||
, metadata(std::make_unique<StorageInMemoryMetadata>())
|
||||
, virtuals(std::make_unique<VirtualColumnsDescription>())
|
||||
{
|
||||
}
|
||||
|
||||
bool IStorage::isVirtualColumn(const String & column_name, const StorageMetadataPtr & metadata_snapshot) const
|
||||
{
|
||||
/// Virtual column maybe overridden by real column
|
||||
return !metadata_snapshot->getColumns().has(column_name) && getVirtuals().contains(column_name);
|
||||
return !metadata_snapshot->getColumns().has(column_name) && virtuals.get()->has(column_name);
|
||||
}
|
||||
|
||||
RWLockImpl::LockHolder IStorage::tryLockTimed(
|
||||
@ -237,11 +244,6 @@ void IStorage::renameInMemory(const StorageID & new_table_id)
|
||||
storage_id = new_table_id;
|
||||
}
|
||||
|
||||
NamesAndTypesList IStorage::getVirtuals() const
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
Names IStorage::getAllRegisteredNames() const
|
||||
{
|
||||
Names result;
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <Storages/IStorage_fwd.h>
|
||||
#include <Storages/SelectQueryDescription.h>
|
||||
#include <Storages/StorageInMemoryMetadata.h>
|
||||
#include <Storages/VirtualColumnsDescription.h>
|
||||
#include <Storages/TableLockHolder.h>
|
||||
#include <Storages/StorageSnapshot.h>
|
||||
#include <Common/ActionLock.h>
|
||||
@ -98,9 +99,7 @@ class IStorage : public std::enable_shared_from_this<IStorage>, public TypePromo
|
||||
public:
|
||||
IStorage() = delete;
|
||||
/// Storage metadata can be set separately in setInMemoryMetadata method
|
||||
explicit IStorage(StorageID storage_id_)
|
||||
: storage_id(std::move(storage_id_))
|
||||
, metadata(std::make_unique<StorageInMemoryMetadata>()) {}
|
||||
explicit IStorage(StorageID storage_id_);
|
||||
|
||||
IStorage(const IStorage &) = delete;
|
||||
IStorage & operator=(const IStorage &) = delete;
|
||||
@ -215,6 +214,10 @@ public:
|
||||
metadata.set(std::make_unique<StorageInMemoryMetadata>(metadata_));
|
||||
}
|
||||
|
||||
void setVirtuals(VirtualColumnsDescription virtuals_)
|
||||
{
|
||||
virtuals.set(std::make_unique<VirtualColumnsDescription>(std::move(virtuals_)));
|
||||
}
|
||||
|
||||
/// Return list of virtual columns (like _part, _table, etc). In the vast
|
||||
/// majority of cases virtual columns are static constant part of Storage
|
||||
@ -226,7 +229,9 @@ public:
|
||||
/// virtual column will be overridden and inaccessible.
|
||||
///
|
||||
/// By default return empty list of columns.
|
||||
virtual NamesAndTypesList getVirtuals() const;
|
||||
VirtualsDescriptionPtr getVirtualsPtr() const { return virtuals.get(); }
|
||||
NamesAndTypesList getVirtualsList() const { return virtuals.get()->getNamesAndTypesList(); }
|
||||
Block getVirtualsHeader() const { return virtuals.get()->getSampleBlock(); }
|
||||
|
||||
Names getAllRegisteredNames() const override;
|
||||
|
||||
@ -263,15 +268,16 @@ public:
|
||||
virtual bool supportsTrivialCountOptimization() const { return false; }
|
||||
|
||||
private:
|
||||
|
||||
StorageID storage_id;
|
||||
|
||||
mutable std::mutex id_mutex;
|
||||
|
||||
/// Multiversion storage metadata. Allows to read/write storage metadata
|
||||
/// without locks.
|
||||
/// Multiversion storage metadata. Allows to read/write storage metadata without locks.
|
||||
MultiVersionStorageMetadataPtr metadata;
|
||||
|
||||
/// Description of virtual columns. Optional, may be set in constructor.
|
||||
MultiVersionVirtualsDescriptionPtr virtuals;
|
||||
|
||||
protected:
|
||||
RWLockImpl::LockHolder tryLockTimed(
|
||||
const RWLock & rwlock, RWLockImpl::Type type, const String & query_id, const std::chrono::milliseconds & acquire_timeout) const;
|
||||
|
@ -45,7 +45,7 @@ KafkaSource::KafkaSource(
|
||||
, max_block_size(max_block_size_)
|
||||
, commit_in_suffix(commit_in_suffix_)
|
||||
, non_virtual_header(storage_snapshot->metadata->getSampleBlockNonMaterialized())
|
||||
, virtual_header(storage_snapshot->getSampleBlockForColumns(storage.getVirtualColumnNames()))
|
||||
, virtual_header(storage.getVirtualsHeader())
|
||||
, handle_error_mode(storage.getStreamingHandleErrorMode())
|
||||
{
|
||||
}
|
||||
|
@ -363,6 +363,8 @@ StorageKafka::StorageKafka(
|
||||
StorageInMemoryMetadata storage_metadata;
|
||||
storage_metadata.setColumns(columns_);
|
||||
setInMemoryMetadata(storage_metadata);
|
||||
setVirtuals(createVirtuals(kafka_settings->kafka_handle_error_mode));
|
||||
|
||||
auto task_count = thread_per_consumer ? num_consumers : 1;
|
||||
for (size_t i = 0; i < task_count; ++i)
|
||||
{
|
||||
@ -384,6 +386,28 @@ StorageKafka::StorageKafka(
|
||||
});
|
||||
}
|
||||
|
||||
VirtualColumnsDescription StorageKafka::createVirtuals(StreamingHandleErrorMode handle_error_mode)
|
||||
{
|
||||
VirtualColumnsDescription desc;
|
||||
|
||||
desc.addEphemeral("_topic", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "");
|
||||
desc.addEphemeral("_key", std::make_shared<DataTypeString>(), "");
|
||||
desc.addEphemeral("_offset", std::make_shared<DataTypeUInt64>(), "");
|
||||
desc.addEphemeral("_partition", std::make_shared<DataTypeUInt64>(), "");
|
||||
desc.addEphemeral("_timestamp", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTime>()), "");
|
||||
desc.addEphemeral("_timestamp_ms", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTime64>(3)), "");
|
||||
desc.addEphemeral("_headers.name", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "");
|
||||
desc.addEphemeral("_headers.value", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>()), "");
|
||||
|
||||
if (handle_error_mode == StreamingHandleErrorMode::STREAM)
|
||||
{
|
||||
desc.addEphemeral("_raw_message", std::make_shared<DataTypeString>(), "");
|
||||
desc.addEphemeral("_error", std::make_shared<DataTypeString>(), "");
|
||||
}
|
||||
|
||||
return desc;
|
||||
}
|
||||
|
||||
SettingsChanges StorageKafka::createSettingsAdjustments()
|
||||
{
|
||||
SettingsChanges result;
|
||||
@ -1194,43 +1218,4 @@ void registerStorageKafka(StorageFactory & factory)
|
||||
factory.registerStorage("Kafka", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, });
|
||||
}
|
||||
|
||||
NamesAndTypesList StorageKafka::getVirtuals() const
|
||||
{
|
||||
auto result = NamesAndTypesList{
|
||||
{"_topic", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
|
||||
{"_key", std::make_shared<DataTypeString>()},
|
||||
{"_offset", std::make_shared<DataTypeUInt64>()},
|
||||
{"_partition", std::make_shared<DataTypeUInt64>()},
|
||||
{"_timestamp", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTime>())},
|
||||
{"_timestamp_ms", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeDateTime64>(3))},
|
||||
{"_headers.name", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())},
|
||||
{"_headers.value", std::make_shared<DataTypeArray>(std::make_shared<DataTypeString>())}};
|
||||
if (kafka_settings->kafka_handle_error_mode == StreamingHandleErrorMode::STREAM)
|
||||
{
|
||||
result.push_back({"_raw_message", std::make_shared<DataTypeString>()});
|
||||
result.push_back({"_error", std::make_shared<DataTypeString>()});
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
Names StorageKafka::getVirtualColumnNames() const
|
||||
{
|
||||
auto result = Names {
|
||||
"_topic",
|
||||
"_key",
|
||||
"_offset",
|
||||
"_partition",
|
||||
"_timestamp",
|
||||
"_timestamp_ms",
|
||||
"_headers.name",
|
||||
"_headers.value",
|
||||
};
|
||||
if (kafka_settings->kafka_handle_error_mode == StreamingHandleErrorMode::STREAM)
|
||||
{
|
||||
result.push_back({"_raw_message"});
|
||||
result.push_back({"_error"});
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -74,8 +74,6 @@ public:
|
||||
|
||||
const auto & getFormatName() const { return format_name; }
|
||||
|
||||
NamesAndTypesList getVirtuals() const override;
|
||||
Names getVirtualColumnNames() const;
|
||||
StreamingHandleErrorMode getStreamingHandleErrorMode() const { return kafka_settings->kafka_handle_error_mode; }
|
||||
|
||||
struct SafeConsumers
|
||||
@ -158,6 +156,8 @@ private:
|
||||
bool checkDependencies(const StorageID & table_id);
|
||||
|
||||
void cleanConsumers();
|
||||
|
||||
static VirtualColumnsDescription createVirtuals(StreamingHandleErrorMode handle_error_mode);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,9 +0,0 @@
|
||||
#include <Storages/LightweightDeleteDescription.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
const NameAndTypePair LightweightDeleteDescription::FILTER_COLUMN {"_row_exists", std::make_shared<DataTypeUInt8>()};
|
||||
|
||||
}
|
@ -1,13 +0,0 @@
|
||||
#pragma once
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include "Storages/TTLDescription.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
struct LightweightDeleteDescription
|
||||
{
|
||||
static const NameAndTypePair FILTER_COLUMN;
|
||||
};
|
||||
|
||||
}
|
@ -218,6 +218,10 @@ StorageLiveView::StorageLiveView(
|
||||
|
||||
setInMemoryMetadata(storage_metadata);
|
||||
|
||||
VirtualColumnsDescription virtuals;
|
||||
virtuals.addEphemeral("_version", std::make_shared<DataTypeUInt64>(), "");
|
||||
setVirtuals(std::move(virtuals));
|
||||
|
||||
if (!query.select)
|
||||
throw Exception(ErrorCodes::INCORRECT_QUERY, "SELECT query is not specified for {}", getName());
|
||||
|
||||
@ -236,13 +240,6 @@ StorageLiveView::~StorageLiveView()
|
||||
shutdown(false);
|
||||
}
|
||||
|
||||
NamesAndTypesList StorageLiveView::getVirtuals() const
|
||||
{
|
||||
return NamesAndTypesList{
|
||||
NameAndTypePair("_version", std::make_shared<DataTypeUInt64>())
|
||||
};
|
||||
}
|
||||
|
||||
void StorageLiveView::checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const
|
||||
{
|
||||
auto table_id = getStorageID();
|
||||
|
@ -74,8 +74,6 @@ public:
|
||||
|
||||
bool supportsFinal() const override { return true; }
|
||||
|
||||
NamesAndTypesList getVirtuals() const override;
|
||||
|
||||
void checkTableCanBeDropped([[ maybe_unused ]] ContextPtr query_context) const override;
|
||||
|
||||
void drop() override;
|
||||
|
@ -1451,6 +1451,11 @@ bool IMergeTreeDataPart::supportLightweightDeleteMutate() const
|
||||
parent_part == nullptr && projection_parts.empty();
|
||||
}
|
||||
|
||||
bool IMergeTreeDataPart::hasLightweightDelete() const
|
||||
{
|
||||
return columns.contains(RowExistsColumn::name);
|
||||
}
|
||||
|
||||
void IMergeTreeDataPart::assertHasVersionMetadata(MergeTreeTransaction * txn) const
|
||||
{
|
||||
TransactionID expected_tid = txn ? txn->tid : Tx::PrehistoricTID;
|
||||
|
@ -1,12 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include <IO/WriteSettings.h>
|
||||
#include <Core/Block.h>
|
||||
#include <base/types.h>
|
||||
#include <base/defines.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Storages/LightweightDeleteDescription.h>
|
||||
#include <Storages/MergeTree/AlterConversions.h>
|
||||
#include <Storages/MergeTree/IDataPartStorage.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartState.h>
|
||||
@ -48,6 +48,8 @@ class MarkCache;
|
||||
class UncompressedCache;
|
||||
class MergeTreeTransaction;
|
||||
|
||||
struct MergeTreeReadTaskInfo;
|
||||
using MergeTreeReadTaskInfoPtr = std::shared_ptr<const MergeTreeReadTaskInfo>;
|
||||
|
||||
enum class DataPartRemovalState
|
||||
{
|
||||
@ -69,6 +71,7 @@ public:
|
||||
using Checksums = MergeTreeDataPartChecksums;
|
||||
using Checksum = MergeTreeDataPartChecksums::Checksum;
|
||||
using ValueSizeMap = std::map<std::string, double>;
|
||||
using VirtualFields = std::unordered_map<String, Field>;
|
||||
|
||||
using MergeTreeReaderPtr = std::unique_ptr<IMergeTreeReader>;
|
||||
using MergeTreeWriterPtr = std::unique_ptr<IMergeTreeDataPartWriter>;
|
||||
@ -95,6 +98,7 @@ public:
|
||||
const NamesAndTypesList & columns_,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
const MarkRanges & mark_ranges,
|
||||
const VirtualFields & virtual_fields,
|
||||
UncompressedCache * uncompressed_cache,
|
||||
MarkCache * mark_cache,
|
||||
const AlterConversionsPtr & alter_conversions,
|
||||
@ -493,7 +497,7 @@ public:
|
||||
bool supportLightweightDeleteMutate() const;
|
||||
|
||||
/// True if here is lightweight deleted mask file in part.
|
||||
bool hasLightweightDelete() const { return columns.contains(LightweightDeleteDescription::FILTER_COLUMN.name); }
|
||||
bool hasLightweightDelete() const;
|
||||
|
||||
void writeChecksums(const MergeTreeDataPartChecksums & checksums_, const WriteSettings & settings);
|
||||
|
||||
|
@ -1,7 +1,8 @@
|
||||
#include <Storages/MergeTree/IMergeTreeReader.h>
|
||||
#include <Storages/MergeTree/MergeTreeReadTask.h>
|
||||
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
|
||||
#include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeNested.h>
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Compression/CachedCompressedReadBuffer.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
@ -19,12 +20,13 @@ namespace
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
|
||||
IMergeTreeReader::IMergeTreeReader(
|
||||
MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_,
|
||||
const NamesAndTypesList & columns_,
|
||||
const VirtualFields & virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
UncompressedCache * uncompressed_cache_,
|
||||
MarkCache * mark_cache_,
|
||||
@ -47,6 +49,7 @@ IMergeTreeReader::IMergeTreeReader(
|
||||
, part_columns(data_part_info_for_read->isWidePart()
|
||||
? data_part_info_for_read->getColumnsDescriptionWithCollectedNested()
|
||||
: data_part_info_for_read->getColumnsDescription())
|
||||
, virtual_fields(virtual_fields_)
|
||||
{
|
||||
columns_to_read.reserve(requested_columns.size());
|
||||
serializations.reserve(requested_columns.size());
|
||||
@ -63,7 +66,49 @@ const IMergeTreeReader::ValueSizeMap & IMergeTreeReader::getAvgValueSizeHints()
|
||||
return avg_value_size_hints;
|
||||
}
|
||||
|
||||
void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows, size_t block_number) const
|
||||
void IMergeTreeReader::fillVirtualColumns(Columns & columns, size_t rows) const
|
||||
{
|
||||
chassert(columns.size() == requested_columns.size());
|
||||
|
||||
const auto * loaded_part_info = typeid_cast<const LoadedMergeTreeDataPartInfoForReader *>(data_part_info_for_read.get());
|
||||
if (!loaded_part_info)
|
||||
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Filling of virtual columns is supported only for LoadedMergeTreeDataPartInfoForReader");
|
||||
|
||||
const auto & data_part = loaded_part_info->getDataPart();
|
||||
const auto & storage_columns = storage_snapshot->getMetadataForQuery()->getColumns();
|
||||
const auto & virtual_columns = storage_snapshot->virtual_columns;
|
||||
|
||||
auto it = requested_columns.begin();
|
||||
for (size_t pos = 0; pos < columns.size(); ++pos, ++it)
|
||||
{
|
||||
if (columns[pos] || storage_columns.has(it->name))
|
||||
continue;
|
||||
|
||||
auto virtual_column = virtual_columns->tryGet(it->name);
|
||||
if (!virtual_column)
|
||||
continue;
|
||||
|
||||
if (!it->type->equals(*virtual_column->type))
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Data type for virtual column {} mismatched. Requested type: {}, virtual column type: {}",
|
||||
it->name, it->type->getName(), virtual_column->type->getName());
|
||||
}
|
||||
|
||||
if (it->name == "_part_offset")
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Virtual column {} must be filled by range reader", it->name);
|
||||
|
||||
Field field;
|
||||
if (auto field_it = virtual_fields.find(it->name); field_it != virtual_fields.end())
|
||||
field = field_it->second;
|
||||
else
|
||||
field = getFieldForConstVirtualColumn(it->name, *data_part);
|
||||
|
||||
columns[pos] = virtual_column->type->createColumnConst(rows, field)->convertToFullColumnIfConst();
|
||||
}
|
||||
}
|
||||
|
||||
void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows) const
|
||||
{
|
||||
try
|
||||
{
|
||||
@ -72,7 +117,7 @@ void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_e
|
||||
res_columns, num_rows,
|
||||
Nested::convertToSubcolumns(requested_columns),
|
||||
Nested::convertToSubcolumns(available_columns),
|
||||
partially_read_columns, storage_snapshot->metadata, block_number);
|
||||
partially_read_columns, storage_snapshot->metadata);
|
||||
|
||||
should_evaluate_missing_defaults = std::any_of(
|
||||
res_columns.begin(), res_columns.end(), [](const auto & column) { return column == nullptr; });
|
||||
|
@ -9,8 +9,6 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class IDataType;
|
||||
|
||||
/// Reads the data between pairs of marks in the same part. When reading consecutive ranges, avoids unnecessary seeks.
|
||||
/// When ranges are almost consecutive, seeks are fast because they are performed inside the buffer.
|
||||
/// Avoids loading the marks file if it is not needed (e.g. when reading the whole part).
|
||||
@ -18,11 +16,13 @@ class IMergeTreeReader : private boost::noncopyable
|
||||
{
|
||||
public:
|
||||
using ValueSizeMap = std::map<std::string, double>;
|
||||
using VirtualFields = std::unordered_map<String, Field>;
|
||||
using DeserializeBinaryBulkStateMap = std::map<std::string, ISerialization::DeserializeBinaryBulkStatePtr>;
|
||||
|
||||
IMergeTreeReader(
|
||||
MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_,
|
||||
const NamesAndTypesList & columns_,
|
||||
const VirtualFields & virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
UncompressedCache * uncompressed_cache_,
|
||||
MarkCache * mark_cache_,
|
||||
@ -42,10 +42,13 @@ public:
|
||||
|
||||
const ValueSizeMap & getAvgValueSizeHints() const;
|
||||
|
||||
/// Add virtual columns that are not present in the block.
|
||||
void fillVirtualColumns(Columns & columns, size_t rows) const;
|
||||
|
||||
/// Add columns from ordered_names that are not present in the block.
|
||||
/// Missing columns are added in the order specified by ordered_names.
|
||||
/// num_rows is needed in case if all res_columns are nullptr.
|
||||
void fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows, size_t block_number = 0) const;
|
||||
void fillMissingColumns(Columns & res_columns, bool & should_evaluate_missing_defaults, size_t num_rows) const;
|
||||
/// Evaluate defaulted columns if necessary.
|
||||
void evaluateMissingDefaults(Block additional_columns, Columns & res_columns) const;
|
||||
|
||||
@ -113,6 +116,9 @@ private:
|
||||
|
||||
/// Actual columns description in part.
|
||||
const ColumnsDescription & part_columns;
|
||||
|
||||
/// Fields of virtual columns that were filled in previous stages.
|
||||
VirtualFields virtual_fields;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -8,7 +8,6 @@
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/ActionBlocker.h>
|
||||
#include <Processors/Transforms/CheckSortedTransform.h>
|
||||
#include <Storages/LightweightDeleteDescription.h>
|
||||
#include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
|
||||
|
||||
#include <DataTypes/ObjectUtils.h>
|
||||
@ -1075,14 +1074,18 @@ void MergeTask::ExecuteAndFinalizeHorizontalPart::createMergedStream()
|
||||
|
||||
if (global_ctx->deduplicate)
|
||||
{
|
||||
/// We don't want to deduplicate by block number column
|
||||
/// so if deduplicate_by_columns is empty, add all columns except _block_number
|
||||
if (supportsBlockNumberColumn(global_ctx) && global_ctx->deduplicate_by_columns.empty())
|
||||
const auto & virtuals = *global_ctx->data->getVirtualsPtr();
|
||||
|
||||
/// We don't want to deduplicate by virtual persistent column.
|
||||
/// If deduplicate_by_columns is empty, add all columns except virtuals.
|
||||
if (global_ctx->deduplicate_by_columns.empty())
|
||||
{
|
||||
for (const auto & col : global_ctx->merging_column_names)
|
||||
for (const auto & column_name : global_ctx->merging_column_names)
|
||||
{
|
||||
if (col != BlockNumberColumn::name)
|
||||
global_ctx->deduplicate_by_columns.emplace_back(col);
|
||||
if (virtuals.tryGet(column_name, VirtualsKind::Persistent))
|
||||
continue;
|
||||
|
||||
global_ctx->deduplicate_by_columns.emplace_back(column_name);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -15,7 +15,7 @@
|
||||
|
||||
#include <QueryPipeline/QueryPipeline.h>
|
||||
|
||||
#include <Storages/BlockNumberColumn.h>
|
||||
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
|
||||
#include <Storages/MergeTree/ColumnSizeEstimator.h>
|
||||
#include <Storages/MergeTree/FutureMergedMutatedPart.h>
|
||||
#include <Storages/MergeTree/IExecutableTask.h>
|
||||
|
@ -6,6 +6,7 @@
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <Common/checkStackSize.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
|
||||
#include <Storages/MergeTree/MergeTreeSelectProcessor.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
@ -106,16 +107,14 @@ NameSet injectRequiredColumns(
|
||||
|
||||
auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical)
|
||||
.withExtendedObjects()
|
||||
.withSystemColumns();
|
||||
|
||||
if (with_subcolumns)
|
||||
options.withSubcolumns();
|
||||
.withVirtuals()
|
||||
.withSubcolumns(with_subcolumns);
|
||||
|
||||
for (size_t i = 0; i < columns.size(); ++i)
|
||||
{
|
||||
/// We are going to fetch only physical columns and system columns
|
||||
/// We are going to fetch physical columns and system columns first
|
||||
if (!storage_snapshot->tryGetColumn(options, columns[i]))
|
||||
throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no physical column or subcolumn {} in table", columns[i]);
|
||||
throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "There is no column or subcolumn {} in table", columns[i]);
|
||||
|
||||
have_at_least_one_physical_column |= injectRequiredColumnsRecursively(
|
||||
columns[i], storage_snapshot, alter_conversions,
|
||||
@ -258,11 +257,10 @@ void MergeTreeBlockSizePredictor::update(const Block & sample_block, const Colum
|
||||
}
|
||||
|
||||
|
||||
MergeTreeReadTask::Columns getReadTaskColumns(
|
||||
MergeTreeReadTaskColumns getReadTaskColumns(
|
||||
const IMergeTreeDataPartInfoForReader & data_part_info_for_reader,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
const Names & required_columns,
|
||||
const Names & system_columns,
|
||||
const PrewhereInfoPtr & prewhere_info,
|
||||
const ExpressionActionsSettings & actions_settings,
|
||||
const MergeTreeReaderSettings & reader_settings,
|
||||
@ -270,28 +268,30 @@ MergeTreeReadTask::Columns getReadTaskColumns(
|
||||
{
|
||||
Names column_to_read_after_prewhere = required_columns;
|
||||
|
||||
/// Read system columns such as lightweight delete mask "_row_exists" if it is persisted in the part
|
||||
for (const auto & name : system_columns)
|
||||
if (data_part_info_for_reader.getColumns().contains(name))
|
||||
column_to_read_after_prewhere.push_back(name);
|
||||
|
||||
/// Inject columns required for defaults evaluation
|
||||
injectRequiredColumns(
|
||||
data_part_info_for_reader, storage_snapshot, with_subcolumns, column_to_read_after_prewhere);
|
||||
|
||||
MergeTreeReadTask::Columns result;
|
||||
MergeTreeReadTaskColumns result;
|
||||
auto options = GetColumnsOptions(GetColumnsOptions::All)
|
||||
.withExtendedObjects()
|
||||
.withSystemColumns();
|
||||
.withVirtuals()
|
||||
.withSubcolumns(with_subcolumns);
|
||||
|
||||
if (with_subcolumns)
|
||||
options.withSubcolumns();
|
||||
static const NameSet columns_to_read_at_first_step = {"_part_offset"};
|
||||
|
||||
NameSet columns_from_previous_steps;
|
||||
auto add_step = [&](const PrewhereExprStep & step)
|
||||
{
|
||||
Names step_column_names;
|
||||
|
||||
if (columns_from_previous_steps.empty())
|
||||
{
|
||||
for (const auto & required_column : required_columns)
|
||||
if (columns_to_read_at_first_step.contains(required_column))
|
||||
step_column_names.push_back(required_column);
|
||||
}
|
||||
|
||||
/// Computation results from previous steps might be used in the current step as well. In such a case these
|
||||
/// computed columns will be present in the current step inputs. They don't need to be read from the disk so
|
||||
/// exclude them from the list of columns to read. This filtering must be done before injecting required
|
||||
|
@ -1,6 +1,5 @@
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <Storages/MergeTree/MergeTreeReadTask.h>
|
||||
|
||||
@ -22,11 +21,10 @@ NameSet injectRequiredColumns(
|
||||
bool with_subcolumns,
|
||||
Names & columns);
|
||||
|
||||
MergeTreeReadTask::Columns getReadTaskColumns(
|
||||
MergeTreeReadTaskColumns getReadTaskColumns(
|
||||
const IMergeTreeDataPartInfoForReader & data_part_info_for_reader,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
const Names & required_columns,
|
||||
const Names & system_columns,
|
||||
const PrewhereInfoPtr & prewhere_info,
|
||||
const ExpressionActionsSettings & actions_settings,
|
||||
const MergeTreeReaderSettings & reader_settings,
|
||||
|
@ -67,7 +67,7 @@
|
||||
#include <Processors/QueryPlan/QueryIdHolder.h>
|
||||
#include <Processors/QueryPlan/ReadFromMergeTree.h>
|
||||
#include <Storages/AlterCommands.h>
|
||||
#include <Storages/BlockNumberColumn.h>
|
||||
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
|
||||
#include <Storages/Freeze.h>
|
||||
#include <Storages/MergeTree/DataPartStorageOnDiskFull.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartBuilder.h>
|
||||
@ -430,6 +430,29 @@ MergeTreeData::MergeTreeData(
|
||||
};
|
||||
}
|
||||
|
||||
VirtualColumnsDescription MergeTreeData::createVirtuals(const StorageInMemoryMetadata & metadata)
|
||||
{
|
||||
VirtualColumnsDescription desc;
|
||||
|
||||
desc.addEphemeral("_part", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "Name of part");
|
||||
desc.addEphemeral("_part_index", std::make_shared<DataTypeUInt64>(), "Sequential index of the part in the query result");
|
||||
desc.addEphemeral("_part_uuid", std::make_shared<DataTypeUUID>(), "Unique part identifier (if enabled MergeTree setting assign_part_uuids)");
|
||||
desc.addEphemeral("_partition_id", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), "Name of partition");
|
||||
desc.addEphemeral("_sample_factor", std::make_shared<DataTypeFloat64>(), "Sample factor (from the query)");
|
||||
desc.addEphemeral("_part_offset", std::make_shared<DataTypeUInt64>(), "Number of row in the part");
|
||||
|
||||
if (metadata.hasPartitionKey())
|
||||
{
|
||||
auto partition_types = metadata.partition_key.sample_block.getDataTypes();
|
||||
desc.addEphemeral("_partition_value", std::make_shared<DataTypeTuple>(std::move(partition_types)), "Value (a tuple) of a PARTITION BY expression");
|
||||
}
|
||||
|
||||
desc.addPersistent(RowExistsColumn::name, RowExistsColumn::type, nullptr, "Persisted mask created by lightweight delete that show whether row exists or is deleted");
|
||||
desc.addPersistent(BlockNumberColumn::name, BlockNumberColumn::type, BlockNumberColumn::codec, "Persisted original number of block that was assigned at insert");
|
||||
|
||||
return desc;
|
||||
}
|
||||
|
||||
StoragePolicyPtr MergeTreeData::getStoragePolicy() const
|
||||
{
|
||||
auto settings = getSettings();
|
||||
@ -677,6 +700,7 @@ void MergeTreeData::setProperties(
|
||||
{
|
||||
checkProperties(new_metadata, old_metadata, attach, false, allow_nullable_key, local_context);
|
||||
setInMemoryMetadata(new_metadata);
|
||||
setVirtuals(createVirtuals(new_metadata));
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -1002,73 +1026,38 @@ void MergeTreeData::MergingParams::check(const StorageInMemoryMetadata & metadat
|
||||
/// TODO Checks for Graphite mode.
|
||||
}
|
||||
|
||||
const Names MergeTreeData::virtuals_useful_for_filter = {"_part", "_partition_id", "_part_uuid", "_partition_value"};
|
||||
|
||||
DataTypePtr MergeTreeData::getPartitionValueType() const
|
||||
Block MergeTreeData::getHeaderWithVirtualsForFilter() const
|
||||
{
|
||||
DataTypePtr partition_value_type;
|
||||
auto partition_types = getInMemoryMetadataPtr()->partition_key.sample_block.getDataTypes();
|
||||
if (partition_types.empty())
|
||||
partition_value_type = std::make_shared<DataTypeUInt8>();
|
||||
else
|
||||
partition_value_type = std::make_shared<DataTypeTuple>(std::move(partition_types));
|
||||
return partition_value_type;
|
||||
Block header;
|
||||
auto virtuals_desc = getVirtualsPtr();
|
||||
for (const auto & name : virtuals_useful_for_filter)
|
||||
if (auto column = virtuals_desc->tryGet(name))
|
||||
header.insert({column->type->createColumn(), column->type, name});
|
||||
return header;
|
||||
}
|
||||
|
||||
|
||||
Block MergeTreeData::getSampleBlockWithVirtualColumns() const
|
||||
Block MergeTreeData::getBlockWithVirtualsForFilter(const MergeTreeData::DataPartsVector & parts, bool ignore_empty) const
|
||||
{
|
||||
DataTypePtr partition_value_type = getPartitionValueType();
|
||||
return {
|
||||
ColumnWithTypeAndName(
|
||||
DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumn(),
|
||||
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()),
|
||||
"_part"),
|
||||
ColumnWithTypeAndName(
|
||||
DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumn(),
|
||||
std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()),
|
||||
"_partition_id"),
|
||||
ColumnWithTypeAndName(ColumnUUID::create(), std::make_shared<DataTypeUUID>(), "_part_uuid"),
|
||||
ColumnWithTypeAndName(partition_value_type->createColumn(), partition_value_type, "_partition_value")};
|
||||
}
|
||||
auto block = getHeaderWithVirtualsForFilter();
|
||||
|
||||
|
||||
Block MergeTreeData::getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool one_part, bool ignore_empty) const
|
||||
{
|
||||
auto block = getSampleBlockWithVirtualColumns();
|
||||
MutableColumns columns = block.mutateColumns();
|
||||
|
||||
auto & part_column = columns[0];
|
||||
auto & partition_id_column = columns[1];
|
||||
auto & part_uuid_column = columns[2];
|
||||
auto & partition_value_column = columns[3];
|
||||
|
||||
bool has_partition_value = typeid_cast<const ColumnTuple *>(partition_value_column.get());
|
||||
for (const auto & part_or_projection : parts)
|
||||
{
|
||||
if (ignore_empty && part_or_projection->isEmpty())
|
||||
continue;
|
||||
const auto * part = part_or_projection->isProjectionPart() ? part_or_projection->getParentPart() : part_or_projection.get();
|
||||
part_column->insert(part->name);
|
||||
partition_id_column->insert(part->info.partition_id);
|
||||
part_uuid_column->insert(part->uuid);
|
||||
Tuple tuple(part->partition.value.begin(), part->partition.value.end());
|
||||
if (has_partition_value)
|
||||
partition_value_column->insert(tuple);
|
||||
|
||||
if (one_part)
|
||||
const auto * part = part_or_projection->isProjectionPart()
|
||||
? part_or_projection->getParentPart()
|
||||
: part_or_projection.get();
|
||||
|
||||
for (auto & column : block)
|
||||
{
|
||||
part_column = ColumnConst::create(std::move(part_column), 1);
|
||||
partition_id_column = ColumnConst::create(std::move(partition_id_column), 1);
|
||||
part_uuid_column = ColumnConst::create(std::move(part_uuid_column), 1);
|
||||
if (has_partition_value)
|
||||
partition_value_column = ColumnConst::create(std::move(partition_value_column), 1);
|
||||
break;
|
||||
auto field = getFieldForConstVirtualColumn(column.name, *part);
|
||||
column.column->assumeMutableRef().insert(field);
|
||||
}
|
||||
}
|
||||
|
||||
block.setColumns(std::move(columns));
|
||||
if (!has_partition_value)
|
||||
block.erase("_partition_value");
|
||||
return block;
|
||||
}
|
||||
|
||||
@ -1077,13 +1066,14 @@ std::optional<UInt64> MergeTreeData::totalRowsByPartitionPredicateImpl(
|
||||
const ActionsDAGPtr & filter_actions_dag, ContextPtr local_context, const DataPartsVector & parts) const
|
||||
{
|
||||
if (parts.empty())
|
||||
return 0u;
|
||||
return 0;
|
||||
|
||||
auto metadata_snapshot = getInMemoryMetadataPtr();
|
||||
Block virtual_columns_block = getBlockWithVirtualPartColumns(parts, true /* one_part */);
|
||||
auto virtual_columns_block = getBlockWithVirtualsForFilter({parts[0]});
|
||||
|
||||
auto filter_dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_actions_dag->getOutputs().at(0), nullptr);
|
||||
|
||||
// Generate valid expressions for filtering
|
||||
/// Generate valid expressions for filtering
|
||||
bool valid = true;
|
||||
for (const auto * input : filter_dag->getInputs())
|
||||
if (!virtual_columns_block.has(input->result_name))
|
||||
@ -1096,7 +1086,7 @@ std::optional<UInt64> MergeTreeData::totalRowsByPartitionPredicateImpl(
|
||||
std::unordered_set<String> part_values;
|
||||
if (valid)
|
||||
{
|
||||
virtual_columns_block = getBlockWithVirtualPartColumns(parts, false /* one_part */);
|
||||
virtual_columns_block = getBlockWithVirtualsForFilter(parts);
|
||||
VirtualColumnUtils::filterBlockWithDAG(filter_dag, virtual_columns_block, local_context);
|
||||
part_values = VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_part");
|
||||
if (part_values.empty())
|
||||
@ -3658,6 +3648,7 @@ void MergeTreeData::checkPartDynamicColumns(MutableDataPartPtr & part, DataParts
|
||||
{
|
||||
auto metadata_snapshot = getInMemoryMetadataPtr();
|
||||
const auto & columns = metadata_snapshot->getColumns();
|
||||
auto virtuals = getVirtualsPtr();
|
||||
|
||||
if (!hasDynamicSubcolumns(columns))
|
||||
return;
|
||||
@ -3665,7 +3656,7 @@ void MergeTreeData::checkPartDynamicColumns(MutableDataPartPtr & part, DataParts
|
||||
const auto & part_columns = part->getColumns();
|
||||
for (const auto & part_column : part_columns)
|
||||
{
|
||||
if (part_column.name == LightweightDeleteDescription::FILTER_COLUMN.name || part_column.name == BlockNumberColumn::name)
|
||||
if (virtuals->has(part_column.name))
|
||||
continue;
|
||||
|
||||
auto storage_column = columns.getPhysical(part_column.name);
|
||||
@ -6669,14 +6660,6 @@ Block MergeTreeData::getMinMaxCountProjectionBlock(
|
||||
const auto & primary_key_max_column_name = metadata_snapshot->minmax_count_projection->primary_key_max_column_name;
|
||||
NameSet required_columns_set(required_columns.begin(), required_columns.end());
|
||||
|
||||
if (required_columns_set.contains("_partition_value") && !typeid_cast<const DataTypeTuple *>(getPartitionValueType().get()))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::NO_SUCH_COLUMN_IN_TABLE,
|
||||
"Missing column `_partition_value` because there is no partition column in table {}",
|
||||
getStorageID().getTableName());
|
||||
}
|
||||
|
||||
if (!primary_key_max_column_name.empty())
|
||||
need_primary_key_max_column = required_columns_set.contains(primary_key_max_column_name);
|
||||
|
||||
@ -6702,11 +6685,11 @@ Block MergeTreeData::getMinMaxCountProjectionBlock(
|
||||
};
|
||||
|
||||
Block virtual_columns_block;
|
||||
auto virtual_block = getSampleBlockWithVirtualColumns();
|
||||
auto virtual_block = getHeaderWithVirtualsForFilter();
|
||||
bool has_virtual_column = std::any_of(required_columns.begin(), required_columns.end(), [&](const auto & name) { return virtual_block.has(name); });
|
||||
if (has_virtual_column || filter_dag)
|
||||
{
|
||||
virtual_columns_block = getBlockWithVirtualPartColumns(parts, false /* one_part */, true /* ignore_empty */);
|
||||
virtual_columns_block = getBlockWithVirtualsForFilter(parts, /*ignore_empty=*/ true);
|
||||
if (virtual_columns_block.rows() == 0)
|
||||
return {};
|
||||
}
|
||||
@ -7952,21 +7935,6 @@ AlterConversionsPtr MergeTreeData::getAlterConversionsForPart(MergeTreeDataPartP
|
||||
return result;
|
||||
}
|
||||
|
||||
NamesAndTypesList MergeTreeData::getVirtuals() const
|
||||
{
|
||||
return NamesAndTypesList{
|
||||
NameAndTypePair("_part", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())),
|
||||
NameAndTypePair("_part_index", std::make_shared<DataTypeUInt64>()),
|
||||
NameAndTypePair("_part_uuid", std::make_shared<DataTypeUUID>()),
|
||||
NameAndTypePair("_partition_id", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())),
|
||||
NameAndTypePair("_partition_value", getPartitionValueType()),
|
||||
NameAndTypePair("_sample_factor", std::make_shared<DataTypeFloat64>()),
|
||||
NameAndTypePair("_part_offset", std::make_shared<DataTypeUInt64>()),
|
||||
LightweightDeleteDescription::FILTER_COLUMN,
|
||||
NameAndTypePair(BlockNumberColumn::name, BlockNumberColumn::type),
|
||||
};
|
||||
}
|
||||
|
||||
size_t MergeTreeData::getTotalMergesWithTTLInMergeList() const
|
||||
{
|
||||
return getContext()->getMergeList().getMergesWithTTLCount();
|
||||
|
@ -444,8 +444,6 @@ public:
|
||||
|
||||
bool supportsTrivialCountOptimization() const override { return !hasLightweightDeletedMask(); }
|
||||
|
||||
NamesAndTypesList getVirtuals() const override;
|
||||
|
||||
/// Snapshot for MergeTree contains the current set of data parts
|
||||
/// at the moment of the start of query.
|
||||
struct SnapshotData : public StorageSnapshot::Data
|
||||
@ -988,15 +986,13 @@ public:
|
||||
void removeQueryId(const String & query_id) const;
|
||||
void removeQueryIdNoLock(const String & query_id) const TSA_REQUIRES(query_id_set_mutex);
|
||||
|
||||
/// Return the partition expression types as a Tuple type. Return DataTypeUInt8 if partition expression is empty.
|
||||
DataTypePtr getPartitionValueType() const;
|
||||
static const Names virtuals_useful_for_filter;
|
||||
|
||||
/// Construct a sample block of virtual columns.
|
||||
Block getSampleBlockWithVirtualColumns() const;
|
||||
Block getHeaderWithVirtualsForFilter() const;
|
||||
|
||||
/// Construct a block consisting only of possible virtual columns for part pruning.
|
||||
/// If one_part is true, fill in at most one part.
|
||||
Block getBlockWithVirtualPartColumns(const MergeTreeData::DataPartsVector & parts, bool one_part, bool ignore_empty = false) const;
|
||||
Block getBlockWithVirtualsForFilter(const MergeTreeData::DataPartsVector & parts, bool ignore_empty = false) const;
|
||||
|
||||
/// In merge tree we do inserts with several steps. One of them:
|
||||
/// X. write part to temporary directory with some temp name
|
||||
@ -1087,6 +1083,8 @@ public:
|
||||
|
||||
bool initializeDiskOnConfigChange(const std::set<String> & /*new_added_disks*/) override;
|
||||
|
||||
static VirtualColumnsDescription createVirtuals(const StorageInMemoryMetadata & metadata);
|
||||
|
||||
protected:
|
||||
friend class IMergeTreeDataPart;
|
||||
friend class MergeTreeDataMergerMutator;
|
||||
|
@ -5,7 +5,6 @@
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
|
||||
#include <Compression/CompressedReadBufferFromFile.h>
|
||||
#include <Storages/BlockNumberColumn.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -33,6 +32,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader(
|
||||
const NamesAndTypesList & columns_to_read,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
const MarkRanges & mark_ranges,
|
||||
const VirtualFields & virtual_fields,
|
||||
UncompressedCache * uncompressed_cache,
|
||||
MarkCache * mark_cache,
|
||||
const AlterConversionsPtr & alter_conversions,
|
||||
@ -41,12 +41,21 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader(
|
||||
const ReadBufferFromFileBase::ProfileCallback & profile_callback) const
|
||||
{
|
||||
auto read_info = std::make_shared<LoadedMergeTreeDataPartInfoForReader>(shared_from_this(), alter_conversions);
|
||||
auto * load_marks_threadpool = reader_settings.read_settings.load_marks_asynchronously ? &read_info->getContext()->getLoadMarksThreadpool() : nullptr;
|
||||
auto * load_marks_threadpool
|
||||
= reader_settings.read_settings.load_marks_asynchronously ? &read_info->getContext()->getLoadMarksThreadpool() : nullptr;
|
||||
|
||||
return std::make_unique<MergeTreeReaderCompact>(
|
||||
read_info, columns_to_read, storage_snapshot, uncompressed_cache,
|
||||
mark_cache, mark_ranges, reader_settings, load_marks_threadpool,
|
||||
avg_value_size_hints, profile_callback);
|
||||
read_info,
|
||||
columns_to_read,
|
||||
virtual_fields,
|
||||
storage_snapshot,
|
||||
uncompressed_cache,
|
||||
mark_cache,
|
||||
mark_ranges,
|
||||
reader_settings,
|
||||
load_marks_threadpool,
|
||||
avg_value_size_hints,
|
||||
profile_callback);
|
||||
}
|
||||
|
||||
IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartCompact::getWriter(
|
||||
@ -66,12 +75,6 @@ IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartCompact::getWriter(
|
||||
ordered_columns_list.sort([this](const auto & lhs, const auto & rhs)
|
||||
{ return *getColumnPosition(lhs.name) < *getColumnPosition(rhs.name); });
|
||||
|
||||
/// _block_number column is not added by user, but is persisted in a part after merge
|
||||
/// If _block_number is not present in the parts to be merged, then it won't have a position
|
||||
/// So check if its not present and add it at the end
|
||||
if (columns_list.contains(BlockNumberColumn::name) && !ordered_columns_list.contains(BlockNumberColumn::name))
|
||||
ordered_columns_list.emplace_back(NameAndTypePair{BlockNumberColumn::name, BlockNumberColumn::type});
|
||||
|
||||
return std::make_unique<MergeTreeDataPartWriterCompact>(
|
||||
shared_from_this(), ordered_columns_list, metadata_snapshot,
|
||||
indices_to_recalc, stats_to_recalc_, getMarksFileExtension(),
|
||||
|
@ -32,6 +32,7 @@ public:
|
||||
const NamesAndTypesList & columns,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
const MarkRanges & mark_ranges,
|
||||
const VirtualFields & virtual_fields,
|
||||
UncompressedCache * uncompressed_cache,
|
||||
MarkCache * mark_cache,
|
||||
const AlterConversionsPtr & alter_conversions,
|
||||
|
@ -33,6 +33,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartInMemory::getReader(
|
||||
const NamesAndTypesList & columns_to_read,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
const MarkRanges & mark_ranges,
|
||||
const VirtualFields & virtual_fields,
|
||||
UncompressedCache * /* uncompressed_cache */,
|
||||
MarkCache * /* mark_cache */,
|
||||
const AlterConversionsPtr & alter_conversions,
|
||||
@ -44,7 +45,13 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartInMemory::getReader(
|
||||
auto ptr = std::static_pointer_cast<const MergeTreeDataPartInMemory>(shared_from_this());
|
||||
|
||||
return std::make_unique<MergeTreeReaderInMemory>(
|
||||
read_info, ptr, columns_to_read, storage_snapshot, mark_ranges, reader_settings);
|
||||
read_info,
|
||||
ptr,
|
||||
columns_to_read,
|
||||
virtual_fields,
|
||||
storage_snapshot,
|
||||
mark_ranges,
|
||||
reader_settings);
|
||||
}
|
||||
|
||||
IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartInMemory::getWriter(
|
||||
|
@ -21,6 +21,7 @@ public:
|
||||
const NamesAndTypesList & columns,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
const MarkRanges & mark_ranges,
|
||||
const VirtualFields & virtual_fields,
|
||||
UncompressedCache * uncompressed_cache,
|
||||
MarkCache * mark_cache,
|
||||
const AlterConversionsPtr & alter_conversions,
|
||||
|
@ -31,6 +31,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader(
|
||||
const NamesAndTypesList & columns_to_read,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
const MarkRanges & mark_ranges,
|
||||
const VirtualFields & virtual_fields,
|
||||
UncompressedCache * uncompressed_cache,
|
||||
MarkCache * mark_cache,
|
||||
const AlterConversionsPtr & alter_conversions,
|
||||
@ -40,10 +41,16 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader(
|
||||
{
|
||||
auto read_info = std::make_shared<LoadedMergeTreeDataPartInfoForReader>(shared_from_this(), alter_conversions);
|
||||
return std::make_unique<MergeTreeReaderWide>(
|
||||
read_info, columns_to_read,
|
||||
storage_snapshot, uncompressed_cache,
|
||||
mark_cache, mark_ranges, reader_settings,
|
||||
avg_value_size_hints, profile_callback);
|
||||
read_info,
|
||||
columns_to_read,
|
||||
virtual_fields,
|
||||
storage_snapshot,
|
||||
uncompressed_cache,
|
||||
mark_cache,
|
||||
mark_ranges,
|
||||
reader_settings,
|
||||
avg_value_size_hints,
|
||||
profile_callback);
|
||||
}
|
||||
|
||||
IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartWide::getWriter(
|
||||
|
@ -27,6 +27,7 @@ public:
|
||||
const NamesAndTypesList & columns,
|
||||
const StorageSnapshotPtr & storage_snapshot,
|
||||
const MarkRanges & mark_ranges,
|
||||
const VirtualFields & virtual_fields,
|
||||
UncompressedCache * uncompressed_cache,
|
||||
MarkCache * mark_cache,
|
||||
const AlterConversionsPtr & alter_conversions,
|
||||
|
@ -1,12 +1,9 @@
|
||||
#include <Storages/MergeTree/MergeTreeDataPartWriterCompact.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartCompact.h>
|
||||
#include <Storages/BlockNumberColumn.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size);
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
@ -55,14 +52,10 @@ MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact(
|
||||
marks_source_hashing = std::make_unique<HashingWriteBuffer>(*marks_compressor);
|
||||
}
|
||||
|
||||
const auto & storage_columns = metadata_snapshot->getColumns();
|
||||
auto storage_snapshot = std::make_shared<StorageSnapshot>(data_part->storage, metadata_snapshot);
|
||||
for (const auto & column : columns_list)
|
||||
{
|
||||
ASTPtr compression;
|
||||
if (column.name == BlockNumberColumn::name)
|
||||
compression = BlockNumberColumn::compression_codec->getFullCodecDesc();
|
||||
else
|
||||
compression = storage_columns.getCodecDescOrDefault(column.name, default_codec);
|
||||
auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec);
|
||||
addStreams(column, compression);
|
||||
}
|
||||
}
|
||||
|
@ -6,12 +6,10 @@
|
||||
#include <Common/escapeForFileName.h>
|
||||
#include <Columns/ColumnSparse.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Storages/BlockNumberColumn.h>
|
||||
#include <Storages/ColumnsDescription.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
CompressionCodecPtr getCompressionCodecDelta(UInt8 delta_bytes_size);
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
@ -91,15 +89,11 @@ MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide(
|
||||
indices_to_recalc_, stats_to_recalc_, marks_file_extension_,
|
||||
default_codec_, settings_, index_granularity_)
|
||||
{
|
||||
const auto & columns = metadata_snapshot->getColumns();
|
||||
for (const auto & it : columns_list)
|
||||
auto storage_snapshot = std::make_shared<StorageSnapshot>(data_part->storage, metadata_snapshot);
|
||||
for (const auto & column : columns_list)
|
||||
{
|
||||
ASTPtr compression;
|
||||
if (it.name == BlockNumberColumn::name)
|
||||
compression = BlockNumberColumn::compression_codec->getFullCodecDesc();
|
||||
else
|
||||
compression = columns.getCodecDescOrDefault(it.name, default_codec);
|
||||
addStreams(it, compression);
|
||||
auto compression = storage_snapshot->getCodecDescOrDefault(column.name, default_codec);
|
||||
addStreams(column, compression);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -46,7 +46,6 @@
|
||||
#include <Functions/IFunction.h>
|
||||
|
||||
#include <IO/WriteBufferFromOStream.h>
|
||||
#include <Storages/BlockNumberColumn.h>
|
||||
#include <Storages/MergeTree/ApproximateNearestNeighborIndexesCommon.h>
|
||||
|
||||
namespace CurrentMetrics
|
||||
@ -69,7 +68,6 @@ namespace ErrorCodes
|
||||
extern const int CANNOT_PARSE_TEXT;
|
||||
extern const int TOO_MANY_PARTITIONS;
|
||||
extern const int DUPLICATED_PART_UUIDS;
|
||||
extern const int NO_SUCH_COLUMN_IN_TABLE;
|
||||
}
|
||||
|
||||
|
||||
@ -166,7 +164,6 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling(
|
||||
const MergeTreeData & data,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
ContextPtr context,
|
||||
bool sample_factor_column_queried,
|
||||
LoggerPtr log)
|
||||
{
|
||||
const Settings & settings = context->getSettingsRef();
|
||||
@ -296,7 +293,7 @@ MergeTreeDataSelectSamplingData MergeTreeDataSelectExecutor::getSampling(
|
||||
|
||||
if (sampling.use_sampling)
|
||||
{
|
||||
if (sample_factor_column_queried && relative_sample_size != RelativeSize(0))
|
||||
if (relative_sample_size != RelativeSize(0))
|
||||
sampling.used_sample_factor = 1.0 / boost::rational_cast<Float64>(relative_sample_size);
|
||||
|
||||
RelativeSize size_of_universum = 0;
|
||||
@ -483,12 +480,13 @@ std::optional<std::unordered_set<String>> MergeTreeDataSelectExecutor::filterPar
|
||||
{
|
||||
if (!filter_dag)
|
||||
return {};
|
||||
auto sample = data.getSampleBlockWithVirtualColumns();
|
||||
|
||||
auto sample = data.getHeaderWithVirtualsForFilter();
|
||||
auto dag = VirtualColumnUtils::splitFilterDagForAllowedInputs(filter_dag->getOutputs().at(0), &sample);
|
||||
if (!dag)
|
||||
return {};
|
||||
|
||||
auto virtual_columns_block = data.getBlockWithVirtualPartColumns(parts, false /* one_part */);
|
||||
auto virtual_columns_block = data.getBlockWithVirtualsForFilter(parts);
|
||||
VirtualColumnUtils::filterBlockWithDAG(dag, virtual_columns_block, context);
|
||||
return VirtualColumnUtils::extractSingleValueFromBlock<String>(virtual_columns_block, "_part");
|
||||
}
|
||||
@ -868,69 +866,6 @@ std::shared_ptr<QueryIdHolder> MergeTreeDataSelectExecutor::checkLimits(
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static void selectColumnNames(
|
||||
const Names & column_names_to_return,
|
||||
const MergeTreeData & data,
|
||||
Names & real_column_names,
|
||||
Names & virt_column_names,
|
||||
bool & sample_factor_column_queried)
|
||||
{
|
||||
sample_factor_column_queried = false;
|
||||
|
||||
for (const String & name : column_names_to_return)
|
||||
{
|
||||
if (name == "_part")
|
||||
{
|
||||
virt_column_names.push_back(name);
|
||||
}
|
||||
else if (name == "_part_index")
|
||||
{
|
||||
virt_column_names.push_back(name);
|
||||
}
|
||||
else if (name == "_partition_id")
|
||||
{
|
||||
virt_column_names.push_back(name);
|
||||
}
|
||||
else if (name == "_part_offset")
|
||||
{
|
||||
virt_column_names.push_back(name);
|
||||
}
|
||||
else if (name == LightweightDeleteDescription::FILTER_COLUMN.name)
|
||||
{
|
||||
virt_column_names.push_back(name);
|
||||
}
|
||||
else if (name == BlockNumberColumn::name)
|
||||
{
|
||||
virt_column_names.push_back(name);
|
||||
}
|
||||
else if (name == "_part_uuid")
|
||||
{
|
||||
virt_column_names.push_back(name);
|
||||
}
|
||||
else if (name == "_partition_value")
|
||||
{
|
||||
if (!typeid_cast<const DataTypeTuple *>(data.getPartitionValueType().get()))
|
||||
{
|
||||
throw Exception(
|
||||
ErrorCodes::NO_SUCH_COLUMN_IN_TABLE,
|
||||
"Missing column `_partition_value` because there is no partition column in table {}",
|
||||
data.getStorageID().getTableName());
|
||||
}
|
||||
|
||||
virt_column_names.push_back(name);
|
||||
}
|
||||
else if (name == "_sample_factor")
|
||||
{
|
||||
sample_factor_column_queried = true;
|
||||
virt_column_names.push_back(name);
|
||||
}
|
||||
else
|
||||
{
|
||||
real_column_names.push_back(name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMarksToRead(
|
||||
MergeTreeData::DataPartsVector parts,
|
||||
const Names & column_names_to_return,
|
||||
@ -944,14 +879,6 @@ ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar
|
||||
if (total_parts == 0)
|
||||
return std::make_shared<ReadFromMergeTree::AnalysisResult>();
|
||||
|
||||
Names real_column_names;
|
||||
Names virt_column_names;
|
||||
/// If query contains restrictions on the virtual column `_part` or `_part_index`, select only parts suitable for it.
|
||||
/// The virtual column `_sample_factor` (which is equal to 1 / used sample rate) can be requested in the query.
|
||||
bool sample_factor_column_queried = false;
|
||||
|
||||
selectColumnNames(column_names_to_return, data, real_column_names, virt_column_names, sample_factor_column_queried);
|
||||
|
||||
std::optional<ReadFromMergeTree::Indexes> indexes;
|
||||
/// NOTE: We don't need alter_conversions because the returned analysis_result is only used for:
|
||||
/// 1. estimate the number of rows to read; 2. projection reading, which doesn't have alter_conversions.
|
||||
@ -964,8 +891,7 @@ ReadFromMergeTree::AnalysisResultPtr MergeTreeDataSelectExecutor::estimateNumMar
|
||||
num_streams,
|
||||
max_block_numbers_to_read,
|
||||
data,
|
||||
real_column_names,
|
||||
sample_factor_column_queried,
|
||||
column_names_to_return,
|
||||
log,
|
||||
indexes);
|
||||
}
|
||||
@ -992,27 +918,16 @@ QueryPlanStepPtr MergeTreeDataSelectExecutor::readFromParts(
|
||||
else if (parts.empty())
|
||||
return {};
|
||||
|
||||
Names real_column_names;
|
||||
Names virt_column_names;
|
||||
/// If query contains restrictions on the virtual column `_part` or `_part_index`, select only parts suitable for it.
|
||||
/// The virtual column `_sample_factor` (which is equal to 1 / used sample rate) can be requested in the query.
|
||||
bool sample_factor_column_queried = false;
|
||||
|
||||
selectColumnNames(column_names_to_return, data, real_column_names, virt_column_names, sample_factor_column_queried);
|
||||
|
||||
return std::make_unique<ReadFromMergeTree>(
|
||||
std::move(parts),
|
||||
std::move(alter_conversions),
|
||||
column_names_to_return,
|
||||
real_column_names,
|
||||
virt_column_names,
|
||||
data,
|
||||
query_info,
|
||||
storage_snapshot,
|
||||
context,
|
||||
max_block_size,
|
||||
num_streams,
|
||||
sample_factor_column_queried,
|
||||
max_block_numbers_to_read,
|
||||
log,
|
||||
merge_tree_select_result_ptr,
|
||||
|
@ -213,7 +213,6 @@ public:
|
||||
const MergeTreeData & data,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
ContextPtr context,
|
||||
bool sample_factor_column_queried,
|
||||
LoggerPtr log);
|
||||
|
||||
/// Check query limits: max_partitions_to_read, max_concurrent_queries.
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <Storages/MergeTree/MergeTreePrefetchedReadPool.h>
|
||||
#include <Storages/MergeTree/MergeTreeRangeReader.h>
|
||||
#include <Storages/MergeTree/RangesInDataPart.h>
|
||||
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
|
||||
#include <base/getThreadId.h>
|
||||
#include <Common/ElapsedTimeProfileEventIncrement.h>
|
||||
#include <Common/logger_useful.h>
|
||||
@ -108,22 +109,22 @@ MergeTreeReadTask::Readers MergeTreePrefetchedReadPool::PrefetchedReaders::get()
|
||||
|
||||
MergeTreePrefetchedReadPool::MergeTreePrefetchedReadPool(
|
||||
RangesInDataParts && parts_,
|
||||
VirtualFields shared_virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
const ExpressionActionsSettings & actions_settings_,
|
||||
const MergeTreeReaderSettings & reader_settings_,
|
||||
const Names & column_names_,
|
||||
const Names & virtual_column_names_,
|
||||
const PoolSettings & settings_,
|
||||
const ContextPtr & context_)
|
||||
: MergeTreeReadPoolBase(
|
||||
std::move(parts_),
|
||||
std::move(shared_virtual_fields_),
|
||||
storage_snapshot_,
|
||||
prewhere_info_,
|
||||
actions_settings_,
|
||||
reader_settings_,
|
||||
column_names_,
|
||||
virtual_column_names_,
|
||||
settings_,
|
||||
context_)
|
||||
, WithContext(context_)
|
||||
@ -375,7 +376,7 @@ void MergeTreePrefetchedReadPool::fillPerPartStatistics()
|
||||
update_stat_for_column(column.name);
|
||||
|
||||
if (reader_settings.apply_deleted_mask && read_info.data_part->hasLightweightDelete())
|
||||
update_stat_for_column(LightweightDeleteDescription::FILTER_COLUMN.name);
|
||||
update_stat_for_column(RowExistsColumn::name);
|
||||
|
||||
for (const auto & pre_columns : read_info.task_columns.pre_columns)
|
||||
for (const auto & column : pre_columns)
|
||||
|
@ -18,12 +18,12 @@ class MergeTreePrefetchedReadPool : public MergeTreeReadPoolBase, private WithCo
|
||||
public:
|
||||
MergeTreePrefetchedReadPool(
|
||||
RangesInDataParts && parts_,
|
||||
VirtualFields shared_virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
const ExpressionActionsSettings & actions_settings_,
|
||||
const MergeTreeReaderSettings & reader_settings_,
|
||||
const Names & column_names_,
|
||||
const Names & virtual_column_names_,
|
||||
const PoolSettings & settings_,
|
||||
const ContextPtr & context_);
|
||||
|
||||
@ -67,7 +67,7 @@ private:
|
||||
|
||||
struct ThreadTask
|
||||
{
|
||||
using InfoPtr = MergeTreeReadTask::InfoPtr;
|
||||
using InfoPtr = MergeTreeReadTaskInfoPtr;
|
||||
|
||||
ThreadTask(InfoPtr read_info_, MarkRanges ranges_, Priority priority_)
|
||||
: read_info(std::move(read_info_)), ranges(std::move(ranges_)), priority(priority_)
|
||||
|
@ -362,7 +362,7 @@ void MergeTreeRangeReader::ReadResult::shrink(Columns & old_columns, const NumRo
|
||||
}
|
||||
}
|
||||
|
||||
/// The main invariant of the data in the read result is that he number of rows is
|
||||
/// The main invariant of the data in the read result is that the number of rows is
|
||||
/// either equal to total_rows_per_granule (if filter has not been applied) or to the number of
|
||||
/// 1s in the filter (if filter has been applied).
|
||||
void MergeTreeRangeReader::ReadResult::checkInternalConsistency() const
|
||||
@ -803,8 +803,7 @@ MergeTreeRangeReader::MergeTreeRangeReader(
|
||||
IMergeTreeReader * merge_tree_reader_,
|
||||
MergeTreeRangeReader * prev_reader_,
|
||||
const PrewhereExprStep * prewhere_info_,
|
||||
bool last_reader_in_chain_,
|
||||
const Names & non_const_virtual_column_names_)
|
||||
bool last_reader_in_chain_)
|
||||
: merge_tree_reader(merge_tree_reader_)
|
||||
, index_granularity(&(merge_tree_reader->data_part_info_for_read->getIndexGranularity()))
|
||||
, prev_reader(prev_reader_)
|
||||
@ -821,21 +820,6 @@ MergeTreeRangeReader::MergeTreeRangeReader(
|
||||
result_sample_block.insert({name_and_type.type->createColumn(), name_and_type.type, name_and_type.name});
|
||||
}
|
||||
|
||||
for (const auto & column_name : non_const_virtual_column_names_)
|
||||
{
|
||||
if (result_sample_block.has(column_name))
|
||||
continue;
|
||||
|
||||
non_const_virtual_column_names.push_back(column_name);
|
||||
|
||||
if (column_name == "_part_offset" && !prev_reader)
|
||||
{
|
||||
/// _part_offset column is filled by the first reader.
|
||||
read_sample_block.insert(ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), column_name));
|
||||
result_sample_block.insert(ColumnWithTypeAndName(ColumnUInt64::create(), std::make_shared<DataTypeUInt64>(), column_name));
|
||||
}
|
||||
}
|
||||
|
||||
if (prewhere_info)
|
||||
{
|
||||
const auto & step = *prewhere_info;
|
||||
@ -1001,6 +985,8 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar
|
||||
if (num_read_rows == 0)
|
||||
num_read_rows = read_result.num_rows;
|
||||
|
||||
merge_tree_reader->fillVirtualColumns(columns, num_read_rows);
|
||||
|
||||
/// fillMissingColumns() must be called after reading but befoe any filterings because
|
||||
/// some columns (e.g. arrays) might be only partially filled and thus not be valid and
|
||||
/// fillMissingColumns() fixes this.
|
||||
@ -1050,23 +1036,23 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::read(size_t max_rows, Mar
|
||||
return read_result;
|
||||
|
||||
{
|
||||
/// Physical columns go first and then some virtual columns follow
|
||||
size_t physical_columns_count = merge_tree_reader->getColumns().size();
|
||||
Columns physical_columns(read_result.columns.begin(), read_result.columns.begin() + physical_columns_count);
|
||||
size_t columns_count = merge_tree_reader->getColumns().size();
|
||||
Columns columns(read_result.columns.begin(), read_result.columns.begin() + columns_count);
|
||||
merge_tree_reader->fillVirtualColumns(columns, read_result.num_rows);
|
||||
|
||||
bool should_evaluate_missing_defaults;
|
||||
merge_tree_reader->fillMissingColumns(physical_columns, should_evaluate_missing_defaults, read_result.num_rows);
|
||||
merge_tree_reader->fillMissingColumns(columns, should_evaluate_missing_defaults, read_result.num_rows);
|
||||
|
||||
/// If some columns absent in part, then evaluate default values
|
||||
if (should_evaluate_missing_defaults)
|
||||
merge_tree_reader->evaluateMissingDefaults({}, physical_columns);
|
||||
merge_tree_reader->evaluateMissingDefaults({}, columns);
|
||||
|
||||
/// If result not empty, then apply on-fly alter conversions if any required
|
||||
if (!prewhere_info || prewhere_info->perform_alter_conversions)
|
||||
merge_tree_reader->performRequiredConversions(physical_columns);
|
||||
merge_tree_reader->performRequiredConversions(columns);
|
||||
|
||||
for (size_t i = 0; i < physical_columns.size(); ++i)
|
||||
read_result.columns[i] = std::move(physical_columns[i]);
|
||||
for (size_t i = 0; i < columns.size(); ++i)
|
||||
read_result.columns[i] = std::move(columns[i]);
|
||||
}
|
||||
|
||||
size_t total_bytes = 0;
|
||||
@ -1158,12 +1144,17 @@ MergeTreeRangeReader::ReadResult MergeTreeRangeReader::startReadingChain(size_t
|
||||
result.adjustLastGranule();
|
||||
|
||||
if (read_sample_block.has("_part_offset"))
|
||||
fillPartOffsetColumn(result, leading_begin_part_offset, leading_end_part_offset);
|
||||
{
|
||||
size_t pos = read_sample_block.getPositionByName("_part_offset");
|
||||
chassert(pos < result.columns.size());
|
||||
chassert(result.columns[pos] == nullptr);
|
||||
result.columns[pos] = createPartOffsetColumn(result, leading_begin_part_offset, leading_end_part_offset);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void MergeTreeRangeReader::fillPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset)
|
||||
ColumnPtr MergeTreeRangeReader::createPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset)
|
||||
{
|
||||
size_t num_rows = result.numReadRows();
|
||||
|
||||
@ -1189,7 +1180,7 @@ void MergeTreeRangeReader::fillPartOffsetColumn(ReadResult & result, UInt64 lead
|
||||
*pos++ = start_part_offset++;
|
||||
}
|
||||
|
||||
result.columns.emplace_back(std::move(column));
|
||||
return column;
|
||||
}
|
||||
|
||||
Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, size_t & num_rows)
|
||||
@ -1203,7 +1194,7 @@ Columns MergeTreeRangeReader::continueReadingChain(const ReadResult & result, si
|
||||
|
||||
if (result.rows_per_granule.empty())
|
||||
{
|
||||
/// If zero rows were read on prev step, than there is no more rows to read.
|
||||
/// If zero rows were read on prev step, there is no more rows to read.
|
||||
/// Last granule may have less rows than index_granularity, so finish reading manually.
|
||||
stream.finish();
|
||||
return columns;
|
||||
|
@ -101,8 +101,7 @@ public:
|
||||
IMergeTreeReader * merge_tree_reader_,
|
||||
MergeTreeRangeReader * prev_reader_,
|
||||
const PrewhereExprStep * prewhere_info_,
|
||||
bool last_reader_in_chain_,
|
||||
const Names & non_const_virtual_column_names);
|
||||
bool last_reader_in_chain_);
|
||||
|
||||
MergeTreeRangeReader() = default;
|
||||
|
||||
@ -309,7 +308,7 @@ private:
|
||||
ReadResult startReadingChain(size_t max_rows, MarkRanges & ranges);
|
||||
Columns continueReadingChain(const ReadResult & result, size_t & num_rows);
|
||||
void executePrewhereActionsAndFilterColumns(ReadResult & result) const;
|
||||
void fillPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset);
|
||||
ColumnPtr createPartOffsetColumn(ReadResult & result, UInt64 leading_begin_part_offset, UInt64 leading_end_part_offset);
|
||||
|
||||
IMergeTreeReader * merge_tree_reader = nullptr;
|
||||
const MergeTreeIndexGranularity * index_granularity = nullptr;
|
||||
@ -323,7 +322,6 @@ private:
|
||||
|
||||
bool last_reader_in_chain = false;
|
||||
bool is_initialized = false;
|
||||
Names non_const_virtual_column_names;
|
||||
|
||||
LoggerPtr log = getLogger("MergeTreeRangeReader");
|
||||
};
|
||||
|
@ -35,22 +35,22 @@ size_t getApproxSizeOfPart(const IMergeTreeDataPart & part, const Names & column
|
||||
|
||||
MergeTreeReadPool::MergeTreeReadPool(
|
||||
RangesInDataParts && parts_,
|
||||
VirtualFields shared_virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
const ExpressionActionsSettings & actions_settings_,
|
||||
const MergeTreeReaderSettings & reader_settings_,
|
||||
const Names & column_names_,
|
||||
const Names & virtual_column_names_,
|
||||
const PoolSettings & settings_,
|
||||
const ContextPtr & context_)
|
||||
: MergeTreeReadPoolBase(
|
||||
std::move(parts_),
|
||||
std::move(shared_virtual_fields_),
|
||||
storage_snapshot_,
|
||||
prewhere_info_,
|
||||
actions_settings_,
|
||||
reader_settings_,
|
||||
column_names_,
|
||||
virtual_column_names_,
|
||||
settings_,
|
||||
context_)
|
||||
, min_marks_for_concurrent_read(pool_settings.min_marks_for_concurrent_read)
|
||||
|
@ -26,12 +26,12 @@ public:
|
||||
|
||||
MergeTreeReadPool(
|
||||
RangesInDataParts && parts_,
|
||||
VirtualFields shared_virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
const ExpressionActionsSettings & actions_settings_,
|
||||
const MergeTreeReaderSettings & reader_settings_,
|
||||
const Names & column_names_,
|
||||
const Names & virtual_column_names_,
|
||||
const PoolSettings & settings_,
|
||||
const ContextPtr & context_);
|
||||
|
||||
|
@ -8,21 +8,21 @@ namespace DB
|
||||
|
||||
MergeTreeReadPoolBase::MergeTreeReadPoolBase(
|
||||
RangesInDataParts && parts_,
|
||||
VirtualFields shared_virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
const ExpressionActionsSettings & actions_settings_,
|
||||
const MergeTreeReaderSettings & reader_settings_,
|
||||
const Names & column_names_,
|
||||
const Names & virtual_column_names_,
|
||||
const PoolSettings & pool_settings_,
|
||||
const ContextPtr & context_)
|
||||
: parts_ranges(std::move(parts_))
|
||||
, shared_virtual_fields(std::move(shared_virtual_fields_))
|
||||
, storage_snapshot(storage_snapshot_)
|
||||
, prewhere_info(prewhere_info_)
|
||||
, actions_settings(actions_settings_)
|
||||
, reader_settings(reader_settings_)
|
||||
, column_names(column_names_)
|
||||
, virtual_column_names(virtual_column_names_)
|
||||
, pool_settings(pool_settings_)
|
||||
, owned_mark_cache(context_->getGlobalContext()->getMarkCache())
|
||||
, owned_uncompressed_cache(pool_settings_.use_uncompressed_cache ? context_->getGlobalContext()->getUncompressedCache() : nullptr)
|
||||
@ -45,7 +45,7 @@ void MergeTreeReadPoolBase::fillPerPartInfos()
|
||||
assertSortedAndNonIntersecting(part_with_ranges.ranges);
|
||||
#endif
|
||||
|
||||
MergeTreeReadTask::Info read_task_info;
|
||||
MergeTreeReadTaskInfo read_task_info;
|
||||
|
||||
read_task_info.data_part = part_with_ranges.data_part;
|
||||
read_task_info.part_index_in_query = part_with_ranges.part_index_in_query;
|
||||
@ -54,9 +54,16 @@ void MergeTreeReadPoolBase::fillPerPartInfos()
|
||||
LoadedMergeTreeDataPartInfoForReader part_info(part_with_ranges.data_part, part_with_ranges.alter_conversions);
|
||||
|
||||
read_task_info.task_columns = getReadTaskColumns(
|
||||
part_info, storage_snapshot, column_names, virtual_column_names,
|
||||
prewhere_info, actions_settings,
|
||||
reader_settings, /*with_subcolumns=*/ true);
|
||||
part_info,
|
||||
storage_snapshot,
|
||||
column_names,
|
||||
prewhere_info,
|
||||
actions_settings,
|
||||
reader_settings,
|
||||
/*with_subcolumns=*/true);
|
||||
|
||||
read_task_info.const_virtual_fields = shared_virtual_fields;
|
||||
read_task_info.const_virtual_fields.emplace("_part_index", read_task_info.part_index_in_query);
|
||||
|
||||
if (pool_settings.preferred_block_size_bytes > 0)
|
||||
{
|
||||
@ -76,7 +83,7 @@ void MergeTreeReadPoolBase::fillPerPartInfos()
|
||||
}
|
||||
|
||||
is_part_on_remote_disk.push_back(part_with_ranges.data_part->isStoredOnRemoteDisk());
|
||||
per_part_infos.push_back(std::make_shared<MergeTreeReadTask::Info>(std::move(read_task_info)));
|
||||
per_part_infos.push_back(std::make_shared<MergeTreeReadTaskInfo>(std::move(read_task_info)));
|
||||
}
|
||||
}
|
||||
|
||||
@ -98,7 +105,7 @@ std::vector<size_t> MergeTreeReadPoolBase::getPerPartSumMarks() const
|
||||
}
|
||||
|
||||
MergeTreeReadTaskPtr MergeTreeReadPoolBase::createTask(
|
||||
MergeTreeReadTask::InfoPtr read_info,
|
||||
MergeTreeReadTaskInfoPtr read_info,
|
||||
MarkRanges ranges,
|
||||
MergeTreeReadTask * previous_task) const
|
||||
{
|
||||
|
@ -23,12 +23,12 @@ public:
|
||||
|
||||
MergeTreeReadPoolBase(
|
||||
RangesInDataParts && parts_,
|
||||
VirtualFields shared_virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
const ExpressionActionsSettings & actions_settings_,
|
||||
const MergeTreeReaderSettings & reader_settings_,
|
||||
const Names & column_names_,
|
||||
const Names & virtual_column_names_,
|
||||
const PoolSettings & settings_,
|
||||
const ContextPtr & context_);
|
||||
|
||||
@ -37,12 +37,12 @@ public:
|
||||
protected:
|
||||
/// Initialized in constructor
|
||||
const RangesInDataParts parts_ranges;
|
||||
const VirtualFields shared_virtual_fields;
|
||||
const StorageSnapshotPtr storage_snapshot;
|
||||
const PrewhereInfoPtr prewhere_info;
|
||||
const ExpressionActionsSettings actions_settings;
|
||||
const MergeTreeReaderSettings reader_settings;
|
||||
const Names column_names;
|
||||
const Names virtual_column_names;
|
||||
const PoolSettings pool_settings;
|
||||
const MarkCachePtr owned_mark_cache;
|
||||
const UncompressedCachePtr owned_uncompressed_cache;
|
||||
@ -52,13 +52,13 @@ protected:
|
||||
std::vector<size_t> getPerPartSumMarks() const;
|
||||
|
||||
MergeTreeReadTaskPtr createTask(
|
||||
MergeTreeReadTask::InfoPtr read_info,
|
||||
MergeTreeReadTaskInfoPtr read_info,
|
||||
MarkRanges ranges,
|
||||
MergeTreeReadTask * previous_task) const;
|
||||
|
||||
MergeTreeReadTask::Extras getExtras() const;
|
||||
|
||||
std::vector<MergeTreeReadTask::InfoPtr> per_part_infos;
|
||||
std::vector<MergeTreeReadTaskInfoPtr> per_part_infos;
|
||||
std::vector<bool> is_part_on_remote_disk;
|
||||
|
||||
ReadBufferFromFileBase::ProfileCallback profile_callback;
|
||||
|
@ -12,22 +12,22 @@ MergeTreeReadPoolInOrder::MergeTreeReadPoolInOrder(
|
||||
bool has_limit_below_one_block_,
|
||||
MergeTreeReadType read_type_,
|
||||
RangesInDataParts parts_,
|
||||
VirtualFields shared_virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
const ExpressionActionsSettings & actions_settings_,
|
||||
const MergeTreeReaderSettings & reader_settings_,
|
||||
const Names & column_names_,
|
||||
const Names & virtual_column_names_,
|
||||
const PoolSettings & settings_,
|
||||
const ContextPtr & context_)
|
||||
: MergeTreeReadPoolBase(
|
||||
std::move(parts_),
|
||||
std::move(shared_virtual_fields_),
|
||||
storage_snapshot_,
|
||||
prewhere_info_,
|
||||
actions_settings_,
|
||||
reader_settings_,
|
||||
column_names_,
|
||||
virtual_column_names_,
|
||||
settings_,
|
||||
context_)
|
||||
, has_limit_below_one_block(has_limit_below_one_block_)
|
||||
|
@ -11,12 +11,12 @@ public:
|
||||
bool has_limit_below_one_block_,
|
||||
MergeTreeReadType read_type_,
|
||||
RangesInDataParts parts_,
|
||||
VirtualFields shared_virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
const ExpressionActionsSettings & actions_settings_,
|
||||
const MergeTreeReaderSettings & reader_settings_,
|
||||
const Names & column_names_,
|
||||
const Names & virtual_column_names_,
|
||||
const PoolSettings & settings_,
|
||||
const ContextPtr & context_);
|
||||
|
||||
|
@ -13,22 +13,22 @@ namespace ErrorCodes
|
||||
MergeTreeReadPoolParallelReplicas::MergeTreeReadPoolParallelReplicas(
|
||||
ParallelReadingExtension extension_,
|
||||
RangesInDataParts && parts_,
|
||||
VirtualFields shared_virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
const ExpressionActionsSettings & actions_settings_,
|
||||
const MergeTreeReaderSettings & reader_settings_,
|
||||
const Names & column_names_,
|
||||
const Names & virtual_column_names_,
|
||||
const PoolSettings & settings_,
|
||||
const ContextPtr & context_)
|
||||
: MergeTreeReadPoolBase(
|
||||
std::move(parts_),
|
||||
std::move(shared_virtual_fields_),
|
||||
storage_snapshot_,
|
||||
prewhere_info_,
|
||||
actions_settings_,
|
||||
reader_settings_,
|
||||
column_names_,
|
||||
virtual_column_names_,
|
||||
settings_,
|
||||
context_)
|
||||
, extension(std::move(extension_))
|
||||
|
@ -11,12 +11,12 @@ public:
|
||||
MergeTreeReadPoolParallelReplicas(
|
||||
ParallelReadingExtension extension_,
|
||||
RangesInDataParts && parts_,
|
||||
VirtualFields shared_virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
const ExpressionActionsSettings & actions_settings_,
|
||||
const MergeTreeReaderSettings & reader_settings_,
|
||||
const Names & column_names_,
|
||||
const Names & virtual_column_names_,
|
||||
const PoolSettings & settings_,
|
||||
const ContextPtr & context_);
|
||||
|
||||
|
@ -12,22 +12,22 @@ MergeTreeReadPoolParallelReplicasInOrder::MergeTreeReadPoolParallelReplicasInOrd
|
||||
ParallelReadingExtension extension_,
|
||||
CoordinationMode mode_,
|
||||
RangesInDataParts parts_,
|
||||
VirtualFields shared_virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
const ExpressionActionsSettings & actions_settings_,
|
||||
const MergeTreeReaderSettings & reader_settings_,
|
||||
const Names & column_names_,
|
||||
const Names & virtual_column_names_,
|
||||
const PoolSettings & settings_,
|
||||
const ContextPtr & context_)
|
||||
: MergeTreeReadPoolBase(
|
||||
std::move(parts_),
|
||||
std::move(shared_virtual_fields_),
|
||||
storage_snapshot_,
|
||||
prewhere_info_,
|
||||
actions_settings_,
|
||||
reader_settings_,
|
||||
column_names_,
|
||||
virtual_column_names_,
|
||||
settings_,
|
||||
context_)
|
||||
, extension(std::move(extension_))
|
||||
|
@ -12,12 +12,12 @@ public:
|
||||
ParallelReadingExtension extension_,
|
||||
CoordinationMode mode_,
|
||||
RangesInDataParts parts_,
|
||||
VirtualFields shared_virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
const ExpressionActionsSettings & actions_settings_,
|
||||
const MergeTreeReaderSettings & reader_settings_,
|
||||
const Names & column_names_,
|
||||
const Names & virtual_column_names_,
|
||||
const PoolSettings & settings_,
|
||||
const ContextPtr & context_);
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
#include <Storages/MergeTree/MergeTreeReadTask.h>
|
||||
#include <Storages/MergeTree/MergeTreeBlockReadUtils.h>
|
||||
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
namespace DB
|
||||
@ -10,7 +11,7 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
String MergeTreeReadTask::Columns::dump() const
|
||||
String MergeTreeReadTaskColumns::dump() const
|
||||
{
|
||||
WriteBufferFromOwnString s;
|
||||
for (size_t i = 0; i < pre_columns.size(); ++i)
|
||||
@ -22,7 +23,7 @@ String MergeTreeReadTask::Columns::dump() const
|
||||
}
|
||||
|
||||
MergeTreeReadTask::MergeTreeReadTask(
|
||||
InfoPtr info_,
|
||||
MergeTreeReadTaskInfoPtr info_,
|
||||
Readers readers_,
|
||||
MarkRanges mark_ranges_,
|
||||
MergeTreeBlockSizePredictorPtr size_predictor_)
|
||||
@ -34,23 +35,30 @@ MergeTreeReadTask::MergeTreeReadTask(
|
||||
}
|
||||
|
||||
MergeTreeReadTask::Readers MergeTreeReadTask::createReaders(
|
||||
const InfoPtr & read_info, const Extras & extras, const MarkRanges & ranges)
|
||||
const MergeTreeReadTaskInfoPtr & read_info, const Extras & extras, const MarkRanges & ranges)
|
||||
{
|
||||
Readers new_readers;
|
||||
|
||||
auto create_reader = [&](const NamesAndTypesList & columns_to_read)
|
||||
{
|
||||
return read_info->data_part->getReader(
|
||||
columns_to_read, extras.storage_snapshot, ranges,
|
||||
extras.uncompressed_cache, extras.mark_cache,
|
||||
read_info->alter_conversions, extras.reader_settings, extras.value_size_map, extras.profile_callback);
|
||||
columns_to_read,
|
||||
extras.storage_snapshot,
|
||||
ranges,
|
||||
read_info->const_virtual_fields,
|
||||
extras.uncompressed_cache,
|
||||
extras.mark_cache,
|
||||
read_info->alter_conversions,
|
||||
extras.reader_settings,
|
||||
extras.value_size_map,
|
||||
extras.profile_callback);
|
||||
};
|
||||
|
||||
new_readers.main = create_reader(read_info->task_columns.columns);
|
||||
|
||||
/// Add lightweight delete filtering step
|
||||
if (extras.reader_settings.apply_deleted_mask && read_info->data_part->hasLightweightDelete())
|
||||
new_readers.prewhere.push_back(create_reader({LightweightDeleteDescription::FILTER_COLUMN}));
|
||||
new_readers.prewhere.push_back(create_reader({{RowExistsColumn::name, RowExistsColumn::type}}));
|
||||
|
||||
for (const auto & pre_columns_per_step : read_info->task_columns.pre_columns)
|
||||
new_readers.prewhere.push_back(create_reader(pre_columns_per_step));
|
||||
@ -58,10 +66,8 @@ MergeTreeReadTask::Readers MergeTreeReadTask::createReaders(
|
||||
return new_readers;
|
||||
}
|
||||
|
||||
MergeTreeReadTask::RangeReaders MergeTreeReadTask::createRangeReaders(
|
||||
const Readers & task_readers,
|
||||
const PrewhereExprInfo & prewhere_actions,
|
||||
const Names & non_const_virtual_column_names)
|
||||
MergeTreeReadTask::RangeReaders
|
||||
MergeTreeReadTask::createRangeReaders(const Readers & task_readers, const PrewhereExprInfo & prewhere_actions)
|
||||
{
|
||||
MergeTreeReadTask::RangeReaders new_range_readers;
|
||||
if (prewhere_actions.steps.size() != task_readers.prewhere.size())
|
||||
@ -77,10 +83,7 @@ MergeTreeReadTask::RangeReaders MergeTreeReadTask::createRangeReaders(
|
||||
{
|
||||
last_reader = task_readers.main->getColumns().empty() && (i + 1 == prewhere_actions.steps.size());
|
||||
|
||||
MergeTreeRangeReader current_reader(
|
||||
task_readers.prewhere[i].get(),
|
||||
prev_reader, prewhere_actions.steps[i].get(),
|
||||
last_reader, non_const_virtual_column_names);
|
||||
MergeTreeRangeReader current_reader(task_readers.prewhere[i].get(), prev_reader, prewhere_actions.steps[i].get(), last_reader);
|
||||
|
||||
new_range_readers.prewhere.push_back(std::move(current_reader));
|
||||
prev_reader = &new_range_readers.prewhere.back();
|
||||
@ -88,11 +91,11 @@ MergeTreeReadTask::RangeReaders MergeTreeReadTask::createRangeReaders(
|
||||
|
||||
if (!last_reader)
|
||||
{
|
||||
new_range_readers.main = MergeTreeRangeReader(task_readers.main.get(), prev_reader, nullptr, true, non_const_virtual_column_names);
|
||||
new_range_readers.main = MergeTreeRangeReader(task_readers.main.get(), prev_reader, nullptr, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// If all columns are read by prewhere range readers than move last prewhere range reader to main.
|
||||
/// If all columns are read by prewhere range readers, move last prewhere range reader to main.
|
||||
new_range_readers.main = std::move(new_range_readers.prewhere.back());
|
||||
new_range_readers.prewhere.pop_back();
|
||||
}
|
||||
@ -100,14 +103,12 @@ MergeTreeReadTask::RangeReaders MergeTreeReadTask::createRangeReaders(
|
||||
return new_range_readers;
|
||||
}
|
||||
|
||||
void MergeTreeReadTask::initializeRangeReaders(
|
||||
const PrewhereExprInfo & prewhere_actions,
|
||||
const Names & non_const_virtual_column_names)
|
||||
void MergeTreeReadTask::initializeRangeReaders(const PrewhereExprInfo & prewhere_actions)
|
||||
{
|
||||
if (range_readers.main.isInitialized())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Range reader is already initialized");
|
||||
|
||||
range_readers = createRangeReaders(readers, prewhere_actions, non_const_virtual_column_names);
|
||||
range_readers = createRangeReaders(readers, prewhere_actions);
|
||||
}
|
||||
|
||||
UInt64 MergeTreeReadTask::estimateNumRows(const BlockSizeParams & params) const
|
||||
|
@ -20,6 +20,8 @@ using MergeTreeBlockSizePredictorPtr = std::shared_ptr<MergeTreeBlockSizePredict
|
||||
class IMergeTreeDataPart;
|
||||
using DataPartPtr = std::shared_ptr<const IMergeTreeDataPart>;
|
||||
using MergeTreeReaderPtr = std::unique_ptr<IMergeTreeReader>;
|
||||
using VirtualFields = std::unordered_map<String, Field>;
|
||||
|
||||
|
||||
enum class MergeTreeReadType
|
||||
{
|
||||
@ -40,36 +42,38 @@ enum class MergeTreeReadType
|
||||
ParallelReplicas,
|
||||
};
|
||||
|
||||
struct MergeTreeReadTaskColumns
|
||||
{
|
||||
/// Column names to read during WHERE
|
||||
NamesAndTypesList columns;
|
||||
/// Column names to read during each PREWHERE step
|
||||
std::vector<NamesAndTypesList> pre_columns;
|
||||
|
||||
String dump() const;
|
||||
};
|
||||
|
||||
struct MergeTreeReadTaskInfo
|
||||
{
|
||||
/// Data part which should be read while performing this task
|
||||
DataPartPtr data_part;
|
||||
/// For `part_index` virtual column
|
||||
size_t part_index_in_query;
|
||||
/// Alter converversionss that should be applied on-fly for part.
|
||||
AlterConversionsPtr alter_conversions;
|
||||
/// Column names to read during PREWHERE and WHERE
|
||||
MergeTreeReadTaskColumns task_columns;
|
||||
/// Shared initialized size predictor. It is copied for each new task.
|
||||
MergeTreeBlockSizePredictorPtr shared_size_predictor;
|
||||
/// TODO: comment
|
||||
VirtualFields const_virtual_fields;
|
||||
};
|
||||
|
||||
using MergeTreeReadTaskInfoPtr = std::shared_ptr<const MergeTreeReadTaskInfo>;
|
||||
|
||||
/// A batch of work for MergeTreeSelectProcessor
|
||||
struct MergeTreeReadTask : private boost::noncopyable
|
||||
{
|
||||
public:
|
||||
struct Columns
|
||||
{
|
||||
/// Column names to read during WHERE
|
||||
NamesAndTypesList columns;
|
||||
/// Column names to read during each PREWHERE step
|
||||
std::vector<NamesAndTypesList> pre_columns;
|
||||
|
||||
String dump() const;
|
||||
};
|
||||
|
||||
struct Info
|
||||
{
|
||||
/// Data part which should be read while performing this task
|
||||
DataPartPtr data_part;
|
||||
/// For virtual `part_index` virtual column
|
||||
size_t part_index_in_query;
|
||||
/// Alter converversionss that should be applied on-fly for part.
|
||||
AlterConversionsPtr alter_conversions;
|
||||
/// Column names to read during PREWHERE and WHERE
|
||||
Columns task_columns;
|
||||
/// Shared initialized size predictor. It is copied for each new task.
|
||||
MergeTreeBlockSizePredictorPtr shared_size_predictor;
|
||||
};
|
||||
|
||||
using InfoPtr = std::shared_ptr<const Info>;
|
||||
|
||||
/// Extra params that required for creation of reader.
|
||||
struct Extras
|
||||
{
|
||||
@ -115,27 +119,32 @@ public:
|
||||
size_t num_read_bytes = 0;
|
||||
};
|
||||
|
||||
MergeTreeReadTask(InfoPtr info_, Readers readers_, MarkRanges mark_ranges_, MergeTreeBlockSizePredictorPtr size_predictor_);
|
||||
MergeTreeReadTask(
|
||||
MergeTreeReadTaskInfoPtr info_,
|
||||
Readers readers_,
|
||||
MarkRanges mark_ranges_,
|
||||
|
||||
void initializeRangeReaders(const PrewhereExprInfo & prewhere_actions, const Names & non_const_virtual_column_names);
|
||||
MergeTreeBlockSizePredictorPtr size_predictor_);
|
||||
|
||||
void initializeRangeReaders(const PrewhereExprInfo & prewhere_actions);
|
||||
|
||||
BlockAndProgress read(const BlockSizeParams & params);
|
||||
bool isFinished() const { return mark_ranges.empty() && range_readers.main.isCurrentRangeFinished(); }
|
||||
|
||||
const Info & getInfo() const { return *info; }
|
||||
const MergeTreeReadTaskInfo & getInfo() const { return *info; }
|
||||
const MergeTreeRangeReader & getMainRangeReader() const { return range_readers.main; }
|
||||
const IMergeTreeReader & getMainReader() const { return *readers.main; }
|
||||
|
||||
Readers releaseReaders() { return std::move(readers); }
|
||||
|
||||
static Readers createReaders(const InfoPtr & read_info, const Extras & extras, const MarkRanges & ranges);
|
||||
static RangeReaders createRangeReaders(const Readers & readers, const PrewhereExprInfo & prewhere_actions, const Names & non_const_virtual_column_names);
|
||||
static Readers createReaders(const MergeTreeReadTaskInfoPtr & read_info, const Extras & extras, const MarkRanges & ranges);
|
||||
static RangeReaders createRangeReaders(const Readers & readers, const PrewhereExprInfo & prewhere_actions);
|
||||
|
||||
private:
|
||||
UInt64 estimateNumRows(const BlockSizeParams & params) const;
|
||||
|
||||
/// Shared information required for reading.
|
||||
InfoPtr info;
|
||||
MergeTreeReadTaskInfoPtr info;
|
||||
|
||||
/// Readers for data_part of this task.
|
||||
/// May be reused and released to the next task.
|
||||
|
@ -17,6 +17,7 @@ namespace ErrorCodes
|
||||
MergeTreeReaderCompact::MergeTreeReaderCompact(
|
||||
MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_,
|
||||
NamesAndTypesList columns_,
|
||||
const VirtualFields & virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
UncompressedCache * uncompressed_cache_,
|
||||
MarkCache * mark_cache_,
|
||||
@ -29,6 +30,7 @@ MergeTreeReaderCompact::MergeTreeReaderCompact(
|
||||
: IMergeTreeReader(
|
||||
data_part_info_for_read_,
|
||||
columns_,
|
||||
virtual_fields_,
|
||||
storage_snapshot_,
|
||||
uncompressed_cache_,
|
||||
mark_cache_,
|
||||
|
@ -21,6 +21,7 @@ public:
|
||||
MergeTreeReaderCompact(
|
||||
MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_,
|
||||
NamesAndTypesList columns_,
|
||||
const VirtualFields & virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
UncompressedCache * uncompressed_cache_,
|
||||
MarkCache * mark_cache_,
|
||||
|
@ -19,12 +19,14 @@ MergeTreeReaderInMemory::MergeTreeReaderInMemory(
|
||||
MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_,
|
||||
DataPartInMemoryPtr data_part_,
|
||||
NamesAndTypesList columns_,
|
||||
const VirtualFields & virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
MarkRanges mark_ranges_,
|
||||
MergeTreeReaderSettings settings_)
|
||||
: IMergeTreeReader(
|
||||
data_part_info_for_read_,
|
||||
columns_,
|
||||
virtual_fields_,
|
||||
storage_snapshot_,
|
||||
nullptr,
|
||||
nullptr,
|
||||
|
@ -18,6 +18,7 @@ public:
|
||||
MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_,
|
||||
DataPartInMemoryPtr data_part_,
|
||||
NamesAndTypesList columns_,
|
||||
const VirtualFields & virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
MarkRanges mark_ranges_,
|
||||
MergeTreeReaderSettings settings_);
|
||||
|
@ -24,6 +24,7 @@ namespace
|
||||
MergeTreeReaderWide::MergeTreeReaderWide(
|
||||
MergeTreeDataPartInfoForReaderPtr data_part_info_,
|
||||
NamesAndTypesList columns_,
|
||||
const VirtualFields & virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
UncompressedCache * uncompressed_cache_,
|
||||
MarkCache * mark_cache_,
|
||||
@ -35,6 +36,7 @@ MergeTreeReaderWide::MergeTreeReaderWide(
|
||||
: IMergeTreeReader(
|
||||
data_part_info_,
|
||||
columns_,
|
||||
virtual_fields_,
|
||||
storage_snapshot_,
|
||||
uncompressed_cache_,
|
||||
mark_cache_,
|
||||
|
@ -17,6 +17,7 @@ public:
|
||||
MergeTreeReaderWide(
|
||||
MergeTreeDataPartInfoForReaderPtr data_part_info_for_read_,
|
||||
NamesAndTypesList columns_,
|
||||
const VirtualFields & virtual_fields_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
UncompressedCache * uncompressed_cache_,
|
||||
MarkCache * mark_cache_,
|
||||
|
@ -12,7 +12,7 @@
|
||||
#include <Processors/Chunk.h>
|
||||
#include <Processors/QueryPlan/SourceStepWithFilter.h>
|
||||
#include <Processors/Transforms/AggregatingTransform.h>
|
||||
#include <Storages/BlockNumberColumn.h>
|
||||
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
|
||||
#include <city.h>
|
||||
|
||||
namespace DB
|
||||
@ -20,41 +20,26 @@ namespace DB
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int QUERY_WAS_CANCELLED;
|
||||
}
|
||||
|
||||
static void injectNonConstVirtualColumns(
|
||||
size_t rows,
|
||||
Block & block,
|
||||
const Names & virtual_columns,
|
||||
MergeTreeReadTask * task = nullptr);
|
||||
|
||||
static void injectPartConstVirtualColumns(
|
||||
size_t rows,
|
||||
Block & block,
|
||||
MergeTreeReadTask * task,
|
||||
const DataTypePtr & partition_value_type,
|
||||
const Names & virtual_columns);
|
||||
|
||||
MergeTreeSelectProcessor::MergeTreeSelectProcessor(
|
||||
MergeTreeReadPoolPtr pool_,
|
||||
MergeTreeSelectAlgorithmPtr algorithm_,
|
||||
const MergeTreeData & storage_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
const ExpressionActionsSettings & actions_settings_,
|
||||
const MergeTreeReadTask::BlockSizeParams & block_size_params_,
|
||||
const MergeTreeReaderSettings & reader_settings_,
|
||||
const Names & virt_column_names_)
|
||||
const MergeTreeReaderSettings & reader_settings_)
|
||||
: pool(std::move(pool_))
|
||||
, algorithm(std::move(algorithm_))
|
||||
, storage_snapshot(storage_snapshot_)
|
||||
, prewhere_info(prewhere_info_)
|
||||
, actions_settings(actions_settings_)
|
||||
, prewhere_actions(getPrewhereActions(prewhere_info, actions_settings, reader_settings_.enable_multiple_prewhere_read_steps))
|
||||
, reader_settings(reader_settings_)
|
||||
, block_size_params(block_size_params_)
|
||||
, virt_column_names(virt_column_names_)
|
||||
, partition_value_type(storage_.getPartitionValueType())
|
||||
, result_header(transformHeader(pool->getHeader(), prewhere_info))
|
||||
{
|
||||
if (reader_settings.apply_deleted_mask)
|
||||
{
|
||||
@ -62,7 +47,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor(
|
||||
{
|
||||
.type = PrewhereExprStep::Filter,
|
||||
.actions = nullptr,
|
||||
.filter_column_name = LightweightDeleteDescription::FILTER_COLUMN.name,
|
||||
.filter_column_name = RowExistsColumn::name,
|
||||
.remove_filter_column = true,
|
||||
.need_filter = true,
|
||||
.perform_alter_conversions = true,
|
||||
@ -71,16 +56,6 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor(
|
||||
lightweight_delete_filter_step = std::make_shared<PrewhereExprStep>(std::move(step));
|
||||
}
|
||||
|
||||
header_without_const_virtual_columns = SourceStepWithFilter::applyPrewhereActions(pool->getHeader(), prewhere_info);
|
||||
size_t non_const_columns_offset = header_without_const_virtual_columns.columns();
|
||||
injectNonConstVirtualColumns(0, header_without_const_virtual_columns, virt_column_names);
|
||||
|
||||
for (size_t col_num = non_const_columns_offset; col_num < header_without_const_virtual_columns.columns(); ++col_num)
|
||||
non_const_virtual_column_names.emplace_back(header_without_const_virtual_columns.getByPosition(col_num).name);
|
||||
|
||||
result_header = header_without_const_virtual_columns;
|
||||
injectPartConstVirtualColumns(0, result_header, nullptr, partition_value_type, virt_column_names);
|
||||
|
||||
if (!prewhere_actions.steps.empty())
|
||||
LOG_TRACE(log, "PREWHERE condition was split into {} steps: {}", prewhere_actions.steps.size(), prewhere_actions.dumpConditions());
|
||||
|
||||
@ -163,8 +138,6 @@ ChunkAndProgress MergeTreeSelectProcessor::read()
|
||||
|
||||
if (res.row_count)
|
||||
{
|
||||
injectVirtualColumns(res.block, res.row_count, task.get(), partition_value_type, virt_column_names);
|
||||
|
||||
/// Reorder the columns according to result_header
|
||||
Columns ordered_columns;
|
||||
ordered_columns.reserve(result_header.columns());
|
||||
@ -198,209 +171,12 @@ void MergeTreeSelectProcessor::initializeRangeReaders()
|
||||
for (const auto & step : prewhere_actions.steps)
|
||||
all_prewhere_actions.steps.push_back(step);
|
||||
|
||||
task->initializeRangeReaders(all_prewhere_actions, non_const_virtual_column_names);
|
||||
task->initializeRangeReaders(all_prewhere_actions);
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
Block MergeTreeSelectProcessor::transformHeader(Block block, const PrewhereInfoPtr & prewhere_info)
|
||||
{
|
||||
struct VirtualColumnsInserter
|
||||
{
|
||||
explicit VirtualColumnsInserter(Block & block_) : block(block_) {}
|
||||
|
||||
bool columnExists(const String & name) const { return block.has(name); }
|
||||
|
||||
void insertUInt8Column(const ColumnPtr & column, const String & name)
|
||||
{
|
||||
block.insert({column, std::make_shared<DataTypeUInt8>(), name});
|
||||
}
|
||||
|
||||
void insertUInt64Column(const ColumnPtr & column, const String & name)
|
||||
{
|
||||
block.insert({column, std::make_shared<DataTypeUInt64>(), name});
|
||||
}
|
||||
|
||||
void insertUUIDColumn(const ColumnPtr & column, const String & name)
|
||||
{
|
||||
block.insert({column, std::make_shared<DataTypeUUID>(), name});
|
||||
}
|
||||
|
||||
void insertLowCardinalityColumn(const ColumnPtr & column, const String & name)
|
||||
{
|
||||
block.insert({column, std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()), name});
|
||||
}
|
||||
|
||||
void insertPartitionValueColumn(
|
||||
size_t rows, const Row & partition_value, const DataTypePtr & partition_value_type, const String & name)
|
||||
{
|
||||
ColumnPtr column;
|
||||
if (rows)
|
||||
column = partition_value_type->createColumnConst(rows, Tuple(partition_value.begin(), partition_value.end()))
|
||||
->convertToFullColumnIfConst();
|
||||
else
|
||||
column = partition_value_type->createColumn();
|
||||
|
||||
block.insert({column, partition_value_type, name});
|
||||
}
|
||||
|
||||
Block & block;
|
||||
};
|
||||
}
|
||||
|
||||
/// Adds virtual columns that are not const for all rows
|
||||
static void injectNonConstVirtualColumns(
|
||||
size_t rows,
|
||||
Block & block,
|
||||
const Names & virtual_columns,
|
||||
MergeTreeReadTask * task)
|
||||
{
|
||||
VirtualColumnsInserter inserter(block);
|
||||
for (const auto & virtual_column_name : virtual_columns)
|
||||
{
|
||||
if (virtual_column_name == "_part_offset")
|
||||
{
|
||||
if (!rows)
|
||||
{
|
||||
inserter.insertUInt64Column(DataTypeUInt64().createColumn(), virtual_column_name);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!inserter.columnExists(virtual_column_name))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Column {} must have been filled part reader",
|
||||
virtual_column_name);
|
||||
}
|
||||
}
|
||||
|
||||
if (virtual_column_name == LightweightDeleteDescription::FILTER_COLUMN.name)
|
||||
{
|
||||
/// If _row_exists column isn't present in the part then fill it here with 1s
|
||||
ColumnPtr column;
|
||||
if (rows)
|
||||
column = LightweightDeleteDescription::FILTER_COLUMN.type->createColumnConst(rows, 1)->convertToFullColumnIfConst();
|
||||
else
|
||||
column = LightweightDeleteDescription::FILTER_COLUMN.type->createColumn();
|
||||
|
||||
inserter.insertUInt8Column(column, virtual_column_name);
|
||||
}
|
||||
|
||||
if (virtual_column_name == BlockNumberColumn::name)
|
||||
{
|
||||
ColumnPtr column;
|
||||
if (rows)
|
||||
{
|
||||
size_t value = 0;
|
||||
if (task)
|
||||
{
|
||||
value = task->getInfo().data_part ? task->getInfo().data_part->info.min_block : 0;
|
||||
}
|
||||
column = BlockNumberColumn::type->createColumnConst(rows, value)->convertToFullColumnIfConst();
|
||||
}
|
||||
else
|
||||
column = BlockNumberColumn::type->createColumn();
|
||||
|
||||
inserter.insertUInt64Column(column, virtual_column_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds virtual columns that are const for the whole part
|
||||
static void injectPartConstVirtualColumns(
|
||||
size_t rows,
|
||||
Block & block,
|
||||
MergeTreeReadTask * task,
|
||||
const DataTypePtr & partition_value_type,
|
||||
const Names & virtual_columns)
|
||||
{
|
||||
VirtualColumnsInserter inserter(block);
|
||||
/// add virtual columns
|
||||
/// Except _sample_factor, which is added from the outside.
|
||||
if (!virtual_columns.empty())
|
||||
{
|
||||
if (unlikely(rows && !task))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot insert virtual columns to non-empty chunk without specified task.");
|
||||
|
||||
const IMergeTreeDataPart * part = nullptr;
|
||||
|
||||
if (rows)
|
||||
{
|
||||
part = task->getInfo().data_part.get();
|
||||
if (part->isProjectionPart())
|
||||
part = part->getParentPart();
|
||||
}
|
||||
|
||||
for (const auto & virtual_column_name : virtual_columns)
|
||||
{
|
||||
if (virtual_column_name == "_part")
|
||||
{
|
||||
ColumnPtr column;
|
||||
if (rows)
|
||||
column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}
|
||||
.createColumnConst(rows, part->name)
|
||||
->convertToFullColumnIfConst();
|
||||
else
|
||||
column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumn();
|
||||
|
||||
inserter.insertLowCardinalityColumn(column, virtual_column_name);
|
||||
}
|
||||
else if (virtual_column_name == "_part_index")
|
||||
{
|
||||
ColumnPtr column;
|
||||
if (rows)
|
||||
column = DataTypeUInt64().createColumnConst(rows, task->getInfo().part_index_in_query)->convertToFullColumnIfConst();
|
||||
else
|
||||
column = DataTypeUInt64().createColumn();
|
||||
|
||||
inserter.insertUInt64Column(column, virtual_column_name);
|
||||
}
|
||||
else if (virtual_column_name == "_part_uuid")
|
||||
{
|
||||
ColumnPtr column;
|
||||
if (rows)
|
||||
column = DataTypeUUID().createColumnConst(rows, part->uuid)->convertToFullColumnIfConst();
|
||||
else
|
||||
column = DataTypeUUID().createColumn();
|
||||
|
||||
inserter.insertUUIDColumn(column, virtual_column_name);
|
||||
}
|
||||
else if (virtual_column_name == "_partition_id")
|
||||
{
|
||||
ColumnPtr column;
|
||||
if (rows)
|
||||
column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}
|
||||
.createColumnConst(rows, part->info.partition_id)
|
||||
->convertToFullColumnIfConst();
|
||||
else
|
||||
column = DataTypeLowCardinality{std::make_shared<DataTypeString>()}.createColumn();
|
||||
|
||||
inserter.insertLowCardinalityColumn(column, virtual_column_name);
|
||||
}
|
||||
else if (virtual_column_name == "_partition_value")
|
||||
{
|
||||
if (rows)
|
||||
inserter.insertPartitionValueColumn(rows, part->partition.value, partition_value_type, virtual_column_name);
|
||||
else
|
||||
inserter.insertPartitionValueColumn(rows, {}, partition_value_type, virtual_column_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MergeTreeSelectProcessor::injectVirtualColumns(
|
||||
Block & block, size_t row_count, MergeTreeReadTask * task, const DataTypePtr & partition_value_type, const Names & virtual_columns)
|
||||
{
|
||||
/// First add non-const columns that are filled by the range reader and then const columns that we will fill ourselves.
|
||||
/// Note that the order is important: virtual columns filled by the range reader must go first
|
||||
injectNonConstVirtualColumns(row_count, block, virtual_columns,task);
|
||||
injectPartConstVirtualColumns(row_count, block, task, partition_value_type, virtual_columns);
|
||||
}
|
||||
|
||||
Block MergeTreeSelectProcessor::transformHeader(
|
||||
Block block, const PrewhereInfoPtr & prewhere_info, const DataTypePtr & partition_value_type, const Names & virtual_columns)
|
||||
{
|
||||
injectVirtualColumns(block, 0, nullptr, partition_value_type, virtual_columns);
|
||||
auto transformed = SourceStepWithFilter::applyPrewhereActions(std::move(block), prewhere_info);
|
||||
return transformed;
|
||||
return SourceStepWithFilter::applyPrewhereActions(std::move(block), prewhere_info);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -41,21 +41,15 @@ public:
|
||||
MergeTreeSelectProcessor(
|
||||
MergeTreeReadPoolPtr pool_,
|
||||
MergeTreeSelectAlgorithmPtr algorithm_,
|
||||
const MergeTreeData & storage_,
|
||||
const StorageSnapshotPtr & storage_snapshot_,
|
||||
const PrewhereInfoPtr & prewhere_info_,
|
||||
const ExpressionActionsSettings & actions_settings_,
|
||||
const MergeTreeReadTask::BlockSizeParams & block_size_params_,
|
||||
const MergeTreeReaderSettings & reader_settings_,
|
||||
const Names & virt_column_names_);
|
||||
const MergeTreeReaderSettings & reader_settings_);
|
||||
|
||||
String getName() const;
|
||||
|
||||
static Block transformHeader(
|
||||
Block block,
|
||||
const PrewhereInfoPtr & prewhere_info,
|
||||
const DataTypePtr & partition_value_type,
|
||||
const Names & virtual_columns);
|
||||
|
||||
static Block transformHeader(Block block, const PrewhereInfoPtr & prewhere_info);
|
||||
Block getHeader() const { return result_header; }
|
||||
|
||||
ChunkAndProgress read();
|
||||
@ -81,14 +75,12 @@ private:
|
||||
size_t num_read_bytes = 0;
|
||||
};
|
||||
|
||||
/// Used for filling header with no rows as well as block with data
|
||||
static void injectVirtualColumns(Block & block, size_t row_count, MergeTreeReadTask * task, const DataTypePtr & partition_value_type, const Names & virtual_columns);
|
||||
|
||||
/// Sets up range readers corresponding to data readers
|
||||
void initializeRangeReaders();
|
||||
|
||||
const MergeTreeReadPoolPtr pool;
|
||||
const MergeTreeSelectAlgorithmPtr algorithm;
|
||||
const StorageSnapshotPtr storage_snapshot;
|
||||
|
||||
const PrewhereInfoPtr prewhere_info;
|
||||
const ExpressionActionsSettings actions_settings;
|
||||
@ -96,17 +88,11 @@ private:
|
||||
|
||||
const MergeTreeReaderSettings reader_settings;
|
||||
const MergeTreeReadTask::BlockSizeParams block_size_params;
|
||||
const Names virt_column_names;
|
||||
const DataTypePtr partition_value_type;
|
||||
|
||||
/// Current task to read from.
|
||||
MergeTreeReadTaskPtr task;
|
||||
/// This step is added when the part has lightweight delete mask
|
||||
PrewhereExprStepPtr lightweight_delete_filter_step;
|
||||
/// These columns will be filled by the merge tree range reader
|
||||
Names non_const_virtual_column_names;
|
||||
/// This header is used for chunks from readFromPart().
|
||||
Block header_without_const_virtual_columns;
|
||||
/// A result of getHeader(). A chunk which this header is returned from read().
|
||||
Block result_header;
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <Storages/MergeTree/MergeTreeBlockReadUtils.h>
|
||||
#include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
|
||||
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
|
||||
#include <Processors/Transforms/FilterTransform.h>
|
||||
#include <Processors/QueryPlan/ISourceStep.h>
|
||||
#include <QueryPipeline/QueryPipelineBuilder.h>
|
||||
@ -16,6 +17,7 @@
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int MEMORY_LIMIT_EXCEEDED;
|
||||
@ -55,7 +57,6 @@ protected:
|
||||
Chunk generate() override;
|
||||
|
||||
private:
|
||||
|
||||
const MergeTreeData & storage;
|
||||
StorageSnapshotPtr storage_snapshot;
|
||||
|
||||
@ -86,7 +87,6 @@ private:
|
||||
void finish();
|
||||
};
|
||||
|
||||
|
||||
MergeTreeSequentialSource::MergeTreeSequentialSource(
|
||||
MergeTreeSequentialSourceType type,
|
||||
const MergeTreeData & storage_,
|
||||
@ -136,10 +136,8 @@ MergeTreeSequentialSource::MergeTreeSequentialSource(
|
||||
{
|
||||
auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical)
|
||||
.withExtendedObjects()
|
||||
.withSystemColumns();
|
||||
|
||||
if (storage.supportsSubcolumns())
|
||||
options.withSubcolumns();
|
||||
.withVirtuals()
|
||||
.withSubcolumns(storage.supportsSubcolumns());
|
||||
|
||||
columns_for_reader = storage_snapshot->getColumnsByNames(options, columns_to_read);
|
||||
}
|
||||
@ -181,9 +179,37 @@ MergeTreeSequentialSource::MergeTreeSequentialSource(
|
||||
mark_ranges.emplace(MarkRanges{MarkRange(0, data_part->getMarksCount())});
|
||||
|
||||
reader = data_part->getReader(
|
||||
columns_for_reader, storage_snapshot,
|
||||
*mark_ranges, /* uncompressed_cache = */ nullptr,
|
||||
mark_cache.get(), alter_conversions, reader_settings, {}, {});
|
||||
columns_for_reader,
|
||||
storage_snapshot,
|
||||
*mark_ranges,
|
||||
/*virtual_fields=*/ {},
|
||||
/*uncompressed_cache=*/{},
|
||||
mark_cache.get(),
|
||||
alter_conversions,
|
||||
reader_settings,
|
||||
{},
|
||||
{});
|
||||
}
|
||||
|
||||
static void fillBlockNumberColumns(
|
||||
Columns & res_columns,
|
||||
const NamesAndTypesList & columns_list,
|
||||
UInt64 block_number,
|
||||
UInt64 num_rows)
|
||||
{
|
||||
chassert(res_columns.size() == columns_list.size());
|
||||
|
||||
auto it = columns_list.begin();
|
||||
for (size_t i = 0; i < res_columns.size(); ++i, ++it)
|
||||
{
|
||||
if (res_columns[i])
|
||||
continue;
|
||||
|
||||
if (it->name == BlockNumberColumn::name)
|
||||
{
|
||||
res_columns[i] = BlockNumberColumn::type->createColumnConst(num_rows, block_number)->convertToFullColumnIfConst();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Chunk MergeTreeSequentialSource::generate()
|
||||
@ -204,16 +230,17 @@ try
|
||||
|
||||
if (rows_read)
|
||||
{
|
||||
fillBlockNumberColumns(columns, sample, data_part->info.min_block, rows_read);
|
||||
reader->fillVirtualColumns(columns, rows_read);
|
||||
|
||||
current_row += rows_read;
|
||||
current_mark += (rows_to_read == rows_read);
|
||||
|
||||
bool should_evaluate_missing_defaults = false;
|
||||
reader->fillMissingColumns(columns, should_evaluate_missing_defaults, rows_read, data_part->info.min_block);
|
||||
reader->fillMissingColumns(columns, should_evaluate_missing_defaults, rows_read);
|
||||
|
||||
if (should_evaluate_missing_defaults)
|
||||
{
|
||||
reader->evaluateMissingDefaults({}, columns);
|
||||
}
|
||||
|
||||
reader->performRequiredConversions(columns);
|
||||
|
||||
@ -278,14 +305,13 @@ Pipe createMergeTreeSequentialSource(
|
||||
bool quiet,
|
||||
std::shared_ptr<std::atomic<size_t>> filtered_rows_count)
|
||||
{
|
||||
const auto & filter_column = LightweightDeleteDescription::FILTER_COLUMN;
|
||||
|
||||
/// The part might have some rows masked by lightweight deletes
|
||||
const bool need_to_filter_deleted_rows = apply_deleted_mask && data_part->hasLightweightDelete();
|
||||
const bool has_filter_column = std::ranges::find(columns_to_read, filter_column.name) != columns_to_read.end();
|
||||
const bool has_filter_column = std::ranges::find(columns_to_read, RowExistsColumn::name) != columns_to_read.end();
|
||||
|
||||
if (need_to_filter_deleted_rows && !has_filter_column)
|
||||
columns_to_read.emplace_back(filter_column.name);
|
||||
columns_to_read.emplace_back(RowExistsColumn::name);
|
||||
|
||||
auto column_part_source = std::make_shared<MergeTreeSequentialSource>(type,
|
||||
storage, storage_snapshot, data_part, columns_to_read, std::move(mark_ranges),
|
||||
@ -299,7 +325,7 @@ Pipe createMergeTreeSequentialSource(
|
||||
pipe.addSimpleTransform([filtered_rows_count, has_filter_column](const Block & header)
|
||||
{
|
||||
return std::make_shared<FilterTransform>(
|
||||
header, nullptr, filter_column.name, !has_filter_column, false, filtered_rows_count);
|
||||
header, nullptr, RowExistsColumn::name, !has_filter_column, false, filtered_rows_count);
|
||||
});
|
||||
}
|
||||
|
||||
|
52
src/Storages/MergeTree/MergeTreeVirtualColumns.cpp
Normal file
52
src/Storages/MergeTree/MergeTreeVirtualColumns.cpp
Normal file
@ -0,0 +1,52 @@
|
||||
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
|
||||
#include <Storages/MergeTree/IMergeTreeDataPart.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NO_SUCH_COLUMN_IN_TABLE;
|
||||
}
|
||||
|
||||
static ASTPtr getCompressionCodecDeltaLZ4()
|
||||
{
|
||||
return makeASTFunction("CODEC",
|
||||
std::make_shared<ASTIdentifier>("Delta"),
|
||||
std::make_shared<ASTIdentifier>("LZ4"));
|
||||
}
|
||||
|
||||
const String RowExistsColumn::name = "_row_exists";
|
||||
const DataTypePtr RowExistsColumn::type = std::make_shared<DataTypeUInt8>();
|
||||
|
||||
const String BlockNumberColumn::name = "_block_number";
|
||||
const DataTypePtr BlockNumberColumn::type = std::make_shared<DataTypeUInt64>();
|
||||
const ASTPtr BlockNumberColumn::codec = getCompressionCodecDeltaLZ4();
|
||||
|
||||
Field getFieldForConstVirtualColumn(const String & column_name, const IMergeTreeDataPart & part)
|
||||
{
|
||||
if (column_name == RowExistsColumn::name)
|
||||
return 1ULL;
|
||||
|
||||
if (column_name == BlockNumberColumn::name)
|
||||
return part.info.min_block;
|
||||
|
||||
if (column_name == "_part")
|
||||
return part.name;
|
||||
|
||||
if (column_name == "_part_uuid")
|
||||
return part.uuid;
|
||||
|
||||
if (column_name == "_partition_id")
|
||||
return part.info.partition_id;
|
||||
|
||||
if (column_name == "_partition_value")
|
||||
return Tuple(part.partition.value.begin(), part.partition.value.end());
|
||||
|
||||
throw Exception(ErrorCodes::NO_SUCH_COLUMN_IN_TABLE, "Unexpected const virtual column: {}", column_name);
|
||||
}
|
||||
|
||||
}
|
26
src/Storages/MergeTree/MergeTreeVirtualColumns.h
Normal file
26
src/Storages/MergeTree/MergeTreeVirtualColumns.h
Normal file
@ -0,0 +1,26 @@
|
||||
#pragma once
|
||||
#include <Core/Types.h>
|
||||
#include <DataTypes/IDataType.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class IMergeTreeDataPart;
|
||||
|
||||
struct RowExistsColumn
|
||||
{
|
||||
static const String name;
|
||||
static const DataTypePtr type;
|
||||
};
|
||||
|
||||
struct BlockNumberColumn
|
||||
{
|
||||
static const String name;
|
||||
static const DataTypePtr type;
|
||||
static const ASTPtr codec;
|
||||
};
|
||||
|
||||
Field getFieldForConstVirtualColumn(const String & column_name, const IMergeTreeDataPart & part);
|
||||
|
||||
}
|
@ -23,7 +23,7 @@
|
||||
#include <Storages/MutationCommands.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataMergerMutator.h>
|
||||
#include <Storages/MergeTree/MergeTreeIndexInverted.h>
|
||||
#include <Storages/BlockNumberColumn.h>
|
||||
#include <Storages/MergeTree/MergeTreeVirtualColumns.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeVariant.h>
|
||||
#include <boost/algorithm/string/replace.hpp>
|
||||
@ -168,7 +168,7 @@ static void splitAndModifyMutationCommands(
|
||||
{
|
||||
if (!mutated_columns.contains(column.name))
|
||||
{
|
||||
if (!metadata_snapshot->getColumns().has(column.name) && !part->storage.getVirtuals().contains(column.name))
|
||||
if (!metadata_snapshot->getColumns().has(column.name) && !part->storage.getVirtualsPtr()->has(column.name))
|
||||
{
|
||||
/// We cannot add the column because there's no such column in table.
|
||||
/// It's okay if the column was dropped. It may also absent in dropped_columns
|
||||
@ -283,7 +283,6 @@ getColumnsForNewDataPart(
|
||||
ColumnsDescription part_columns(source_part->getColumns());
|
||||
NamesAndTypesList system_columns;
|
||||
|
||||
const auto & deleted_mask_column = LightweightDeleteDescription::FILTER_COLUMN;
|
||||
bool supports_lightweight_deletes = source_part->supportLightweightDeleteMutate();
|
||||
|
||||
bool deleted_mask_updated = false;
|
||||
@ -299,9 +298,9 @@ getColumnsForNewDataPart(
|
||||
{
|
||||
for (const auto & [column_name, _] : command.column_to_update_expression)
|
||||
{
|
||||
if (column_name == deleted_mask_column.name
|
||||
if (column_name == RowExistsColumn::name
|
||||
&& supports_lightweight_deletes
|
||||
&& !storage_columns_set.contains(deleted_mask_column.name))
|
||||
&& !storage_columns_set.contains(RowExistsColumn::name))
|
||||
deleted_mask_updated = true;
|
||||
}
|
||||
}
|
||||
@ -323,12 +322,12 @@ getColumnsForNewDataPart(
|
||||
}
|
||||
}
|
||||
|
||||
if (!storage_columns_set.contains(deleted_mask_column.name))
|
||||
if (!storage_columns_set.contains(RowExistsColumn::name))
|
||||
{
|
||||
if (deleted_mask_updated || (part_columns.has(deleted_mask_column.name) && !has_delete_command))
|
||||
if (deleted_mask_updated || (part_columns.has(RowExistsColumn::name) && !has_delete_command))
|
||||
{
|
||||
storage_columns.push_back(deleted_mask_column);
|
||||
storage_columns_set.insert(deleted_mask_column.name);
|
||||
storage_columns.emplace_back(RowExistsColumn::name, RowExistsColumn::type);
|
||||
storage_columns_set.insert(RowExistsColumn::name);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -34,6 +34,7 @@ public:
|
||||
, partition_id(part_->info.partition_id)
|
||||
{
|
||||
setInMemoryMetadata(storage.getInMemoryMetadata());
|
||||
setVirtuals(*storage.getVirtualsPtr());
|
||||
}
|
||||
|
||||
/// Used in queries with projection.
|
||||
@ -90,11 +91,6 @@ public:
|
||||
|
||||
bool supportsSubcolumns() const override { return true; }
|
||||
|
||||
NamesAndTypesList getVirtuals() const override
|
||||
{
|
||||
return storage.getVirtuals();
|
||||
}
|
||||
|
||||
String getPartitionId() const
|
||||
{
|
||||
return partition_id;
|
||||
|
@ -9,10 +9,10 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
static std::pair<Block, Block> getHeaders(StorageNATS & storage, const StorageSnapshotPtr & storage_snapshot)
|
||||
static std::pair<Block, Block> getHeaders(const StorageSnapshotPtr & storage_snapshot)
|
||||
{
|
||||
auto non_virtual_header = storage_snapshot->metadata->getSampleBlockNonMaterialized();
|
||||
auto virtual_header = storage_snapshot->getSampleBlockForColumns(storage.getVirtuals().getNames());
|
||||
auto virtual_header = storage_snapshot->virtual_columns->getSampleBlock();
|
||||
|
||||
return {non_virtual_header, virtual_header};
|
||||
}
|
||||
@ -33,7 +33,7 @@ NATSSource::NATSSource(
|
||||
const Names & columns,
|
||||
size_t max_block_size_,
|
||||
StreamingHandleErrorMode handle_error_mode_)
|
||||
: NATSSource(storage_, storage_snapshot_, getHeaders(storage_, storage_snapshot_), context_, columns, max_block_size_, handle_error_mode_)
|
||||
: NATSSource(storage_, storage_snapshot_, getHeaders(storage_snapshot_), context_, columns, max_block_size_, handle_error_mode_)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -88,6 +88,7 @@ StorageNATS::StorageNATS(
|
||||
StorageInMemoryMetadata storage_metadata;
|
||||
storage_metadata.setColumns(columns_);
|
||||
setInMemoryMetadata(storage_metadata);
|
||||
setVirtuals(createVirtuals(nats_settings->nats_handle_error_mode));
|
||||
|
||||
nats_context = addSettings(getContext());
|
||||
nats_context->makeQueryContext();
|
||||
@ -131,6 +132,19 @@ StorageNATS::StorageNATS(
|
||||
connection_task->deactivate();
|
||||
}
|
||||
|
||||
VirtualColumnsDescription StorageNATS::createVirtuals(StreamingHandleErrorMode handle_error_mode)
|
||||
{
|
||||
VirtualColumnsDescription desc;
|
||||
desc.addEphemeral("_subject", std::make_shared<DataTypeString>(), "");
|
||||
|
||||
if (handle_error_mode == StreamingHandleErrorMode::STREAM)
|
||||
{
|
||||
desc.addEphemeral("_raw_message", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()), "");
|
||||
desc.addEphemeral("_error", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()), "");
|
||||
}
|
||||
|
||||
return desc;
|
||||
}
|
||||
|
||||
Names StorageNATS::parseList(const String & list, char delim)
|
||||
{
|
||||
@ -746,20 +760,4 @@ void registerStorageNATS(StorageFactory & factory)
|
||||
factory.registerStorage("NATS", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, });
|
||||
}
|
||||
|
||||
|
||||
NamesAndTypesList StorageNATS::getVirtuals() const
|
||||
{
|
||||
auto virtuals = NamesAndTypesList{
|
||||
{"_subject", std::make_shared<DataTypeString>()}
|
||||
};
|
||||
|
||||
if (nats_settings->nats_handle_error_mode == StreamingHandleErrorMode::STREAM)
|
||||
{
|
||||
virtuals.push_back({"_raw_message", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>())});
|
||||
virtuals.push_back({"_error", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>())});
|
||||
}
|
||||
|
||||
return virtuals;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -61,7 +61,6 @@ public:
|
||||
NATSConsumerPtr popConsumer(std::chrono::milliseconds timeout);
|
||||
|
||||
const String & getFormatName() const { return format_name; }
|
||||
NamesAndTypesList getVirtuals() const override;
|
||||
|
||||
void incrementReader();
|
||||
void decrementReader();
|
||||
@ -137,6 +136,7 @@ private:
|
||||
|
||||
static Names parseList(const String & list, char delim);
|
||||
static String getTableBasedName(String name, const StorageID & table_id);
|
||||
static VirtualColumnsDescription createVirtuals(StreamingHandleErrorMode handle_error_mode);
|
||||
|
||||
ContextMutablePtr addSettings(ContextPtr context) const;
|
||||
size_t getMaxBlockSize() const;
|
||||
|
@ -72,6 +72,7 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL(
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Storage MaterializedPostgreSQL is allowed only for Atomic database");
|
||||
|
||||
setInMemoryMetadata(storage_metadata);
|
||||
setVirtuals(createVirtuals());
|
||||
|
||||
replication_settings->materialized_postgresql_tables_list = remote_table_name_;
|
||||
|
||||
@ -127,8 +128,16 @@ StorageMaterializedPostgreSQL::StorageMaterializedPostgreSQL(
|
||||
, nested_table_id(nested_storage_->getStorageID())
|
||||
{
|
||||
setInMemoryMetadata(nested_storage_->getInMemoryMetadata());
|
||||
setVirtuals(*nested_storage_->getVirtualsPtr());
|
||||
}
|
||||
|
||||
VirtualColumnsDescription StorageMaterializedPostgreSQL::createVirtuals()
|
||||
{
|
||||
VirtualColumnsDescription desc;
|
||||
desc.addEphemeral("_sign", std::make_shared<DataTypeInt8>(), "");
|
||||
desc.addEphemeral("_version", std::make_shared<DataTypeUInt64>(), "");
|
||||
return desc;
|
||||
}
|
||||
|
||||
/// A temporary clone table might be created for current table in order to update its schema and reload
|
||||
/// all data in the background while current table will still handle read requests.
|
||||
@ -254,15 +263,6 @@ void StorageMaterializedPostgreSQL::dropInnerTableIfAny(bool sync, ContextPtr lo
|
||||
}
|
||||
|
||||
|
||||
NamesAndTypesList StorageMaterializedPostgreSQL::getVirtuals() const
|
||||
{
|
||||
return NamesAndTypesList{
|
||||
{"_sign", std::make_shared<DataTypeInt8>()},
|
||||
{"_version", std::make_shared<DataTypeUInt64>()}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
bool StorageMaterializedPostgreSQL::needRewriteQueryWithFinal(const Names & column_names) const
|
||||
{
|
||||
return needRewriteQueryWithFinalForStorage(column_names, getNested());
|
||||
|
@ -89,8 +89,6 @@ public:
|
||||
/// Used only for single MaterializedPostgreSQL storage.
|
||||
void dropInnerTableIfAny(bool sync, ContextPtr local_context) override;
|
||||
|
||||
NamesAndTypesList getVirtuals() const override;
|
||||
|
||||
bool needRewriteQueryWithFinal(const Names & column_names) const override;
|
||||
|
||||
void read(
|
||||
@ -138,6 +136,8 @@ private:
|
||||
static std::shared_ptr<ASTColumnDeclaration> getMaterializedColumnsDeclaration(
|
||||
String name, String type, UInt64 default_value);
|
||||
|
||||
static VirtualColumnsDescription createVirtuals();
|
||||
|
||||
ASTPtr getColumnDeclaration(const DataTypePtr & data_type) const;
|
||||
|
||||
String getNestedTableName() const;
|
||||
|
@ -11,10 +11,10 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
static std::pair<Block, Block> getHeaders(StorageRabbitMQ & storage_, const StorageSnapshotPtr & storage_snapshot)
|
||||
static std::pair<Block, Block> getHeaders(const StorageSnapshotPtr & storage_snapshot)
|
||||
{
|
||||
auto non_virtual_header = storage_snapshot->metadata->getSampleBlockNonMaterialized();
|
||||
auto virtual_header = storage_snapshot->getSampleBlockForColumns(storage_.getVirtuals().getNames());
|
||||
auto virtual_header = storage_snapshot->virtual_columns->getSampleBlock();
|
||||
|
||||
return {non_virtual_header, virtual_header};
|
||||
}
|
||||
@ -40,7 +40,7 @@ RabbitMQSource::RabbitMQSource(
|
||||
: RabbitMQSource(
|
||||
storage_,
|
||||
storage_snapshot_,
|
||||
getHeaders(storage_, storage_snapshot_),
|
||||
getHeaders(storage_snapshot_),
|
||||
context_,
|
||||
columns,
|
||||
max_block_size_,
|
||||
|
@ -136,6 +136,7 @@ StorageRabbitMQ::StorageRabbitMQ(
|
||||
StorageInMemoryMetadata storage_metadata;
|
||||
storage_metadata.setColumns(columns_);
|
||||
setInMemoryMetadata(storage_metadata);
|
||||
setVirtuals(createVirtuals(rabbitmq_settings->rabbitmq_handle_error_mode));
|
||||
|
||||
rabbitmq_context = addSettings(getContext());
|
||||
rabbitmq_context->makeQueryContext();
|
||||
@ -191,6 +192,26 @@ StorageRabbitMQ::StorageRabbitMQ(
|
||||
init_task->deactivate();
|
||||
}
|
||||
|
||||
VirtualColumnsDescription StorageRabbitMQ::createVirtuals(StreamingHandleErrorMode handle_error_mode)
|
||||
{
|
||||
VirtualColumnsDescription desc;
|
||||
|
||||
desc.addEphemeral("_exchange_name", std::make_shared<DataTypeString>(), "");
|
||||
desc.addEphemeral("_channel_id", std::make_shared<DataTypeString>(), "");
|
||||
desc.addEphemeral("_delivery_tag", std::make_shared<DataTypeUInt64>(), "");
|
||||
desc.addEphemeral("_redelivered", std::make_shared<DataTypeUInt8>(), "");
|
||||
desc.addEphemeral("_message_id", std::make_shared<DataTypeString>(), "");
|
||||
desc.addEphemeral("_timestamp", std::make_shared<DataTypeUInt64>(), "");
|
||||
|
||||
|
||||
if (handle_error_mode == StreamingHandleErrorMode::STREAM)
|
||||
{
|
||||
desc.addEphemeral("_raw_message", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()), "");
|
||||
desc.addEphemeral("_error", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>()), "");
|
||||
}
|
||||
|
||||
return desc;
|
||||
}
|
||||
|
||||
Names StorageRabbitMQ::parseSettings(String settings_list)
|
||||
{
|
||||
@ -1213,25 +1234,4 @@ void registerStorageRabbitMQ(StorageFactory & factory)
|
||||
factory.registerStorage("RabbitMQ", creator_fn, StorageFactory::StorageFeatures{ .supports_settings = true, });
|
||||
}
|
||||
|
||||
|
||||
NamesAndTypesList StorageRabbitMQ::getVirtuals() const
|
||||
{
|
||||
auto virtuals = NamesAndTypesList{
|
||||
{"_exchange_name", std::make_shared<DataTypeString>()},
|
||||
{"_channel_id", std::make_shared<DataTypeString>()},
|
||||
{"_delivery_tag", std::make_shared<DataTypeUInt64>()},
|
||||
{"_redelivered", std::make_shared<DataTypeUInt8>()},
|
||||
{"_message_id", std::make_shared<DataTypeString>()},
|
||||
{"_timestamp", std::make_shared<DataTypeUInt64>()}
|
||||
};
|
||||
|
||||
if (rabbitmq_settings->rabbitmq_handle_error_mode == StreamingHandleErrorMode::STREAM)
|
||||
{
|
||||
virtuals.push_back({"_raw_message", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>())});
|
||||
virtuals.push_back({"_error", std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>())});
|
||||
}
|
||||
|
||||
return virtuals;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -68,7 +68,6 @@ public:
|
||||
RabbitMQConsumerPtr popConsumer(std::chrono::milliseconds timeout);
|
||||
|
||||
const String & getFormatName() const { return format_name; }
|
||||
NamesAndTypesList getVirtuals() const override;
|
||||
|
||||
String getExchange() const { return exchange_name; }
|
||||
void unbindExchange();
|
||||
@ -191,6 +190,8 @@ private:
|
||||
bool tryStreamToViews();
|
||||
bool hasDependencies(const StorageID & table_id);
|
||||
|
||||
static VirtualColumnsDescription createVirtuals(StreamingHandleErrorMode handle_error_mode);
|
||||
|
||||
static String getRandomName()
|
||||
{
|
||||
std::uniform_int_distribution<int> distribution('a', 'z');
|
||||
|
@ -155,8 +155,7 @@ StorageS3Queue::StorageS3Queue(
|
||||
storage_metadata.setConstraints(constraints_);
|
||||
storage_metadata.setComment(comment);
|
||||
setInMemoryMetadata(storage_metadata);
|
||||
|
||||
virtual_columns = VirtualColumnUtils::getPathFileAndSizeVirtualsForStorage(storage_metadata.getSampleBlock().getNamesAndTypesList());
|
||||
setVirtuals(VirtualColumnUtils::getVirtualsForFileLikeStorage(storage_metadata.getColumns()));
|
||||
|
||||
LOG_INFO(log, "Using zookeeper path: {}", zk_path.string());
|
||||
task = getContext()->getSchedulePool().createTask("S3QueueStreamingTask", [this] { threadFunc(); });
|
||||
@ -315,7 +314,7 @@ void StorageS3Queue::read(
|
||||
}
|
||||
|
||||
auto this_ptr = std::static_pointer_cast<StorageS3Queue>(shared_from_this());
|
||||
auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context), getVirtuals());
|
||||
auto read_from_format_info = prepareReadingFromFormat(column_names, storage_snapshot, supportsSubsetOfColumns(local_context));
|
||||
|
||||
auto reading = std::make_unique<ReadFromS3Queue>(
|
||||
column_names,
|
||||
@ -493,7 +492,7 @@ bool StorageS3Queue::streamToViews()
|
||||
auto block_io = interpreter.execute();
|
||||
auto file_iterator = createFileIterator(s3queue_context, nullptr);
|
||||
|
||||
auto read_from_format_info = prepareReadingFromFormat(block_io.pipeline.getHeader().getNames(), storage_snapshot, supportsSubsetOfColumns(s3queue_context), getVirtuals());
|
||||
auto read_from_format_info = prepareReadingFromFormat(block_io.pipeline.getHeader().getNames(), storage_snapshot, supportsSubsetOfColumns(s3queue_context));
|
||||
|
||||
Pipes pipes;
|
||||
pipes.reserve(s3queue_settings->s3queue_processing_threads_num);
|
||||
@ -602,8 +601,9 @@ void StorageS3Queue::checkTableStructure(const String & zookeeper_prefix, const
|
||||
std::shared_ptr<StorageS3Queue::FileIterator> StorageS3Queue::createFileIterator(ContextPtr local_context, const ActionsDAG::Node * predicate)
|
||||
{
|
||||
auto glob_iterator = std::make_unique<StorageS3QueueSource::GlobIterator>(
|
||||
*configuration.client, configuration.url, predicate, virtual_columns, local_context,
|
||||
*configuration.client, configuration.url, predicate, getVirtualsList(), local_context,
|
||||
/* read_keys */nullptr, configuration.request_settings);
|
||||
|
||||
return std::make_shared<FileIterator>(files_metadata, std::move(glob_iterator), s3queue_settings->s3queue_current_shard_num, shutdown_called);
|
||||
}
|
||||
|
||||
|
@ -51,8 +51,6 @@ public:
|
||||
size_t max_block_size,
|
||||
size_t num_streams) override;
|
||||
|
||||
NamesAndTypesList getVirtuals() const override { return virtual_columns; }
|
||||
|
||||
const auto & getFormatName() const { return configuration.format; }
|
||||
|
||||
const fs::path & getZooKeeperPath() const { return zk_path; }
|
||||
@ -71,7 +69,6 @@ private:
|
||||
Configuration configuration;
|
||||
|
||||
const std::optional<FormatSettings> format_settings;
|
||||
NamesAndTypesList virtual_columns;
|
||||
|
||||
BackgroundSchedulePool::TaskHolder task;
|
||||
std::atomic<bool> stream_cancelled{false};
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user