mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-03 04:52:10 +00:00
combine FINAL selection and WHERE filter
Co-authored-by: Joris Giovannangeli <joris.giovannangeli@ahrefs.com> Signed-off-by: Duc Canh Le <duccanh.le@ahrefs.com>
This commit is contained in:
parent
dfc8793b5c
commit
93a4d69c05
@ -14,8 +14,12 @@ namespace DB
|
|||||||
|
|
||||||
struct ChunkSelectFinalIndices : public ChunkInfo
|
struct ChunkSelectFinalIndices : public ChunkInfo
|
||||||
{
|
{
|
||||||
ColumnPtr select_final_indices;
|
ColumnPtr column_holder;
|
||||||
explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_) : select_final_indices(std::move(select_final_indices_)) {}
|
const ColumnUInt32 * select_final_indices;
|
||||||
|
explicit ChunkSelectFinalIndices(MutableColumnPtr select_final_indices_) : column_holder(std::move(select_final_indices_))
|
||||||
|
{
|
||||||
|
select_final_indices = typeid_cast<const ColumnUInt32 *>(column_holder.get());
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/** Merges several sorted inputs into one.
|
/** Merges several sorted inputs into one.
|
||||||
|
@ -986,7 +986,8 @@ static void addMergingFinal(
|
|||||||
MergeTreeData::MergingParams merging_params,
|
MergeTreeData::MergingParams merging_params,
|
||||||
Names partition_key_columns,
|
Names partition_key_columns,
|
||||||
size_t max_block_size_rows,
|
size_t max_block_size_rows,
|
||||||
bool use_skipping_final)
|
bool use_skipping_final,
|
||||||
|
bool query_has_where)
|
||||||
{
|
{
|
||||||
const auto & header = pipe.getHeader();
|
const auto & header = pipe.getHeader();
|
||||||
size_t num_outputs = pipe.numOutputPorts();
|
size_t num_outputs = pipe.numOutputPorts();
|
||||||
@ -1041,9 +1042,14 @@ static void addMergingFinal(
|
|||||||
};
|
};
|
||||||
|
|
||||||
pipe.addTransform(get_merging_processor());
|
pipe.addTransform(get_merging_processor());
|
||||||
if (use_skipping_final && merging_params.mode == MergeTreeData::MergingParams::Replacing)
|
if (use_skipping_final && !query_has_where)
|
||||||
|
{
|
||||||
|
/// Skipping FINAL algorithm will output the original chunk and a column indices of selected rows
|
||||||
|
/// If query has WHERE, we will merge the indices with the filter in FilterTransform later
|
||||||
|
/// Otherwise, use SelectByIndicesTransform to select rows
|
||||||
pipe.addSimpleTransform([](const Block & header_)
|
pipe.addSimpleTransform([](const Block & header_)
|
||||||
{ return std::make_shared<SelectByIndicesTransform>(header_); });
|
{ return std::make_shared<SelectByIndicesTransform>(header_); });
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -1052,6 +1058,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
|
|||||||
{
|
{
|
||||||
const auto & settings = context->getSettingsRef();
|
const auto & settings = context->getSettingsRef();
|
||||||
const auto & data_settings = data.getSettings();
|
const auto & data_settings = data.getSettings();
|
||||||
|
const auto & select_query = query_info.query->as<ASTSelectQuery>();
|
||||||
|
bool query_has_where = select_query && select_query->where();
|
||||||
|
|
||||||
PartRangesReadInfo info(parts_with_ranges, settings, *data_settings);
|
PartRangesReadInfo info(parts_with_ranges, settings, *data_settings);
|
||||||
|
|
||||||
@ -1184,7 +1192,8 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
|
|||||||
data.merging_params,
|
data.merging_params,
|
||||||
partition_key_columns,
|
partition_key_columns,
|
||||||
block_size.max_block_size_rows,
|
block_size.max_block_size_rows,
|
||||||
settings.use_skipping_final);
|
use_skipping_final,
|
||||||
|
query_has_where);
|
||||||
|
|
||||||
merging_pipes.emplace_back(Pipe::unitePipes(std::move(pipes)));
|
merging_pipes.emplace_back(Pipe::unitePipes(std::move(pipes)));
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
#include <Core/Field.h>
|
#include <Core/Field.h>
|
||||||
#include <DataTypes/DataTypeNullable.h>
|
#include <DataTypes/DataTypeNullable.h>
|
||||||
#include <DataTypes/DataTypeLowCardinality.h>
|
#include <DataTypes/DataTypeLowCardinality.h>
|
||||||
|
#include <Processors/Merges/Algorithms/ReplacingSortedAlgorithm.h>
|
||||||
|
|
||||||
namespace DB
|
namespace DB
|
||||||
{
|
{
|
||||||
@ -34,6 +35,87 @@ static void replaceFilterToConstant(Block & block, const String & filter_column_
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static std::shared_ptr<const ChunkSelectFinalIndices> getSelectByFinalIndices(Chunk & chunk)
|
||||||
|
{
|
||||||
|
if (auto select_final_indices_info = std::dynamic_pointer_cast<const ChunkSelectFinalIndices>(chunk.getChunkInfo()))
|
||||||
|
{
|
||||||
|
chunk.setChunkInfo(nullptr);
|
||||||
|
return select_final_indices_info;
|
||||||
|
}
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
executeSelectByIndices(Columns & columns, std::shared_ptr<const ChunkSelectFinalIndices> & select_final_indices_info, size_t & num_rows)
|
||||||
|
{
|
||||||
|
if (select_final_indices_info)
|
||||||
|
{
|
||||||
|
const auto & index_column = select_final_indices_info->select_final_indices;
|
||||||
|
|
||||||
|
if (index_column && index_column->size() != num_rows)
|
||||||
|
for (auto & column : columns)
|
||||||
|
column = column->index(*index_column, 0);
|
||||||
|
|
||||||
|
num_rows = index_column->size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::unique_ptr<IFilterDescription> combineFilterAndIndices(
|
||||||
|
std::unique_ptr<FilterDescription> description,
|
||||||
|
std::shared_ptr<const ChunkSelectFinalIndices> & select_final_indices_info,
|
||||||
|
size_t num_rows)
|
||||||
|
{
|
||||||
|
if (select_final_indices_info)
|
||||||
|
{
|
||||||
|
const auto * index_column = select_final_indices_info->select_final_indices;
|
||||||
|
|
||||||
|
if (description->data && index_column && index_column->size() != num_rows)
|
||||||
|
{
|
||||||
|
const auto & selected_by_indices = index_column->getData();
|
||||||
|
const auto & selected_by_filter = *description->data;
|
||||||
|
/// At this point we know that the filter is not constant, just create a new filter
|
||||||
|
auto mutable_holder = ColumnUInt8::create(num_rows, 0);
|
||||||
|
ColumnUInt8 * concrete_column = typeid_cast<ColumnUInt8 *>(mutable_holder.get());
|
||||||
|
auto & data = concrete_column->getData();
|
||||||
|
for (auto idx : selected_by_indices)
|
||||||
|
data[idx] |= 1;
|
||||||
|
for (size_t i = 0; i < num_rows; ++i)
|
||||||
|
data[i] &= selected_by_filter[i];
|
||||||
|
description->data_holder = std::move(mutable_holder);
|
||||||
|
description->data = &data;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return std::move(description);
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::unique_ptr<IFilterDescription> combineFilterAndIndices(
|
||||||
|
std::unique_ptr<SparseFilterDescription> description,
|
||||||
|
std::shared_ptr<const ChunkSelectFinalIndices> & select_final_indices_info,
|
||||||
|
size_t num_rows)
|
||||||
|
{
|
||||||
|
if (select_final_indices_info)
|
||||||
|
{
|
||||||
|
const auto * index_column = select_final_indices_info->select_final_indices;
|
||||||
|
|
||||||
|
if (description->filter_indices && index_column && index_column->size() != num_rows)
|
||||||
|
{
|
||||||
|
std::unique_ptr<FilterDescription> res;
|
||||||
|
const auto & selected_by_indices = index_column->getData();
|
||||||
|
const auto & selected_by_filter = typeid_cast<const ColumnUInt64 *>(description->filter_indices)->getData();
|
||||||
|
auto mutable_holder = ColumnUInt8::create(num_rows, 0);
|
||||||
|
auto & data = mutable_holder->getData();
|
||||||
|
for (auto idx : selected_by_indices)
|
||||||
|
data[idx] += 1;
|
||||||
|
for (auto idx : selected_by_filter)
|
||||||
|
data[idx] = (data[idx] + 1) << 1;
|
||||||
|
res->data_holder = std::move(mutable_holder);
|
||||||
|
res->data = &data;
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return description;
|
||||||
|
}
|
||||||
|
|
||||||
Block FilterTransform::transformHeader(
|
Block FilterTransform::transformHeader(
|
||||||
Block header,
|
Block header,
|
||||||
const ActionsDAG * expression,
|
const ActionsDAG * expression,
|
||||||
@ -126,6 +208,7 @@ void FilterTransform::doTransform(Chunk & chunk)
|
|||||||
{
|
{
|
||||||
size_t num_rows_before_filtration = chunk.getNumRows();
|
size_t num_rows_before_filtration = chunk.getNumRows();
|
||||||
auto columns = chunk.detachColumns();
|
auto columns = chunk.detachColumns();
|
||||||
|
auto select_final_indices_info = getSelectByFinalIndices(chunk);
|
||||||
|
|
||||||
{
|
{
|
||||||
Block block = getInputPort().getHeader().cloneWithColumns(columns);
|
Block block = getInputPort().getHeader().cloneWithColumns(columns);
|
||||||
@ -139,6 +222,7 @@ void FilterTransform::doTransform(Chunk & chunk)
|
|||||||
|
|
||||||
if (constant_filter_description.always_true || on_totals)
|
if (constant_filter_description.always_true || on_totals)
|
||||||
{
|
{
|
||||||
|
executeSelectByIndices(columns, select_final_indices_info, num_rows_before_filtration);
|
||||||
chunk.setColumns(std::move(columns), num_rows_before_filtration);
|
chunk.setColumns(std::move(columns), num_rows_before_filtration);
|
||||||
removeFilterIfNeed(chunk);
|
removeFilterIfNeed(chunk);
|
||||||
return;
|
return;
|
||||||
@ -159,11 +243,20 @@ void FilterTransform::doTransform(Chunk & chunk)
|
|||||||
|
|
||||||
if (constant_filter_description.always_true)
|
if (constant_filter_description.always_true)
|
||||||
{
|
{
|
||||||
|
executeSelectByIndices(columns, select_final_indices_info, num_rows_before_filtration);
|
||||||
chunk.setColumns(std::move(columns), num_rows_before_filtration);
|
chunk.setColumns(std::move(columns), num_rows_before_filtration);
|
||||||
removeFilterIfNeed(chunk);
|
removeFilterIfNeed(chunk);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::unique_ptr<IFilterDescription> filter_description;
|
||||||
|
if (filter_column->isSparse())
|
||||||
|
filter_description = combineFilterAndIndices(
|
||||||
|
std::make_unique<SparseFilterDescription>(*filter_column), select_final_indices_info, num_rows_before_filtration);
|
||||||
|
else
|
||||||
|
filter_description = combineFilterAndIndices(
|
||||||
|
std::make_unique<FilterDescription>(*filter_column), select_final_indices_info, num_rows_before_filtration);
|
||||||
|
|
||||||
/** Let's find out how many rows will be in result.
|
/** Let's find out how many rows will be in result.
|
||||||
* To do this, we filter out the first non-constant column
|
* To do this, we filter out the first non-constant column
|
||||||
* or calculate number of set bytes in the filter.
|
* or calculate number of set bytes in the filter.
|
||||||
@ -178,12 +271,6 @@ void FilterTransform::doTransform(Chunk & chunk)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<IFilterDescription> filter_description;
|
|
||||||
if (filter_column->isSparse())
|
|
||||||
filter_description = std::make_unique<SparseFilterDescription>(*filter_column);
|
|
||||||
else
|
|
||||||
filter_description = std::make_unique<FilterDescription>(*filter_column);
|
|
||||||
|
|
||||||
size_t num_filtered_rows = 0;
|
size_t num_filtered_rows = 0;
|
||||||
if (first_non_constant_column != num_columns)
|
if (first_non_constant_column != num_columns)
|
||||||
{
|
{
|
||||||
|
Loading…
Reference in New Issue
Block a user