mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
Merge pull request #9886 from ClickHouse/add_sort_check_to_mutations
Remove order by from mutations interpreter and add check
This commit is contained in:
commit
655ce5b6a7
93
dbms/src/DataStreams/CheckSortedBlockInputStream.cpp
Normal file
93
dbms/src/DataStreams/CheckSortedBlockInputStream.cpp
Normal file
@ -0,0 +1,93 @@
|
||||
#include <DataStreams/CheckSortedBlockInputStream.h>
|
||||
#include <Core/SortDescription.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
CheckSortedBlockInputStream::CheckSortedBlockInputStream(
|
||||
const BlockInputStreamPtr & input_,
|
||||
const SortDescription & sort_description_)
|
||||
: header(input_->getHeader())
|
||||
, sort_description_map(addPositionsToSortDescriptions(sort_description_))
|
||||
{
|
||||
children.push_back(input_);
|
||||
}
|
||||
|
||||
SortDescriptionsWithPositions
|
||||
CheckSortedBlockInputStream::addPositionsToSortDescriptions(const SortDescription & sort_description)
|
||||
{
|
||||
SortDescriptionsWithPositions result;
|
||||
result.reserve(sort_description.size());
|
||||
|
||||
for (SortColumnDescription description_copy : sort_description)
|
||||
{
|
||||
if (!description_copy.column_name.empty())
|
||||
description_copy.column_number = header.getPositionByName(description_copy.column_name);
|
||||
|
||||
result.push_back(description_copy);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// Compares values in columns. Columns must have equal types.
|
||||
struct SortingLessOrEqualComparator
|
||||
{
|
||||
const SortDescriptionsWithPositions & sort_description;
|
||||
|
||||
explicit SortingLessOrEqualComparator(const SortDescriptionsWithPositions & sort_description_)
|
||||
: sort_description(sort_description_) {}
|
||||
|
||||
bool operator()(const Columns & left, size_t left_index, const Columns & right, size_t right_index) const
|
||||
{
|
||||
for (const auto & elem : sort_description)
|
||||
{
|
||||
size_t column_number = elem.column_number;
|
||||
|
||||
const IColumn * left_col = left[column_number].get();
|
||||
const IColumn * right_col = right[column_number].get();
|
||||
|
||||
int res = elem.direction * left_col->compareAt(left_index, right_index, *right_col, elem.nulls_direction);
|
||||
if (res < 0)
|
||||
return true;
|
||||
else if (res > 0)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
Block CheckSortedBlockInputStream::readImpl()
|
||||
{
|
||||
Block block = children.back()->read();
|
||||
if (!block || block.rows() == 0)
|
||||
return block;
|
||||
|
||||
SortingLessOrEqualComparator less(sort_description_map);
|
||||
|
||||
auto block_columns = block.getColumns();
|
||||
if (!last_row.empty() && !less(last_row, 0, block_columns, 0))
|
||||
throw Exception("Sort order of blocks violated", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
size_t rows = block.rows();
|
||||
for (size_t i = 1; i < rows; ++i)
|
||||
if (!less(block_columns, i - 1, block_columns, i))
|
||||
throw Exception("Sort order of blocks violated", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
last_row.clear();
|
||||
for (size_t i = 0; i < block.columns(); ++i)
|
||||
{
|
||||
auto column = block_columns[i]->cloneEmpty();
|
||||
column->insertFrom(*block_columns[i], rows - 1);
|
||||
last_row.emplace_back(std::move(column));
|
||||
}
|
||||
|
||||
return block;
|
||||
}
|
||||
|
||||
}
|
35
dbms/src/DataStreams/CheckSortedBlockInputStream.h
Normal file
35
dbms/src/DataStreams/CheckSortedBlockInputStream.h
Normal file
@ -0,0 +1,35 @@
|
||||
#pragma once
|
||||
#include <DataStreams/IBlockInputStream.h>
|
||||
#include <Core/SortDescription.h>
|
||||
#include <Columns/IColumn.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
using SortDescriptionsWithPositions = std::vector<SortColumnDescription>;
|
||||
|
||||
/// Streams checks that flow of blocks is sorted in the sort_description order
|
||||
/// Othrewise throws exception in readImpl function.
|
||||
class CheckSortedBlockInputStream : public IBlockInputStream
|
||||
{
|
||||
public:
|
||||
CheckSortedBlockInputStream(
|
||||
const BlockInputStreamPtr & input_,
|
||||
const SortDescription & sort_description_);
|
||||
|
||||
String getName() const override { return "CheckingSorted"; }
|
||||
|
||||
Block getHeader() const override { return header; }
|
||||
|
||||
protected:
|
||||
Block readImpl() override;
|
||||
|
||||
private:
|
||||
Block header;
|
||||
SortDescriptionsWithPositions sort_description_map;
|
||||
Columns last_row;
|
||||
|
||||
private:
|
||||
/// Just checks, that all sort_descriptions has column_number
|
||||
SortDescriptionsWithPositions addPositionsToSortDescriptions(const SortDescription & sort_description);
|
||||
};
|
||||
}
|
181
dbms/src/DataStreams/tests/gtest_check_sorted_stream.cpp
Normal file
181
dbms/src/DataStreams/tests/gtest_check_sorted_stream.cpp
Normal file
@ -0,0 +1,181 @@
|
||||
#include <Core/Block.h>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataStreams/BlocksListBlockInputStream.h>
|
||||
#include <DataStreams/CheckSortedBlockInputStream.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
|
||||
using namespace DB;
|
||||
|
||||
|
||||
static SortDescription getSortDescription(const std::vector<std::string> & column_names)
|
||||
{
|
||||
SortDescription descr;
|
||||
for (const auto & column : column_names)
|
||||
{
|
||||
descr.emplace_back(column, 1, 1);
|
||||
}
|
||||
return descr;
|
||||
}
|
||||
|
||||
static Block getSortedBlockWithSize(
|
||||
const std::vector<std::string> & columns,
|
||||
size_t rows, size_t stride, size_t start)
|
||||
{
|
||||
ColumnsWithTypeAndName cols;
|
||||
size_t size_of_row_in_bytes = columns.size() * sizeof(UInt64);
|
||||
for (size_t i = 0; i * sizeof(UInt64) < size_of_row_in_bytes; i++)
|
||||
{
|
||||
auto column = ColumnUInt64::create(rows, 0);
|
||||
for (size_t j = 0; j < rows; ++j)
|
||||
{
|
||||
column->getElement(j) = start;
|
||||
start += stride;
|
||||
}
|
||||
cols.emplace_back(std::move(column), std::make_shared<DataTypeUInt64>(), columns[i]);
|
||||
}
|
||||
return Block(cols);
|
||||
}
|
||||
|
||||
|
||||
static Block getUnSortedBlockWithSize(const std::vector<std::string> & columns, size_t rows, size_t stride, size_t start, size_t bad_row, size_t bad_column, size_t bad_value)
|
||||
{
|
||||
ColumnsWithTypeAndName cols;
|
||||
size_t size_of_row_in_bytes = columns.size() * sizeof(UInt64);
|
||||
for (size_t i = 0; i * sizeof(UInt64) < size_of_row_in_bytes; i++)
|
||||
{
|
||||
auto column = ColumnUInt64::create(rows, 0);
|
||||
for (size_t j = 0; j < rows; ++j)
|
||||
{
|
||||
if (bad_row == j && bad_column == i)
|
||||
column->getElement(j) = bad_value;
|
||||
else if (i < bad_column)
|
||||
column->getElement(j) = 0;
|
||||
else
|
||||
column->getElement(j) = start;
|
||||
|
||||
start += stride;
|
||||
}
|
||||
cols.emplace_back(std::move(column), std::make_shared<DataTypeUInt64>(), columns[i]);
|
||||
}
|
||||
return Block(cols);
|
||||
}
|
||||
|
||||
static Block getEqualValuesBlockWithSize(
|
||||
const std::vector<std::string> & columns, size_t rows)
|
||||
{
|
||||
ColumnsWithTypeAndName cols;
|
||||
size_t size_of_row_in_bytes = columns.size() * sizeof(UInt64);
|
||||
for (size_t i = 0; i * sizeof(UInt64) < size_of_row_in_bytes; i++)
|
||||
{
|
||||
auto column = ColumnUInt64::create(rows, 0);
|
||||
for (size_t j = 0; j < rows; ++j)
|
||||
column->getElement(j) = 0;
|
||||
|
||||
cols.emplace_back(std::move(column), std::make_shared<DataTypeUInt64>(), columns[i]);
|
||||
}
|
||||
return Block(cols);
|
||||
}
|
||||
|
||||
|
||||
TEST(CheckSortedBlockInputStream, CheckGoodCase)
|
||||
{
|
||||
std::vector<std::string> key_columns{"K1", "K2", "K3"};
|
||||
auto sort_description = getSortDescription(key_columns);
|
||||
|
||||
BlocksList blocks;
|
||||
for (size_t i = 0; i < 3; ++i)
|
||||
blocks.push_back(getSortedBlockWithSize(key_columns, 10, 1, i * 10));
|
||||
|
||||
BlockInputStreamPtr stream = std::make_shared<BlocksListBlockInputStream>(std::move(blocks));
|
||||
|
||||
CheckSortedBlockInputStream sorted(stream, sort_description);
|
||||
|
||||
EXPECT_NO_THROW(sorted.read());
|
||||
EXPECT_NO_THROW(sorted.read());
|
||||
EXPECT_NO_THROW(sorted.read());
|
||||
EXPECT_EQ(sorted.read(), Block());
|
||||
}
|
||||
|
||||
TEST(CheckSortedBlockInputStream, CheckBadLastRow)
|
||||
{
|
||||
std::vector<std::string> key_columns{"K1", "K2", "K3"};
|
||||
auto sort_description = getSortDescription(key_columns);
|
||||
BlocksList blocks;
|
||||
blocks.push_back(getSortedBlockWithSize(key_columns, 100, 1, 100));
|
||||
blocks.push_back(getSortedBlockWithSize(key_columns, 100, 1, 200));
|
||||
blocks.push_back(getSortedBlockWithSize(key_columns, 100, 1, 0));
|
||||
blocks.push_back(getSortedBlockWithSize(key_columns, 100, 1, 300));
|
||||
|
||||
BlockInputStreamPtr stream = std::make_shared<BlocksListBlockInputStream>(std::move(blocks));
|
||||
|
||||
CheckSortedBlockInputStream sorted(stream, sort_description);
|
||||
|
||||
|
||||
EXPECT_NO_THROW(sorted.read());
|
||||
EXPECT_NO_THROW(sorted.read());
|
||||
EXPECT_THROW(sorted.read(), DB::Exception);
|
||||
}
|
||||
|
||||
|
||||
TEST(CheckSortedBlockInputStream, CheckUnsortedBlock1)
|
||||
{
|
||||
std::vector<std::string> key_columns{"K1", "K2", "K3"};
|
||||
auto sort_description = getSortDescription(key_columns);
|
||||
BlocksList blocks;
|
||||
blocks.push_back(getUnSortedBlockWithSize(key_columns, 100, 1, 0, 5, 1, 77));
|
||||
|
||||
BlockInputStreamPtr stream = std::make_shared<BlocksListBlockInputStream>(std::move(blocks));
|
||||
|
||||
CheckSortedBlockInputStream sorted(stream, sort_description);
|
||||
|
||||
EXPECT_THROW(sorted.read(), DB::Exception);
|
||||
}
|
||||
|
||||
TEST(CheckSortedBlockInputStream, CheckUnsortedBlock2)
|
||||
{
|
||||
std::vector<std::string> key_columns{"K1", "K2", "K3"};
|
||||
auto sort_description = getSortDescription(key_columns);
|
||||
BlocksList blocks;
|
||||
blocks.push_back(getUnSortedBlockWithSize(key_columns, 100, 1, 0, 99, 2, 77));
|
||||
|
||||
BlockInputStreamPtr stream = std::make_shared<BlocksListBlockInputStream>(std::move(blocks));
|
||||
|
||||
CheckSortedBlockInputStream sorted(stream, sort_description);
|
||||
|
||||
EXPECT_THROW(sorted.read(), DB::Exception);
|
||||
}
|
||||
|
||||
TEST(CheckSortedBlockInputStream, CheckUnsortedBlock3)
|
||||
{
|
||||
std::vector<std::string> key_columns{"K1", "K2", "K3"};
|
||||
auto sort_description = getSortDescription(key_columns);
|
||||
BlocksList blocks;
|
||||
blocks.push_back(getUnSortedBlockWithSize(key_columns, 100, 1, 0, 50, 0, 77));
|
||||
|
||||
BlockInputStreamPtr stream = std::make_shared<BlocksListBlockInputStream>(std::move(blocks));
|
||||
|
||||
CheckSortedBlockInputStream sorted(stream, sort_description);
|
||||
|
||||
EXPECT_THROW(sorted.read(), DB::Exception);
|
||||
}
|
||||
|
||||
TEST(CheckSortedBlockInputStream, CheckEqualBlock)
|
||||
{
|
||||
std::vector<std::string> key_columns{"K1", "K2", "K3"};
|
||||
auto sort_description = getSortDescription(key_columns);
|
||||
BlocksList blocks;
|
||||
blocks.push_back(getEqualValuesBlockWithSize(key_columns, 100));
|
||||
blocks.push_back(getEqualValuesBlockWithSize(key_columns, 10));
|
||||
blocks.push_back(getEqualValuesBlockWithSize(key_columns, 1));
|
||||
|
||||
BlockInputStreamPtr stream = std::make_shared<BlocksListBlockInputStream>(std::move(blocks));
|
||||
|
||||
CheckSortedBlockInputStream sorted(stream, sort_description);
|
||||
|
||||
EXPECT_NO_THROW(sorted.read());
|
||||
EXPECT_NO_THROW(sorted.read());
|
||||
EXPECT_NO_THROW(sorted.read());
|
||||
}
|
@ -12,13 +12,13 @@
|
||||
#include <DataStreams/CreatingSetsBlockInputStream.h>
|
||||
#include <DataStreams/MaterializingBlockInputStream.h>
|
||||
#include <DataStreams/NullBlockInputStream.h>
|
||||
#include <DataStreams/CheckSortedBlockInputStream.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/formatAST.h>
|
||||
#include <Parsers/ASTOrderByElement.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
|
||||
@ -620,24 +620,6 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector<Stage> &
|
||||
}
|
||||
select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression));
|
||||
}
|
||||
/// We have to execute select in order of primary key
|
||||
/// because we don't sort results additionaly and don't have
|
||||
/// any guarantees on data order without ORDER BY. It's almost free, because we
|
||||
/// have optimization for data read in primary key order.
|
||||
if (ASTPtr key_expr = storage->getSortingKeyAST(); key_expr && !key_expr->children.empty())
|
||||
{
|
||||
ASTPtr dummy;
|
||||
auto res = std::make_shared<ASTExpressionList>();
|
||||
for (const auto & key_part : key_expr->children)
|
||||
{
|
||||
auto order_by_expr = std::make_shared<ASTOrderByElement>(1, 1, false, dummy, false, dummy, dummy, dummy);
|
||||
order_by_expr->children.push_back(key_part);
|
||||
|
||||
res->children.push_back(order_by_expr);
|
||||
}
|
||||
|
||||
select->setExpression(ASTSelectQuery::Expression::ORDER_BY, std::move(res));
|
||||
}
|
||||
|
||||
return select;
|
||||
}
|
||||
@ -702,9 +684,17 @@ BlockInputStreamPtr MutationsInterpreter::execute(TableStructureReadLockHolder &
|
||||
throw Exception("Cannot execute mutations interpreter because can_execute flag set to false", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
BlockInputStreamPtr in = select_interpreter->execute().in;
|
||||
|
||||
auto result_stream = addStreamsForLaterStages(stages, in);
|
||||
|
||||
/// Sometimes we update just part of columns (for example UPDATE mutation)
|
||||
/// in this case we don't read sorting key, so just we don't check anything.
|
||||
if (auto sort_desc = getStorageSortDescriptionIfPossible(result_stream->getHeader()))
|
||||
result_stream = std::make_shared<CheckSortedBlockInputStream>(result_stream, *sort_desc);
|
||||
|
||||
if (!updated_header)
|
||||
updated_header = std::make_unique<Block>(result_stream->getHeader());
|
||||
|
||||
return result_stream;
|
||||
}
|
||||
|
||||
@ -723,4 +713,22 @@ size_t MutationsInterpreter::evaluateCommandsSize()
|
||||
return std::max(prepareQueryAffectedAST(commands)->size(), mutation_ast->size());
|
||||
}
|
||||
|
||||
std::optional<SortDescription> MutationsInterpreter::getStorageSortDescriptionIfPossible(const Block & header) const
|
||||
{
|
||||
Names sort_columns = storage->getSortingKeyColumns();
|
||||
SortDescription sort_description;
|
||||
size_t sort_columns_size = sort_columns.size();
|
||||
sort_description.reserve(sort_columns_size);
|
||||
|
||||
for (size_t i = 0; i < sort_columns_size; ++i)
|
||||
{
|
||||
if (header.has(sort_columns[i]))
|
||||
sort_description.emplace_back(header.getPositionByName(sort_columns[i]), 1, 1);
|
||||
else
|
||||
return {};
|
||||
}
|
||||
|
||||
return sort_description;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -43,6 +43,8 @@ private:
|
||||
ASTPtr prepareInterpreterSelectQuery(std::vector<Stage> &prepared_stages, bool dry_run);
|
||||
BlockInputStreamPtr addStreamsForLaterStages(const std::vector<Stage> & prepared_stages, BlockInputStreamPtr in) const;
|
||||
|
||||
std::optional<SortDescription> getStorageSortDescriptionIfPossible(const Block & header) const;
|
||||
|
||||
StoragePtr storage;
|
||||
MutationCommands commands;
|
||||
const Context & context;
|
||||
|
Loading…
Reference in New Issue
Block a user