Merge pull request #26668 from ClickHouse/aku/window-materialize

materialize all columns in window transform
This commit is contained in:
Alexander Kuzmenkov 2021-07-22 13:42:36 +03:00 committed by GitHub
commit 60ca9990e5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 109 additions and 10 deletions

View File

@ -4,6 +4,7 @@
#include <Common/Arena.h>
#include <Common/FieldVisitorsAccurateComparison.h>
#include <common/arithmeticOverflow.h>
#include <Columns/ColumnConst.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/getLeastSupertype.h>
#include <Interpreters/ExpressionActions.h>
@ -965,10 +966,37 @@ void WindowTransform::writeOutCurrentRow()
}
}
static void assertSameColumns(const Columns & left_all,
const Columns & right_all)
{
assert(left_all.size() == right_all.size());
for (size_t i = 0; i < left_all.size(); ++i)
{
const auto * left_column = left_all[i].get();
const auto * right_column = right_all[i].get();
assert(left_column);
assert(right_column);
assert(typeid(*left_column).hash_code()
== typeid(*right_column).hash_code());
if (isColumnConst(*left_column))
{
Field left_value = assert_cast<const ColumnConst &>(*left_column).getField();
Field right_value = assert_cast<const ColumnConst &>(*right_column).getField();
assert(left_value == right_value);
}
}
}
void WindowTransform::appendChunk(Chunk & chunk)
{
// fmt::print(stderr, "new chunk, {} rows, finished={}\n", chunk.getNumRows(),
// input_is_finished);
// fmt::print(stderr, "chunk structure '{}'\n", chunk.dumpStructure());
// First, prepare the new input block and add it to the queue. We might not
// have it if it's end of data, though.
@ -984,28 +1012,45 @@ void WindowTransform::appendChunk(Chunk & chunk)
blocks.push_back({});
auto & block = blocks.back();
// Use the number of rows from the Chunk, because it is correct even in
// the case where the Chunk has no columns. Not sure if this actually
// happens, because even in the case of `count() over ()` we have a dummy
// input column.
block.rows = chunk.getNumRows();
block.input_columns = chunk.detachColumns();
// If we have a (logically) constant column, some Chunks will have a
// Const column for it, and some -- materialized. Such difference is
// generated by e.g. MergingSortedAlgorithm, which mostly materializes
// the constant ORDER BY columns, but in some obscure cases passes them
// through, unmaterialized. This mix is a pain to work with in Window
// Transform, because we have to compare columns across blocks, when e.g.
// searching for peer group boundaries, and each of the four combinations
// of const and materialized requires different code.
// Another problem with Const columns is that the aggregate functions
// can't work with them, so we have to materialize them like the
// Aggregator does.
// Just materialize everything.
auto columns = chunk.detachColumns();
for (auto & column : columns)
column = std::move(column)->convertToFullColumnIfConst();
block.input_columns = std::move(columns);
// Initialize output columns.
for (auto & ws : workspaces)
{
// Aggregate functions can't work with constant columns, so we have to
// materialize them like the Aggregator does.
for (const auto column_index : ws.argument_column_indices)
{
block.input_columns[column_index]
= std::move(block.input_columns[column_index])
->convertToFullColumnIfConst();
}
block.output_columns.push_back(ws.aggregate_function->getReturnType()
->createColumn());
block.output_columns.back()->reserve(block.rows);
}
// As a debugging aid, assert that chunk have the same C++ type of
// columns, because we often have to work across chunks.
if (blocks.size() > 1)
{
assertSameColumns(blocks.front().input_columns,
blocks.back().input_columns);
}
}
// Start the calculations. First, advance the partition end.

View File

@ -1,6 +1,37 @@
-- { echo }
-- Another test for window functions because the other one is too long.
set allow_experimental_window_functions = 1;
-- some craziness with a mix of materialized and unmaterialized const columns
-- after merging sorted transform, that used to break the peer group detection in
-- the window transform.
CREATE TABLE order_by_const
(
`a` UInt64,
`b` UInt64,
`c` UInt64,
`d` UInt64
)
ENGINE = MergeTree
ORDER BY (a, b)
SETTINGS index_granularity = 8192;
truncate table order_by_const;
system stop merges order_by_const;
INSERT INTO order_by_const(a, b, c, d) VALUES (1, 1, 101, 1), (1, 2, 102, 1), (1, 3, 103, 1), (1, 4, 104, 1);
INSERT INTO order_by_const(a, b, c, d) VALUES (1, 5, 104, 1), (1, 6, 105, 1), (2, 1, 106, 2), (2, 1, 107, 2);
INSERT INTO order_by_const(a, b, c, d) VALUES (2, 2, 107, 2), (2, 3, 108, 2), (2, 4, 109, 2);
SELECT row_number() OVER (order by 1, a) FROM order_by_const;
1
2
3
4
5
6
7
8
9
10
11
drop table order_by_const;
-- expressions in window frame
select count() over (rows between 1 + 1 preceding and 1 + 1 following) from numbers(10);
3

View File

@ -2,6 +2,29 @@
-- Another test for window functions because the other one is too long.
set allow_experimental_window_functions = 1;
-- some craziness with a mix of materialized and unmaterialized const columns
-- after merging sorted transform, that used to break the peer group detection in
-- the window transform.
CREATE TABLE order_by_const
(
`a` UInt64,
`b` UInt64,
`c` UInt64,
`d` UInt64
)
ENGINE = MergeTree
ORDER BY (a, b)
SETTINGS index_granularity = 8192;
truncate table order_by_const;
system stop merges order_by_const;
INSERT INTO order_by_const(a, b, c, d) VALUES (1, 1, 101, 1), (1, 2, 102, 1), (1, 3, 103, 1), (1, 4, 104, 1);
INSERT INTO order_by_const(a, b, c, d) VALUES (1, 5, 104, 1), (1, 6, 105, 1), (2, 1, 106, 2), (2, 1, 107, 2);
INSERT INTO order_by_const(a, b, c, d) VALUES (2, 2, 107, 2), (2, 3, 108, 2), (2, 4, 109, 2);
SELECT row_number() OVER (order by 1, a) FROM order_by_const;
drop table order_by_const;
-- expressions in window frame
select count() over (rows between 1 + 1 preceding and 1 + 1 following) from numbers(10);