mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge pull request #26668 from ClickHouse/aku/window-materialize
materialize all columns in window transform
This commit is contained in:
commit
60ca9990e5
@ -4,6 +4,7 @@
|
||||
#include <Common/Arena.h>
|
||||
#include <Common/FieldVisitorsAccurateComparison.h>
|
||||
#include <common/arithmeticOverflow.h>
|
||||
#include <Columns/ColumnConst.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
#include <Interpreters/ExpressionActions.h>
|
||||
@ -965,10 +966,37 @@ void WindowTransform::writeOutCurrentRow()
|
||||
}
|
||||
}
|
||||
|
||||
static void assertSameColumns(const Columns & left_all,
|
||||
const Columns & right_all)
|
||||
{
|
||||
assert(left_all.size() == right_all.size());
|
||||
|
||||
for (size_t i = 0; i < left_all.size(); ++i)
|
||||
{
|
||||
const auto * left_column = left_all[i].get();
|
||||
const auto * right_column = right_all[i].get();
|
||||
|
||||
assert(left_column);
|
||||
assert(right_column);
|
||||
|
||||
assert(typeid(*left_column).hash_code()
|
||||
== typeid(*right_column).hash_code());
|
||||
|
||||
if (isColumnConst(*left_column))
|
||||
{
|
||||
Field left_value = assert_cast<const ColumnConst &>(*left_column).getField();
|
||||
Field right_value = assert_cast<const ColumnConst &>(*right_column).getField();
|
||||
|
||||
assert(left_value == right_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void WindowTransform::appendChunk(Chunk & chunk)
|
||||
{
|
||||
// fmt::print(stderr, "new chunk, {} rows, finished={}\n", chunk.getNumRows(),
|
||||
// input_is_finished);
|
||||
// fmt::print(stderr, "chunk structure '{}'\n", chunk.dumpStructure());
|
||||
|
||||
// First, prepare the new input block and add it to the queue. We might not
|
||||
// have it if it's end of data, though.
|
||||
@ -984,28 +1012,45 @@ void WindowTransform::appendChunk(Chunk & chunk)
|
||||
|
||||
blocks.push_back({});
|
||||
auto & block = blocks.back();
|
||||
|
||||
// Use the number of rows from the Chunk, because it is correct even in
|
||||
// the case where the Chunk has no columns. Not sure if this actually
|
||||
// happens, because even in the case of `count() over ()` we have a dummy
|
||||
// input column.
|
||||
block.rows = chunk.getNumRows();
|
||||
block.input_columns = chunk.detachColumns();
|
||||
|
||||
// If we have a (logically) constant column, some Chunks will have a
|
||||
// Const column for it, and some -- materialized. Such difference is
|
||||
// generated by e.g. MergingSortedAlgorithm, which mostly materializes
|
||||
// the constant ORDER BY columns, but in some obscure cases passes them
|
||||
// through, unmaterialized. This mix is a pain to work with in Window
|
||||
// Transform, because we have to compare columns across blocks, when e.g.
|
||||
// searching for peer group boundaries, and each of the four combinations
|
||||
// of const and materialized requires different code.
|
||||
// Another problem with Const columns is that the aggregate functions
|
||||
// can't work with them, so we have to materialize them like the
|
||||
// Aggregator does.
|
||||
// Just materialize everything.
|
||||
auto columns = chunk.detachColumns();
|
||||
for (auto & column : columns)
|
||||
column = std::move(column)->convertToFullColumnIfConst();
|
||||
block.input_columns = std::move(columns);
|
||||
|
||||
// Initialize output columns.
|
||||
for (auto & ws : workspaces)
|
||||
{
|
||||
// Aggregate functions can't work with constant columns, so we have to
|
||||
// materialize them like the Aggregator does.
|
||||
for (const auto column_index : ws.argument_column_indices)
|
||||
{
|
||||
block.input_columns[column_index]
|
||||
= std::move(block.input_columns[column_index])
|
||||
->convertToFullColumnIfConst();
|
||||
}
|
||||
|
||||
block.output_columns.push_back(ws.aggregate_function->getReturnType()
|
||||
->createColumn());
|
||||
block.output_columns.back()->reserve(block.rows);
|
||||
}
|
||||
|
||||
// As a debugging aid, assert that chunk have the same C++ type of
|
||||
// columns, because we often have to work across chunks.
|
||||
if (blocks.size() > 1)
|
||||
{
|
||||
assertSameColumns(blocks.front().input_columns,
|
||||
blocks.back().input_columns);
|
||||
}
|
||||
}
|
||||
|
||||
// Start the calculations. First, advance the partition end.
|
||||
|
@ -1,6 +1,37 @@
|
||||
-- { echo }
|
||||
-- Another test for window functions because the other one is too long.
|
||||
set allow_experimental_window_functions = 1;
|
||||
-- some craziness with a mix of materialized and unmaterialized const columns
|
||||
-- after merging sorted transform, that used to break the peer group detection in
|
||||
-- the window transform.
|
||||
CREATE TABLE order_by_const
|
||||
(
|
||||
`a` UInt64,
|
||||
`b` UInt64,
|
||||
`c` UInt64,
|
||||
`d` UInt64
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (a, b)
|
||||
SETTINGS index_granularity = 8192;
|
||||
truncate table order_by_const;
|
||||
system stop merges order_by_const;
|
||||
INSERT INTO order_by_const(a, b, c, d) VALUES (1, 1, 101, 1), (1, 2, 102, 1), (1, 3, 103, 1), (1, 4, 104, 1);
|
||||
INSERT INTO order_by_const(a, b, c, d) VALUES (1, 5, 104, 1), (1, 6, 105, 1), (2, 1, 106, 2), (2, 1, 107, 2);
|
||||
INSERT INTO order_by_const(a, b, c, d) VALUES (2, 2, 107, 2), (2, 3, 108, 2), (2, 4, 109, 2);
|
||||
SELECT row_number() OVER (order by 1, a) FROM order_by_const;
|
||||
1
|
||||
2
|
||||
3
|
||||
4
|
||||
5
|
||||
6
|
||||
7
|
||||
8
|
||||
9
|
||||
10
|
||||
11
|
||||
drop table order_by_const;
|
||||
-- expressions in window frame
|
||||
select count() over (rows between 1 + 1 preceding and 1 + 1 following) from numbers(10);
|
||||
3
|
||||
|
@ -2,6 +2,29 @@
|
||||
-- Another test for window functions because the other one is too long.
|
||||
set allow_experimental_window_functions = 1;
|
||||
|
||||
-- some craziness with a mix of materialized and unmaterialized const columns
|
||||
-- after merging sorted transform, that used to break the peer group detection in
|
||||
-- the window transform.
|
||||
CREATE TABLE order_by_const
|
||||
(
|
||||
`a` UInt64,
|
||||
`b` UInt64,
|
||||
`c` UInt64,
|
||||
`d` UInt64
|
||||
)
|
||||
ENGINE = MergeTree
|
||||
ORDER BY (a, b)
|
||||
SETTINGS index_granularity = 8192;
|
||||
|
||||
truncate table order_by_const;
|
||||
system stop merges order_by_const;
|
||||
INSERT INTO order_by_const(a, b, c, d) VALUES (1, 1, 101, 1), (1, 2, 102, 1), (1, 3, 103, 1), (1, 4, 104, 1);
|
||||
INSERT INTO order_by_const(a, b, c, d) VALUES (1, 5, 104, 1), (1, 6, 105, 1), (2, 1, 106, 2), (2, 1, 107, 2);
|
||||
INSERT INTO order_by_const(a, b, c, d) VALUES (2, 2, 107, 2), (2, 3, 108, 2), (2, 4, 109, 2);
|
||||
SELECT row_number() OVER (order by 1, a) FROM order_by_const;
|
||||
|
||||
drop table order_by_const;
|
||||
|
||||
-- expressions in window frame
|
||||
select count() over (rows between 1 + 1 preceding and 1 + 1 following) from numbers(10);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user