Merge pull request #67661 from bigo-sg/win_bug

Fix crash on `percent_rank`
This commit is contained in:
Dmitry Novik 2024-08-08 08:28:48 +00:00 committed by GitHub
commit 486d717e88
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
17 changed files with 363 additions and 230 deletions

View File

@ -1,7 +1,7 @@
---
slug: /en/sql-reference/window-functions/lagInFrame
sidebar_label: lagInFrame
sidebar_position: 8
sidebar_position: 9
---
# lagInFrame

View File

@ -1,7 +1,7 @@
---
slug: /en/sql-reference/window-functions/leadInFrame
sidebar_label: leadInFrame
sidebar_position: 9
sidebar_position: 10
---
# leadInFrame

View File

@ -0,0 +1,72 @@
---
slug: /en/sql-reference/window-functions/percent_rank
sidebar_label: percent_rank
sidebar_position: 8
---
# percent_rank
returns the relative rank (i.e. percentile) of rows within a window partition.
**Syntax**
Alias: `percentRank` (case-sensitive)
```sql
percent_rank (column_name)
OVER ([[PARTITION BY grouping_column] [ORDER BY sorting_column]
[RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING]] | [window_name])
FROM table_name
WINDOW window_name as ([PARTITION BY grouping_column] [ORDER BY sorting_column] RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING)
```
The default and required window frame definition is `RANGE BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING`.
For more detail on window function syntax see: [Window Functions - Syntax](./index.md/#syntax).
**Example**
Query:
```sql
CREATE TABLE salaries
(
`team` String,
`player` String,
`salary` UInt32,
`position` String
)
Engine = Memory;
INSERT INTO salaries FORMAT Values
('Port Elizabeth Barbarians', 'Gary Chen', 195000, 'F'),
('New Coreystad Archdukes', 'Charles Juarez', 190000, 'F'),
('Port Elizabeth Barbarians', 'Michael Stanley', 150000, 'D'),
('New Coreystad Archdukes', 'Scott Harrison', 150000, 'D'),
('Port Elizabeth Barbarians', 'Robert George', 195000, 'M'),
('South Hampton Seagulls', 'Douglas Benson', 150000, 'M'),
('South Hampton Seagulls', 'James Henderson', 140000, 'M');
```
```sql
SELECT player, salary,
percent_rank() OVER (ORDER BY salary DESC) AS percent_rank
FROM salaries;
```
Result:
```response
┌─player──────────┬─salary─┬───────percent_rank─┐
1. │ Gary Chen │ 195000 │ 0 │
2. │ Robert George │ 195000 │ 0 │
3. │ Charles Juarez │ 190000 │ 0.3333333333333333 │
4. │ Michael Stanley │ 150000 │ 0.5 │
5. │ Scott Harrison │ 150000 │ 0.5 │
6. │ Douglas Benson │ 150000 │ 0.5 │
7. │ James Henderson │ 140000 │ 1 │
└─────────────────┴────────┴────────────────────┘
```

View File

@ -0,0 +1,117 @@
#pragma once
#include <AggregateFunctions/IAggregateFunction.h>
#include <Interpreters/WindowDescription.h>
#include <Common/AlignedBuffer.h>
namespace DB
{
namespace ErrorCodes
{
extern const int BAD_ARGUMENTS;
}
class WindowTransform;
// Interface for true window functions. It's not much of an interface, they just
// accept the guts of WindowTransform and do 'something'. Given a small number of
// true window functions, and the fact that the WindowTransform internals are
// pretty much well-defined in domain terms (e.g. frame boundaries), this is
// somewhat acceptable.
class IWindowFunction
{
public:
virtual ~IWindowFunction() = default;
// Must insert the result for current_row.
virtual void windowInsertResultInto(const WindowTransform * transform, size_t function_index) const = 0;
virtual std::optional<WindowFrame> getDefaultFrame() const { return {}; }
virtual ColumnPtr castColumn(const Columns &, const std::vector<size_t> &) { return nullptr; }
/// Is the frame type supported by this function.
virtual bool checkWindowFrameType(const WindowTransform * /*transform*/) const { return true; }
};
// Runtime data for computing one window function.
struct WindowFunctionWorkspace
{
AggregateFunctionPtr aggregate_function;
// Cached value of aggregate function isState virtual method
bool is_aggregate_function_state = false;
// This field is set for pure window functions. When set, we ignore the
// window_function.aggregate_function, and work through this interface
// instead.
IWindowFunction * window_function_impl = nullptr;
std::vector<size_t> argument_column_indices;
// Will not be initialized for a pure window function.
mutable AlignedBuffer aggregate_function_state;
// Argument columns. Be careful, this is a per-block cache.
std::vector<const IColumn *> argument_columns;
UInt64 cached_block_number = std::numeric_limits<UInt64>::max();
};
// A basic implementation for a true window function. It pretends to be an
// aggregate function, but refuses to work as such.
struct WindowFunction : public IAggregateFunctionHelper<WindowFunction>, public IWindowFunction
{
std::string name;
WindowFunction(
const std::string & name_, const DataTypes & argument_types_, const Array & parameters_, const DataTypePtr & result_type_)
: IAggregateFunctionHelper<WindowFunction>(argument_types_, parameters_, result_type_), name(name_)
{
}
bool isOnlyWindowFunction() const override { return true; }
[[noreturn]] void fail() const
{
throw Exception(
ErrorCodes::BAD_ARGUMENTS, "The function '{}' can only be used as a window function, not as an aggregate function", getName());
}
String getName() const override { return name; }
void create(AggregateDataPtr __restrict) const override { }
void destroy(AggregateDataPtr __restrict) const noexcept override { }
bool hasTrivialDestructor() const override { return true; }
size_t sizeOfData() const override { return 0; }
size_t alignOfData() const override { return 1; }
void add(AggregateDataPtr __restrict, const IColumn **, size_t, Arena *) const override { fail(); }
void merge(AggregateDataPtr __restrict, ConstAggregateDataPtr, Arena *) const override { fail(); }
void serialize(ConstAggregateDataPtr __restrict, WriteBuffer &, std::optional<size_t>) const override { fail(); }
void deserialize(AggregateDataPtr __restrict, ReadBuffer &, std::optional<size_t>, Arena *) const override { fail(); }
void insertResultInto(AggregateDataPtr __restrict, IColumn &, Arena *) const override { fail(); }
};
template <typename State>
struct StatefulWindowFunction : public WindowFunction
{
StatefulWindowFunction(
const std::string & name_, const DataTypes & argument_types_, const Array & parameters_, const DataTypePtr & result_type_)
: WindowFunction(name_, argument_types_, parameters_, result_type_)
{
}
size_t sizeOfData() const override { return sizeof(State); }
size_t alignOfData() const override { return 1; }
void create(AggregateDataPtr __restrict place) const override { new (place) State(); }
void destroy(AggregateDataPtr __restrict place) const noexcept override { reinterpret_cast<State *>(place)->~State(); }
bool hasTrivialDestructor() const override { return std::is_trivially_destructible_v<State>; }
State & getState(const WindowFunctionWorkspace & workspace) const
{
return *reinterpret_cast<State *>(workspace.aggregate_function_state.data());
}
};
}

View File

@ -38,6 +38,7 @@
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/parseAggregateFunctionParameters.h>
#include <AggregateFunctions/WindowFunction.h>
#include <Storages/StorageDistributed.h>
#include <Storages/StorageDictionary.h>
@ -590,6 +591,7 @@ void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAG & actions, Aggrega
void ExpressionAnalyzer::makeWindowDescriptionFromAST(const Context & context_,
const WindowDescriptions & existing_descriptions,
AggregateFunctionPtr aggregate_function,
WindowDescription & desc, const IAST * ast)
{
const auto & definition = ast->as<const ASTWindowDefinition &>();
@ -698,7 +700,21 @@ void ExpressionAnalyzer::makeWindowDescriptionFromAST(const Context & context_,
ast->formatForErrorMessage());
}
const auto * window_function = aggregate_function ? dynamic_cast<const IWindowFunction *>(aggregate_function.get()) : nullptr;
desc.frame.is_default = definition.frame_is_default;
if (desc.frame.is_default && window_function)
{
auto default_window_frame_opt = window_function->getDefaultFrame();
if (default_window_frame_opt)
{
desc.frame = *default_window_frame_opt;
/// Append the default frame description to window_name, make sure it will be put into
/// a proper window description.
desc.window_name += " " + desc.frame.toString();
return;
}
}
desc.frame.type = definition.frame_type;
desc.frame.begin_type = definition.frame_begin_type;
desc.frame.begin_preceding = definition.frame_begin_preceding;
@ -734,7 +750,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAG & actions)
WindowDescription desc;
desc.window_name = elem.name;
makeWindowDescriptionFromAST(*current_context, window_descriptions,
desc, elem.definition.get());
nullptr, desc, elem.definition.get());
auto [it, inserted] = window_descriptions.insert(
{elem.name, std::move(desc)});
@ -821,12 +837,12 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAG & actions)
WindowDescription desc;
desc.window_name = default_window_name;
makeWindowDescriptionFromAST(*current_context, window_descriptions,
desc, &definition);
window_function.aggregate_function, desc, &definition);
auto full_sort_description = desc.full_sort_description;
auto [it, inserted] = window_descriptions.insert(
{default_window_name, std::move(desc)});
{desc.window_name, std::move(desc)});
if (!inserted)
{

View File

@ -135,7 +135,12 @@ public:
/// A list of windows for window functions.
const WindowDescriptions & windowDescriptions() const { return window_descriptions; }
void makeWindowDescriptionFromAST(const Context & context, const WindowDescriptions & existing_descriptions, WindowDescription & desc, const IAST * ast);
void makeWindowDescriptionFromAST(
const Context & context,
const WindowDescriptions & existing_descriptions,
AggregateFunctionPtr aggregate_function,
WindowDescription & desc,
const IAST * ast);
void makeWindowDescriptions(ActionsDAG & actions);
/** Checks if subquery is not a plain StorageSet.

View File

@ -1,5 +1,6 @@
#include <Planner/PlannerActionsVisitor.h>
#include <AggregateFunctions/WindowFunction.h>
#include <Analyzer/Utils.h>
#include <Analyzer/SetUtils.h>
#include <Analyzer/ConstantNode.h>
@ -237,7 +238,7 @@ public:
if (function_node.isWindowFunction())
{
buffer << " OVER (";
buffer << calculateWindowNodeActionName(function_node.getWindowNode());
buffer << calculateWindowNodeActionName(node, function_node.getWindowNode());
buffer << ')';
}
@ -298,21 +299,22 @@ public:
return calculateConstantActionNodeName(constant_literal, applyVisitor(FieldToDataType(), constant_literal));
}
String calculateWindowNodeActionName(const QueryTreeNodePtr & node)
String calculateWindowNodeActionName(const QueryTreeNodePtr & function_nodew_node_, const QueryTreeNodePtr & window_node_)
{
auto & window_node = node->as<WindowNode &>();
const auto & function_node = function_nodew_node_->as<const FunctionNode&>();
const auto & window_node = window_node_->as<const WindowNode &>();
WriteBufferFromOwnString buffer;
if (window_node.hasPartitionBy())
{
buffer << "PARTITION BY ";
auto & partition_by_nodes = window_node.getPartitionBy().getNodes();
const auto & partition_by_nodes = window_node.getPartitionBy().getNodes();
size_t partition_by_nodes_size = partition_by_nodes.size();
for (size_t i = 0; i < partition_by_nodes_size; ++i)
{
auto & partition_by_node = partition_by_nodes[i];
const auto & partition_by_node = partition_by_nodes[i];
buffer << calculateActionNodeName(partition_by_node);
if (i + 1 != partition_by_nodes_size)
buffer << ", ";
@ -326,7 +328,7 @@ public:
buffer << "ORDER BY ";
auto & order_by_nodes = window_node.getOrderBy().getNodes();
const auto & order_by_nodes = window_node.getOrderBy().getNodes();
size_t order_by_nodes_size = order_by_nodes.size();
for (size_t i = 0; i < order_by_nodes_size; ++i)
@ -364,44 +366,14 @@ public:
}
}
auto & window_frame = window_node.getWindowFrame();
if (!window_frame.is_default)
auto window_frame_opt = extractWindowFrame(function_node);
if (window_frame_opt)
{
auto & window_frame = *window_frame_opt;
if (window_node.hasPartitionBy() || window_node.hasOrderBy())
buffer << ' ';
buffer << window_frame.type << " BETWEEN ";
if (window_frame.begin_type == WindowFrame::BoundaryType::Current)
{
buffer << "CURRENT ROW";
}
else if (window_frame.begin_type == WindowFrame::BoundaryType::Unbounded)
{
buffer << "UNBOUNDED";
buffer << " " << (window_frame.begin_preceding ? "PRECEDING" : "FOLLOWING");
}
else
{
buffer << calculateActionNodeName(window_node.getFrameBeginOffsetNode());
buffer << " " << (window_frame.begin_preceding ? "PRECEDING" : "FOLLOWING");
}
buffer << " AND ";
if (window_frame.end_type == WindowFrame::BoundaryType::Current)
{
buffer << "CURRENT ROW";
}
else if (window_frame.end_type == WindowFrame::BoundaryType::Unbounded)
{
buffer << "UNBOUNDED";
buffer << " " << (window_frame.end_preceding ? "PRECEDING" : "FOLLOWING");
}
else
{
buffer << calculateActionNodeName(window_node.getFrameEndOffsetNode());
buffer << " " << (window_frame.end_preceding ? "PRECEDING" : "FOLLOWING");
}
window_frame.toString(buffer);
}
return buffer.str();
@ -1056,20 +1028,11 @@ String calculateConstantActionNodeName(const Field & constant_literal)
return ActionNodeNameHelper::calculateConstantActionNodeName(constant_literal);
}
String calculateWindowNodeActionName(const QueryTreeNodePtr & node,
const PlannerContext & planner_context,
QueryTreeNodeToName & node_to_name,
bool use_column_identifier_as_action_node_name)
{
ActionNodeNameHelper helper(node_to_name, planner_context, use_column_identifier_as_action_node_name);
return helper.calculateWindowNodeActionName(node);
}
String calculateWindowNodeActionName(const QueryTreeNodePtr & node, const PlannerContext & planner_context, bool use_column_identifier_as_action_node_name)
String calculateWindowNodeActionName(const QueryTreeNodePtr & function_node, const QueryTreeNodePtr & window_node, const PlannerContext & planner_context, bool use_column_identifier_as_action_node_name)
{
QueryTreeNodeToName empty_map;
ActionNodeNameHelper helper(empty_map, planner_context, use_column_identifier_as_action_node_name);
return helper.calculateWindowNodeActionName(node);
return helper.calculateWindowNodeActionName(function_node, window_node);
}
}

View File

@ -1,5 +1,6 @@
#pragma once
#include <optional>
#include <Core/Names.h>
#include <Core/NamesAndTypes.h>
@ -8,6 +9,7 @@
#include <Analyzer/IQueryTreeNode.h>
#include <Interpreters/ActionsDAG.h>
#include <Interpreters/WindowDescription.h>
namespace DB
{
@ -73,16 +75,8 @@ String calculateConstantActionNodeName(const Field & constant_literal);
* Window node action name can only be part of window function action name.
* For column node column node identifier from planner context is used, if use_column_identifier_as_action_node_name = true.
*/
String calculateWindowNodeActionName(const QueryTreeNodePtr & node,
const PlannerContext & planner_context,
QueryTreeNodeToName & node_to_name,
bool use_column_identifier_as_action_node_name = true);
/** Calculate action node name for window node.
* Window node action name can only be part of window function action name.
* For column node column node identifier from planner context is used, if use_column_identifier_as_action_node_name = true.
*/
String calculateWindowNodeActionName(const QueryTreeNodePtr & node,
String calculateWindowNodeActionName(const QueryTreeNodePtr & function_node,
const QueryTreeNodePtr & window_node,
const PlannerContext & planner_context,
bool use_column_identifier_as_action_node_name = true);

View File

@ -1,5 +1,7 @@
#include <optional>
#include <Planner/PlannerWindowFunctions.h>
#include <AggregateFunctions/WindowFunction.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/WindowNode.h>
@ -8,8 +10,9 @@
#include <Interpreters/Context.h>
#include <Planner/PlannerSorting.h>
#include <Planner/PlannerActionsVisitor.h>
#include <Planner/PlannerSorting.h>
#include <Planner/Utils.h>
namespace DB
{
@ -22,27 +25,33 @@ namespace ErrorCodes
namespace
{
WindowDescription extractWindowDescriptionFromWindowNode(const QueryTreeNodePtr & node, const PlannerContext & planner_context)
WindowDescription extractWindowDescriptionFromWindowNode(const QueryTreeNodePtr & func_node_, const PlannerContext & planner_context)
{
const auto & func_node = func_node_->as<FunctionNode &>();
auto node = func_node.getWindowNode();
auto & window_node = node->as<WindowNode &>();
WindowDescription window_description;
window_description.window_name = calculateWindowNodeActionName(node, planner_context);
window_description.window_name = calculateWindowNodeActionName(func_node_, node, planner_context);
for (const auto & partition_by_node : window_node.getPartitionBy().getNodes())
{
auto partition_by_node_action_name = calculateActionNodeName(partition_by_node, planner_context);
auto partition_by_sort_column_description = SortColumnDescription(partition_by_node_action_name, 1 /* direction */, 1 /* nulls_direction */);
auto partition_by_sort_column_description
= SortColumnDescription(partition_by_node_action_name, 1 /* direction */, 1 /* nulls_direction */);
window_description.partition_by.push_back(std::move(partition_by_sort_column_description));
}
window_description.order_by = extractSortDescription(window_node.getOrderByNode(), planner_context);
window_description.full_sort_description = window_description.partition_by;
window_description.full_sort_description.insert(window_description.full_sort_description.end(), window_description.order_by.begin(), window_description.order_by.end());
window_description.full_sort_description.insert(
window_description.full_sort_description.end(), window_description.order_by.begin(), window_description.order_by.end());
/// WINDOW frame is validated during query analysis stage
window_description.frame = window_node.getWindowFrame();
auto window_frame = extractWindowFrame(func_node);
window_description.frame = window_frame ? *window_frame : window_node.getWindowFrame();
auto node_frame = window_node.getWindowFrame();
const auto & query_context = planner_context.getQueryContext();
const auto & query_context_settings = query_context->getSettingsRef();
@ -64,7 +73,8 @@ WindowDescription extractWindowDescriptionFromWindowNode(const QueryTreeNodePtr
}
std::vector<WindowDescription> extractWindowDescriptions(const QueryTreeNodes & window_function_nodes, const PlannerContext & planner_context)
std::vector<WindowDescription>
extractWindowDescriptions(const QueryTreeNodes & window_function_nodes, const PlannerContext & planner_context)
{
std::unordered_map<std::string, WindowDescription> window_name_to_description;
@ -72,7 +82,7 @@ std::vector<WindowDescription> extractWindowDescriptions(const QueryTreeNodes &
{
auto & window_function_node_typed = window_function_node->as<FunctionNode &>();
auto function_window_description = extractWindowDescriptionFromWindowNode(window_function_node_typed.getWindowNode(), planner_context);
auto function_window_description = extractWindowDescriptionFromWindowNode(window_function_node, planner_context);
auto frame_type = function_window_description.frame.type;
if (frame_type != WindowFrame::FrameType::ROWS && frame_type != WindowFrame::FrameType::RANGE)

View File

@ -22,6 +22,8 @@
#include <Interpreters/Context.h>
#include <AggregateFunctions/WindowFunction.h>
#include <Analyzer/Utils.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/ColumnNode.h>
@ -34,6 +36,7 @@
#include <Analyzer/JoinNode.h>
#include <Analyzer/QueryTreeBuilder.h>
#include <Analyzer/Passes/QueryAnalysisPass.h>
#include <Analyzer/WindowNode.h>
#include <Core/Settings.h>
@ -507,4 +510,20 @@ void appendSetsFromActionsDAG(const ActionsDAG & dag, UsefulSets & useful_sets)
}
}
std::optional<WindowFrame> extractWindowFrame(const FunctionNode & node)
{
if (!node.isWindowFunction())
return {};
auto & window_node = node.getWindowNode()->as<WindowNode &>();
const auto & window_frame = window_node.getWindowFrame();
if (!window_frame.is_default)
return window_frame;
auto aggregate_function = node.getAggregateFunction();
if (const auto * win_func = dynamic_cast<const IWindowFunction *>(aggregate_function.get()))
{
return win_func->getDefaultFrame();
}
return {};
}
}

View File

@ -19,6 +19,8 @@
#include <Storages/SelectQueryInfo.h>
#include <Interpreters/WindowDescription.h>
namespace DB
{
@ -91,4 +93,9 @@ ASTPtr parseAdditionalResultFilter(const Settings & settings);
using UsefulSets = std::unordered_set<FutureSetPtr>;
void appendSetsFromActionsDAG(const ActionsDAG & dag, UsefulSets & useful_sets);
/// If the window frame is not set in sql, try to use the default frame from window function
/// if it have any one. Otherwise return empty.
/// If the window frame is set in sql, use it anyway.
std::optional<WindowFrame> extractWindowFrame(const FunctionNode & node);
}

View File

@ -65,28 +65,6 @@ namespace ErrorCodes
extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
}
// Interface for true window functions. It's not much of an interface, they just
// accept the guts of WindowTransform and do 'something'. Given a small number of
// true window functions, and the fact that the WindowTransform internals are
// pretty much well-defined in domain terms (e.g. frame boundaries), this is
// somewhat acceptable.
class IWindowFunction
{
public:
virtual ~IWindowFunction() = default;
// Must insert the result for current_row.
virtual void windowInsertResultInto(const WindowTransform * transform,
size_t function_index) const = 0;
virtual std::optional<WindowFrame> getDefaultFrame() const { return {}; }
virtual ColumnPtr castColumn(const Columns &, const std::vector<size_t> &) { return nullptr; }
/// Is the frame type supported by this function.
virtual bool checkWindowFrameType(const WindowTransform * /*transform*/) const { return true; }
};
// Compares ORDER BY column values at given rows to find the boundaries of frame:
// [compared] with [reference] +/- offset. Return value is -1/0/+1, like in
// sorting predicates -- -1 means [compared] is less than [reference] +/- offset.
@ -1523,41 +1501,6 @@ void WindowTransform::work()
}
}
// A basic implementation for a true window function. It pretends to be an
// aggregate function, but refuses to work as such.
struct WindowFunction
: public IAggregateFunctionHelper<WindowFunction>
, public IWindowFunction
{
std::string name;
WindowFunction(const std::string & name_, const DataTypes & argument_types_, const Array & parameters_, const DataTypePtr & result_type_)
: IAggregateFunctionHelper<WindowFunction>(argument_types_, parameters_, result_type_)
, name(name_)
{}
bool isOnlyWindowFunction() const override { return true; }
[[noreturn]] void fail() const
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"The function '{}' can only be used as a window function, not as an aggregate function",
getName());
}
String getName() const override { return name; }
void create(AggregateDataPtr __restrict) const override {}
void destroy(AggregateDataPtr __restrict) const noexcept override {}
bool hasTrivialDestructor() const override { return true; }
size_t sizeOfData() const override { return 0; }
size_t alignOfData() const override { return 1; }
void add(AggregateDataPtr __restrict, const IColumn **, size_t, Arena *) const override { fail(); }
void merge(AggregateDataPtr __restrict, ConstAggregateDataPtr, Arena *) const override { fail(); }
void serialize(ConstAggregateDataPtr __restrict, WriteBuffer &, std::optional<size_t>) const override { fail(); }
void deserialize(AggregateDataPtr __restrict, ReadBuffer &, std::optional<size_t>, Arena *) const override { fail(); }
void insertResultInto(AggregateDataPtr __restrict, IColumn &, Arena *) const override { fail(); }
};
struct WindowFunctionRank final : public WindowFunction
{
WindowFunctionRank(const std::string & name_,
@ -1669,36 +1612,6 @@ struct WindowFunctionHelpers
}
};
template<typename State>
struct StatefulWindowFunction : public WindowFunction
{
StatefulWindowFunction(const std::string & name_,
const DataTypes & argument_types_, const Array & parameters_, const DataTypePtr & result_type_)
: WindowFunction(name_, argument_types_, parameters_, result_type_)
{
}
size_t sizeOfData() const override { return sizeof(State); }
size_t alignOfData() const override { return 1; }
void create(AggregateDataPtr __restrict place) const override
{
new (place) State();
}
void destroy(AggregateDataPtr __restrict place) const noexcept override
{
reinterpret_cast<State *>(place)->~State();
}
bool hasTrivialDestructor() const override { return std::is_trivially_destructible_v<State>; }
State & getState(const WindowFunctionWorkspace & workspace) const
{
return *reinterpret_cast<State *>(workspace.aggregate_function_state.data());
}
};
struct ExponentialTimeDecayedSumState
{
Float64 previous_time;
@ -2278,14 +2191,13 @@ public:
bool checkWindowFrameType(const WindowTransform * transform) const override
{
if (transform->window_description.frame.type != WindowFrame::FrameType::RANGE
|| transform->window_description.frame.begin_type != WindowFrame::BoundaryType::Unbounded
|| transform->window_description.frame.end_type != WindowFrame::BoundaryType::Current)
{
LOG_ERROR(
getLogger("WindowFunctionPercentRank"),
"Window frame for function 'percent_rank' should be 'RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT'");
return false;
auto default_window_frame = getDefaultFrame();
if (transform->window_description.frame != default_window_frame)
{
LOG_ERROR(
getLogger("WindowFunctionPercentRank"),
"Window frame for function 'percent_rank' should be '{}'", default_window_frame->toString());
return false;
}
return true;
}
@ -2295,7 +2207,7 @@ public:
WindowFrame frame;
frame.type = WindowFrame::FrameType::RANGE;
frame.begin_type = WindowFrame::BoundaryType::Unbounded;
frame.end_type = WindowFrame::BoundaryType::Current;
frame.end_type = WindowFrame::BoundaryType::Unbounded;
return frame;
}
@ -2860,5 +2772,4 @@ void registerWindowFunctions(AggregateFunctionFactory & factory)
name, argument_types, parameters);
}, properties});
}
}

View File

@ -1,6 +1,7 @@
#pragma once
#include <Interpreters/WindowDescription.h>
#include <AggregateFunctions/WindowFunction.h>
#include <Processors/IProcessor.h>
@ -21,30 +22,6 @@ using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
class Arena;
class IWindowFunction;
// Runtime data for computing one window function.
struct WindowFunctionWorkspace
{
AggregateFunctionPtr aggregate_function;
// Cached value of aggregate function isState virtual method
bool is_aggregate_function_state = false;
// This field is set for pure window functions. When set, we ignore the
// window_function.aggregate_function, and work through this interface
// instead.
IWindowFunction * window_function_impl = nullptr;
std::vector<size_t> argument_column_indices;
// Will not be initialized for a pure window function.
mutable AlignedBuffer aggregate_function_state;
// Argument columns. Be careful, this is a per-block cache.
std::vector<const IColumn *> argument_columns;
UInt64 cached_block_number = std::numeric_limits<UInt64>::max();
};
struct WindowTransformBlock
{

View File

@ -79,16 +79,3 @@ iPhone 900 Smartphone 500 500
Kindle Fire 150 Tablet 150 350
Samsung Galaxy Tab 200 Tablet 175 350
iPad 700 Tablet 350 350
---- Q8 ----
Lenovo Thinkpad Laptop 700 1 0
Sony VAIO Laptop 700 1 0
Dell Vostro Laptop 800 3 0.6666666666666666
HP Elite Laptop 1200 4 1
Microsoft Lumia Smartphone 200 1 0
HTC One Smartphone 400 2 0.3333333333333333
Nexus Smartphone 500 3 0.6666666666666666
iPhone Smartphone 900 4 1
Kindle Fire Tablet 150 1 0
Samsung Galaxy Tab Tablet 200 2 0.5
iPad Tablet 700 3 1
Others Unknow 200 1 0

View File

@ -101,26 +101,7 @@ SELECT
FROM products INNER JOIN product_groups USING (group_id)) t
order by group_name, product_name, price;
select '---- Q8 ----';
INSERT INTO product_groups VALUES (4, 'Unknow');
INSERT INTO products (product_id,product_name, group_id,price) VALUES (12, 'Others', 4, 200);
SELECT *
FROM
(
SELECT
product_name,
group_name,
price,
rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS rank,
percent_rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS percent
FROM products
INNER JOIN product_groups USING (group_id)
) AS t
ORDER BY
group_name ASC,
price ASC,
product_name ASC;
drop table product_groups;
drop table products;

View File

@ -0,0 +1,22 @@
Lenovo Thinkpad Laptop 700 1 0
Sony VAIO Laptop 700 1 0
Dell Vostro Laptop 800 3 0.6666666666666666
HP Elite Laptop 1200 4 1
Microsoft Lumia Smartphone 200 1 0
HTC One Smartphone 400 2 0.3333333333333333
Nexus Smartphone 500 3 0.6666666666666666
iPhone Smartphone 900 4 1
Kindle Fire Tablet 150 1 0
Samsung Galaxy Tab Tablet 200 2 0.5
iPad Tablet 700 3 1
Others Unknow 200 1 0
0 1 0
1 2 1
2 3 2
3 4 3
4 5 4
5 6 5
6 7 6
7 8 7
8 9 8
9 10 9

View File

@ -0,0 +1,52 @@
drop table if exists product_groups;
drop table if exists products;
CREATE TABLE product_groups (
group_id Int64,
group_name String
) Engine = Memory;
CREATE TABLE products (
product_id Int64,
product_name String,
price DECIMAL(11, 2),
group_id Int64
) Engine = Memory;
INSERT INTO product_groups VALUES (1, 'Smartphone'),(2, 'Laptop'),(3, 'Tablet');
INSERT INTO products (product_id,product_name, group_id,price) VALUES (1, 'Microsoft Lumia', 1, 200), (2, 'HTC One', 1, 400), (3, 'Nexus', 1, 500), (4, 'iPhone', 1, 900),(5, 'HP Elite', 2, 1200),(6, 'Lenovo Thinkpad', 2, 700),(7, 'Sony VAIO', 2, 700),(8, 'Dell Vostro', 2, 800),(9, 'iPad', 3, 700),(10, 'Kindle Fire', 3, 150),(11, 'Samsung Galaxy Tab', 3, 200);
INSERT INTO product_groups VALUES (4, 'Unknow');
INSERT INTO products (product_id,product_name, group_id,price) VALUES (12, 'Others', 4, 200);
SELECT *
FROM
(
SELECT
product_name,
group_name,
price,
rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS rank,
percent_rank() OVER (PARTITION BY group_name ORDER BY price ASC) AS percent
FROM products
INNER JOIN product_groups USING (group_id)
) AS t
ORDER BY
group_name ASC,
price ASC,
product_name ASC;
drop table product_groups;
drop table products;
select number, row_number, cast(percent_rank * 10000 as Int32) as percent_rank
from (
select number, row_number() over () as row_number, percent_rank() over (order by number) as percent_rank
from numbers(10000)
order by number
limit 10
)
settings max_block_size=100;