Merge pull request #40762 from ClickHouse/grouping-comp

Fix GROUPING function SQL compatibility
This commit is contained in:
Dmitry Novik 2022-09-13 17:03:06 +02:00 committed by GitHub
commit 7e3fb0a681
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 140 additions and 53 deletions

View File

@ -618,6 +618,8 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
M(Bool, allow_deprecated_database_ordinary, false, "Allow to create databases with deprecated Ordinary engine", 0) \
M(Bool, allow_deprecated_syntax_for_merge_tree, false, "Allow to create *MergeTree tables with deprecated engine definition syntax", 0) \
\
M(Bool, force_grouping_standard_compatibility, true, "Make GROUPING function to return 1 when argument is not used as an aggregation key", 0) \
\
M(Bool, schema_inference_use_cache_for_file, true, "Use cache in schema inference while using file table function", 0) \
M(Bool, schema_inference_use_cache_for_s3, true, "Use cache in schema inference while using s3 table function", 0) \
M(Bool, schema_inference_use_cache_for_hdfs, true, "Use cache in schema inference while using hdfs table function", 0) \

View File

@ -78,6 +78,7 @@ namespace SettingsChangesHistory
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
{
{"22.9", {{"force_grouping_standard_compatibility", false, true, "Make GROUPING function output the same as in SQL standard and other DBMS"}}},
{"22.7", {{"cross_to_inner_join_rewrite", 1, 2, "Force rewrite comma join to inner"},
{"enable_positional_arguments", false, true, "Enable positional arguments feature by default"},
{"format_csv_allow_single_quotes", true, false, "Most tools don't treat single quote in CSV specially, don't do it by default too"}}},

View File

@ -1,9 +1,9 @@
#pragma once
#include <base/defines.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnFixedString.h>
#include <Core/ColumnNumbers.h>
#include <DataTypes/DataTypesNumber.h>
#include <Functions/FunctionFactory.h>
@ -19,10 +19,17 @@ protected:
static constexpr UInt64 ONE = 1;
const ColumnNumbers arguments_indexes;
// Initial implementation of GROUPING function returned 1 if the argument is used as an aggregation key.
// This differs from the behavior described in the standard and other DBMS.
const bool force_compatibility;
static constexpr UInt64 COMPATIBLE_MODE[] = {1, 0};
static constexpr UInt64 INCOMPATIBLE_MODE[] = {0, 1};
public:
FunctionGroupingBase(ColumnNumbers arguments_indexes_)
FunctionGroupingBase(ColumnNumbers arguments_indexes_, bool force_compatibility_)
: arguments_indexes(std::move(arguments_indexes_))
, force_compatibility(force_compatibility_)
{}
bool isVariadic() const override { return true; }
@ -48,13 +55,15 @@ public:
auto result = ColumnUInt64::create();
auto & result_data = result->getData();
result_data.reserve(input_rows_count);
const auto * result_table = likely(force_compatibility) ? COMPATIBLE_MODE : INCOMPATIBLE_MODE;
for (size_t i = 0; i < input_rows_count; ++i)
{
UInt64 set_index = grouping_set_column->getElement(i);
UInt64 value = 0;
for (auto index : arguments_indexes)
value = (value << 1) + (checker(set_index, index) ? 1 : 0);
value = (value << 1) + result_table[checker(set_index, index) ? 1 : 0];
result_data.push_back(value);
}
@ -65,14 +74,16 @@ public:
class FunctionGroupingOrdinary : public FunctionGroupingBase
{
public:
explicit FunctionGroupingOrdinary(ColumnNumbers arguments_indexes_)
: FunctionGroupingBase(std::move(arguments_indexes_))
FunctionGroupingOrdinary(ColumnNumbers arguments_indexes_, bool force_compatibility_)
: FunctionGroupingBase(std::move(arguments_indexes_), force_compatibility_)
{}
String getName() const override { return "groupingOrdinary"; }
ColumnPtr executeImpl(const ColumnsWithTypeAndName &, const DataTypePtr &, size_t input_rows_count) const override
{
if (likely(force_compatibility))
return ColumnUInt64::create(input_rows_count, 0);
UInt64 value = (ONE << arguments_indexes.size()) - 1;
return ColumnUInt64::create(input_rows_count, value);
}
@ -83,8 +94,8 @@ class FunctionGroupingForRollup : public FunctionGroupingBase
const UInt64 aggregation_keys_number;
public:
FunctionGroupingForRollup(ColumnNumbers arguments_indexes_, UInt64 aggregation_keys_number_)
: FunctionGroupingBase(std::move(arguments_indexes_))
FunctionGroupingForRollup(ColumnNumbers arguments_indexes_, UInt64 aggregation_keys_number_, bool force_compatibility_)
: FunctionGroupingBase(std::move(arguments_indexes_), force_compatibility_)
, aggregation_keys_number(aggregation_keys_number_)
{}
@ -113,8 +124,8 @@ class FunctionGroupingForCube : public FunctionGroupingBase
public:
FunctionGroupingForCube(ColumnNumbers arguments_indexes_, UInt64 aggregation_keys_number_)
: FunctionGroupingBase(arguments_indexes_)
FunctionGroupingForCube(ColumnNumbers arguments_indexes_, UInt64 aggregation_keys_number_, bool force_compatibility_)
: FunctionGroupingBase(arguments_indexes_, force_compatibility_)
, aggregation_keys_number(aggregation_keys_number_)
{}
@ -142,8 +153,8 @@ class FunctionGroupingForGroupingSets : public FunctionGroupingBase
{
ColumnNumbersSetList grouping_sets;
public:
FunctionGroupingForGroupingSets(ColumnNumbers arguments_indexes_, ColumnNumbersList const & grouping_sets_)
: FunctionGroupingBase(std::move(arguments_indexes_))
FunctionGroupingForGroupingSets(ColumnNumbers arguments_indexes_, ColumnNumbersList const & grouping_sets_, bool force_compatibility_)
: FunctionGroupingBase(std::move(arguments_indexes_), force_compatibility_)
{
for (auto const & set : grouping_sets_)
grouping_sets.emplace_back(set.begin(), set.end());

View File

@ -880,20 +880,20 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
{
case GroupByKind::GROUPING_SETS:
{
data.addFunction(std::make_shared<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionGroupingForGroupingSets>(std::move(arguments_indexes), keys_info.grouping_set_keys)), { "__grouping_set" }, column_name);
data.addFunction(std::make_shared<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionGroupingForGroupingSets>(std::move(arguments_indexes), keys_info.grouping_set_keys, data.getContext()->getSettingsRef().force_grouping_standard_compatibility)), { "__grouping_set" }, column_name);
break;
}
case GroupByKind::ROLLUP:
data.addFunction(std::make_shared<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionGroupingForRollup>(std::move(arguments_indexes), aggregation_keys_number)), { "__grouping_set" }, column_name);
data.addFunction(std::make_shared<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionGroupingForRollup>(std::move(arguments_indexes), aggregation_keys_number, data.getContext()->getSettingsRef().force_grouping_standard_compatibility)), { "__grouping_set" }, column_name);
break;
case GroupByKind::CUBE:
{
data.addFunction(std::make_shared<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionGroupingForCube>(std::move(arguments_indexes), aggregation_keys_number)), { "__grouping_set" }, column_name);
data.addFunction(std::make_shared<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionGroupingForCube>(std::move(arguments_indexes), aggregation_keys_number, data.getContext()->getSettingsRef().force_grouping_standard_compatibility)), { "__grouping_set" }, column_name);
break;
}
case GroupByKind::ORDINARY:
{
data.addFunction(std::make_shared<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionGroupingOrdinary>(std::move(arguments_indexes))), {}, column_name);
data.addFunction(std::make_shared<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionGroupingOrdinary>(std::move(arguments_indexes), data.getContext()->getSettingsRef().force_grouping_standard_compatibility)), {}, column_name);
break;
}
default:

View File

@ -8,7 +8,8 @@ GROUP BY
(number),
(number % 2)
)
ORDER BY number, gr;
ORDER BY number, gr
SETTINGS force_grouping_standard_compatibility=0;
0 1
0 1
0 2
@ -30,7 +31,8 @@ GROUP BY
(number),
(number % 2)
)
ORDER BY number, gr;
ORDER BY number, gr
SETTINGS force_grouping_standard_compatibility=0;
0 1
0 2
0 2
@ -52,7 +54,8 @@ GROUP BY
(number),
(number % 2)
)
ORDER BY number, gr;
ORDER BY number, gr
SETTINGS force_grouping_standard_compatibility=0;
0 0
0 1
0 1
@ -73,7 +76,8 @@ GROUP BY
(number),
(number % 2)
)
ORDER BY number, grouping(number, number % 2) = 1;
ORDER BY number, grouping(number, number % 2) = 1
SETTINGS force_grouping_standard_compatibility=0;
0
0
0
@ -97,7 +101,8 @@ GROUP BY
(number, number % 2),
()
)
ORDER BY (gr, number);
ORDER BY (gr, number)
SETTINGS force_grouping_standard_compatibility=0;
0 10 0
0 1 2
1 1 2
@ -129,7 +134,7 @@ GROUP BY
)
HAVING grouping(number, number % 2) = 2
ORDER BY number
SETTINGS enable_optimize_predicate_expression = 0;
SETTINGS enable_optimize_predicate_expression = 0, force_grouping_standard_compatibility=0;
0
1
2
@ -150,7 +155,7 @@ GROUP BY
)
HAVING grouping(number, number % 2) = 1
ORDER BY number
SETTINGS enable_optimize_predicate_expression = 0;
SETTINGS enable_optimize_predicate_expression = 0, force_grouping_standard_compatibility=0;
0
0
SELECT
@ -161,7 +166,8 @@ GROUP BY
GROUPING SETS (
(number),
(number % 2))
ORDER BY number, gr;
ORDER BY number, gr
SETTINGS force_grouping_standard_compatibility=0;
0 0
0 1
0 1

View File

@ -19,7 +19,8 @@ GROUP BY
(number),
(number % 2)
)
ORDER BY number, gr;
ORDER BY number, gr
SETTINGS force_grouping_standard_compatibility=0;
SELECT
number,
@ -30,7 +31,8 @@ GROUP BY
(number),
(number % 2)
)
ORDER BY number, gr;
ORDER BY number, gr
SETTINGS force_grouping_standard_compatibility=0;
SELECT
number,
@ -41,7 +43,8 @@ GROUP BY
(number),
(number % 2)
)
ORDER BY number, gr;
ORDER BY number, gr
SETTINGS force_grouping_standard_compatibility=0;
SELECT
number
@ -51,7 +54,8 @@ GROUP BY
(number),
(number % 2)
)
ORDER BY number, grouping(number, number % 2) = 1;
ORDER BY number, grouping(number, number % 2) = 1
SETTINGS force_grouping_standard_compatibility=0;
SELECT
number,
@ -64,7 +68,8 @@ GROUP BY
(number, number % 2),
()
)
ORDER BY (gr, number);
ORDER BY (gr, number)
SETTINGS force_grouping_standard_compatibility=0;
SELECT
number
@ -76,7 +81,7 @@ GROUP BY
)
HAVING grouping(number, number % 2) = 2
ORDER BY number
SETTINGS enable_optimize_predicate_expression = 0;
SETTINGS enable_optimize_predicate_expression = 0, force_grouping_standard_compatibility=0;
SELECT
number
@ -88,7 +93,7 @@ GROUP BY
)
HAVING grouping(number, number % 2) = 1
ORDER BY number
SETTINGS enable_optimize_predicate_expression = 0;
SETTINGS enable_optimize_predicate_expression = 0, force_grouping_standard_compatibility=0;
SELECT
number,
@ -98,4 +103,5 @@ GROUP BY
GROUPING SETS (
(number),
(number % 2))
ORDER BY number, gr;
ORDER BY number, gr
SETTINGS force_grouping_standard_compatibility=0;

View File

@ -6,7 +6,8 @@ FROM remote('127.0.0.{2,3}', numbers(10))
GROUP BY
number,
number % 2
ORDER BY number;
ORDER BY number
SETTINGS force_grouping_standard_compatibility=0;
0 1
1 1
2 1
@ -25,7 +26,8 @@ FROM remote('127.0.0.{2,3}', numbers(10))
GROUP BY
number,
number % 2
ORDER BY number;
ORDER BY number
SETTINGS force_grouping_standard_compatibility=0;
0 1 1
1 1 1
2 1 1
@ -45,7 +47,8 @@ GROUP BY
number % 2
WITH ROLLUP
ORDER BY
number, gr;
number, gr
SETTINGS force_grouping_standard_compatibility=0;
0 0
0 2
0 3
@ -74,7 +77,8 @@ FROM remote('127.0.0.{2,3}', numbers(10))
GROUP BY
ROLLUP(number, number % 2)
ORDER BY
number, gr;
number, gr
SETTINGS force_grouping_standard_compatibility=0;
0 0
0 2
0 3
@ -105,7 +109,8 @@ GROUP BY
number % 2
WITH CUBE
ORDER BY
number, gr;
number, gr
SETTINGS force_grouping_standard_compatibility=0;
0 0
0 1
0 1
@ -136,7 +141,8 @@ FROM remote('127.0.0.{2,3}', numbers(10))
GROUP BY
CUBE(number, number % 2)
ORDER BY
number, gr;
number, gr
SETTINGS force_grouping_standard_compatibility=0;
0 0
0 1
0 1
@ -168,7 +174,8 @@ GROUP BY
CUBE(number, number % 2)
HAVING grouping(number) != 0
ORDER BY
number, gr;
number, gr
SETTINGS force_grouping_standard_compatibility=0;
0 5
0 6
1 5
@ -205,7 +212,8 @@ FROM remote('127.0.0.{2,3}', numbers(10))
GROUP BY
CUBE(number, number % 2) WITH TOTALS
ORDER BY
number, gr;
number, gr
SETTINGS force_grouping_standard_compatibility=0;
0 0
0 1
0 1
@ -247,7 +255,8 @@ FROM remote('127.0.0.{2,3}', numbers(10))
GROUP BY
ROLLUP(number, number % 2) WITH TOTALS
ORDER BY
number, gr;
number, gr
SETTINGS force_grouping_standard_compatibility=0;
0 0
0 2
0 3

View File

@ -15,7 +15,8 @@ FROM remote('127.0.0.{2,3}', numbers(10))
GROUP BY
number,
number % 2
ORDER BY number;
ORDER BY number
SETTINGS force_grouping_standard_compatibility=0;
SELECT
number,
@ -25,7 +26,8 @@ FROM remote('127.0.0.{2,3}', numbers(10))
GROUP BY
number,
number % 2
ORDER BY number;
ORDER BY number
SETTINGS force_grouping_standard_compatibility=0;
SELECT
number,
@ -36,7 +38,8 @@ GROUP BY
number % 2
WITH ROLLUP
ORDER BY
number, gr;
number, gr
SETTINGS force_grouping_standard_compatibility=0;
SELECT
number,
@ -45,7 +48,8 @@ FROM remote('127.0.0.{2,3}', numbers(10))
GROUP BY
ROLLUP(number, number % 2)
ORDER BY
number, gr;
number, gr
SETTINGS force_grouping_standard_compatibility=0;
SELECT
number,
@ -56,7 +60,8 @@ GROUP BY
number % 2
WITH CUBE
ORDER BY
number, gr;
number, gr
SETTINGS force_grouping_standard_compatibility=0;
SELECT
number,
@ -65,7 +70,8 @@ FROM remote('127.0.0.{2,3}', numbers(10))
GROUP BY
CUBE(number, number % 2)
ORDER BY
number, gr;
number, gr
SETTINGS force_grouping_standard_compatibility=0;
SELECT
number,
@ -75,7 +81,8 @@ GROUP BY
CUBE(number, number % 2)
HAVING grouping(number) != 0
ORDER BY
number, gr;
number, gr
SETTINGS force_grouping_standard_compatibility=0;
SELECT
number,
@ -94,7 +101,8 @@ FROM remote('127.0.0.{2,3}', numbers(10))
GROUP BY
CUBE(number, number % 2) WITH TOTALS
ORDER BY
number, gr;
number, gr
SETTINGS force_grouping_standard_compatibility=0;
SELECT
number,
@ -113,4 +121,5 @@ FROM remote('127.0.0.{2,3}', numbers(10))
GROUP BY
ROLLUP(number, number % 2) WITH TOTALS
ORDER BY
number, gr;
number, gr
SETTINGS force_grouping_standard_compatibility=0;

View File

@ -1,5 +1,5 @@
-- { echoOn }
SELECT count() AS amount, a, b, GROUPING(a, b) FROM test02315 GROUP BY GROUPING SETS ((a, b), (a), ()) ORDER BY (amount, a, b);
SELECT count() AS amount, a, b, GROUPING(a, b) FROM test02315 GROUP BY GROUPING SETS ((a, b), (a), ()) ORDER BY (amount, a, b) SETTINGS force_grouping_standard_compatibility=0;
1 0 0 3
1 0 2 3
1 0 4 3
@ -13,7 +13,7 @@ SELECT count() AS amount, a, b, GROUPING(a, b) FROM test02315 GROUP BY GROUPING
5 0 0 2
5 1 0 2
10 0 0 0
SELECT count() AS amount, a, b, GROUPING(a, b) FROM test02315 GROUP BY ROLLUP(a, b) ORDER BY (amount, a, b);
SELECT count() AS amount, a, b, GROUPING(a, b) FROM test02315 GROUP BY ROLLUP(a, b) ORDER BY (amount, a, b) SETTINGS force_grouping_standard_compatibility=0;
1 0 0 3
1 0 2 3
1 0 4 3

View File

@ -5,9 +5,9 @@ CREATE TABLE test02315(a UInt64, b UInt64) ENGINE=MergeTree() ORDER BY (a, b);
INSERT INTO test02315 SELECT number % 2 as a, number as b FROM numbers(10);
-- { echoOn }
SELECT count() AS amount, a, b, GROUPING(a, b) FROM test02315 GROUP BY GROUPING SETS ((a, b), (a), ()) ORDER BY (amount, a, b);
SELECT count() AS amount, a, b, GROUPING(a, b) FROM test02315 GROUP BY GROUPING SETS ((a, b), (a), ()) ORDER BY (amount, a, b) SETTINGS force_grouping_standard_compatibility=0;
SELECT count() AS amount, a, b, GROUPING(a, b) FROM test02315 GROUP BY ROLLUP(a, b) ORDER BY (amount, a, b);
SELECT count() AS amount, a, b, GROUPING(a, b) FROM test02315 GROUP BY ROLLUP(a, b) ORDER BY (amount, a, b) SETTINGS force_grouping_standard_compatibility=0;
-- { echoOff }
DROP TABLE test02315;

View File

@ -0,0 +1,29 @@
-- { echoOn }
SELECT count() AS amount, a, b, GROUPING(a, b) FROM test02416 GROUP BY GROUPING SETS ((a, b), (a), ()) ORDER BY (amount, a, b);
1 0 0 0
1 0 2 0
1 0 4 0
1 0 6 0
1 0 8 0
1 1 1 0
1 1 3 0
1 1 5 0
1 1 7 0
1 1 9 0
5 0 0 1
5 1 0 1
10 0 0 3
SELECT count() AS amount, a, b, GROUPING(a, b) FROM test02416 GROUP BY ROLLUP(a, b) ORDER BY (amount, a, b);
1 0 0 0
1 0 2 0
1 0 4 0
1 0 6 0
1 0 8 0
1 1 1 0
1 1 3 0
1 1 5 0
1 1 7 0
1 1 9 0
5 0 0 1
5 1 0 1
10 0 0 3

View File

@ -0,0 +1,14 @@
DROP TABLE IF EXISTS test02416;
CREATE TABLE test02416(a UInt64, b UInt64) ENGINE=MergeTree() ORDER BY (a, b);
INSERT INTO test02416 SELECT number % 2 as a, number as b FROM numbers(10);
-- { echoOn }
SELECT count() AS amount, a, b, GROUPING(a, b) FROM test02416 GROUP BY GROUPING SETS ((a, b), (a), ()) ORDER BY (amount, a, b);
SELECT count() AS amount, a, b, GROUPING(a, b) FROM test02416 GROUP BY ROLLUP(a, b) ORDER BY (amount, a, b);
-- { echoOff }
DROP TABLE test02416;