Merge pull request #69429 from ClickHouse/materialize_sparse

Make `materialize()` function return full column when parameter is a sparse column
This commit is contained in:
Alexander Gololobov 2024-09-10 15:49:29 +00:00 committed by GitHub
commit e4761d40ba
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 112 additions and 7 deletions

View File

@ -2,7 +2,7 @@
#include <Functions/IFunction.h> #include <Functions/IFunction.h>
#include <Functions/FunctionFactory.h> #include <Functions/FunctionFactory.h>
#include <Columns/ColumnLowCardinality.h> #include <Columns/ColumnLowCardinality.h>
#include <DataTypes/DataTypeLowCardinality.h> #include <Columns/ColumnSparse.h>
namespace DB namespace DB
{ {
@ -18,11 +18,6 @@ public:
return std::make_shared<FunctionMaterialize>(); return std::make_shared<FunctionMaterialize>();
} }
bool useDefaultImplementationForNulls() const override
{
return false;
}
/// Get the function name. /// Get the function name.
String getName() const override String getName() const override
{ {
@ -34,8 +29,16 @@ public:
return true; return true;
} }
bool useDefaultImplementationForNulls() const override { return false; }
bool useDefaultImplementationForNothing() const override { return false; }
bool useDefaultImplementationForConstants() const override { return false; }
bool useDefaultImplementationForLowCardinalityColumns() const override { return false; } bool useDefaultImplementationForLowCardinalityColumns() const override { return false; }
bool useDefaultImplementationForSparseColumns() const override { return false; }
bool isSuitableForConstantFolding() const override { return false; } bool isSuitableForConstantFolding() const override { return false; }
bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; } bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
@ -52,7 +55,7 @@ public:
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /*input_rows_count*/) const override
{ {
return arguments[0].column->convertToFullColumnIfConst(); return recursiveRemoveSparse(arguments[0].column->convertToFullColumnIfConst());
} }
bool hasInformationAboutMonotonicity() const override { return true; } bool hasInformationAboutMonotonicity() const override { return true; }

View File

@ -0,0 +1,50 @@
-- { echoOn }
SELECT dumpColumnStructure(id) FROM sparse_t;
UInt64, Sparse(size = 2, UInt64(size = 2), UInt64(size = 1))
UInt64, Sparse(size = 2, UInt64(size = 2), UInt64(size = 1))
SELECT dumpColumnStructure(materialize(id)) FROM sparse_t;
UInt64, UInt64(size = 2)
UInt64, UInt64(size = 2)
SELECT dumpColumnStructure(u) FROM sparse_t;
UInt64, Sparse(size = 2, UInt64(size = 1), UInt64(size = 0))
UInt64, Sparse(size = 2, UInt64(size = 1), UInt64(size = 0))
SELECT dumpColumnStructure(materialize(u)) FROM sparse_t;
UInt64, UInt64(size = 2)
UInt64, UInt64(size = 2)
SELECT dumpColumnStructure(s) FROM sparse_t;
String, Sparse(size = 2, String(size = 2), UInt64(size = 1))
String, Sparse(size = 2, String(size = 2), UInt64(size = 1))
SELECT dumpColumnStructure(materialize(s)) FROM sparse_t;
String, String(size = 2)
String, String(size = 2)
SELECT dumpColumnStructure(arr1) FROM sparse_t;
Array(String), Array(size = 2, UInt64(size = 2), String(size = 1))
Array(String), Array(size = 2, UInt64(size = 2), String(size = 1))
SELECT dumpColumnStructure(materialize(arr1)) FROM sparse_t;
Array(String), Array(size = 2, UInt64(size = 2), String(size = 1))
Array(String), Array(size = 2, UInt64(size = 2), String(size = 1))
SELECT dumpColumnStructure(arr2) FROM sparse_t;
Array(UInt64), Array(size = 2, UInt64(size = 2), UInt64(size = 1))
Array(UInt64), Array(size = 2, UInt64(size = 2), UInt64(size = 1))
SELECT dumpColumnStructure(materialize(arr2)) FROM sparse_t;
Array(UInt64), Array(size = 2, UInt64(size = 2), UInt64(size = 1))
Array(UInt64), Array(size = 2, UInt64(size = 2), UInt64(size = 1))
SELECT dumpColumnStructure(t) FROM sparse_t;
Tuple(a UInt64, s String), Tuple(size = 2, Sparse(size = 2, UInt64(size = 1), UInt64(size = 0)), Sparse(size = 2, String(size = 1), UInt64(size = 0)))
Tuple(a UInt64, s String), Tuple(size = 2, Sparse(size = 2, UInt64(size = 1), UInt64(size = 0)), Sparse(size = 2, String(size = 1), UInt64(size = 0)))
SELECT dumpColumnStructure(materialize(t)) FROM sparse_t;
Tuple(a UInt64, s String), Tuple(size = 2, UInt64(size = 2), String(size = 2))
Tuple(a UInt64, s String), Tuple(size = 2, UInt64(size = 2), String(size = 2))
SELECT dumpColumnStructure(t.a) FROM sparse_t;
UInt64, Sparse(size = 2, UInt64(size = 1), UInt64(size = 0))
UInt64, Sparse(size = 2, UInt64(size = 1), UInt64(size = 0))
SELECT dumpColumnStructure(materialize(t.a)) FROM sparse_t;
UInt64, UInt64(size = 2)
UInt64, UInt64(size = 2)
SELECT dumpColumnStructure(t.s) FROM sparse_t;
String, Sparse(size = 2, String(size = 1), UInt64(size = 0))
String, Sparse(size = 2, String(size = 1), UInt64(size = 0))
SELECT dumpColumnStructure(materialize(t.s)) FROM sparse_t;
String, String(size = 2)
String, String(size = 2)

View File

@ -0,0 +1,52 @@
DROP TABLE IF EXISTS sparse_t;
CREATE TABLE sparse_t (
id UInt64,
u UInt64,
s String,
arr1 Array(String),
arr2 Array(UInt64),
t Tuple(a UInt64, s String))
ENGINE = MergeTree ORDER BY tuple()
SETTINGS ratio_of_defaults_for_sparse_serialization = 0.1;
INSERT INTO sparse_t SELECT
number,
if (number % 2 = 0, number, 0),
if (number % 2 = 0, toString(number), ''),
if (number % 2 = 0, [''], []),
if (number % 2 = 0, [0], []),
(if (number % 2 = 0, number, 0), '')
FROM numbers(2);
-- { echoOn }
SELECT dumpColumnStructure(id) FROM sparse_t;
SELECT dumpColumnStructure(materialize(id)) FROM sparse_t;
SELECT dumpColumnStructure(u) FROM sparse_t;
SELECT dumpColumnStructure(materialize(u)) FROM sparse_t;
SELECT dumpColumnStructure(s) FROM sparse_t;
SELECT dumpColumnStructure(materialize(s)) FROM sparse_t;
SELECT dumpColumnStructure(arr1) FROM sparse_t;
SELECT dumpColumnStructure(materialize(arr1)) FROM sparse_t;
SELECT dumpColumnStructure(arr2) FROM sparse_t;
SELECT dumpColumnStructure(materialize(arr2)) FROM sparse_t;
SELECT dumpColumnStructure(t) FROM sparse_t;
SELECT dumpColumnStructure(materialize(t)) FROM sparse_t;
SELECT dumpColumnStructure(t.a) FROM sparse_t;
SELECT dumpColumnStructure(materialize(t.a)) FROM sparse_t;
SELECT dumpColumnStructure(t.s) FROM sparse_t;
SELECT dumpColumnStructure(materialize(t.s)) FROM sparse_t;
-- { echoOff }
DROP TABLE IF EXISTS sparse_t
;