Merge pull request #45661 from ClickHouse/generate-random-lowcardinality

Allow LowCardinality in GenerateRandom
This commit is contained in:
Alexey Milovidov 2023-01-27 02:14:38 +03:00 committed by GitHub
commit f401449bd9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 22 additions and 3 deletions

View File

@ -25,7 +25,7 @@ public:
uint128 getHash()
{
if (block_pos)
return CityHash_v1_0_2::CityHash128WithSeed(&BufferWithOwnMemory<Buffer>::memory[0], block_pos, state);
return CityHash_v1_0_2::CityHash128WithSeed(BufferWithOwnMemory<Buffer>::memory.data(), block_pos, state);
else
return state;
}

View File

@ -14,6 +14,7 @@
#include <DataTypes/DataTypeDecimalBase.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeFixedString.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/NestedUtils.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnFixedString.h>
@ -21,6 +22,7 @@
#include <Columns/ColumnVector.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnLowCardinality.h>
#include <Common/SipHash.h>
#include <Common/randomSeed.h>
@ -157,7 +159,7 @@ ColumnPtr fillColumnWithRandomData(
case TypeIndex::Array:
{
auto nested_type = typeid_cast<const DataTypeArray *>(type.get())->getNestedType();
auto nested_type = typeid_cast<const DataTypeArray &>(*type).getNestedType();
auto offsets_column = ColumnVector<ColumnArray::Offset>::create();
auto & offsets = offsets_column->getData();
@ -189,7 +191,7 @@ ColumnPtr fillColumnWithRandomData(
case TypeIndex::Nullable:
{
auto nested_type = typeid_cast<const DataTypeNullable *>(type.get())->getNestedType();
auto nested_type = typeid_cast<const DataTypeNullable &>(*type).getNestedType();
auto nested_column = fillColumnWithRandomData(nested_type, limit, max_array_length, max_string_length, rng, context);
auto null_map_column = ColumnUInt8::create();
@ -372,6 +374,20 @@ ColumnPtr fillColumnWithRandomData(
return column;
}
case TypeIndex::LowCardinality:
{
/// We are generating the values using the same random distribution as for full columns
/// so it's not in fact "low cardinality",
/// but it's ok for testing purposes, because the LowCardinality data type supports high cardinality data as well.
auto nested_type = typeid_cast<const DataTypeLowCardinality &>(*type).getDictionaryType();
auto nested_column = fillColumnWithRandomData(nested_type, limit, max_array_length, max_string_length, rng, context);
auto column = type->createColumn();
typeid_cast<ColumnLowCardinality &>(*column).insertRangeFromFullColumn(*nested_column, 0, limit);
return column;
}
default:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "The 'GenerateRandom' is not implemented for type {}", type->getName());

View File

@ -0,0 +1,2 @@
-- Check that the function works for LowCardinality and gives at least something plausible:
SELECT uniq(x) > 1000 FROM (SELECT * FROM generateRandom('x Array(LowCardinality(Nullable(String)))') LIMIT 100000);