Make check work with Nullable / Array / Tuple

This commit is contained in:
Robert Schulze 2023-01-27 12:55:51 +00:00
parent 574cab5d7e
commit 83593810ff
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
8 changed files with 69 additions and 15 deletions

View File

@ -39,7 +39,7 @@ protected:
bool isCompression() const override { return true; }
bool isGenericCompression() const override { return false; }
bool isFloatingPointTimeSeries() const override { return true; }
bool isFloatingPointTimeSeriesCodec() const override { return true; }
private:
static constexpr UInt32 HEADER_SIZE = 2;

View File

@ -123,7 +123,7 @@ protected:
bool isCompression() const override { return true; }
bool isGenericCompression() const override { return false; }
bool isFloatingPointTimeSeries() const override { return true; }
bool isFloatingPointTimeSeriesCodec() const override { return true; }
private:
UInt8 data_bytes_size;

View File

@ -15,7 +15,9 @@
#include <DataTypes/NestedUtils.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypeMap.h>
#include <DataTypes/DataTypeNested.h>
#include <DataTypes/DataTypeNullable.h>
#include <Common/Exception.h>
@ -51,6 +53,28 @@ void CompressionCodecFactory::validateCodec(
}
}
namespace
{
bool innerDataTypeIsFloat(const DataTypePtr & type)
{
if (isFloat(type))
return true;
if (const DataTypeNullable * type_nullable = typeid_cast<const DataTypeNullable *>(type.get()))
return innerDataTypeIsFloat(type_nullable->getNestedType());
if (const DataTypeArray * type_array = typeid_cast<const DataTypeArray *>(type.get()))
return innerDataTypeIsFloat(type_array->getNestedType());
if (const DataTypeTuple * type_tuple = typeid_cast<const DataTypeTuple *>(type.get()))
{
for (const auto & subtype : type_tuple->getElements())
if (innerDataTypeIsFloat(subtype))
return true;
return false;
}
return false;
}
}
ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs) const
@ -139,7 +163,7 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
with_compression_codec |= result_codec->isCompression();
with_none_codec |= result_codec->isNone();
with_floating_point_timeseries_codec |= result_codec->isFloatingPointTimeSeries();
with_floating_point_timeseries_codec |= result_codec->isFloatingPointTimeSeriesCodec();
if (!generic_compression_codec_pos && result_codec->isGenericCompression())
generic_compression_codec_pos = i;
@ -176,12 +200,13 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
"post-processing ones. (Note: you can enable setting 'allow_suspicious_codecs' "
"to skip this check).", codec_description);
if (column_type)
if (with_floating_point_timeseries_codec && !WhichDataType(*column_type).isFloat())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"The combination of compression codecs {} is meaningless,"
" because it does not make sense to apply a floating-point time series codec to non-floating-point columns"
" (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).", codec_description);
/// Floating-point time series codecs are not supposed to compress non-floating-point data
if (with_floating_point_timeseries_codec &&
column_type && !innerDataTypeIsFloat(column_type))
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"The combination of compression codecs {} is meaningless,"
" because it does not make sense to apply a floating-point time series codec to non-floating-point columns"
" (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).", codec_description);
/// It does not make sense to apply any transformations after generic compression algorithm
/// So, generic compression can be only one and only at the end.

View File

@ -107,7 +107,7 @@ public:
virtual bool isEncryption() const { return false; }
/// If it is a specialized codec for floating-point time series. Applying it to non-floating point data is suspicious.
virtual bool isFloatingPointTimeSeries() const { return false; }
virtual bool isFloatingPointTimeSeriesCodec() const { return false; }
/// It is a codec available only for evaluation purposes and not meant to be used in production.
/// It will not be allowed to use unless the user will turn off the safety switch.

View File

@ -534,7 +534,7 @@ TEST_P(CodecTest, TranscodingWithDataType)
WhichDataType which(std::get<1>(GetParam()).data_type.get());
bool data_is_float = which.isFloat();
if (codec_is_gorilla && !data_is_float)
GTEST_SKIP() << "Skipping Gorilla-compressed integer column";
GTEST_SKIP() << "Skipping Gorilla-compressed non-float column";
const auto codec = makeCodec(CODEC_WITH_DATA_TYPE);
testTranscoding(*codec);

View File

@ -1078,8 +1078,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const
"this column name is reserved for lightweight delete feature", backQuote(column_name));
if (command.codec)
CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type,
!context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs);
CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs);
all_columns.add(ColumnDescription(column_name, command.data_type));
}
@ -1103,8 +1102,7 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const
"in a single ALTER query", backQuote(column_name));
if (command.codec)
CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type,
!context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs);
CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs);
auto column_default = all_columns.getDefault(column_name);
if (column_default)
{

View File

@ -0,0 +1,31 @@
DROP TABLE IF EXISTS tbl;
-- Checks that (floating-point) time series codecs can be combined
-- with Nullable and
-- with composite types Array and Tuple
CREATE TABLE tbl (
-- Nullable
v1_gor Nullable(Float64) CODEC(Gorilla),
v1_fpc Nullable(Float64) CODEC(FPC),
-- Array
v2_gor Array(Float64) CODEC(Gorilla),
v2_fpc Array(Float64) CODEC(FPC),
v3_gor Array(Array(Float64)) CODEC(Gorilla),
v3_fpc Array(Array(Float64)) CODEC(FPC),
v4_gor Array(Nullable(Float64)) CODEC(Gorilla),
v4_fpc Array(Nullable(Float64)) CODEC(FPC),
v5_gor Array(Tuple(Float64)) CODEC(Gorilla),
v5_fpc Array(Tuple(Float64)) CODEC(FPC),
-- Tuple
v6_gor Tuple(Float64) CODEC(Gorilla),
v6_fpc Tuple(Float64) CODEC(FPC),
v7_gor Tuple(Tuple(Float64)) CODEC(Gorilla),
v7_fpc Tuple(Tuple(Float64)) CODEC(FPC),
v8_gor Tuple(Nullable(Float64)) CODEC(Gorilla),
v8_fpc Tuple(Nullable(Float64)) CODEC(FPC),
v9_gor Tuple(Array(Float64)) CODEC(Gorilla),
v9_fpc Tuple(Array(Float64)) CODEC(FPC),
) Engine = MergeTree ORDER BY tuple();
DROP TABLE IF EXISTS tbl;