mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 08:32:02 +00:00
Deprecate Gorilla compression of non-float columns
Reasons: 1. The original Gorilla paper proposed a compression schema for pairs of time stamps and double-precision FP values. ClickHouse's Gorilla codec only implements compression of the latter and it does not impose any data type restrictions. - Data types != Float* or (U)Int* (e.g. Decimal, Point etc.) are definitely not supposed to be used with Gorilla. - (U)Int* types are debatable. The paper only considers integers-stored-as-FP-values, a practical use case for which Gorilla works well. Standalone integers are not considered which makes them at least suspicious. 2. Achieve consistency with FPC, another specialized floating-point timeseries codec, which rejects non-float data. 3. On practical datasets, ZSTD is often "good enough" (**) so it should be okay to disincentive non-ZSTD codecs a little bit. If needed, Delta and DoubleDelta codecs are viable alternative for slowly changing (time-series-like) integer sequences. Since on-prem and hosted users may still have Gorilla-compressed non-float data, this combination is only deprecated for now. No warning or error will be emitted. Users are encouraged to migrate Gorilla-compressed non-float data to an alternative codec. It is planned to treat Gorilla-compressed non-float columns as "suspicious" six months after this commit (i.e. in v23.6). Even then, it will still be possible to set "allow_suspicious_codecs = true" and read and write Gorilla-compressed non-float data. (*) Sec. 4.1.2, "Gorilla restricts the value element in its tuple to a double floating point type.", https://doi.org/10.14778/2824032.2824078 (**) https://clickhouse.com/blog/optimize-clickhouse-codecs-compression-schema
This commit is contained in:
parent
4ac17d71fa
commit
e6167d6b36
@ -293,7 +293,9 @@ These codecs are designed to make compression more effective by using specific f
|
||||
|
||||
#### Gorilla
|
||||
|
||||
`Gorilla` — Calculates XOR between current and previous value and writes it in compact binary form. Efficient when storing a series of floating point values that change slowly, because the best compression rate is achieved when neighboring values are binary equal. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. For additional information, see Compressing Values in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](http://www.vldb.org/pvldb/vol8/p1816-teller.pdf).
|
||||
`Gorilla` — Calculates XOR between current and previous value and writes it in compact binary form. Efficient when storing a series of floating point values that change slowly, because the best compression rate is achieved when neighboring values are binary equal. Implements the algorithm used in Gorilla TSDB, extending it to support 64-bit types. For additional information, see section 4.1 in [Gorilla: A Fast, Scalable, In-Memory Time Series Database](https://doi.org/10.14778/2824032.2824078).
|
||||
|
||||
It is currently possible to apply Gorilla compression to columns of non-floating-point type but this practice is strongly discouraged and will be removed in future.
|
||||
|
||||
#### FPC
|
||||
|
||||
|
@ -541,7 +541,7 @@ void Connection::sendQuery(
|
||||
if (method == "ZSTD")
|
||||
level = settings->network_zstd_compression_level;
|
||||
|
||||
CompressionCodecFactory::instance().validateCodec(method, level, !settings->allow_suspicious_codecs, settings->allow_experimental_codecs);
|
||||
CompressionCodecFactory::instance().validateCodec(method, level, !settings->allow_suspicious_codecs, settings->allow_experimental_codecs, settings->enable_gorilla_codec_for_non_float_data);
|
||||
compression_codec = CompressionCodecFactory::instance().get(method, level);
|
||||
}
|
||||
else
|
||||
|
@ -39,6 +39,7 @@ protected:
|
||||
|
||||
bool isCompression() const override { return true; }
|
||||
bool isGenericCompression() const override { return false; }
|
||||
bool isFloatingPointTimeSeries() const override { return true; }
|
||||
|
||||
private:
|
||||
static constexpr UInt32 HEADER_SIZE = 2;
|
||||
|
@ -123,6 +123,7 @@ protected:
|
||||
|
||||
bool isCompression() const override { return true; }
|
||||
bool isGenericCompression() const override { return false; }
|
||||
bool isFloatingPointTimeSeries() const override { return true; }
|
||||
|
||||
private:
|
||||
UInt8 data_bytes_size;
|
||||
@ -444,14 +445,14 @@ void CompressionCodecGorilla::doDecompressData(const char * source, UInt32 sourc
|
||||
void registerCodecGorilla(CompressionCodecFactory & factory)
|
||||
{
|
||||
UInt8 method_code = static_cast<UInt8>(CompressionMethodByte::Gorilla);
|
||||
factory.registerCompressionCodecWithType("Gorilla", method_code,
|
||||
[&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr
|
||||
auto codec_builder = [&](const ASTPtr & arguments, const IDataType * column_type) -> CompressionCodecPtr
|
||||
{
|
||||
if (arguments)
|
||||
throw Exception("Codec Gorilla does not accept any arguments", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
UInt8 data_bytes_size = column_type ? getDataBytesSize(column_type) : 0;
|
||||
return std::make_shared<CompressionCodecGorilla>(data_bytes_size);
|
||||
});
|
||||
};
|
||||
factory.registerCompressionCodecWithType("Gorilla", method_code, codec_builder);
|
||||
}
|
||||
}
|
||||
|
@ -40,10 +40,12 @@ public:
|
||||
CompressionCodecPtr getDefaultCodec() const;
|
||||
|
||||
/// Validate codecs AST specified by user and parses codecs description (substitute default parameters)
|
||||
ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs) const;
|
||||
///
|
||||
/// Note: enable_gorilla_coded_for_non_float_data is a transitory parameter and can be removed after v23.6 (being implicitly false then), see comments in Core/Settings.h.
|
||||
ASTPtr validateCodecAndGetPreprocessedAST(const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_gorilla_coded_for_non_float_data) const;
|
||||
|
||||
/// Validate codecs AST specified by user
|
||||
void validateCodec(const String & family_name, std::optional<int> level, bool sanity_check, bool allow_experimental_codecs) const;
|
||||
void validateCodec(const String & family_name, std::optional<int> level, bool sanity_check, bool allow_experimental_codecs, bool enable_gorilla_coded_for_non_float_data) const;
|
||||
|
||||
/// Get codec by AST and possible column_type. Some codecs can use
|
||||
/// information about type to improve inner settings, but every codec should
|
||||
|
@ -32,7 +32,7 @@ namespace ErrorCodes
|
||||
|
||||
|
||||
void CompressionCodecFactory::validateCodec(
|
||||
const String & family_name, std::optional<int> level, bool sanity_check, bool allow_experimental_codecs) const
|
||||
const String & family_name, std::optional<int> level, bool sanity_check, bool allow_experimental_codecs, bool enable_gorilla_codec_for_non_float_data) const
|
||||
{
|
||||
if (family_name.empty())
|
||||
throw Exception("Compression codec name cannot be empty", ErrorCodes::BAD_ARGUMENTS);
|
||||
@ -41,33 +41,34 @@ void CompressionCodecFactory::validateCodec(
|
||||
{
|
||||
auto literal = std::make_shared<ASTLiteral>(static_cast<UInt64>(*level));
|
||||
validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", makeASTFunction(Poco::toUpper(family_name), literal)),
|
||||
{}, sanity_check, allow_experimental_codecs);
|
||||
{}, sanity_check, allow_experimental_codecs, enable_gorilla_codec_for_non_float_data);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto identifier = std::make_shared<ASTIdentifier>(Poco::toUpper(family_name));
|
||||
validateCodecAndGetPreprocessedAST(makeASTFunction("CODEC", identifier),
|
||||
{}, sanity_check, allow_experimental_codecs);
|
||||
{}, sanity_check, allow_experimental_codecs, enable_gorilla_codec_for_non_float_data);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
|
||||
const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs) const
|
||||
const ASTPtr & ast, const DataTypePtr & column_type, bool sanity_check, bool allow_experimental_codecs, bool enable_gorilla_codec_for_non_float_data) const
|
||||
{
|
||||
if (const auto * func = ast->as<ASTFunction>())
|
||||
{
|
||||
ASTPtr codecs_descriptions = std::make_shared<ASTExpressionList>();
|
||||
|
||||
bool is_compression = false;
|
||||
bool has_none = false;
|
||||
bool with_compression_codec = false;
|
||||
bool with_none_codec = false;
|
||||
bool with_floating_point_timeseries_codec = false;
|
||||
std::optional<size_t> generic_compression_codec_pos;
|
||||
std::set<size_t> encryption_codecs;
|
||||
std::set<size_t> encryption_codecs_pos;
|
||||
|
||||
bool can_substitute_codec_arguments = true;
|
||||
for (size_t i = 0, size = func->arguments->children.size(); i < size; ++i)
|
||||
{
|
||||
const auto & inner_codec_ast = func->arguments->children[i];
|
||||
const ASTPtr & inner_codec_ast = func->arguments->children[i];
|
||||
String codec_family_name;
|
||||
ASTPtr codec_arguments;
|
||||
if (const auto * family_name = inner_codec_ast->as<ASTIdentifier>())
|
||||
@ -136,21 +137,22 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
|
||||
codecs_descriptions->children.emplace_back(result_codec->getCodecDesc());
|
||||
}
|
||||
|
||||
is_compression |= result_codec->isCompression();
|
||||
has_none |= result_codec->isNone();
|
||||
with_compression_codec |= result_codec->isCompression();
|
||||
with_none_codec |= result_codec->isNone();
|
||||
with_floating_point_timeseries_codec |= result_codec->isFloatingPointTimeSeries();
|
||||
|
||||
if (!generic_compression_codec_pos && result_codec->isGenericCompression())
|
||||
generic_compression_codec_pos = i;
|
||||
|
||||
if (result_codec->isEncryption())
|
||||
encryption_codecs.insert(i);
|
||||
encryption_codecs_pos.insert(i);
|
||||
}
|
||||
|
||||
String codec_description = queryToString(codecs_descriptions);
|
||||
|
||||
if (sanity_check)
|
||||
{
|
||||
if (codecs_descriptions->children.size() > 1 && has_none)
|
||||
if (codecs_descriptions->children.size() > 1 && with_none_codec)
|
||||
throw Exception(
|
||||
"It does not make sense to have codec NONE along with other compression codecs: " + codec_description
|
||||
+ ". (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).",
|
||||
@ -159,7 +161,7 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
|
||||
/// Allow to explicitly specify single NONE codec if user don't want any compression.
|
||||
/// But applying other transformations solely without compression (e.g. Delta) does not make sense.
|
||||
/// It's okay to apply encryption codecs solely without anything else.
|
||||
if (!is_compression && !has_none && encryption_codecs.size() != codecs_descriptions->children.size())
|
||||
if (!with_compression_codec && !with_none_codec && encryption_codecs_pos.size() != codecs_descriptions->children.size())
|
||||
throw Exception(
|
||||
"Compression codec " + codec_description
|
||||
+ " does not compress anything."
|
||||
@ -171,17 +173,26 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
|
||||
|
||||
/// It does not make sense to apply any non-encryption codecs
|
||||
/// after encryption one.
|
||||
if (!encryption_codecs.empty() &&
|
||||
*encryption_codecs.begin() != codecs_descriptions->children.size() - encryption_codecs.size())
|
||||
if (!encryption_codecs_pos.empty() &&
|
||||
*encryption_codecs_pos.begin() != codecs_descriptions->children.size() - encryption_codecs_pos.size())
|
||||
throw Exception("The combination of compression codecs " + codec_description + " is meaningless,"
|
||||
" because it does not make sense to apply any non-post-processing codecs after"
|
||||
" post-processing ones. (Note: you can enable setting 'allow_suspicious_codecs'"
|
||||
" to skip this check).", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
if (column_type)
|
||||
if (with_floating_point_timeseries_codec &&
|
||||
!WhichDataType(*column_type).isFloat() &&
|
||||
!enable_gorilla_codec_for_non_float_data)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"The combination of compression codecs {} is meaningless,"
|
||||
" because it does not make sense to apply a floating-point time series codec to non-floating-point columns"
|
||||
" (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).", codec_description);
|
||||
|
||||
/// It does not make sense to apply any transformations after generic compression algorithm
|
||||
/// So, generic compression can be only one and only at the end.
|
||||
if (generic_compression_codec_pos &&
|
||||
*generic_compression_codec_pos != codecs_descriptions->children.size() - 1 - encryption_codecs.size())
|
||||
*generic_compression_codec_pos != codecs_descriptions->children.size() - 1 - encryption_codecs_pos.size())
|
||||
throw Exception("The combination of compression codecs " + codec_description + " is meaningless,"
|
||||
" because it does not make sense to apply any transformations after generic compression algorithm."
|
||||
" (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
@ -113,6 +113,8 @@ public:
|
||||
/// If it does nothing.
|
||||
virtual bool isNone() const { return false; }
|
||||
|
||||
virtual bool isFloatingPointTimeSeries() const { return false; }
|
||||
|
||||
protected:
|
||||
/// This is used for fuzz testing
|
||||
friend int LLVMFuzzerTestOneInput(const uint8_t * data, size_t size);
|
||||
|
@ -529,6 +529,13 @@ public:
|
||||
|
||||
TEST_P(CodecTest, TranscodingWithDataType)
|
||||
{
|
||||
/// Gorilla can only be applied to floating point columns
|
||||
bool codec_is_gorilla = std::get<0>(GetParam()).codec_statement.find("Gorilla") != std::string::npos;
|
||||
WhichDataType which(std::get<1>(GetParam()).data_type.get());
|
||||
bool data_is_float = which.isFloat();
|
||||
if (codec_is_gorilla && !data_is_float)
|
||||
GTEST_SKIP() << "Skipping Gorilla-compressed integer column";
|
||||
|
||||
const auto codec = makeCodec(CODEC_WITH_DATA_TYPE);
|
||||
testTranscoding(*codec);
|
||||
}
|
||||
@ -1204,68 +1211,20 @@ auto DDperformanceTestSequence()
|
||||
}
|
||||
|
||||
// prime numbers in ascending order with some random repetitions hit all the cases of Gorilla.
|
||||
auto PrimesWithMultiplierGenerator = [](int multiplier = 1)
|
||||
{
|
||||
return [multiplier](auto i)
|
||||
{
|
||||
static const int vals[] = {
|
||||
2, 3, 5, 7, 11, 11, 13, 17, 19, 23, 29, 29, 31, 37, 41, 43,
|
||||
47, 47, 53, 59, 61, 61, 67, 71, 73, 79, 83, 89, 89, 97, 101, 103,
|
||||
107, 107, 109, 113, 113, 127, 127, 127
|
||||
};
|
||||
static const size_t count = sizeof(vals)/sizeof(vals[0]);
|
||||
|
||||
return static_cast<UInt64>(vals[i % count]) * multiplier;
|
||||
};
|
||||
};
|
||||
|
||||
template <typename ValueType>
|
||||
auto GCompatibilityTestSequence()
|
||||
{
|
||||
// Also multiply result by some factor to test large values on types that can hold those.
|
||||
return generateSeq<ValueType>(G(PrimesWithMultiplierGenerator(intExp10(sizeof(ValueType)))), 0, 42);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_SUITE_P(Gorilla,
|
||||
CodecTestCompatibility,
|
||||
::testing::Combine(
|
||||
::testing::Values(Codec("Gorilla")),
|
||||
::testing::ValuesIn(std::initializer_list<std::tuple<CodecTestSequence, std::string>>{
|
||||
{
|
||||
GCompatibilityTestSequence<Int8>(),
|
||||
BIN_STR("\x95\x35\x00\x00\x00\x2a\x00\x00\x00\x01\x00\x2a\x00\x00\x00\x14\xe1\xdd\x25\xe5\x7b\x29\x86\xee\x2a\x16\x5a\xc5\x0b\x23\x75\x1b\x3c\xb1\x97\x8b\x5f\xcb\x43\xd9\xc5\x48\xab\x23\xaf\x62\x93\x71\x4a\x73\x0f\xc6\x0a")
|
||||
},
|
||||
{
|
||||
GCompatibilityTestSequence<UInt8>(),
|
||||
BIN_STR("\x95\x35\x00\x00\x00\x2a\x00\x00\x00\x01\x00\x2a\x00\x00\x00\x14\xe1\xdd\x25\xe5\x7b\x29\x86\xee\x2a\x16\x5a\xc5\x0b\x23\x75\x1b\x3c\xb1\x97\x8b\x5f\xcb\x43\xd9\xc5\x48\xab\x23\xaf\x62\x93\x71\x4a\x73\x0f\xc6\x0a")
|
||||
},
|
||||
{
|
||||
GCompatibilityTestSequence<Int16>(),
|
||||
BIN_STR("\x95\x52\x00\x00\x00\x54\x00\x00\x00\x02\x00\x2a\x00\x00\x00\xc8\x00\xdc\xfe\x66\xdb\x1f\x4e\xa7\xde\xdc\xd5\xec\x6e\xf7\x37\x3a\x23\xe7\x63\xf5\x6a\x8e\x99\x37\x34\xf9\xf8\x2e\x76\x35\x2d\x51\xbb\x3b\xc3\x6d\x13\xbf\x86\x53\x9e\x25\xe4\xaf\xaf\x63\xd5\x6a\x6e\x76\x35\x3a\x27\xd3\x0f\x91\xae\x6b\x33\x57\x6e\x64\xcc\x55\x81\xe4")
|
||||
},
|
||||
{
|
||||
GCompatibilityTestSequence<UInt16>(),
|
||||
BIN_STR("\x95\x52\x00\x00\x00\x54\x00\x00\x00\x02\x00\x2a\x00\x00\x00\xc8\x00\xdc\xfe\x66\xdb\x1f\x4e\xa7\xde\xdc\xd5\xec\x6e\xf7\x37\x3a\x23\xe7\x63\xf5\x6a\x8e\x99\x37\x34\xf9\xf8\x2e\x76\x35\x2d\x51\xbb\x3b\xc3\x6d\x13\xbf\x86\x53\x9e\x25\xe4\xaf\xaf\x63\xd5\x6a\x6e\x76\x35\x3a\x27\xd3\x0f\x91\xae\x6b\x33\x57\x6e\x64\xcc\x55\x81\xe4")
|
||||
},
|
||||
{
|
||||
GCompatibilityTestSequence<Int32>(),
|
||||
BIN_STR("\x95\x65\x00\x00\x00\xa8\x00\x00\x00\x04\x00\x2a\x00\x00\x00\x20\x4e\x00\x00\xe4\x57\x63\xc0\xbb\x67\xbc\xce\x91\x97\x99\x15\x9e\xe3\x36\x3f\x89\x5f\x8e\xf2\xec\x8e\xd3\xbf\x75\x43\x58\xc4\x7e\xcf\x93\x43\x38\xc6\x91\x36\x1f\xe7\xb6\x11\x6f\x02\x73\x46\xef\xe0\xec\x50\xfb\x79\xcb\x9c\x14\xfa\x13\xea\x8d\x66\x43\x48\xa0\xde\x3a\xcf\xff\x26\xe0\x5f\x93\xde\x5e\x7f\x6e\x36\x5e\xe6\xb4\x66\x5d\xb0\x0e\xc4")
|
||||
},
|
||||
{
|
||||
GCompatibilityTestSequence<UInt32>(),
|
||||
BIN_STR("\x95\x65\x00\x00\x00\xa8\x00\x00\x00\x04\x00\x2a\x00\x00\x00\x20\x4e\x00\x00\xe4\x57\x63\xc0\xbb\x67\xbc\xce\x91\x97\x99\x15\x9e\xe3\x36\x3f\x89\x5f\x8e\xf2\xec\x8e\xd3\xbf\x75\x43\x58\xc4\x7e\xcf\x93\x43\x38\xc6\x91\x36\x1f\xe7\xb6\x11\x6f\x02\x73\x46\xef\xe0\xec\x50\xfb\x79\xcb\x9c\x14\xfa\x13\xea\x8d\x66\x43\x48\xa0\xde\x3a\xcf\xff\x26\xe0\x5f\x93\xde\x5e\x7f\x6e\x36\x5e\xe6\xb4\x66\x5d\xb0\x0e\xc4")
|
||||
},
|
||||
{
|
||||
GCompatibilityTestSequence<Int64>(),
|
||||
BIN_STR("\x95\x91\x00\x00\x00\x50\x01\x00\x00\x08\x00\x2a\x00\x00\x00\x00\xc2\xeb\x0b\x00\x00\x00\x00\xe3\x2b\xa0\xa6\x19\x85\x98\xdc\x45\x74\x74\x43\xc2\x57\x41\x4c\x6e\x42\x79\xd9\x8f\x88\xa5\x05\xf3\xf1\x94\xa3\x62\x1e\x02\xdf\x05\x10\xf1\x15\x97\x35\x2a\x50\x71\x0f\x09\x6c\x89\xf7\x65\x1d\x11\xb7\xcc\x7d\x0b\x70\xc1\x86\x88\x48\x47\x87\xb6\x32\x26\xa7\x86\x87\x88\xd3\x93\x3d\xfc\x28\x68\x85\x05\x0b\x13\xc6\x5f\xd4\x70\xe1\x5e\x76\xf1\x9f\xf3\x33\x2a\x14\x14\x5e\x40\xc1\x5c\x28\x3f\xec\x43\x03\x05\x11\x91\xe8\xeb\x8e\x0a\x0e\x27\x21\x55\xcb\x39\xbc\x6a\xff\x11\x5d\x81\xa0\xa6\x10")
|
||||
},
|
||||
{
|
||||
GCompatibilityTestSequence<UInt64>(),
|
||||
BIN_STR("\x95\x91\x00\x00\x00\x50\x01\x00\x00\x08\x00\x2a\x00\x00\x00\x00\xc2\xeb\x0b\x00\x00\x00\x00\xe3\x2b\xa0\xa6\x19\x85\x98\xdc\x45\x74\x74\x43\xc2\x57\x41\x4c\x6e\x42\x79\xd9\x8f\x88\xa5\x05\xf3\xf1\x94\xa3\x62\x1e\x02\xdf\x05\x10\xf1\x15\x97\x35\x2a\x50\x71\x0f\x09\x6c\x89\xf7\x65\x1d\x11\xb7\xcc\x7d\x0b\x70\xc1\x86\x88\x48\x47\x87\xb6\x32\x26\xa7\x86\x87\x88\xd3\x93\x3d\xfc\x28\x68\x85\x05\x0b\x13\xc6\x5f\xd4\x70\xe1\x5e\x76\xf1\x9f\xf3\x33\x2a\x14\x14\x5e\x40\xc1\x5c\x28\x3f\xec\x43\x03\x05\x11\x91\xe8\xeb\x8e\x0a\x0e\x27\x21\x55\xcb\x39\xbc\x6a\xff\x11\x5d\x81\xa0\xa6\x10")
|
||||
},
|
||||
})
|
||||
)
|
||||
);
|
||||
// auto PrimesWithMultiplierGenerator = [](int multiplier = 1)
|
||||
// {
|
||||
// return [multiplier](auto i)
|
||||
// {
|
||||
// static const int vals[] = {
|
||||
// 2, 3, 5, 7, 11, 11, 13, 17, 19, 23, 29, 29, 31, 37, 41, 43,
|
||||
// 47, 47, 53, 59, 61, 61, 67, 71, 73, 79, 83, 89, 89, 97, 101, 103,
|
||||
// 107, 107, 109, 113, 113, 127, 127, 127
|
||||
// };
|
||||
// static const size_t count = sizeof(vals)/sizeof(vals[0]);
|
||||
//
|
||||
// return static_cast<UInt64>(vals[i % count]) * multiplier;
|
||||
// };
|
||||
// };
|
||||
|
||||
// These 'tests' try to measure performance of encoding and decoding and hence only make sense to be run locally,
|
||||
// also they require pretty big data to run against and generating this data slows down startup of unit test process.
|
||||
|
@ -627,6 +627,10 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
|
||||
\
|
||||
M(Bool, load_marks_asynchronously, false, "Load MergeTree marks asynchronously", 0) \
|
||||
\
|
||||
/* Transitory setting needed for ca. six months. After v23.6, the setting can be moved into the obsolete section below and assumed as false. */ \
|
||||
/* Gorilla is a float-codec which happend to be enabled for non-float datatypes. Users (self-hosted or on-premise) might have non-float Gorilla-compressed data so allow some time for migration. */ \
|
||||
M(Bool, enable_gorilla_codec_for_non_float_data, true, "Enable Gorilla compression of columns with non-float data type", 0) \
|
||||
\
|
||||
M(UInt64, use_structure_from_insertion_table_in_table_functions, 2, "Use structure from insertion table instead of schema inference from data. Possible values: 0 - disabled, 1 - enabled, 2 - auto", 0) \
|
||||
\
|
||||
M(UInt64, http_max_tries, 10, "Max attempts to read via http.", 0) \
|
||||
|
@ -556,6 +556,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
|
||||
|
||||
bool sanity_check_compression_codecs = !attach && !context_->getSettingsRef().allow_suspicious_codecs;
|
||||
bool allow_experimental_codecs = attach || context_->getSettingsRef().allow_experimental_codecs;
|
||||
bool enable_gorilla_codec_for_non_float_data = context_->getSettingsRef().enable_gorilla_codec_for_non_float_data;
|
||||
|
||||
ColumnsDescription res;
|
||||
auto name_type_it = column_names_and_types.begin();
|
||||
@ -617,7 +618,7 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
|
||||
if (col_decl.default_specifier == "ALIAS")
|
||||
throw Exception{"Cannot specify codec for column type ALIAS", ErrorCodes::BAD_ARGUMENTS};
|
||||
column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(
|
||||
col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs);
|
||||
col_decl.codec, column.type, sanity_check_compression_codecs, allow_experimental_codecs, enable_gorilla_codec_for_non_float_data);
|
||||
}
|
||||
|
||||
if (col_decl.ttl)
|
||||
|
@ -1692,7 +1692,7 @@ void TCPHandler::initBlockOutput(const Block & block)
|
||||
|
||||
if (state.compression == Protocol::Compression::Enable)
|
||||
{
|
||||
CompressionCodecFactory::instance().validateCodec(method, level, !query_settings.allow_suspicious_codecs, query_settings.allow_experimental_codecs);
|
||||
CompressionCodecFactory::instance().validateCodec(method, level, !query_settings.allow_suspicious_codecs, query_settings.allow_experimental_codecs, query_settings.enable_gorilla_codec_for_non_float_data);
|
||||
|
||||
state.maybe_compressed_out = std::make_shared<CompressedWriteBuffer>(
|
||||
*out, CompressionCodecFactory::instance().get(method, level));
|
||||
|
@ -389,7 +389,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
|
||||
column.comment = *comment;
|
||||
|
||||
if (codec)
|
||||
column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type, false, true);
|
||||
column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type, false, true, true);
|
||||
|
||||
column.ttl = ttl;
|
||||
|
||||
@ -430,7 +430,7 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context)
|
||||
else
|
||||
{
|
||||
if (codec)
|
||||
column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type ? data_type : column.type, false, true);
|
||||
column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type ? data_type : column.type, false, true, true);
|
||||
|
||||
if (comment)
|
||||
column.comment = *comment;
|
||||
@ -1080,7 +1080,8 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const
|
||||
ErrorCodes::ILLEGAL_COLUMN};
|
||||
|
||||
if (command.codec)
|
||||
CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs);
|
||||
CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type,
|
||||
!context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_gorilla_codec_for_non_float_data);
|
||||
|
||||
all_columns.add(ColumnDescription(column_name, command.data_type));
|
||||
}
|
||||
@ -1104,7 +1105,8 @@ void AlterCommands::validate(const StoragePtr & table, ContextPtr context) const
|
||||
ErrorCodes::NOT_IMPLEMENTED};
|
||||
|
||||
if (command.codec)
|
||||
CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs);
|
||||
CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(command.codec, command.data_type,
|
||||
!context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_gorilla_codec_for_non_float_data);
|
||||
auto column_default = all_columns.getDefault(column_name);
|
||||
if (column_default)
|
||||
{
|
||||
|
@ -130,7 +130,7 @@ void ColumnDescription::readText(ReadBuffer & buf)
|
||||
comment = col_ast->comment->as<ASTLiteral &>().value.get<String>();
|
||||
|
||||
if (col_ast->codec)
|
||||
codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(col_ast->codec, type, false, true);
|
||||
codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(col_ast->codec, type, false, true, true);
|
||||
|
||||
if (col_ast->ttl)
|
||||
ttl = col_ast->ttl;
|
||||
|
@ -701,7 +701,7 @@ void DistributedSink::writeToShard(const Cluster::ShardInfo & shard_info, const
|
||||
if (compression_method == "ZSTD")
|
||||
compression_level = settings.network_zstd_compression_level;
|
||||
|
||||
CompressionCodecFactory::instance().validateCodec(compression_method, compression_level, !settings.allow_suspicious_codecs, settings.allow_experimental_codecs);
|
||||
CompressionCodecFactory::instance().validateCodec(compression_method, compression_level, !settings.allow_suspicious_codecs, settings.allow_experimental_codecs, settings.enable_gorilla_codec_for_non_float_data);
|
||||
CompressionCodecPtr compression_codec = CompressionCodecFactory::instance().get(compression_method, compression_level);
|
||||
|
||||
/// tmp directory is used to ensure atomicity of transactions
|
||||
|
@ -291,7 +291,7 @@ TTLDescription TTLDescription::getTTLFromAST(
|
||||
{
|
||||
result.recompression_codec =
|
||||
CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(
|
||||
ttl_element->recompression_codec, {}, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs);
|
||||
ttl_element->recompression_codec, {}, !context->getSettingsRef().allow_suspicious_codecs, context->getSettingsRef().allow_experimental_codecs, context->getSettingsRef().enable_gorilla_codec_for_non_float_data);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -13,7 +13,6 @@
|
||||
<value>Delta</value>
|
||||
<value>T64</value>
|
||||
<value>DoubleDelta</value>
|
||||
<value>Gorilla</value>
|
||||
</values>
|
||||
</substitution>
|
||||
<substitution>
|
||||
|
@ -13,7 +13,6 @@
|
||||
<value>Delta</value>
|
||||
<value>T64</value>
|
||||
<value>DoubleDelta</value>
|
||||
<value>Gorilla</value>
|
||||
</values>
|
||||
</substitution>
|
||||
<substitution>
|
||||
|
@ -11,7 +11,7 @@ CREATE TABLE codecs
|
||||
c Float32 CODEC(Gorilla),
|
||||
d UInt8 CODEC(Delta, LZ4),
|
||||
e Float64 CODEC(Gorilla, ZSTD),
|
||||
f UInt32 CODEC(Delta, Delta, Gorilla),
|
||||
f UInt32 CODEC(Delta, Delta, T64),
|
||||
g DateTime CODEC(DoubleDelta),
|
||||
h DateTime64 CODEC(DoubleDelta, LZ4),
|
||||
i String CODEC(NONE)
|
||||
@ -21,14 +21,14 @@ DROP TABLE codecs;
|
||||
|
||||
-- test what should not work
|
||||
|
||||
CREATE TABLE codecs (a UInt8 CODEC(NONE, NONE)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 36 }
|
||||
CREATE TABLE codecs (a UInt8 CODEC(NONE, LZ4)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 36 }
|
||||
CREATE TABLE codecs (a UInt8 CODEC(LZ4, NONE)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 36 }
|
||||
CREATE TABLE codecs (a UInt8 CODEC(LZ4, LZ4)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 36 }
|
||||
CREATE TABLE codecs (a UInt8 CODEC(LZ4, ZSTD)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 36 }
|
||||
CREATE TABLE codecs (a UInt8 CODEC(Delta)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 36 }
|
||||
CREATE TABLE codecs (a UInt8 CODEC(Delta, Delta)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 36 }
|
||||
CREATE TABLE codecs (a UInt8 CODEC(LZ4, Delta)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError 36 }
|
||||
CREATE TABLE codecs (a UInt8 CODEC(NONE, NONE)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
|
||||
CREATE TABLE codecs (a UInt8 CODEC(NONE, LZ4)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
|
||||
CREATE TABLE codecs (a UInt8 CODEC(LZ4, NONE)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
|
||||
CREATE TABLE codecs (a UInt8 CODEC(LZ4, LZ4)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
|
||||
CREATE TABLE codecs (a UInt8 CODEC(LZ4, ZSTD)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
|
||||
CREATE TABLE codecs (a UInt8 CODEC(Delta)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
|
||||
CREATE TABLE codecs (a UInt8 CODEC(Delta, Delta)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
|
||||
CREATE TABLE codecs (a UInt8 CODEC(LZ4, Delta)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
|
||||
|
||||
-- test that sanity check is not performed in ATTACH query
|
||||
|
||||
|
34
tests/queries/0_stateless/02533_gorilla_on_nonfloat.sql
Normal file
34
tests/queries/0_stateless/02533_gorilla_on_nonfloat.sql
Normal file
@ -0,0 +1,34 @@
|
||||
-- Welcome visitor from the future! If it is >= July 2023 and your intention is to adjust the test because "enable_gorilla_codec_for_non_float_data"
|
||||
-- is now obsolete, then please also extend 01272_suspicious_codecs.sql with new tests cases for Gorilla on non-float data.
|
||||
|
||||
DROP TABLE IF EXISTS test;
|
||||
|
||||
-- current default behavior is to enable non-float Gorilla compressed data
|
||||
|
||||
CREATE TABLE test (id UInt64, val Decimal(15,5) CODEC (Gorilla)) ENGINE = MergeTree() ORDER BY id;
|
||||
DROP TABLE IF EXISTS test;
|
||||
|
||||
CREATE TABLE test (id UInt64, val FixedString(2) CODEC (Gorilla)) ENGINE = MergeTree() ORDER BY id;
|
||||
DROP TABLE IF EXISTS test;
|
||||
|
||||
CREATE TABLE test (id UInt64, val UInt64 CODEC (Gorilla)) ENGINE = MergeTree() ORDER BY id;
|
||||
DROP TABLE IF EXISTS test;
|
||||
|
||||
-- this can be changed (and it is planned to be changed by default in future) with a setting
|
||||
SET enable_gorilla_codec_for_non_float_data = false;
|
||||
|
||||
CREATE TABLE test (id UInt64, val Decimal(15,5) CODEC (Gorilla)) ENGINE = MergeTree() ORDER BY id; -- { serverError BAD_ARGUMENTS }
|
||||
CREATE TABLE test (id UInt64, val FixedString(2) CODEC (Gorilla)) ENGINE = MergeTree() ORDER BY id; -- { serverError BAD_ARGUMENTS }
|
||||
CREATE TABLE test (id UInt64, val UInt64 CODEC (Gorilla)) ENGINE = MergeTree() ORDER BY id; -- { serverError BAD_ARGUMENTS }
|
||||
|
||||
-- even with above setting, it will still be possible to create non-float Gorilla-compressed data using allow_suspicious_codecs
|
||||
SET allow_suspicious_codecs = true;
|
||||
|
||||
CREATE TABLE test (id UInt64, val Decimal(15,5) CODEC (Gorilla)) ENGINE = MergeTree() ORDER BY id;
|
||||
DROP TABLE IF EXISTS test;
|
||||
|
||||
CREATE TABLE test (id UInt64, val FixedString(2) CODEC (Gorilla)) ENGINE = MergeTree() ORDER BY id;
|
||||
DROP TABLE IF EXISTS test;
|
||||
|
||||
CREATE TABLE test (id UInt64, val UInt64 CODEC (Gorilla)) ENGINE = MergeTree() ORDER BY id;
|
||||
DROP TABLE IF EXISTS test;
|
Loading…
Reference in New Issue
Block a user