mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Mark delta/doubledelta codec followed by a time series codec as suspicious
This commit is contained in:
parent
5f4726eb2a
commit
856eba0a4b
@ -28,6 +28,7 @@ protected:
|
||||
|
||||
bool isCompression() const override { return false; }
|
||||
bool isGenericCompression() const override { return false; }
|
||||
bool isDeltaCompression() const override { return true; }
|
||||
|
||||
private:
|
||||
const UInt8 delta_bytes_size;
|
||||
|
@ -133,6 +133,7 @@ protected:
|
||||
|
||||
bool isCompression() const override { return true; }
|
||||
bool isGenericCompression() const override { return false; }
|
||||
bool isDeltaCompression() const override { return true; }
|
||||
|
||||
private:
|
||||
UInt8 data_bytes_size;
|
||||
|
@ -85,8 +85,9 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
|
||||
|
||||
bool with_compression_codec = false;
|
||||
bool with_none_codec = false;
|
||||
bool with_floating_point_timeseries_codec = false;
|
||||
std::optional<size_t> generic_compression_codec_pos;
|
||||
std::optional<size_t> first_generic_compression_codec_pos;
|
||||
std::optional<size_t> first_delta_codec_pos;
|
||||
std::optional<size_t> last_floating_point_time_series_codec_pos;
|
||||
std::set<size_t> encryption_codecs_pos;
|
||||
|
||||
bool can_substitute_codec_arguments = true;
|
||||
@ -163,10 +164,15 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
|
||||
|
||||
with_compression_codec |= result_codec->isCompression();
|
||||
with_none_codec |= result_codec->isNone();
|
||||
with_floating_point_timeseries_codec |= result_codec->isFloatingPointTimeSeriesCodec();
|
||||
|
||||
if (!generic_compression_codec_pos && result_codec->isGenericCompression())
|
||||
generic_compression_codec_pos = i;
|
||||
if (result_codec->isGenericCompression() && !first_generic_compression_codec_pos.has_value())
|
||||
first_generic_compression_codec_pos = i;
|
||||
|
||||
if (result_codec->isDeltaCompression() && !first_delta_codec_pos.has_value())
|
||||
first_delta_codec_pos = i;
|
||||
|
||||
if (result_codec->isFloatingPointTimeSeriesCodec())
|
||||
last_floating_point_time_series_codec_pos = i;
|
||||
|
||||
if (result_codec->isEncryption())
|
||||
encryption_codecs_pos.insert(i);
|
||||
@ -178,44 +184,55 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
|
||||
{
|
||||
if (codecs_descriptions->children.size() > 1 && with_none_codec)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"It does not make sense to have codec NONE along with other compression codecs: {}. "
|
||||
"(Note: you can enable setting 'allow_suspicious_codecs' to skip this check).",
|
||||
codec_description);
|
||||
"It does not make sense to have codec NONE along with other compression codecs: {}. "
|
||||
"(Note: you can enable setting 'allow_suspicious_codecs' to skip this check).",
|
||||
codec_description);
|
||||
|
||||
/// Allow to explicitly specify single NONE codec if user don't want any compression.
|
||||
/// But applying other transformations solely without compression (e.g. Delta) does not make sense.
|
||||
/// It's okay to apply encryption codecs solely without anything else.
|
||||
if (!with_compression_codec && !with_none_codec && encryption_codecs_pos.size() != codecs_descriptions->children.size())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Compression codec {} does not compress anything. "
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Compression codec {} does not compress anything. "
|
||||
"You may want to add generic compression algorithm after other transformations, like: {}, LZ4. "
|
||||
"(Note: you can enable setting 'allow_suspicious_codecs' to skip this check).",
|
||||
codec_description, codec_description);
|
||||
|
||||
/// It does not make sense to apply any non-encryption codecs
|
||||
/// after encryption one.
|
||||
/// It does not make sense to apply any non-encryption codecs after encryption one.
|
||||
if (!encryption_codecs_pos.empty() &&
|
||||
*encryption_codecs_pos.begin() != codecs_descriptions->children.size() - encryption_codecs_pos.size())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The combination of compression codecs {} is meaningless, "
|
||||
"because it does not make sense to apply any non-post-processing codecs after "
|
||||
"post-processing ones. (Note: you can enable setting 'allow_suspicious_codecs' "
|
||||
"to skip this check).", codec_description);
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"The combination of compression codecs {} is meaningless, "
|
||||
"because it does not make sense to apply any non-post-processing codecs after "
|
||||
"post-processing ones. (Note: you can enable setting 'allow_suspicious_codecs' "
|
||||
"to skip this check).", codec_description);
|
||||
|
||||
/// Floating-point time series codecs are not supposed to compress non-floating-point data
|
||||
if (with_floating_point_timeseries_codec &&
|
||||
column_type && !innerDataTypeIsFloat(column_type))
|
||||
if (last_floating_point_time_series_codec_pos.has_value()
|
||||
&& column_type && !innerDataTypeIsFloat(column_type))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"The combination of compression codecs {} is meaningless,"
|
||||
" because it does not make sense to apply a floating-point time series codec to non-floating-point columns"
|
||||
" (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).", codec_description);
|
||||
|
||||
/// Floating-point time series codecs usually do implicit delta compression (or something equivalent), and makes no sense to run
|
||||
/// delta compression manually.
|
||||
if (first_delta_codec_pos.has_value() && last_floating_point_time_series_codec_pos.has_value()
|
||||
&& (*first_delta_codec_pos < *last_floating_point_time_series_codec_pos))
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"The combination of compression codecs {} is meaningless,"
|
||||
" because floating point time series codecs do delta compression implicitly by themselves."
|
||||
" (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).", codec_description);
|
||||
|
||||
/// It does not make sense to apply any transformations after generic compression algorithm
|
||||
/// So, generic compression can be only one and only at the end.
|
||||
if (generic_compression_codec_pos &&
|
||||
*generic_compression_codec_pos != codecs_descriptions->children.size() - 1 - encryption_codecs_pos.size())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The combination of compression codecs {} is meaningless, "
|
||||
"because it does not make sense to apply any transformations after generic "
|
||||
"compression algorithm. (Note: you can enable setting 'allow_suspicious_codecs' "
|
||||
"to skip this check).", codec_description);
|
||||
if (first_generic_compression_codec_pos &&
|
||||
*first_generic_compression_codec_pos != codecs_descriptions->children.size() - 1 - encryption_codecs_pos.size())
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"The combination of compression codecs {} is meaningless, "
|
||||
"because it does not make sense to apply any transformations after generic "
|
||||
"compression algorithm. (Note: you can enable setting 'allow_suspicious_codecs' "
|
||||
"to skip this check).", codec_description);
|
||||
|
||||
}
|
||||
|
||||
|
@ -102,6 +102,9 @@ public:
|
||||
/// If it is a specialized codec for floating-point time series. Applying it to non-floating point data is suspicious.
|
||||
virtual bool isFloatingPointTimeSeriesCodec() const { return false; }
|
||||
|
||||
/// If the codec's purpose is to calculate deltas betwen consecutive values.
|
||||
virtual bool isDeltaCompression() const { return false; }
|
||||
|
||||
/// It is a codec available only for evaluation purposes and not meant to be used in production.
|
||||
/// It will not be allowed to use unless the user will turn off the safety switch.
|
||||
virtual bool isExperimental() const { return false; }
|
||||
|
@ -32,6 +32,8 @@ CREATE TABLE codecs (a UInt8 CODEC(LZ4, Delta)) ENGINE = MergeTree ORDER BY tupl
|
||||
CREATE TABLE codecs (a UInt8 CODEC(Gorilla)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
|
||||
CREATE TABLE codecs (a FixedString(2) CODEC(Gorilla)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
|
||||
CREATE TABLE codecs (a Decimal(15,5) CODEC(Gorilla)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
|
||||
CREATE TABLE codecs (a Float64 CODEC(Delta, Gorilla)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
|
||||
CREATE TABLE codecs (a Float32 CODEC(DoubleDelta, FPC)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
|
||||
|
||||
-- test that sanity check is not performed in ATTACH query
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user