Mark delta/doubledelta codec followed by a time series codec as suspicious

This commit is contained in:
Robert Schulze 2023-01-26 11:43:16 +00:00
parent 5f4726eb2a
commit 856eba0a4b
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
5 changed files with 47 additions and 23 deletions

View File

@ -28,6 +28,7 @@ protected:
bool isCompression() const override { return false; }
bool isGenericCompression() const override { return false; }
bool isDeltaCompression() const override { return true; }
private:
const UInt8 delta_bytes_size;

View File

@ -133,6 +133,7 @@ protected:
bool isCompression() const override { return true; }
bool isGenericCompression() const override { return false; }
bool isDeltaCompression() const override { return true; }
private:
UInt8 data_bytes_size;

View File

@ -85,8 +85,9 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
bool with_compression_codec = false;
bool with_none_codec = false;
bool with_floating_point_timeseries_codec = false;
std::optional<size_t> generic_compression_codec_pos;
std::optional<size_t> first_generic_compression_codec_pos;
std::optional<size_t> first_delta_codec_pos;
std::optional<size_t> last_floating_point_time_series_codec_pos;
std::set<size_t> encryption_codecs_pos;
bool can_substitute_codec_arguments = true;
@ -163,10 +164,15 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
with_compression_codec |= result_codec->isCompression();
with_none_codec |= result_codec->isNone();
with_floating_point_timeseries_codec |= result_codec->isFloatingPointTimeSeriesCodec();
if (!generic_compression_codec_pos && result_codec->isGenericCompression())
generic_compression_codec_pos = i;
if (result_codec->isGenericCompression() && !first_generic_compression_codec_pos.has_value())
first_generic_compression_codec_pos = i;
if (result_codec->isDeltaCompression() && !first_delta_codec_pos.has_value())
first_delta_codec_pos = i;
if (result_codec->isFloatingPointTimeSeriesCodec())
last_floating_point_time_series_codec_pos = i;
if (result_codec->isEncryption())
encryption_codecs_pos.insert(i);
@ -178,44 +184,55 @@ ASTPtr CompressionCodecFactory::validateCodecAndGetPreprocessedAST(
{
if (codecs_descriptions->children.size() > 1 && with_none_codec)
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"It does not make sense to have codec NONE along with other compression codecs: {}. "
"(Note: you can enable setting 'allow_suspicious_codecs' to skip this check).",
codec_description);
"It does not make sense to have codec NONE along with other compression codecs: {}. "
"(Note: you can enable setting 'allow_suspicious_codecs' to skip this check).",
codec_description);
/// Allow to explicitly specify single NONE codec if user don't want any compression.
/// But applying other transformations solely without compression (e.g. Delta) does not make sense.
/// It's okay to apply encryption codecs solely without anything else.
if (!with_compression_codec && !with_none_codec && encryption_codecs_pos.size() != codecs_descriptions->children.size())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Compression codec {} does not compress anything. "
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Compression codec {} does not compress anything. "
"You may want to add generic compression algorithm after other transformations, like: {}, LZ4. "
"(Note: you can enable setting 'allow_suspicious_codecs' to skip this check).",
codec_description, codec_description);
/// It does not make sense to apply any non-encryption codecs
/// after encryption one.
/// It does not make sense to apply any non-encryption codecs after encryption one.
if (!encryption_codecs_pos.empty() &&
*encryption_codecs_pos.begin() != codecs_descriptions->children.size() - encryption_codecs_pos.size())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The combination of compression codecs {} is meaningless, "
"because it does not make sense to apply any non-post-processing codecs after "
"post-processing ones. (Note: you can enable setting 'allow_suspicious_codecs' "
"to skip this check).", codec_description);
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"The combination of compression codecs {} is meaningless, "
"because it does not make sense to apply any non-post-processing codecs after "
"post-processing ones. (Note: you can enable setting 'allow_suspicious_codecs' "
"to skip this check).", codec_description);
/// Floating-point time series codecs are not supposed to compress non-floating-point data
if (with_floating_point_timeseries_codec &&
column_type && !innerDataTypeIsFloat(column_type))
if (last_floating_point_time_series_codec_pos.has_value()
&& column_type && !innerDataTypeIsFloat(column_type))
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"The combination of compression codecs {} is meaningless,"
" because it does not make sense to apply a floating-point time series codec to non-floating-point columns"
" (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).", codec_description);
/// Floating-point time series codecs usually do implicit delta compression (or something equivalent), and makes no sense to run
/// delta compression manually.
if (first_delta_codec_pos.has_value() && last_floating_point_time_series_codec_pos.has_value()
&& (*first_delta_codec_pos < *last_floating_point_time_series_codec_pos))
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"The combination of compression codecs {} is meaningless,"
" because floating point time series codecs do delta compression implicitly by themselves."
" (Note: you can enable setting 'allow_suspicious_codecs' to skip this check).", codec_description);
/// It does not make sense to apply any transformations after generic compression algorithm
/// So, generic compression can be only one and only at the end.
if (generic_compression_codec_pos &&
*generic_compression_codec_pos != codecs_descriptions->children.size() - 1 - encryption_codecs_pos.size())
throw Exception(ErrorCodes::BAD_ARGUMENTS, "The combination of compression codecs {} is meaningless, "
"because it does not make sense to apply any transformations after generic "
"compression algorithm. (Note: you can enable setting 'allow_suspicious_codecs' "
"to skip this check).", codec_description);
if (first_generic_compression_codec_pos &&
*first_generic_compression_codec_pos != codecs_descriptions->children.size() - 1 - encryption_codecs_pos.size())
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"The combination of compression codecs {} is meaningless, "
"because it does not make sense to apply any transformations after generic "
"compression algorithm. (Note: you can enable setting 'allow_suspicious_codecs' "
"to skip this check).", codec_description);
}

View File

@ -102,6 +102,9 @@ public:
/// If it is a specialized codec for floating-point time series. Applying it to non-floating point data is suspicious.
virtual bool isFloatingPointTimeSeriesCodec() const { return false; }
/// If the codec's purpose is to calculate deltas betwen consecutive values.
virtual bool isDeltaCompression() const { return false; }
/// It is a codec available only for evaluation purposes and not meant to be used in production.
/// It will not be allowed to use unless the user will turn off the safety switch.
virtual bool isExperimental() const { return false; }

View File

@ -32,6 +32,8 @@ CREATE TABLE codecs (a UInt8 CODEC(LZ4, Delta)) ENGINE = MergeTree ORDER BY tupl
CREATE TABLE codecs (a UInt8 CODEC(Gorilla)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
CREATE TABLE codecs (a FixedString(2) CODEC(Gorilla)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
CREATE TABLE codecs (a Decimal(15,5) CODEC(Gorilla)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
CREATE TABLE codecs (a Float64 CODEC(Delta, Gorilla)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
CREATE TABLE codecs (a Float32 CODEC(DoubleDelta, FPC)) ENGINE = MergeTree ORDER BY tuple(); -- { serverError BAD_ARGUMENTS }
-- test that sanity check is not performed in ATTACH query