2017-04-01 09:19:00 +00:00
|
|
|
#include <IO/CompressedStream.h>
|
2017-10-26 11:46:57 +00:00
|
|
|
#include <IO/CompressionSettings.h>
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <IO/ReadHelpers.h>
|
|
|
|
#include <Common/Exception.h>
|
|
|
|
#include <Common/StringUtils.h>
|
2015-04-17 05:35:53 +00:00
|
|
|
#include <Poco/Util/AbstractConfiguration.h>
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2016-01-11 21:46:36 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
2017-04-01 07:20:54 +00:00
|
|
|
extern const int UNKNOWN_COMPRESSION_METHOD;
|
|
|
|
extern const int UNKNOWN_ELEMENT_IN_CONFIG;
|
2016-01-11 21:46:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-10-13 01:02:16 +00:00
|
|
|
/** Allows you to select the compression settings for the conditions specified in the configuration file.
|
2017-04-16 15:00:33 +00:00
|
|
|
* The config looks like this
|
2015-04-17 05:35:53 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
<compression>
|
2015-04-17 05:35:53 +00:00
|
|
|
|
2017-04-16 15:00:33 +00:00
|
|
|
<!-- Set of options. Options are checked in a row. The last worked option wins. If none has worked, then lz4 is used. -->
|
2017-04-01 07:20:54 +00:00
|
|
|
<case>
|
2015-04-17 05:35:53 +00:00
|
|
|
|
2017-04-16 15:00:33 +00:00
|
|
|
<!-- Conditions. All must be satisfied simultaneously. Some conditions may not be specified. -->
|
|
|
|
<min_part_size>10000000000</min_part_size> <!-- The minimum size of a part in bytes. -->
|
|
|
|
<min_part_size_ratio>0.01</min_part_size_ratio> <!-- The minimum size of the part relative to all the data in the table. -->
|
2015-04-17 05:35:53 +00:00
|
|
|
|
2017-04-16 15:00:33 +00:00
|
|
|
<! - Which compression method to choose. ->
|
2017-04-01 07:20:54 +00:00
|
|
|
<method>zstd</method>
|
2017-10-13 01:02:16 +00:00
|
|
|
<level>2</level>
|
2017-04-01 07:20:54 +00:00
|
|
|
</case>
|
2015-04-17 05:35:53 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
<case>
|
|
|
|
...
|
|
|
|
</case>
|
|
|
|
</compression>
|
2015-04-17 05:35:53 +00:00
|
|
|
*/
|
2017-10-13 01:02:16 +00:00
|
|
|
class CompressionSettingsSelector
|
2015-04-17 05:35:53 +00:00
|
|
|
{
|
|
|
|
private:
|
2017-04-01 07:20:54 +00:00
|
|
|
struct Element
|
|
|
|
{
|
|
|
|
size_t min_part_size = 0;
|
|
|
|
double min_part_size_ratio = 0;
|
2017-10-13 01:02:16 +00:00
|
|
|
CompressionSettings settings = CompressionSettings(CompressionMethod::LZ4);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-10-13 01:02:16 +00:00
|
|
|
static CompressionMethod compressionMethodFromString(const std::string & name)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
if (name == "lz4")
|
2017-10-13 01:02:16 +00:00
|
|
|
return CompressionMethod::LZ4;
|
2017-04-01 07:20:54 +00:00
|
|
|
else if (name == "zstd")
|
2017-10-13 01:02:16 +00:00
|
|
|
return CompressionMethod::ZSTD;
|
2017-07-28 14:14:07 +00:00
|
|
|
else if (name == "none")
|
2017-10-13 01:02:16 +00:00
|
|
|
return CompressionMethod::NONE;
|
2017-04-01 07:20:54 +00:00
|
|
|
else
|
|
|
|
throw Exception("Unknown compression method " + name, ErrorCodes::UNKNOWN_COMPRESSION_METHOD);
|
|
|
|
}
|
|
|
|
|
|
|
|
Element(Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
|
|
|
|
{
|
2017-04-12 16:37:19 +00:00
|
|
|
min_part_size = config.getUInt64(config_prefix + ".min_part_size", 0);
|
2017-04-01 07:20:54 +00:00
|
|
|
min_part_size_ratio = config.getDouble(config_prefix + ".min_part_size_ratio", 0);
|
|
|
|
|
2017-10-13 01:02:16 +00:00
|
|
|
CompressionMethod method = compressionMethodFromString(config.getString(config_prefix + ".method"));
|
|
|
|
int level = config.getInt64(config_prefix + ".level", CompressionSettings::getDefaultLevel(method));
|
|
|
|
settings = CompressionSettings(method, level);
|
2017-04-01 07:20:54 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
bool check(size_t part_size, double part_size_ratio) const
|
|
|
|
{
|
|
|
|
return part_size >= min_part_size
|
|
|
|
&& part_size_ratio >= min_part_size_ratio;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
std::vector<Element> elements;
|
2015-04-17 05:35:53 +00:00
|
|
|
|
|
|
|
public:
|
2017-10-13 01:02:16 +00:00
|
|
|
CompressionSettingsSelector() {} /// Always returns the default method.
|
2015-04-17 05:35:53 +00:00
|
|
|
|
2017-10-13 01:02:16 +00:00
|
|
|
CompressionSettingsSelector(Poco::Util::AbstractConfiguration & config, const std::string & config_prefix)
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
|
|
|
Poco::Util::AbstractConfiguration::Keys keys;
|
|
|
|
config.keys(config_prefix, keys);
|
2015-04-17 05:35:53 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
for (const auto & name : keys)
|
|
|
|
{
|
|
|
|
if (!startsWith(name.data(), "case"))
|
|
|
|
throw Exception("Unknown element in config: " + config_prefix + "." + name + ", must be 'case'", ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG);
|
2015-04-17 05:35:53 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
elements.emplace_back(config, config_prefix + "." + name);
|
|
|
|
}
|
|
|
|
}
|
2015-04-17 05:35:53 +00:00
|
|
|
|
2017-10-13 01:02:16 +00:00
|
|
|
CompressionSettings choose(size_t part_size, double part_size_ratio) const
|
2017-04-01 07:20:54 +00:00
|
|
|
{
|
2017-10-13 01:02:16 +00:00
|
|
|
CompressionSettings res = CompressionSettings(CompressionMethod::LZ4);
|
2015-04-17 05:35:53 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
for (const auto & element : elements)
|
|
|
|
if (element.check(part_size, part_size_ratio))
|
2017-10-13 01:02:16 +00:00
|
|
|
res = element.settings;
|
2015-04-17 05:35:53 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
return res;
|
|
|
|
}
|
2015-04-17 05:35:53 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|