make a separate settings collection + some cleanup

This commit is contained in:
Alexander Kuzmenkov 2020-11-07 11:53:39 +03:00
parent 2efbcbaa14
commit 3c60f6cec2
10 changed files with 124 additions and 68 deletions

View File

@ -130,4 +130,6 @@ void Settings::checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfigura
} }
} }
IMPLEMENT_SETTINGS_TRAITS(FormatFactorySettingsTraits, FORMAT_FACTORY_SETTINGS)
} }

View File

@ -518,4 +518,13 @@ struct Settings : public BaseSettings<SettingsTraits>
static void checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path); static void checkNoSettingNamesAtTopLevel(const Poco::Util::AbstractConfiguration & config, const String & config_path);
}; };
/*
* User-specified file format settings for File and ULR engines.
*/
DECLARE_SETTINGS_TRAITS(FormatFactorySettingsTraits, FORMAT_FACTORY_SETTINGS)
struct FormatFactorySettings : public BaseSettings<FormatFactorySettingsTraits>
{
};
} }

View File

@ -40,11 +40,17 @@ const FormatFactory::Creators & FormatFactory::getCreators(const String & name)
throw Exception("Unknown format " + name, ErrorCodes::UNKNOWN_FORMAT); throw Exception("Unknown format " + name, ErrorCodes::UNKNOWN_FORMAT);
} }
FormatSettings getFormatSettings(const Context & context) FormatSettings getFormatSettings(const Context & context)
{ {
const auto & settings = context.getSettingsRef(); const auto & settings = context.getSettingsRef();
return getFormatSettings(context, settings);
}
template <typename Settings>
FormatSettings getFormatSettings(const Context & context,
const Settings & settings)
{
FormatSettings format_settings; FormatSettings format_settings;
format_settings.avro.allow_missing_fields = settings.input_format_avro_allow_missing_fields; format_settings.avro.allow_missing_fields = settings.input_format_avro_allow_missing_fields;
@ -114,6 +120,14 @@ FormatSettings getFormatSettings(const Context & context)
return format_settings; return format_settings;
} }
template
FormatSettings getFormatSettings<FormatFactorySettings>(const Context & context,
const FormatFactorySettings & settings);
template
FormatSettings getFormatSettings<Settings>(const Context & context,
const Settings & settings);
BlockInputStreamPtr FormatFactory::getInput( BlockInputStreamPtr FormatFactory::getInput(
const String & name, const String & name,
@ -121,23 +135,22 @@ BlockInputStreamPtr FormatFactory::getInput(
const Block & sample, const Block & sample,
const Context & context, const Context & context,
UInt64 max_block_size, UInt64 max_block_size,
std::optional<FormatSettings> format_settings) const const std::optional<FormatSettings> & _format_settings) const
{ {
if (name == "Native") if (name == "Native")
return std::make_shared<NativeBlockInputStream>(buf, sample, 0); return std::make_shared<NativeBlockInputStream>(buf, sample, 0);
auto format_settings = _format_settings
? *_format_settings : getFormatSettings(context);
if (!getCreators(name).input_processor_creator) if (!getCreators(name).input_processor_creator)
{ {
const auto & input_getter = getCreators(name).input_creator; const auto & input_getter = getCreators(name).input_creator;
if (!input_getter) if (!input_getter)
throw Exception("Format " + name + " is not suitable for input", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT); throw Exception("Format " + name + " is not suitable for input", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT);
if (!format_settings)
{
format_settings = getFormatSettings(context);
}
return input_getter(buf, sample, max_block_size, {}, *format_settings); return input_getter(buf, sample, max_block_size, {}, format_settings);
} }
const Settings & settings = context.getSettingsRef(); const Settings & settings = context.getSettingsRef();
@ -163,21 +176,16 @@ BlockInputStreamPtr FormatFactory::getInput(
if (!input_getter) if (!input_getter)
throw Exception("Format " + name + " is not suitable for input", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT); throw Exception("Format " + name + " is not suitable for input", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_INPUT);
if (!format_settings)
{
format_settings = getFormatSettings(context);
}
RowInputFormatParams row_input_format_params; RowInputFormatParams row_input_format_params;
row_input_format_params.max_block_size = max_block_size; row_input_format_params.max_block_size = max_block_size;
row_input_format_params.allow_errors_num = format_settings->input_allow_errors_num; row_input_format_params.allow_errors_num = format_settings.input_allow_errors_num;
row_input_format_params.allow_errors_ratio = format_settings->input_allow_errors_ratio; row_input_format_params.allow_errors_ratio = format_settings.input_allow_errors_ratio;
row_input_format_params.max_execution_time = settings.max_execution_time; row_input_format_params.max_execution_time = settings.max_execution_time;
row_input_format_params.timeout_overflow_mode = settings.timeout_overflow_mode; row_input_format_params.timeout_overflow_mode = settings.timeout_overflow_mode;
auto input_creator_params = auto input_creator_params =
ParallelParsingBlockInputStream::InputCreatorParams{sample, ParallelParsingBlockInputStream::InputCreatorParams{sample,
row_input_format_params, *format_settings}; row_input_format_params, format_settings};
ParallelParsingBlockInputStream::Params params{buf, input_getter, ParallelParsingBlockInputStream::Params params{buf, input_getter,
input_creator_params, file_segmentation_engine, input_creator_params, file_segmentation_engine,
static_cast<int>(settings.max_threads), static_cast<int>(settings.max_threads),
@ -193,24 +201,22 @@ BlockInputStreamPtr FormatFactory::getInput(
BlockOutputStreamPtr FormatFactory::getOutput(const String & name, BlockOutputStreamPtr FormatFactory::getOutput(const String & name,
WriteBuffer & buf, const Block & sample, const Context & context, WriteBuffer & buf, const Block & sample, const Context & context,
WriteCallback callback, std::optional<FormatSettings> format_settings) const WriteCallback callback, const std::optional<FormatSettings> & _format_settings) const
{ {
auto format_settings = _format_settings
? *_format_settings : getFormatSettings(context);
if (!getCreators(name).output_processor_creator) if (!getCreators(name).output_processor_creator)
{ {
const auto & output_getter = getCreators(name).output_creator; const auto & output_getter = getCreators(name).output_creator;
if (!output_getter) if (!output_getter)
throw Exception("Format " + name + " is not suitable for output", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT); throw Exception("Format " + name + " is not suitable for output", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT);
if (!format_settings)
{
format_settings = getFormatSettings(context);
}
/** Materialization is needed, because formats can use the functions `IDataType`, /** Materialization is needed, because formats can use the functions `IDataType`,
* which only work with full columns. * which only work with full columns.
*/ */
return std::make_shared<MaterializingBlockOutputStream>( return std::make_shared<MaterializingBlockOutputStream>(
output_getter(buf, sample, std::move(callback), *format_settings), output_getter(buf, sample, std::move(callback), format_settings),
sample); sample);
} }
@ -227,7 +233,7 @@ InputFormatPtr FormatFactory::getInputFormat(
const Block & sample, const Block & sample,
const Context & context, const Context & context,
UInt64 max_block_size, UInt64 max_block_size,
std::optional<FormatSettings> format_settings) const const std::optional<FormatSettings> & _format_settings) const
{ {
const auto & input_getter = getCreators(name).input_processor_creator; const auto & input_getter = getCreators(name).input_processor_creator;
if (!input_getter) if (!input_getter)
@ -235,19 +241,18 @@ InputFormatPtr FormatFactory::getInputFormat(
const Settings & settings = context.getSettingsRef(); const Settings & settings = context.getSettingsRef();
if (!format_settings) auto format_settings = _format_settings
{ ? *_format_settings : getFormatSettings(context);
format_settings = getFormatSettings(context);
}
RowInputFormatParams params; RowInputFormatParams params;
params.max_block_size = max_block_size; params.max_block_size = max_block_size;
params.allow_errors_num = format_settings->input_allow_errors_num; params.allow_errors_num = format_settings.input_allow_errors_num;
params.allow_errors_ratio = format_settings->input_allow_errors_ratio; params.allow_errors_ratio = format_settings.input_allow_errors_ratio;
params.max_execution_time = settings.max_execution_time; params.max_execution_time = settings.max_execution_time;
params.timeout_overflow_mode = settings.timeout_overflow_mode; params.timeout_overflow_mode = settings.timeout_overflow_mode;
auto format = input_getter(buf, sample, params, *format_settings); auto format = input_getter(buf, sample, params, format_settings);
/// It's a kludge. Because I cannot remove context from values format. /// It's a kludge. Because I cannot remove context from values format.
if (auto * values = typeid_cast<ValuesBlockInputFormat *>(format.get())) if (auto * values = typeid_cast<ValuesBlockInputFormat *>(format.get()))
@ -260,27 +265,25 @@ InputFormatPtr FormatFactory::getInputFormat(
OutputFormatPtr FormatFactory::getOutputFormat( OutputFormatPtr FormatFactory::getOutputFormat(
const String & name, WriteBuffer & buf, const Block & sample, const String & name, WriteBuffer & buf, const Block & sample,
const Context & context, WriteCallback callback, const Context & context, WriteCallback callback,
std::optional<FormatSettings> format_settings) const const std::optional<FormatSettings> & _format_settings) const
{ {
const auto & output_getter = getCreators(name).output_processor_creator; const auto & output_getter = getCreators(name).output_processor_creator;
if (!output_getter) if (!output_getter)
throw Exception("Format " + name + " is not suitable for output", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT); throw Exception("Format " + name + " is not suitable for output", ErrorCodes::FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT);
if (!format_settings)
{
format_settings = getFormatSettings(context);
}
RowOutputFormatParams params; RowOutputFormatParams params;
params.callback = std::move(callback); params.callback = std::move(callback);
auto format_settings = _format_settings
? *_format_settings : getFormatSettings(context);
/** TODO: Materialization is needed, because formats can use the functions `IDataType`, /** TODO: Materialization is needed, because formats can use the functions `IDataType`,
* which only work with full columns. * which only work with full columns.
*/ */
auto format = output_getter(buf, sample, params, *format_settings); auto format = output_getter(buf, sample, params, format_settings);
/// Enable auto-flush for streaming mode. Currently it is needed by INSERT WATCH query. /// Enable auto-flush for streaming mode. Currently it is needed by INSERT WATCH query.
if (format_settings->enable_streaming) if (format_settings.enable_streaming)
format->setAutoFlush(); format->setAutoFlush();
/// It's a kludge. Because I cannot remove context from MySQL format. /// It's a kludge. Because I cannot remove context from MySQL format.

View File

@ -18,6 +18,7 @@ class Block;
class Context; class Context;
struct FormatSettings; struct FormatSettings;
struct Settings; struct Settings;
struct FormatFactorySettings;
class ReadBuffer; class ReadBuffer;
class WriteBuffer; class WriteBuffer;
@ -36,6 +37,10 @@ using OutputFormatPtr = std::shared_ptr<IOutputFormat>;
FormatSettings getFormatSettings(const Context & context); FormatSettings getFormatSettings(const Context & context);
template <typename T>
FormatSettings getFormatSettings(const Context & context,
const T & settings);
/** Allows to create an IBlockInputStream or IBlockOutputStream by the name of the format. /** Allows to create an IBlockInputStream or IBlockOutputStream by the name of the format.
* Note: format and compression are independent things. * Note: format and compression are independent things.
*/ */
@ -108,11 +113,11 @@ public:
const Block & sample, const Block & sample,
const Context & context, const Context & context,
UInt64 max_block_size, UInt64 max_block_size,
std::optional<FormatSettings> format_settings = std::nullopt) const; const std::optional<FormatSettings> & format_settings = std::nullopt) const;
BlockOutputStreamPtr getOutput(const String & name, WriteBuffer & buf, BlockOutputStreamPtr getOutput(const String & name, WriteBuffer & buf,
const Block & sample, const Context & context, WriteCallback callback = {}, const Block & sample, const Context & context, WriteCallback callback = {},
std::optional<FormatSettings> format_settings = std::nullopt) const; const std::optional<FormatSettings> & format_settings = std::nullopt) const;
InputFormatPtr getInputFormat( InputFormatPtr getInputFormat(
const String & name, const String & name,
@ -120,12 +125,12 @@ public:
const Block & sample, const Block & sample,
const Context & context, const Context & context,
UInt64 max_block_size, UInt64 max_block_size,
std::optional<FormatSettings> format_settings = std::nullopt) const; const std::optional<FormatSettings> & format_settings = std::nullopt) const;
OutputFormatPtr getOutputFormat( OutputFormatPtr getOutputFormat(
const String & name, WriteBuffer & buf, const Block & sample, const String & name, WriteBuffer & buf, const Block & sample,
const Context & context, WriteCallback callback = {}, const Context & context, WriteCallback callback = {},
std::optional<FormatSettings> format_settings = std::nullopt) const; const std::optional<FormatSettings> & format_settings = std::nullopt) const;
/// Register format by its name. /// Register format by its name.
void registerInputFormat(const String & name, InputCreator input_creator); void registerInputFormat(const String & name, InputCreator input_creator);

View File

@ -6,10 +6,16 @@
namespace DB namespace DB
{ {
/** Various tweaks for input/output formats. /**
* Text serialization/deserialization of data types also depend on some of these settings. * Various tweaks for input/output formats. Text serialization/deserialization
* NOTE Parameters for unrelated formats and unrelated data types * of data types also depend on some of these settings. It is different from
* are collected in this struct - it prevents modularity, but they are difficult to separate. * FormatFactorySettings in that it has all necessary user-provided settings
* combined with information from context etc, that we can use directly during
* serialization. In contrast, FormatFactorySettings' job is to reflect the
* changes made to user-visible format settings, such as when tweaking the
* the format for File engine.
* NOTE Parameters for unrelated formats and unrelated data types are collected
* in this struct - it prevents modularity, but they are difficult to separate.
*/ */
struct FormatSettings struct FormatSettings
{ {
@ -113,13 +119,11 @@ struct FormatSettings
{ {
bool write_row_delimiters = true; bool write_row_delimiters = true;
/** /**
* Some buffers (kafka / rabbit) split the rows internally using callback * Some buffers (kafka / rabbit) split the rows internally using callback,
* so we can push there formats without framing / delimiters (like * and always send one row per message, so we can push there formats
* ProtobufSingle). In other cases you can't write more than single row * without framing / delimiters (like ProtobufSingle). In other cases,
* in unframed format. * we have to enforce exporting at most one row in the format output,
* Not sure we need this parameter at all, it only serves as an additional * because Protobuf without delimiters is not generally useful.
* safety check in ProtobufSingle format, but exporting constant-size
* records w/o delimiters might be generally useful, not only for Kafka.
*/ */
bool allow_many_rows_no_delimiters = false; bool allow_many_rows_no_delimiters = false;
} protobuf; } protobuf;

View File

@ -452,7 +452,7 @@ public:
const StorageMetadataPtr & metadata_snapshot_, const StorageMetadataPtr & metadata_snapshot_,
const CompressionMethod compression_method, const CompressionMethod compression_method,
const Context & context, const Context & context,
std::optional<FormatSettings> format_settings) const std::optional<FormatSettings> & format_settings)
: storage(storage_) : storage(storage_)
, metadata_snapshot(metadata_snapshot_) , metadata_snapshot(metadata_snapshot_)
, lock(storage.rwlock) , lock(storage.rwlock)
@ -628,11 +628,25 @@ void registerStorageFile(StorageFactory & factory)
// session and user are ignored. // session and user are ignored.
if (factory_args.storage_def->settings) if (factory_args.storage_def->settings)
{ {
Context global_context_copy = factory_args.context; FormatFactorySettings user_format_settings;
global_context_copy.applySettingsChanges(
// Apply changed settings from global context, but ignore the
// unknown ones, because we only have the format settings here.
const auto & changes = factory_args.context.getSettingsRef().changes();
for (const auto & change : changes)
{
if (user_format_settings.has(change.name))
{
user_format_settings.set(change.name, change.value);
}
}
// Apply changes from SETTINGS clause, with validation.
user_format_settings.applyChanges(
factory_args.storage_def->settings->changes); factory_args.storage_def->settings->changes);
storage_args.format_settings = getFormatSettings( storage_args.format_settings = getFormatSettings(
global_context_copy); factory_args.context, user_format_settings);
} }
else else
{ {

View File

@ -33,7 +33,7 @@ IStorageURLBase::IStorageURLBase(
const Context & context_, const Context & context_,
const StorageID & table_id_, const StorageID & table_id_,
const String & format_name_, const String & format_name_,
std::optional<FormatSettings> format_settings_, const std::optional<FormatSettings> & format_settings_,
const ColumnsDescription & columns_, const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_, const ConstraintsDescription & constraints_,
const String & compression_method_) const String & compression_method_)
@ -61,7 +61,7 @@ namespace
const std::string & method, const std::string & method,
std::function<void(std::ostream &)> callback, std::function<void(std::ostream &)> callback,
const String & format, const String & format,
std::optional<FormatSettings> format_settings, const std::optional<FormatSettings> & format_settings,
String name_, String name_,
const Block & sample_block, const Block & sample_block,
const Context & context, const Context & context,
@ -140,7 +140,7 @@ namespace
StorageURLBlockOutputStream::StorageURLBlockOutputStream(const Poco::URI & uri, StorageURLBlockOutputStream::StorageURLBlockOutputStream(const Poco::URI & uri,
const String & format, const String & format,
std::optional<FormatSettings> format_settings, const std::optional<FormatSettings> & format_settings,
const Block & sample_block_, const Block & sample_block_,
const Context & context, const Context & context,
const ConnectionTimeouts & timeouts, const ConnectionTimeouts & timeouts,
@ -276,10 +276,24 @@ void registerStorageURL(StorageFactory & factory)
FormatSettings format_settings; FormatSettings format_settings;
if (args.storage_def->settings) if (args.storage_def->settings)
{ {
Context global_context_copy = args.context; FormatFactorySettings user_format_settings;
global_context_copy.applySettingsChanges(
args.storage_def->settings->changes); // Apply changed settings from global context, but ignore the
format_settings = getFormatSettings(global_context_copy); // unknown ones, because we only have the format settings here.
const auto & changes = args.context.getSettingsRef().changes();
for (const auto & change : changes)
{
if (user_format_settings.has(change.name))
{
user_format_settings.set(change.name, change.value);
}
}
// Apply changes from SETTINGS clause, with validation.
user_format_settings.applyChanges(args.storage_def->settings->changes);
format_settings = getFormatSettings(args.context,
user_format_settings);
} }
else else
{ {

View File

@ -36,7 +36,7 @@ protected:
const Context & context_, const Context & context_,
const StorageID & id_, const StorageID & id_,
const String & format_name_, const String & format_name_,
std::optional<FormatSettings> format_settings_, const std::optional<FormatSettings> & format_settings_,
const ColumnsDescription & columns_, const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_, const ConstraintsDescription & constraints_,
const String & compression_method_); const String & compression_method_);
@ -79,7 +79,7 @@ public:
StorageURLBlockOutputStream( StorageURLBlockOutputStream(
const Poco::URI & uri, const Poco::URI & uri,
const String & format, const String & format,
std::optional<FormatSettings> format_settings, const std::optional<FormatSettings> & format_settings,
const Block & sample_block_, const Block & sample_block_,
const Context & context, const Context & context,
const ConnectionTimeouts & timeouts, const ConnectionTimeouts & timeouts,
@ -107,7 +107,7 @@ public:
StorageURL(const Poco::URI & uri_, StorageURL(const Poco::URI & uri_,
const StorageID & table_id_, const StorageID & table_id_,
const String & format_name_, const String & format_name_,
std::optional<FormatSettings> format_settings_, const std::optional<FormatSettings> & format_settings_,
const ColumnsDescription & columns_, const ColumnsDescription & columns_,
const ConstraintsDescription & constraints_, const ConstraintsDescription & constraints_,
Context & context_, Context & context_,

View File

@ -1,2 +1,3 @@
1|1 1|1
1 1 1 1
1 2

View File

@ -21,3 +21,7 @@ ${CLICKHOUSE_LOCAL} --query "
create table t(a int, b int) engine File(CSV, '$the_file') settings format_csv_delimiter = '|'; create table t(a int, b int) engine File(CSV, '$the_file') settings format_csv_delimiter = '|';
select * from t; select * from t;
" "
# Also check that the File engine emplicitly created by clickhouse-local
# uses the modified settings.
${CLICKHOUSE_LOCAL} --structure="a int, b int" --input-format=CSV --format_csv_delimiter="|" --query="select * from table" <<<"1|2"