Address comments

This commit is contained in:
avogar 2023-11-20 15:53:28 +00:00
parent f537bad469
commit 081fa9f3de
4 changed files with 22 additions and 8 deletions

View File

@ -80,7 +80,7 @@ namespace SettingsChangesHistory
/// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
{
{"23.10", {{"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
{"23.11", {{"input_format_parquet_allow_missing_columns", false, true, "Allow missing columns in Parquet files by default"},
{"input_format_orc_allow_missing_columns", false, true, "Allow missing columns in ORC files by default"},
{"input_format_arrow_allow_missing_columns", false, true, "Allow missing columns in Arrow files by default"}}},
{"23.9", {{"optimize_group_by_constant_keys", false, true, "Optimize group by constant keys by default"},

View File

@ -55,7 +55,14 @@ try
NamesAndTypesList names_and_types;
SchemaInferenceMode mode = context->getSettingsRef().schema_inference_mode;
if (mode == SchemaInferenceMode::UNION && !FormatFactory::instance().checkIfFormatSupportsSubsetOfColumns(format_name, context, format_settings))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "UNION schema inference mode is not supported for format {}, because it doesn't support reading subset of columns", format_name);
{
String additional_message;
/// Better exception message for WithNames(AndTypes) formats.
if (format_name.ends_with("WithNames") || format_name.ends_with("WithNamesAndTypes"))
additional_message = " (formats -WithNames(AndTypes) support reading subset of columns only when setting input_format_with_names_use_header is enabled)";
throw Exception(ErrorCodes::BAD_ARGUMENTS, "UNION schema inference mode is not supported for format {}, because it doesn't support reading subset of columns{}", format_name, additional_message);
}
if (FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format_name))
{

View File

@ -57,11 +57,18 @@ private:
/// use it and won't create a read buffer.
/// For formats that have a schema reader from the data,
/// read buffer will be created by the provided iterator and
/// the schema will be extracted from the data. If schema reader
/// couldn't determine the schema we will try the next read buffer
/// from the provided iterator if it makes sense. If the format doesn't
/// have any schema reader or we couldn't determine the schema,
/// an exception will be thrown.
/// the schema will be extracted from the data. If the format doesn't
/// have any schema reader an exception will be thrown.
/// Reading schema can be performed in 2 modes depending on setting schema_inference_mode:
/// 1) Default mode. In this mode ClickHouse assumes that all files have the same schema
/// and tries to infer the schema by reading files one by one until it succeeds.
/// If schema reader couldn't determine the schema for some file, ClickHouse will try the next
/// file (next read buffer from the provided iterator) if it makes sense. If ClickHouse couldn't determine
/// the resulting schema, an exception will be thrown.
/// 2) Union mode. In this mode ClickHouse assumes that files can have different schemas,
/// so it infer schemas of all files and then union them to the common schema. In this mode
/// all read buffers from provided iterator will be used. If ClickHouse couldn't determine
/// the schema for some file, an exception will be thrown.
ColumnsDescription readSchemaFromFormat(
const String & format_name,
const std::optional<FormatSettings> & format_settings,

View File

@ -577,7 +577,7 @@ namespace
element_types.reserve(names_to_types.size());
for (const auto & name : element_names)
{
auto types = names_to_types[name];
auto & types = names_to_types[name];
transformInferredTypesIfNeededImpl<true>(types, settings, json_info);
/// If some element have different types in different tuples, we can't do anything
if (!checkIfTypesAreEqual(types))