diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp index 38b29bc6405..2bead318173 100644 --- a/src/Formats/FormatFactory.cpp +++ b/src/Formats/FormatFactory.cpp @@ -31,23 +31,35 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; } -String FormatFactory::getOriginalFormatNameIfExists(const String & name) const +bool FormatFactory::exists(const String & name) const { - String case_insensitive_format_name = boost::to_lower_copy(name); - auto it = file_extension_formats.find(case_insensitive_format_name); - if (file_extension_formats.end() != it) - return it->second; - return name; + return dict.find(boost::to_lower_copy(name)) != dict.end(); } const FormatFactory::Creators & FormatFactory::getCreators(const String & name) const { - auto it = dict.find(getOriginalFormatNameIfExists(name)); + auto it = dict.find(boost::to_lower_copy(name)); if (dict.end() != it) return it->second; throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown format {}", name); } +FormatFactory::Creators & FormatFactory::getOrCreateCreators(const String & name) +{ + String lower_case = boost::to_lower_copy(name); + auto it = dict.find(lower_case); + if (dict.end() != it) + { + return it->second; + } + else + { + auto & creators = dict[lower_case]; + creators.name = name; + return creators; + } +} + FormatSettings getFormatSettings(const ContextPtr & context) { const auto & settings = context->getSettingsRef(); @@ -578,7 +590,7 @@ ExternalSchemaReaderPtr FormatFactory::getExternalSchemaReader( void FormatFactory::registerInputFormat(const String & name, InputCreator input_creator) { chassert(input_creator); - auto & creators = dict[name]; + auto & creators = getOrCreateCreators(name); if (creators.input_creator || creators.random_access_input_creator) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Input format {} is already registered", name); creators.input_creator = std::move(input_creator); @@ -589,7 +601,7 @@ void FormatFactory::registerInputFormat(const String & name, InputCreator input_ void FormatFactory::registerRandomAccessInputFormat(const String & name, RandomAccessInputCreator input_creator) { chassert(input_creator); - auto & creators = dict[name]; + auto & creators = getOrCreateCreators(name); if (creators.input_creator || creators.random_access_input_creator) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Input format {} is already registered", name); creators.random_access_input_creator = std::move(input_creator); @@ -599,7 +611,7 @@ void FormatFactory::registerRandomAccessInputFormat(const String & name, RandomA void FormatFactory::registerNonTrivialPrefixAndSuffixChecker(const String & name, NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker) { - auto & target = dict[name].non_trivial_prefix_and_suffix_checker; + auto & target = getOrCreateCreators(name).non_trivial_prefix_and_suffix_checker; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Non trivial prefix and suffix checker {} is already registered", name); target = std::move(non_trivial_prefix_and_suffix_checker); @@ -607,7 +619,7 @@ void FormatFactory::registerNonTrivialPrefixAndSuffixChecker(const String & name void FormatFactory::registerAppendSupportChecker(const String & name, AppendSupportChecker append_support_checker) { - auto & target = dict[name].append_support_checker; + auto & target = getOrCreateCreators(name).append_support_checker; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Suffix checker {} is already registered", name); target = std::move(append_support_checker); @@ -628,7 +640,7 @@ bool FormatFactory::checkIfFormatSupportAppend(const String & name, const Contex void FormatFactory::registerOutputFormat(const String & name, OutputCreator output_creator) { - auto & target = dict[name].output_creator; + auto & target = getOrCreateCreators(name).output_creator; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Output format {} is already registered", name); target = std::move(output_creator); @@ -705,7 +717,7 @@ String FormatFactory::getFormatFromFileDescriptor(int fd) void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegmentationEngine file_segmentation_engine) { - auto & target = dict[name].file_segmentation_engine_creator; + auto & target = getOrCreateCreators(name).file_segmentation_engine_creator; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: File segmentation engine {} is already registered", name); auto creator = [file_segmentation_engine](const FormatSettings &) @@ -717,7 +729,7 @@ void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegm void FormatFactory::registerFileSegmentationEngineCreator(const String & name, FileSegmentationEngineCreator file_segmentation_engine_creator) { - auto & target = dict[name].file_segmentation_engine_creator; + auto & target = getOrCreateCreators(name).file_segmentation_engine_creator; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: File segmentation engine creator {} is already registered", name); target = std::move(file_segmentation_engine_creator); @@ -725,7 +737,7 @@ void FormatFactory::registerFileSegmentationEngineCreator(const String & name, F void FormatFactory::registerSchemaReader(const String & name, SchemaReaderCreator schema_reader_creator) { - auto & target = dict[name].schema_reader_creator; + auto & target = getOrCreateCreators(name).schema_reader_creator; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Schema reader {} is already registered", name); target = std::move(schema_reader_creator); @@ -733,7 +745,7 @@ void FormatFactory::registerSchemaReader(const String & name, SchemaReaderCreato void FormatFactory::registerExternalSchemaReader(const String & name, ExternalSchemaReaderCreator external_schema_reader_creator) { - auto & target = dict[name].external_schema_reader_creator; + auto & target = getOrCreateCreators(name).external_schema_reader_creator; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Schema reader {} is already registered", name); target = std::move(external_schema_reader_creator); @@ -741,7 +753,7 @@ void FormatFactory::registerExternalSchemaReader(const String & name, ExternalSc void FormatFactory::markOutputFormatSupportsParallelFormatting(const String & name) { - auto & target = dict[name].supports_parallel_formatting; + auto & target = getOrCreateCreators(name).supports_parallel_formatting; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Output format {} is already marked as supporting parallel formatting", name); target = true; @@ -750,7 +762,7 @@ void FormatFactory::markOutputFormatSupportsParallelFormatting(const String & na void FormatFactory::markFormatSupportsSubsetOfColumns(const String & name) { - auto & target = dict[name].subset_of_columns_support_checker; + auto & target = getOrCreateCreators(name).subset_of_columns_support_checker; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Format {} is already marked as supporting subset of columns", name); target = [](const FormatSettings &){ return true; }; @@ -758,7 +770,7 @@ void FormatFactory::markFormatSupportsSubsetOfColumns(const String & name) void FormatFactory::registerSubsetOfColumnsSupportChecker(const String & name, SubsetOfColumnsSupportChecker subset_of_columns_support_checker) { - auto & target = dict[name].subset_of_columns_support_checker; + auto & target = getOrCreateCreators(name).subset_of_columns_support_checker; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Format {} is already marked as supporting subset of columns", name); target = std::move(subset_of_columns_support_checker); @@ -766,7 +778,7 @@ void FormatFactory::registerSubsetOfColumnsSupportChecker(const String & name, S void FormatFactory::markOutputFormatPrefersLargeBlocks(const String & name) { - auto & target = dict[name].prefers_large_blocks; + auto & target = getOrCreateCreators(name).prefers_large_blocks; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Format {} is already marked as preferring large blocks", name); target = true; @@ -782,7 +794,7 @@ bool FormatFactory::checkIfFormatSupportsSubsetOfColumns(const String & name, co void FormatFactory::registerAdditionalInfoForSchemaCacheGetter( const String & name, AdditionalInfoForSchemaCacheGetter additional_info_for_schema_cache_getter) { - auto & target = dict[name].additional_info_for_schema_cache_getter; + auto & target = getOrCreateCreators(name).additional_info_for_schema_cache_getter; if (target) throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: additional info for schema cache getter {} is already registered", name); target = std::move(additional_info_for_schema_cache_getter); @@ -800,13 +812,13 @@ String FormatFactory::getAdditionalInfoForSchemaCache(const String & name, const bool FormatFactory::isInputFormat(const String & name) const { - auto it = dict.find(getOriginalFormatNameIfExists(name)); + auto it = dict.find(boost::to_lower_copy(name)); return it != dict.end() && (it->second.input_creator || it->second.random_access_input_creator); } bool FormatFactory::isOutputFormat(const String & name) const { - auto it = dict.find(getOriginalFormatNameIfExists(name)); + auto it = dict.find(boost::to_lower_copy(name)); return it != dict.end() && it->second.output_creator; } @@ -835,8 +847,8 @@ bool FormatFactory::checkIfOutputFormatPrefersLargeBlocks(const String & name) c bool FormatFactory::checkParallelizeOutputAfterReading(const String & name, const ContextPtr & context) const { - auto format_name = getOriginalFormatNameIfExists(name); - if (format_name == "Parquet" && context->getSettingsRef().input_format_parquet_preserve_order) + auto format_name = boost::to_lower_copy(name); + if (format_name == "parquet" && context->getSettingsRef().input_format_parquet_preserve_order) return false; return true; @@ -844,7 +856,7 @@ bool FormatFactory::checkParallelizeOutputAfterReading(const String & name, cons void FormatFactory::checkFormatName(const String & name) const { - auto it = dict.find(getOriginalFormatNameIfExists(name)); + auto it = dict.find(boost::to_lower_copy(name)); if (it == dict.end()) throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown format {}", name); } diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h index 145f6258933..46c1b8ddcdd 100644 --- a/src/Formats/FormatFactory.h +++ b/src/Formats/FormatFactory.h @@ -132,6 +132,7 @@ private: struct Creators { + String name; InputCreator input_creator; RandomAccessInputCreator random_access_input_creator; OutputCreator output_creator; @@ -263,12 +264,14 @@ public: /// Check that format with specified name exists and throw an exception otherwise. void checkFormatName(const String & name) const; + bool exists(const String & name) const; private: FormatsDictionary dict; - FileExtensionFormats file_extension_formats; // Also used as a case-insensitive format_name mapping. + FileExtensionFormats file_extension_formats; const Creators & getCreators(const String & name) const; + Creators & getOrCreateCreators(const String & name); // Creates a ReadBuffer to give to an input format. Returns nullptr if we should use `buf` directly. std::unique_ptr wrapReadBufferIfNeeded( @@ -279,9 +282,6 @@ private: const Settings & settings, bool is_remote_fs, size_t max_download_threads) const; - - // Mapping case-insensitive format_name to a key in FormatsDictionary if exists. - String getOriginalFormatNameIfExists(const String & name) const; }; } diff --git a/src/Storages/StorageAzureBlob.cpp b/src/Storages/StorageAzureBlob.cpp index 94bb5d3cf60..d484fefc46f 100644 --- a/src/Storages/StorageAzureBlob.cpp +++ b/src/Storages/StorageAzureBlob.cpp @@ -68,7 +68,6 @@ namespace ErrorCodes extern const int CANNOT_DETECT_FORMAT; extern const int LOGICAL_ERROR; extern const int NOT_IMPLEMENTED; - extern const int UNKNOWN_FORMAT; } namespace @@ -167,7 +166,7 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine auto is_format_arg = [] (const std::string & s) -> bool { - return s == "auto" || FormatFactory::instance().checkFormatName(s); + return s == "auto" || FormatFactory::instance().exists(s); }; if (engine_args.size() == 4) @@ -200,7 +199,7 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine else if (engine_args.size() == 6) { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); } @@ -218,7 +217,7 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine else if (engine_args.size() == 7) { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "format/account_name"); - if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) { throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments"); } diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 07f68072bb6..e59a09efb20 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -133,7 +133,6 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; extern const int CANNOT_COMPILE_REGEXP; extern const int FILE_DOESNT_EXIST; - extern const int UNKNOWN_FORMAT; } @@ -1532,7 +1531,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const C no_sign_request = true; engine_args_to_idx = {{"format", 2}}; } - else if (second_arg == "auto" || FormatFactory::instance().checkFormatName(second_arg)) + else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) engine_args_to_idx = {{"format", 1}, {"compression_method", 2}}; else engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; @@ -1553,7 +1552,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const C else { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); - if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) { engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; } @@ -1569,7 +1568,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const C else if (count == 5) { auto fourth_arg = checkAndGetLiteralArgument(engine_args[3], "session_token/format"); - if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) { engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression", 4}}; } diff --git a/src/Storages/System/StorageSystemFormats.cpp b/src/Storages/System/StorageSystemFormats.cpp index a360971e1f7..849e4eadf78 100644 --- a/src/Storages/System/StorageSystemFormats.cpp +++ b/src/Storages/System/StorageSystemFormats.cpp @@ -23,7 +23,8 @@ void StorageSystemFormats::fillData(MutableColumns & res_columns, ContextPtr, co const auto & formats = FormatFactory::instance().getAllFormats(); for (const auto & pair : formats) { - const auto & [format_name, creators] = pair; + const auto & [name, creators] = pair; + String format_name = creators.name; UInt64 has_input_format(creators.input_creator != nullptr || creators.random_access_input_creator != nullptr); UInt64 has_output_format(creators.output_creator != nullptr); UInt64 supports_parallel_parsing(creators.file_segmentation_engine_creator != nullptr || creators.random_access_input_creator != nullptr); diff --git a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp index ac96364b5bd..8f558adb09b 100644 --- a/src/TableFunctions/TableFunctionAzureBlobStorage.cpp +++ b/src/TableFunctions/TableFunctionAzureBlobStorage.cpp @@ -32,7 +32,6 @@ namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int BAD_ARGUMENTS; - extern const int UNKNOWN_FORMAT; } namespace @@ -81,7 +80,7 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const configuration.blob_path = checkAndGetLiteralArgument(engine_args[2], "blobpath"); auto is_format_arg - = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().checkFormatName(s); }; + = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().exists(s); }; if (engine_args.size() == 4) { @@ -208,7 +207,7 @@ void TableFunctionAzureBlobStorage::updateStructureAndFormatArgumentsIfNeeded(AS arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); auto is_format_arg - = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().checkFormatName(s); }; + = [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().exists(s); }; /// (connection_string, container_name, blobpath) if (args.size() == 3) diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 04182fa4e68..c00b1e2e3e5 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -31,7 +31,6 @@ namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int LOGICAL_ERROR; - extern const int UNKNOWN_FORMAT; } @@ -101,7 +100,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context no_sign_request = true; args_to_idx = {{"format", 2}}; } - else if (second_arg == "auto" || FormatFactory::instance().checkFormatName(second_arg)) + else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) args_to_idx = {{"format", 1}, {"structure", 2}}; else args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}}; @@ -120,14 +119,14 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context no_sign_request = true; args_to_idx = {{"format", 2}, {"structure", 3}}; } - else if (second_arg == "auto" || FormatFactory::instance().checkFormatName(second_arg)) + else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) { args_to_idx = {{"format", 1}, {"structure", 2}, {"compression_method", 3}}; } else { auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); - if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) { args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}}; } @@ -154,7 +153,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context else { auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); - if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) { args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}}; } @@ -171,7 +170,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context else if (count == 6) { auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); - if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg)) + if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg)) { args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}, {"compression_method", 5}}; } @@ -301,7 +300,7 @@ void TableFunctionS3::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, con args.push_back(structure_literal); } /// s3(source, format, structure) - else if (second_arg == "auto" || FormatFactory::instance().checkFormatName(second_arg)) + else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) { if (second_arg == "auto") args[1] = format_literal; @@ -331,7 +330,7 @@ void TableFunctionS3::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, con args[3] = structure_literal; } /// s3(source, format, structure, compression_method) - else if (second_arg == "auto" || FormatFactory::instance().checkFormatName(second_arg)) + else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) { if (second_arg == "auto") args[1] = format_literal;