use lower case in dict

This commit is contained in:
HowePa 2024-02-27 00:48:34 +08:00
parent ea89fa0de9
commit dbd8d35f01
7 changed files with 59 additions and 50 deletions

View File

@ -31,23 +31,35 @@ namespace ErrorCodes
extern const int BAD_ARGUMENTS;
}
String FormatFactory::getOriginalFormatNameIfExists(const String & name) const
bool FormatFactory::exists(const String & name) const
{
String case_insensitive_format_name = boost::to_lower_copy(name);
auto it = file_extension_formats.find(case_insensitive_format_name);
if (file_extension_formats.end() != it)
return it->second;
return name;
return dict.find(boost::to_lower_copy(name)) != dict.end();
}
const FormatFactory::Creators & FormatFactory::getCreators(const String & name) const
{
auto it = dict.find(getOriginalFormatNameIfExists(name));
auto it = dict.find(boost::to_lower_copy(name));
if (dict.end() != it)
return it->second;
throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown format {}", name);
}
FormatFactory::Creators & FormatFactory::getOrCreateCreators(const String & name)
{
String lower_case = boost::to_lower_copy(name);
auto it = dict.find(lower_case);
if (dict.end() != it)
{
return it->second;
}
else
{
auto & creators = dict[lower_case];
creators.name = name;
return creators;
}
}
FormatSettings getFormatSettings(const ContextPtr & context)
{
const auto & settings = context->getSettingsRef();
@ -578,7 +590,7 @@ ExternalSchemaReaderPtr FormatFactory::getExternalSchemaReader(
void FormatFactory::registerInputFormat(const String & name, InputCreator input_creator)
{
chassert(input_creator);
auto & creators = dict[name];
auto & creators = getOrCreateCreators(name);
if (creators.input_creator || creators.random_access_input_creator)
throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Input format {} is already registered", name);
creators.input_creator = std::move(input_creator);
@ -589,7 +601,7 @@ void FormatFactory::registerInputFormat(const String & name, InputCreator input_
void FormatFactory::registerRandomAccessInputFormat(const String & name, RandomAccessInputCreator input_creator)
{
chassert(input_creator);
auto & creators = dict[name];
auto & creators = getOrCreateCreators(name);
if (creators.input_creator || creators.random_access_input_creator)
throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Input format {} is already registered", name);
creators.random_access_input_creator = std::move(input_creator);
@ -599,7 +611,7 @@ void FormatFactory::registerRandomAccessInputFormat(const String & name, RandomA
void FormatFactory::registerNonTrivialPrefixAndSuffixChecker(const String & name, NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker)
{
auto & target = dict[name].non_trivial_prefix_and_suffix_checker;
auto & target = getOrCreateCreators(name).non_trivial_prefix_and_suffix_checker;
if (target)
throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Non trivial prefix and suffix checker {} is already registered", name);
target = std::move(non_trivial_prefix_and_suffix_checker);
@ -607,7 +619,7 @@ void FormatFactory::registerNonTrivialPrefixAndSuffixChecker(const String & name
void FormatFactory::registerAppendSupportChecker(const String & name, AppendSupportChecker append_support_checker)
{
auto & target = dict[name].append_support_checker;
auto & target = getOrCreateCreators(name).append_support_checker;
if (target)
throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Suffix checker {} is already registered", name);
target = std::move(append_support_checker);
@ -628,7 +640,7 @@ bool FormatFactory::checkIfFormatSupportAppend(const String & name, const Contex
void FormatFactory::registerOutputFormat(const String & name, OutputCreator output_creator)
{
auto & target = dict[name].output_creator;
auto & target = getOrCreateCreators(name).output_creator;
if (target)
throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Output format {} is already registered", name);
target = std::move(output_creator);
@ -705,7 +717,7 @@ String FormatFactory::getFormatFromFileDescriptor(int fd)
void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegmentationEngine file_segmentation_engine)
{
auto & target = dict[name].file_segmentation_engine_creator;
auto & target = getOrCreateCreators(name).file_segmentation_engine_creator;
if (target)
throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: File segmentation engine {} is already registered", name);
auto creator = [file_segmentation_engine](const FormatSettings &)
@ -717,7 +729,7 @@ void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegm
void FormatFactory::registerFileSegmentationEngineCreator(const String & name, FileSegmentationEngineCreator file_segmentation_engine_creator)
{
auto & target = dict[name].file_segmentation_engine_creator;
auto & target = getOrCreateCreators(name).file_segmentation_engine_creator;
if (target)
throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: File segmentation engine creator {} is already registered", name);
target = std::move(file_segmentation_engine_creator);
@ -725,7 +737,7 @@ void FormatFactory::registerFileSegmentationEngineCreator(const String & name, F
void FormatFactory::registerSchemaReader(const String & name, SchemaReaderCreator schema_reader_creator)
{
auto & target = dict[name].schema_reader_creator;
auto & target = getOrCreateCreators(name).schema_reader_creator;
if (target)
throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Schema reader {} is already registered", name);
target = std::move(schema_reader_creator);
@ -733,7 +745,7 @@ void FormatFactory::registerSchemaReader(const String & name, SchemaReaderCreato
void FormatFactory::registerExternalSchemaReader(const String & name, ExternalSchemaReaderCreator external_schema_reader_creator)
{
auto & target = dict[name].external_schema_reader_creator;
auto & target = getOrCreateCreators(name).external_schema_reader_creator;
if (target)
throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Schema reader {} is already registered", name);
target = std::move(external_schema_reader_creator);
@ -741,7 +753,7 @@ void FormatFactory::registerExternalSchemaReader(const String & name, ExternalSc
void FormatFactory::markOutputFormatSupportsParallelFormatting(const String & name)
{
auto & target = dict[name].supports_parallel_formatting;
auto & target = getOrCreateCreators(name).supports_parallel_formatting;
if (target)
throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Output format {} is already marked as supporting parallel formatting", name);
target = true;
@ -750,7 +762,7 @@ void FormatFactory::markOutputFormatSupportsParallelFormatting(const String & na
void FormatFactory::markFormatSupportsSubsetOfColumns(const String & name)
{
auto & target = dict[name].subset_of_columns_support_checker;
auto & target = getOrCreateCreators(name).subset_of_columns_support_checker;
if (target)
throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Format {} is already marked as supporting subset of columns", name);
target = [](const FormatSettings &){ return true; };
@ -758,7 +770,7 @@ void FormatFactory::markFormatSupportsSubsetOfColumns(const String & name)
void FormatFactory::registerSubsetOfColumnsSupportChecker(const String & name, SubsetOfColumnsSupportChecker subset_of_columns_support_checker)
{
auto & target = dict[name].subset_of_columns_support_checker;
auto & target = getOrCreateCreators(name).subset_of_columns_support_checker;
if (target)
throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Format {} is already marked as supporting subset of columns", name);
target = std::move(subset_of_columns_support_checker);
@ -766,7 +778,7 @@ void FormatFactory::registerSubsetOfColumnsSupportChecker(const String & name, S
void FormatFactory::markOutputFormatPrefersLargeBlocks(const String & name)
{
auto & target = dict[name].prefers_large_blocks;
auto & target = getOrCreateCreators(name).prefers_large_blocks;
if (target)
throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: Format {} is already marked as preferring large blocks", name);
target = true;
@ -782,7 +794,7 @@ bool FormatFactory::checkIfFormatSupportsSubsetOfColumns(const String & name, co
void FormatFactory::registerAdditionalInfoForSchemaCacheGetter(
const String & name, AdditionalInfoForSchemaCacheGetter additional_info_for_schema_cache_getter)
{
auto & target = dict[name].additional_info_for_schema_cache_getter;
auto & target = getOrCreateCreators(name).additional_info_for_schema_cache_getter;
if (target)
throw Exception(ErrorCodes::LOGICAL_ERROR, "FormatFactory: additional info for schema cache getter {} is already registered", name);
target = std::move(additional_info_for_schema_cache_getter);
@ -800,13 +812,13 @@ String FormatFactory::getAdditionalInfoForSchemaCache(const String & name, const
bool FormatFactory::isInputFormat(const String & name) const
{
auto it = dict.find(getOriginalFormatNameIfExists(name));
auto it = dict.find(boost::to_lower_copy(name));
return it != dict.end() && (it->second.input_creator || it->second.random_access_input_creator);
}
bool FormatFactory::isOutputFormat(const String & name) const
{
auto it = dict.find(getOriginalFormatNameIfExists(name));
auto it = dict.find(boost::to_lower_copy(name));
return it != dict.end() && it->second.output_creator;
}
@ -835,8 +847,8 @@ bool FormatFactory::checkIfOutputFormatPrefersLargeBlocks(const String & name) c
bool FormatFactory::checkParallelizeOutputAfterReading(const String & name, const ContextPtr & context) const
{
auto format_name = getOriginalFormatNameIfExists(name);
if (format_name == "Parquet" && context->getSettingsRef().input_format_parquet_preserve_order)
auto format_name = boost::to_lower_copy(name);
if (format_name == "parquet" && context->getSettingsRef().input_format_parquet_preserve_order)
return false;
return true;
@ -844,7 +856,7 @@ bool FormatFactory::checkParallelizeOutputAfterReading(const String & name, cons
void FormatFactory::checkFormatName(const String & name) const
{
auto it = dict.find(getOriginalFormatNameIfExists(name));
auto it = dict.find(boost::to_lower_copy(name));
if (it == dict.end())
throw Exception(ErrorCodes::UNKNOWN_FORMAT, "Unknown format {}", name);
}

View File

@ -132,6 +132,7 @@ private:
struct Creators
{
String name;
InputCreator input_creator;
RandomAccessInputCreator random_access_input_creator;
OutputCreator output_creator;
@ -263,12 +264,14 @@ public:
/// Check that format with specified name exists and throw an exception otherwise.
void checkFormatName(const String & name) const;
bool exists(const String & name) const;
private:
FormatsDictionary dict;
FileExtensionFormats file_extension_formats; // Also used as a case-insensitive format_name mapping.
FileExtensionFormats file_extension_formats;
const Creators & getCreators(const String & name) const;
Creators & getOrCreateCreators(const String & name);
// Creates a ReadBuffer to give to an input format. Returns nullptr if we should use `buf` directly.
std::unique_ptr<ReadBuffer> wrapReadBufferIfNeeded(
@ -279,9 +282,6 @@ private:
const Settings & settings,
bool is_remote_fs,
size_t max_download_threads) const;
// Mapping case-insensitive format_name to a key in FormatsDictionary if exists.
String getOriginalFormatNameIfExists(const String & name) const;
};
}

View File

@ -68,7 +68,6 @@ namespace ErrorCodes
extern const int CANNOT_DETECT_FORMAT;
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
extern const int UNKNOWN_FORMAT;
}
namespace
@ -167,7 +166,7 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine
auto is_format_arg = [] (const std::string & s) -> bool
{
return s == "auto" || FormatFactory::instance().checkFormatName(s);
return s == "auto" || FormatFactory::instance().exists(s);
};
if (engine_args.size() == 4)
@ -200,7 +199,7 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine
else if (engine_args.size() == 6)
{
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg))
if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments");
}
@ -218,7 +217,7 @@ StorageAzureBlob::Configuration StorageAzureBlob::getConfiguration(ASTs & engine
else if (engine_args.size() == 7)
{
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "format/account_name");
if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg))
if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg))
{
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Format and compression must be last arguments");
}

View File

@ -133,7 +133,6 @@ namespace ErrorCodes
extern const int NOT_IMPLEMENTED;
extern const int CANNOT_COMPILE_REGEXP;
extern const int FILE_DOESNT_EXIST;
extern const int UNKNOWN_FORMAT;
}
@ -1532,7 +1531,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const C
no_sign_request = true;
engine_args_to_idx = {{"format", 2}};
}
else if (second_arg == "auto" || FormatFactory::instance().checkFormatName(second_arg))
else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg))
engine_args_to_idx = {{"format", 1}, {"compression_method", 2}};
else
engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}};
@ -1553,7 +1552,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const C
else
{
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "session_token/format");
if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg))
if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg))
{
engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}};
}
@ -1569,7 +1568,7 @@ StorageS3::Configuration StorageS3::getConfiguration(ASTs & engine_args, const C
else if (count == 5)
{
auto fourth_arg = checkAndGetLiteralArgument<String>(engine_args[3], "session_token/format");
if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg))
if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg))
{
engine_args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"compression", 4}};
}

View File

@ -23,7 +23,8 @@ void StorageSystemFormats::fillData(MutableColumns & res_columns, ContextPtr, co
const auto & formats = FormatFactory::instance().getAllFormats();
for (const auto & pair : formats)
{
const auto & [format_name, creators] = pair;
const auto & [name, creators] = pair;
String format_name = creators.name;
UInt64 has_input_format(creators.input_creator != nullptr || creators.random_access_input_creator != nullptr);
UInt64 has_output_format(creators.output_creator != nullptr);
UInt64 supports_parallel_parsing(creators.file_segmentation_engine_creator != nullptr || creators.random_access_input_creator != nullptr);

View File

@ -32,7 +32,6 @@ namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int BAD_ARGUMENTS;
extern const int UNKNOWN_FORMAT;
}
namespace
@ -81,7 +80,7 @@ void TableFunctionAzureBlobStorage::parseArgumentsImpl(ASTs & engine_args, const
configuration.blob_path = checkAndGetLiteralArgument<String>(engine_args[2], "blobpath");
auto is_format_arg
= [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().checkFormatName(s); };
= [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().exists(s); };
if (engine_args.size() == 4)
{
@ -208,7 +207,7 @@ void TableFunctionAzureBlobStorage::updateStructureAndFormatArgumentsIfNeeded(AS
arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
auto is_format_arg
= [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().checkFormatName(s); };
= [](const std::string & s) -> bool { return s == "auto" || FormatFactory::instance().exists(s); };
/// (connection_string, container_name, blobpath)
if (args.size() == 3)

View File

@ -31,7 +31,6 @@ namespace ErrorCodes
{
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
extern const int LOGICAL_ERROR;
extern const int UNKNOWN_FORMAT;
}
@ -101,7 +100,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context
no_sign_request = true;
args_to_idx = {{"format", 2}};
}
else if (second_arg == "auto" || FormatFactory::instance().checkFormatName(second_arg))
else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg))
args_to_idx = {{"format", 1}, {"structure", 2}};
else
args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}};
@ -120,14 +119,14 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context
no_sign_request = true;
args_to_idx = {{"format", 2}, {"structure", 3}};
}
else if (second_arg == "auto" || FormatFactory::instance().checkFormatName(second_arg))
else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg))
{
args_to_idx = {{"format", 1}, {"structure", 2}, {"compression_method", 3}};
}
else
{
auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/session_token");
if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg))
if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg))
{
args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}};
}
@ -154,7 +153,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context
else
{
auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/session_token");
if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg))
if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg))
{
args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}};
}
@ -171,7 +170,7 @@ void TableFunctionS3::parseArgumentsImpl(ASTs & args, const ContextPtr & context
else if (count == 6)
{
auto fourth_arg = checkAndGetLiteralArgument<String>(args[3], "format/session_token");
if (fourth_arg == "auto" || FormatFactory::instance().checkFormatName(fourth_arg))
if (fourth_arg == "auto" || FormatFactory::instance().exists(fourth_arg))
{
args_to_idx = {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}, {"compression_method", 5}};
}
@ -301,7 +300,7 @@ void TableFunctionS3::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, con
args.push_back(structure_literal);
}
/// s3(source, format, structure)
else if (second_arg == "auto" || FormatFactory::instance().checkFormatName(second_arg))
else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg))
{
if (second_arg == "auto")
args[1] = format_literal;
@ -331,7 +330,7 @@ void TableFunctionS3::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, con
args[3] = structure_literal;
}
/// s3(source, format, structure, compression_method)
else if (second_arg == "auto" || FormatFactory::instance().checkFormatName(second_arg))
else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg))
{
if (second_arg == "auto")
args[1] = format_literal;