From 90b7ae871ed9e4e418eec37f5ccfcd25ecae264e Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 19 Nov 2024 22:23:05 +0000 Subject: [PATCH 1/4] Add inferred format name to create query in File/S3/URL/HDFS/Azure engines --- src/Interpreters/InterpreterCreateQuery.cpp | 21 ++++- src/Storages/IStorage.h | 4 + .../ObjectStorage/Azure/Configuration.cpp | 54 ++++++++----- .../ObjectStorage/Azure/Configuration.h | 3 +- .../ObjectStorage/HDFS/Configuration.cpp | 16 ++-- .../ObjectStorage/HDFS/Configuration.h | 3 +- .../ObjectStorage/Local/Configuration.h | 2 +- .../ObjectStorage/S3/Configuration.cpp | 55 +++++++------ src/Storages/ObjectStorage/S3/Configuration.h | 3 +- .../ObjectStorage/StorageObjectStorage.cpp | 5 ++ .../ObjectStorage/StorageObjectStorage.h | 4 +- .../StorageObjectStorageCluster.cpp | 2 +- src/Storages/StorageFile.cpp | 5 ++ src/Storages/StorageFile.h | 2 + src/Storages/StorageURL.cpp | 7 ++ src/Storages/StorageURL.h | 2 + src/TableFunctions/ITableFunctionCluster.h | 2 +- src/TableFunctions/ITableFunctionFileLike.cpp | 12 +-- src/TableFunctions/ITableFunctionFileLike.h | 2 +- .../TableFunctionObjectStorage.h | 2 +- src/TableFunctions/TableFunctionURL.cpp | 6 +- src/TableFunctions/TableFunctionURL.h | 2 +- .../test_storage_azure_blob_storage/test.py | 79 +++++++++++++++++++ tests/integration/test_storage_hdfs/test.py | 28 ++++++- ...at_inference_create_query_s3_url.reference | 14 ++++ ...3_format_inference_create_query_s3_url.sql | 57 +++++++++++++ ...rmat_inference_create_query_file.reference | 1 + ...3274_format_inference_create_query_file.sh | 19 +++++ .../queries/0_stateless/data_minio/json_data | 1 + 29 files changed, 345 insertions(+), 68 deletions(-) create mode 100644 tests/queries/0_stateless/03273_format_inference_create_query_s3_url.reference create mode 100644 tests/queries/0_stateless/03273_format_inference_create_query_s3_url.sql create mode 100644 tests/queries/0_stateless/03274_format_inference_create_query_file.reference create mode 100755 tests/queries/0_stateless/03274_format_inference_create_query_file.sh create mode 100644 tests/queries/0_stateless/data_minio/json_data diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index f6586f8bfc2..67c4d2c0413 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1181,6 +1181,22 @@ namespace source_ast->children.push_back(source_ast->elements); dict.set(dict.source, source_ast); } + + ASTs * getEngineArgsFromCreateQuery(ASTCreateQuery & create_query) + { + ASTStorage * storage_def = create_query.storage; + if (!storage_def) + return nullptr; + + if (!storage_def->engine) + return nullptr; + + const ASTFunction & engine_def = *storage_def->engine; + if (!engine_def.arguments) + return nullptr; + + return &engine_def.arguments->children; + } } void InterpreterCreateQuery::setEngine(ASTCreateQuery & create) const @@ -1870,7 +1886,10 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, mode); /// If schema wes inferred while storage creation, add columns description to create query. - addColumnsDescriptionToCreateQueryIfNecessary(query_ptr->as(), res); + auto & create_query = query_ptr->as(); + addColumnsDescriptionToCreateQueryIfNecessary(create_query, res); + if (auto * engine_args = getEngineArgsFromCreateQuery(create_query)) + res->updateEngineArgsForCreateQuery(*engine_args, getContext()); } validateVirtualColumns(*res); diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 0dc48634282..e99a66b9bda 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -284,6 +284,10 @@ public: /// Returns hints for serialization of columns accorsing to statistics accumulated by storage. virtual SerializationInfoByName getSerializationHints() const { return {}; } + /// Update engine args in create query that were inferred during storage creation to avoid the same + /// inference on server restart. For example - data format inference in File/URL/S3/etc engines. + virtual void updateEngineArgsForCreateQuery(ASTs & /*args*/, const ContextPtr & /*context*/) const {} + private: StorageID storage_id; diff --git a/src/Storages/ObjectStorage/Azure/Configuration.cpp b/src/Storages/ObjectStorage/Azure/Configuration.cpp index d08e0d9debc..faa1554dbe8 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.cpp +++ b/src/Storages/ObjectStorage/Azure/Configuration.cpp @@ -283,7 +283,7 @@ void StorageAzureConfiguration::fromAST(ASTs & engine_args, ContextPtr context, } void StorageAzureConfiguration::addStructureAndFormatToArgsIfNeeded( - ASTs & args, const String & structure_, const String & format_, ContextPtr context) + ASTs & args, const String & structure_, const String & format_, ContextPtr context, bool with_structure) { if (auto collection = tryGetNamedCollectionWithOverrides(args, context)) { @@ -295,7 +295,7 @@ void StorageAzureConfiguration::addStructureAndFormatToArgsIfNeeded( auto format_equal_func = makeASTFunction("equals", std::move(format_equal_func_args)); args.push_back(format_equal_func); } - if (collection->getOrDefault("structure", "auto") == "auto") + if (with_structure && collection->getOrDefault("structure", "auto") == "auto") { ASTs structure_equal_func_args = {std::make_shared("structure"), std::make_shared(structure_)}; auto structure_equal_func = makeASTFunction("equals", std::move(structure_equal_func_args)); @@ -319,9 +319,12 @@ void StorageAzureConfiguration::addStructureAndFormatToArgsIfNeeded( if (args.size() == 3) { args.push_back(format_literal); - /// Add compression = "auto" before structure argument. - args.push_back(std::make_shared("auto")); - args.push_back(structure_literal); + if (with_structure) + { + /// Add compression = "auto" before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } } /// (connection_string, container_name, blobpath, structure) or /// (connection_string, container_name, blobpath, format) @@ -334,12 +337,15 @@ void StorageAzureConfiguration::addStructureAndFormatToArgsIfNeeded( { if (fourth_arg == "auto") args[3] = format_literal; - /// Add compression=auto before structure argument. - args.push_back(std::make_shared("auto")); - args.push_back(structure_literal); + if (with_structure) + { + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } } /// (..., structure) -> (..., format, compression, structure) - else + else if (with_structure) { auto structure_arg = args.back(); args[3] = format_literal; @@ -362,15 +368,19 @@ void StorageAzureConfiguration::addStructureAndFormatToArgsIfNeeded( { if (fourth_arg == "auto") args[3] = format_literal; - args.push_back(structure_literal); + if (with_structure) + args.push_back(structure_literal); } /// (..., account_name, account_key) -> (..., account_name, account_key, format, compression, structure) else { args.push_back(format_literal); - /// Add compression=auto before structure argument. - args.push_back(std::make_shared("auto")); - args.push_back(structure_literal); + if (with_structure) + { + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } } } /// (connection_string, container_name, blobpath, format, compression, structure) or @@ -386,7 +396,7 @@ void StorageAzureConfiguration::addStructureAndFormatToArgsIfNeeded( { if (fourth_arg == "auto") args[3] = format_literal; - if (checkAndGetLiteralArgument(args[5], "structure") == "auto") + if (with_structure && checkAndGetLiteralArgument(args[5], "structure") == "auto") args[5] = structure_literal; } /// (..., account_name, account_key, format) -> (..., account_name, account_key, format, compression, structure) @@ -394,12 +404,15 @@ void StorageAzureConfiguration::addStructureAndFormatToArgsIfNeeded( { if (sixth_arg == "auto") args[5] = format_literal; - /// Add compression=auto before structure argument. - args.push_back(std::make_shared("auto")); - args.push_back(structure_literal); + if (with_structure) + { + /// Add compression=auto before structure argument. + args.push_back(std::make_shared("auto")); + args.push_back(structure_literal); + } } /// (..., account_name, account_key, structure) -> (..., account_name, account_key, format, compression, structure) - else + else if (with_structure) { auto structure_arg = args.back(); args[5] = format_literal; @@ -417,14 +430,15 @@ void StorageAzureConfiguration::addStructureAndFormatToArgsIfNeeded( /// (..., format, compression) -> (..., format, compression, structure) if (checkAndGetLiteralArgument(args[5], "format") == "auto") args[5] = format_literal; - args.push_back(structure_literal); + if (with_structure) + args.push_back(structure_literal); } /// (storage_account_url, container_name, blobpath, account_name, account_key, format, compression, structure) else if (args.size() == 8) { if (checkAndGetLiteralArgument(args[5], "format") == "auto") args[5] = format_literal; - if (checkAndGetLiteralArgument(args[7], "structure") == "auto") + if (with_structure && checkAndGetLiteralArgument(args[7], "structure") == "auto") args[7] = structure_literal; } } diff --git a/src/Storages/ObjectStorage/Azure/Configuration.h b/src/Storages/ObjectStorage/Azure/Configuration.h index 21db81802c7..72124465c46 100644 --- a/src/Storages/ObjectStorage/Azure/Configuration.h +++ b/src/Storages/ObjectStorage/Azure/Configuration.h @@ -76,7 +76,8 @@ public: ASTs & args, const String & structure_, const String & format_, - ContextPtr context) override; + ContextPtr context, + bool with_structure) override; protected: void fromNamedCollection(const NamedCollection & collection, ContextPtr context) override; diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.cpp b/src/Storages/ObjectStorage/HDFS/Configuration.cpp index 6bee4154b2f..3c4897b5062 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.cpp +++ b/src/Storages/ObjectStorage/HDFS/Configuration.cpp @@ -174,7 +174,8 @@ void StorageHDFSConfiguration::addStructureAndFormatToArgsIfNeeded( ASTs & args, const String & structure_, const String & format_, - ContextPtr context) + ContextPtr context, + bool with_structure) { if (auto collection = tryGetNamedCollectionWithOverrides(args, context)) { @@ -186,7 +187,7 @@ void StorageHDFSConfiguration::addStructureAndFormatToArgsIfNeeded( auto format_equal_func = makeASTFunction("equals", std::move(format_equal_func_args)); args.push_back(format_equal_func); } - if (collection->getOrDefault("structure", "auto") == "auto") + if (with_structure && collection->getOrDefault("structure", "auto") == "auto") { ASTs structure_equal_func_args = {std::make_shared("structure"), std::make_shared(structure_)}; auto structure_equal_func = makeASTFunction("equals", std::move(structure_equal_func_args)); @@ -209,23 +210,26 @@ void StorageHDFSConfiguration::addStructureAndFormatToArgsIfNeeded( if (count == 1) { /// Add format=auto before structure argument. - args.push_back(std::make_shared("auto")); - args.push_back(structure_literal); + args.push_back(format_literal); + if (with_structure) + args.push_back(structure_literal); } /// hdfs(url, format) else if (count == 2) { if (checkAndGetLiteralArgument(args[1], "format") == "auto") args.back() = format_literal; - args.push_back(structure_literal); + if (with_structure) + args.push_back(structure_literal); } /// hdfs(url, format, structure) /// hdfs(url, format, structure, compression_method) + /// hdfs(url, format, compression_method) else if (count >= 3) { if (checkAndGetLiteralArgument(args[1], "format") == "auto") args[1] = format_literal; - if (checkAndGetLiteralArgument(args[2], "structure") == "auto") + if (with_structure && checkAndGetLiteralArgument(args[2], "structure") == "auto") args[2] = structure_literal; } } diff --git a/src/Storages/ObjectStorage/HDFS/Configuration.h b/src/Storages/ObjectStorage/HDFS/Configuration.h index 90997292693..db8ab7f9e4d 100644 --- a/src/Storages/ObjectStorage/HDFS/Configuration.h +++ b/src/Storages/ObjectStorage/HDFS/Configuration.h @@ -62,7 +62,8 @@ public: ASTs & args, const String & structure_, const String & format_, - ContextPtr context) override; + ContextPtr context, + bool with_structure) override; private: void fromNamedCollection(const NamedCollection &, ContextPtr context) override; diff --git a/src/Storages/ObjectStorage/Local/Configuration.h b/src/Storages/ObjectStorage/Local/Configuration.h index 32a095bf7de..d679b4ad724 100644 --- a/src/Storages/ObjectStorage/Local/Configuration.h +++ b/src/Storages/ObjectStorage/Local/Configuration.h @@ -59,7 +59,7 @@ public: ObjectStoragePtr createObjectStorage(ContextPtr, bool) override { return std::make_shared("/"); } - void addStructureAndFormatToArgsIfNeeded(ASTs &, const String &, const String &, ContextPtr) override { } + void addStructureAndFormatToArgsIfNeeded(ASTs &, const String &, const String &, ContextPtr, bool) override { } private: void fromNamedCollection(const NamedCollection & collection, ContextPtr context) override; diff --git a/src/Storages/ObjectStorage/S3/Configuration.cpp b/src/Storages/ObjectStorage/S3/Configuration.cpp index 02effe261d0..629628c762f 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.cpp +++ b/src/Storages/ObjectStorage/S3/Configuration.cpp @@ -395,7 +395,7 @@ void StorageS3Configuration::fromAST(ASTs & args, ContextPtr context, bool with_ } void StorageS3Configuration::addStructureAndFormatToArgsIfNeeded( - ASTs & args, const String & structure_, const String & format_, ContextPtr context) + ASTs & args, const String & structure_, const String & format_, ContextPtr context, bool with_structure) { if (auto collection = tryGetNamedCollectionWithOverrides(args, context)) { @@ -407,7 +407,7 @@ void StorageS3Configuration::addStructureAndFormatToArgsIfNeeded( auto format_equal_func = makeASTFunction("equals", std::move(format_equal_func_args)); args.push_back(format_equal_func); } - if (collection->getOrDefault("structure", "auto") == "auto") + if (with_structure && collection->getOrDefault("structure", "auto") == "auto") { ASTs structure_equal_func_args = {std::make_shared("structure"), std::make_shared(structure_)}; auto structure_equal_func = makeASTFunction("equals", std::move(structure_equal_func_args)); @@ -429,8 +429,9 @@ void StorageS3Configuration::addStructureAndFormatToArgsIfNeeded( if (count == 1) { /// Add format=auto before structure argument. - args.push_back(std::make_shared("auto")); - args.push_back(structure_literal); + args.push_back(format_literal); + if (with_structure) + args.push_back(structure_literal); } /// s3(s3_url, format) or /// s3(s3_url, NOSIGN) @@ -444,11 +445,13 @@ void StorageS3Configuration::addStructureAndFormatToArgsIfNeeded( else if (checkAndGetLiteralArgument(args[1], "format") == "auto") args[1] = format_literal; - args.push_back(structure_literal); + if (with_structure) + args.push_back(structure_literal); } /// s3(source, format, structure) or /// s3(source, access_key_id, secret_access_key) or - /// s3(source, NOSIGN, format) + /// s3(source, NOSIGN, format) or + /// s3(source, format, compression_method) /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither. else if (count == 3) { @@ -457,26 +460,29 @@ void StorageS3Configuration::addStructureAndFormatToArgsIfNeeded( { if (checkAndGetLiteralArgument(args[2], "format") == "auto") args[2] = format_literal; - args.push_back(structure_literal); + if (with_structure) + args.push_back(structure_literal); } else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) { if (second_arg == "auto") args[1] = format_literal; - if (checkAndGetLiteralArgument(args[2], "structure") == "auto") + if (with_structure && checkAndGetLiteralArgument(args[2], "structure") == "auto") args[2] = structure_literal; } else { /// Add format and structure arguments. args.push_back(format_literal); - args.push_back(structure_literal); + if (with_structure) + args.push_back(structure_literal); } } /// s3(source, format, structure, compression_method) or /// s3(source, access_key_id, secret_access_key, format) or /// s3(source, access_key_id, secret_access_key, session_token) or - /// s3(source, NOSIGN, format, structure) + /// s3(source, NOSIGN, format, structure) or + /// s3(source, NOSIGN, format, compression_method) /// We can distinguish them by looking at the 2-nd argument: check if it's NOSIGN, format name or neither. else if (count == 4) { @@ -485,14 +491,14 @@ void StorageS3Configuration::addStructureAndFormatToArgsIfNeeded( { if (checkAndGetLiteralArgument(args[2], "format") == "auto") args[2] = format_literal; - if (checkAndGetLiteralArgument(args[3], "structure") == "auto") + if (with_structure && checkAndGetLiteralArgument(args[3], "structure") == "auto") args[3] = structure_literal; } else if (second_arg == "auto" || FormatFactory::instance().exists(second_arg)) { if (second_arg == "auto") args[1] = format_literal; - if (checkAndGetLiteralArgument(args[2], "structure") == "auto") + if (with_structure && checkAndGetLiteralArgument(args[2], "structure") == "auto") args[2] = structure_literal; } else @@ -502,18 +508,21 @@ void StorageS3Configuration::addStructureAndFormatToArgsIfNeeded( { if (checkAndGetLiteralArgument(args[3], "format") == "auto") args[3] = format_literal; - args.push_back(structure_literal); + if (with_structure) + args.push_back(structure_literal); } else { args.push_back(format_literal); - args.push_back(structure_literal); + if (with_structure) + args.push_back(structure_literal); } } } /// s3(source, access_key_id, secret_access_key, format, structure) or /// s3(source, access_key_id, secret_access_key, session_token, format) or - /// s3(source, NOSIGN, format, structure, compression_method) + /// s3(source, NOSIGN, format, structure, compression_method) or + /// s3(source, access_key_id, secret_access_key, format, compression) /// We can distinguish them by looking at the 2-nd argument: check if it's a NOSIGN keyword name or not. else if (count == 5) { @@ -522,7 +531,7 @@ void StorageS3Configuration::addStructureAndFormatToArgsIfNeeded( { if (checkAndGetLiteralArgument(args[2], "format") == "auto") args[2] = format_literal; - if (checkAndGetLiteralArgument(args[2], "structure") == "auto") + if (with_structure && checkAndGetLiteralArgument(args[2], "structure") == "auto") args[3] = structure_literal; } else @@ -532,19 +541,21 @@ void StorageS3Configuration::addStructureAndFormatToArgsIfNeeded( { if (checkAndGetLiteralArgument(args[3], "format") == "auto") args[3] = format_literal; - if (checkAndGetLiteralArgument(args[4], "structure") == "auto") + if (with_structure && checkAndGetLiteralArgument(args[4], "structure") == "auto") args[4] = structure_literal; } else { if (checkAndGetLiteralArgument(args[4], "format") == "auto") args[4] = format_literal; - args.push_back(structure_literal); + if (with_structure) + args.push_back(structure_literal); } } } /// s3(source, access_key_id, secret_access_key, format, structure, compression) or - /// s3(source, access_key_id, secret_access_key, session_token, format, structure) + /// s3(source, access_key_id, secret_access_key, session_token, format, structure) or + /// s3(source, access_key_id, secret_access_key, session_token, format, compression_method) else if (count == 6) { auto fourth_arg = checkAndGetLiteralArgument(args[3], "format/session_token"); @@ -552,14 +563,14 @@ void StorageS3Configuration::addStructureAndFormatToArgsIfNeeded( { if (checkAndGetLiteralArgument(args[3], "format") == "auto") args[3] = format_literal; - if (checkAndGetLiteralArgument(args[4], "structure") == "auto") + if (with_structure && checkAndGetLiteralArgument(args[4], "structure") == "auto") args[4] = structure_literal; } else { if (checkAndGetLiteralArgument(args[4], "format") == "auto") args[4] = format_literal; - if (checkAndGetLiteralArgument(args[5], "format") == "auto") + if (with_structure && checkAndGetLiteralArgument(args[5], "format") == "auto") args[5] = structure_literal; } } @@ -568,7 +579,7 @@ void StorageS3Configuration::addStructureAndFormatToArgsIfNeeded( { if (checkAndGetLiteralArgument(args[4], "format") == "auto") args[4] = format_literal; - if (checkAndGetLiteralArgument(args[5], "format") == "auto") + if (with_structure && checkAndGetLiteralArgument(args[5], "format") == "auto") args[5] = structure_literal; } } diff --git a/src/Storages/ObjectStorage/S3/Configuration.h b/src/Storages/ObjectStorage/S3/Configuration.h index 87f2be1bf3e..c4614a28189 100644 --- a/src/Storages/ObjectStorage/S3/Configuration.h +++ b/src/Storages/ObjectStorage/S3/Configuration.h @@ -91,7 +91,8 @@ public: ASTs & args, const String & structure, const String & format, - ContextPtr context) override; + ContextPtr context, + bool with_structure) override; private: void fromNamedCollection(const NamedCollection & collection, ContextPtr context) override; diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index fd2fe0400bb..419069dbb8d 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -479,6 +479,11 @@ std::pair StorageObjectStorage::resolveSchemaAn return std::pair(columns, format); } +void StorageObjectStorage::updateEngineArgsForCreateQuery(ASTs & args, const ContextPtr & context) const +{ + configuration->addStructureAndFormatToArgsIfNeeded(args, "", configuration->format, context, /*with_structure=*/false); +} + SchemaCache & StorageObjectStorage::getSchemaCache(const ContextPtr & context, const std::string & storage_type_name) { if (storage_type_name == "s3") diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index e2bb41a4935..81f9828ccc8 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -122,6 +122,8 @@ public: std::string & sample_path, const ContextPtr & context); + void updateEngineArgsForCreateQuery(ASTs & args, const ContextPtr & context) const override; + protected: String getPathSample(StorageInMemoryMetadata metadata, ContextPtr context); @@ -179,7 +181,7 @@ public: /// Add/replace structure and format arguments in the AST arguments if they have 'auto' values. virtual void addStructureAndFormatToArgsIfNeeded( - ASTs & args, const String & structure_, const String & format_, ContextPtr context) = 0; + ASTs & args, const String & structure_, const String & format_, ContextPtr context, bool with_structure) = 0; bool withPartitionWildcard() const; bool withGlobs() const { return isPathWithGlobs() || isNamespaceWithGlobs(); } diff --git a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp index 176f20cf5ca..07eecc65599 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorageCluster.cpp @@ -107,7 +107,7 @@ void StorageObjectStorageCluster::updateQueryToSendIfNeeded( ASTPtr cluster_name_arg = args.front(); args.erase(args.begin()); - configuration->addStructureAndFormatToArgsIfNeeded(args, structure, configuration->format, context); + configuration->addStructureAndFormatToArgsIfNeeded(args, structure, configuration->format, context, /*with_structure=*/true); args.insert(args.begin(), cluster_name_arg); } diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index eefd60128a6..91263180b5c 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -2114,6 +2114,11 @@ void StorageFile::truncate( } } +void StorageFile::updateEngineArgsForCreateQuery(ASTs & args, const ContextPtr & context) const +{ + if (checkAndGetLiteralArgument(evaluateConstantExpressionOrIdentifierAsLiteral(args[0], context), "format") == "auto") + args[0] = std::make_shared(format_name); +} void registerStorageFile(StorageFactory & factory) { diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index 6b21353f161..5b95d1a9f8b 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -139,6 +139,8 @@ public: bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } + void updateEngineArgsForCreateQuery(ASTs & args, const ContextPtr & context) const override; + protected: friend class StorageFileSource; friend class StorageFileSink; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 3ba8d1fa304..af91768f79e 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -38,6 +38,8 @@ #include #include +#include + #include #include #include @@ -1570,6 +1572,11 @@ void StorageURL::processNamedCollectionResult(Configuration & configuration, con configuration.structure = collection.getOrDefault("structure", "auto"); } +void StorageURL::updateEngineArgsForCreateQuery(ASTs & args, const ContextPtr & context) const +{ + TableFunctionURL::updateStructureAndFormatArgumentsIfNeeded(args, "", format_name, context, /*with_structure=*/false); +} + StorageURL::Configuration StorageURL::getConfiguration(ASTs & args, const ContextPtr & local_context) { StorageURL::Configuration configuration; diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 6f1d544629a..4f3e920afac 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -305,6 +305,8 @@ public: bool supportsDynamicSubcolumns() const override { return true; } + void updateEngineArgsForCreateQuery(ASTs & args, const ContextPtr & context) const override; + static FormatSettings getFormatSettingsFromArgs(const StorageFactory::Arguments & args); struct Configuration : public StatelessTableEngineConfiguration diff --git a/src/TableFunctions/ITableFunctionCluster.h b/src/TableFunctions/ITableFunctionCluster.h index 744d7139d16..3378cbe87d3 100644 --- a/src/TableFunctions/ITableFunctionCluster.h +++ b/src/TableFunctions/ITableFunctionCluster.h @@ -31,7 +31,7 @@ public: ASTPtr cluster_name_arg = args.front(); args.erase(args.begin()); - Base::updateStructureAndFormatArgumentsIfNeeded(args, structure_, format_, context); + Base::updateStructureAndFormatArgumentsIfNeeded(args, structure_, format_, context, /*with_structure=*/true); args.insert(args.begin(), cluster_name_arg); } diff --git a/src/TableFunctions/ITableFunctionFileLike.cpp b/src/TableFunctions/ITableFunctionFileLike.cpp index 23e59494f61..b591ea97e3d 100644 --- a/src/TableFunctions/ITableFunctionFileLike.cpp +++ b/src/TableFunctions/ITableFunctionFileLike.cpp @@ -88,7 +88,7 @@ void ITableFunctionFileLike::parseArgumentsImpl(ASTs & args, const ContextPtr & compression_method = checkAndGetLiteralArgument(args[3], "compression_method"); } -void ITableFunctionFileLike::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr & context) +void ITableFunctionFileLike::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr & context, bool with_structure) { if (args.empty() || args.size() > getMaxNumberOfArguments()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected 1 to {} arguments in table function, got {}", getMaxNumberOfArguments(), args.size()); @@ -103,21 +103,23 @@ void ITableFunctionFileLike::updateStructureAndFormatArgumentsIfNeeded(ASTs & ar if (args.size() == 1) { args.push_back(format_literal); - args.push_back(structure_literal); + if (with_structure) + args.push_back(structure_literal); } /// f(filename, format) else if (args.size() == 2) { if (checkAndGetLiteralArgument(args[1], "format") == "auto") args.back() = format_literal; - args.push_back(structure_literal); + if (with_structure) + args.push_back(structure_literal); } - /// f(filename, format, structure) or f(filename, format, structure, compression) + /// f(filename, format, structure) or f(filename, format, structure, compression) or f(filename, format, compression) else if (args.size() >= 3) { if (checkAndGetLiteralArgument(args[1], "format") == "auto") args[1] = format_literal; - if (checkAndGetLiteralArgument(args[2], "structure") == "auto") + if (with_structure && checkAndGetLiteralArgument(args[2], "structure") == "auto") args[2] = structure_literal; } } diff --git a/src/TableFunctions/ITableFunctionFileLike.h b/src/TableFunctions/ITableFunctionFileLike.h index 4c97507b8d1..f9b5e30e85e 100644 --- a/src/TableFunctions/ITableFunctionFileLike.h +++ b/src/TableFunctions/ITableFunctionFileLike.h @@ -35,7 +35,7 @@ public: static size_t getMaxNumberOfArguments() { return max_number_of_arguments; } - static void updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr &); + static void updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure, const String & format, const ContextPtr &, bool with_structure); protected: diff --git a/src/TableFunctions/TableFunctionObjectStorage.h b/src/TableFunctions/TableFunctionObjectStorage.h index 19cd637bd80..855acca85ee 100644 --- a/src/TableFunctions/TableFunctionObjectStorage.h +++ b/src/TableFunctions/TableFunctionObjectStorage.h @@ -139,7 +139,7 @@ public: const String & format, const ContextPtr & context) { - Configuration().addStructureAndFormatToArgsIfNeeded(args, structure, format, context); + Configuration().addStructureAndFormatToArgsIfNeeded(args, structure, format, context, /*with_structure=*/true); } protected: diff --git a/src/TableFunctions/TableFunctionURL.cpp b/src/TableFunctions/TableFunctionURL.cpp index 8f4841a992b..5de3a302772 100644 --- a/src/TableFunctions/TableFunctionURL.cpp +++ b/src/TableFunctions/TableFunctionURL.cpp @@ -77,7 +77,7 @@ void TableFunctionURL::parseArgumentsImpl(ASTs & args, const ContextPtr & contex } } -void TableFunctionURL::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure_, const String & format_, const ContextPtr & context) +void TableFunctionURL::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure_, const String & format_, const ContextPtr & context, bool with_structure) { if (auto collection = tryGetNamedCollectionWithOverrides(args, context)) { @@ -89,7 +89,7 @@ void TableFunctionURL::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, co auto format_equal_func = makeASTFunction("equals", std::move(format_equal_func_args)); args.push_back(format_equal_func); } - if (collection->getOrDefault("structure", "auto") == "auto") + if (with_structure && collection->getOrDefault("structure", "auto") == "auto") { ASTs structure_equal_func_args = {std::make_shared("structure"), std::make_shared(structure_)}; auto structure_equal_func = makeASTFunction("equals", std::move(structure_equal_func_args)); @@ -109,7 +109,7 @@ void TableFunctionURL::updateStructureAndFormatArgumentsIfNeeded(ASTs & args, co args.pop_back(); } - ITableFunctionFileLike::updateStructureAndFormatArgumentsIfNeeded(args, structure_, format_, context); + ITableFunctionFileLike::updateStructureAndFormatArgumentsIfNeeded(args, structure_, format_, context, with_structure); if (headers_ast) args.push_back(headers_ast); diff --git a/src/TableFunctions/TableFunctionURL.h b/src/TableFunctions/TableFunctionURL.h index a1efddb84c6..d2c9d4d9ddf 100644 --- a/src/TableFunctions/TableFunctionURL.h +++ b/src/TableFunctions/TableFunctionURL.h @@ -34,7 +34,7 @@ public: ColumnsDescription getActualTableStructure(ContextPtr context, bool is_insert_query) const override; - static void updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure_, const String & format_, const ContextPtr & context); + static void updateStructureAndFormatArgumentsIfNeeded(ASTs & args, const String & structure_, const String & format_, const ContextPtr & context, bool with_structure); protected: void parseArguments(const ASTPtr & ast, ContextPtr context) override; diff --git a/tests/integration/test_storage_azure_blob_storage/test.py b/tests/integration/test_storage_azure_blob_storage/test.py index 220ee13cb25..e22ce925f6c 100644 --- a/tests/integration/test_storage_azure_blob_storage/test.py +++ b/tests/integration/test_storage_azure_blob_storage/test.py @@ -1314,6 +1314,7 @@ def test_size_virtual_column(cluster): def test_format_detection(cluster): node = cluster.instances["node"] + connection_string = cluster.env_variables["AZURITE_CONNECTION_STRING"] storage_account_url = cluster.env_variables["AZURITE_STORAGE_ACCOUNT_URL"] account_name = "devstoreaccount1" account_key = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==" @@ -1381,6 +1382,84 @@ def test_format_detection(cluster): assert result == expected_result + azure_query( + node, + f"create table test_format_detection engine=AzureBlobStorage('{connection_string}', 'cont', 'test_format_detection1')", + ) + result = azure_query( + node, + f"show create table test_format_detection", + ) + assert ( + result + == f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = AzureBlobStorage(\\'{connection_string}\\', \\'cont\\', \\'test_format_detection1\\', \\'JSON\\')\n" + ) + + azure_query( + node, + f"create or replace table test_format_detection engine=AzureBlobStorage('{connection_string}', 'cont', 'test_format_detection1', auto)", + ) + result = azure_query( + node, + f"show create table test_format_detection", + ) + assert ( + result + == f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = AzureBlobStorage(\\'{connection_string}\\', \\'cont\\', \\'test_format_detection1\\', \\'JSON\\')\n" + ) + + azure_query( + node, + f"create or replace table test_format_detection engine=AzureBlobStorage('{connection_string}', 'cont', 'test_format_detection1', auto, 'none')", + ) + result = azure_query( + node, + f"show create table test_format_detection", + ) + assert ( + result + == f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = AzureBlobStorage(\\'{connection_string}\\', \\'cont\\', \\'test_format_detection1\\', \\'JSON\\', \\'none\\')\n" + ) + + azure_query( + node, + f"create or replace table test_format_detection engine=AzureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection1', '{account_name}', '{account_key}')", + ) + result = azure_query( + node, + f"show create table test_format_detection", + ) + assert ( + result + == f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = AzureBlobStorage(\\'{storage_account_url}\\', \\'cont\\', \\'test_format_detection1\\', \\'{account_name}\\', \\'{account_key}\\', \\'JSON\\')\n" + ) + + azure_query( + node, + f"create or replace table test_format_detection engine=AzureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection1', '{account_name}', '{account_key}', auto)", + ) + result = azure_query( + node, + f"show create table test_format_detection", + ) + assert ( + result + == f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = AzureBlobStorage(\\'{storage_account_url}\\', \\'cont\\', \\'test_format_detection1\\', \\'{account_name}\\', \\'{account_key}\\', \\'JSON\\')\n" + ) + + azure_query( + node, + f"create or replace table test_format_detection engine=AzureBlobStorage('{storage_account_url}', 'cont', 'test_format_detection1', '{account_name}', '{account_key}', auto, 'none')", + ) + result = azure_query( + node, + f"show create table test_format_detection", + ) + assert ( + result + == f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = AzureBlobStorage(\\'{storage_account_url}\\', \\'cont\\', \\'test_format_detection1\\', \\'{account_name}\\', \\'{account_key}\\', \\'JSON\\', \\'none\\')\n" + ) + def test_write_to_globbed_partitioned_path(cluster): node = cluster.instances["node"] diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 366bc28d2c9..9f0afc57ad9 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -636,7 +636,7 @@ def test_multiple_inserts(started_cluster): node1.query(f"drop table test_multiple_inserts") -def test_format_detection(started_cluster): +def test_format_detection_from_file_name(started_cluster): node1.query( f"create table arrow_table (x UInt64) engine=HDFS('hdfs://hdfs1:9000/data.arrow')" ) @@ -1222,6 +1222,32 @@ def test_format_detection(started_cluster): assert expected_result == result + node.query( + f"create table test_format_detection engine=HDFS('hdfs://hdfs1:9000/{dir}/test_format_detection1')" + ) + result = node.query( + f"show create table test_format_detection" + ) + assert result == f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = HDFS(\\'hdfs://hdfs1:9000/{dir}/test_format_detection1\\', \\'JSON\\')\n" + + node.query("drop table test_format_detection") + node.query( + f"create table test_format_detection engine=HDFS('hdfs://hdfs1:9000/{dir}/test_format_detection1', auto)" + ) + result = node.query( + f"show create table test_format_detection" + ) + assert result == f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = HDFS(\\'hdfs://hdfs1:9000/{dir}/test_format_detection1\\', \\'JSON\\')\n" + + node.query("drop table test_format_detection") + node.query( + f"create table test_format_detection engine=HDFS('hdfs://hdfs1:9000/{dir}/test_format_detection1', auto, 'none')" + ) + result = node.query( + f"show create table test_format_detection" + ) + assert result == f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = HDFS(\\'hdfs://hdfs1:9000/{dir}/test_format_detection1\\', \\'JSON\\', \\'none\\')\n" + def test_write_to_globbed_partitioned_path(started_cluster): node = started_cluster.instances["node1"] diff --git a/tests/queries/0_stateless/03273_format_inference_create_query_s3_url.reference b/tests/queries/0_stateless/03273_format_inference_create_query_s3_url.reference new file mode 100644 index 00000000000..72696cef342 --- /dev/null +++ b/tests/queries/0_stateless/03273_format_inference_create_query_s3_url.reference @@ -0,0 +1,14 @@ +CREATE TABLE default.test\n(\n `a` Nullable(Int64)\n)\nENGINE = S3(\'http://localhost:11111/test/json_data\', \'JSON\') +CREATE TABLE default.test\n(\n `a` Nullable(Int64)\n)\nENGINE = S3(\'http://localhost:11111/test/json_data\', \'NOSIGN\', \'JSON\') +CREATE TABLE default.test\n(\n `a` Nullable(Int64)\n)\nENGINE = S3(\'http://localhost:11111/test/json_data\', \'JSON\') +CREATE TABLE default.test\n(\n `a` Nullable(Int64)\n)\nENGINE = S3(\'http://localhost:11111/test/json_data\', \'JSON\', \'none\') +CREATE TABLE default.test\n(\n `a` Nullable(Int64)\n)\nENGINE = S3(\'http://localhost:11111/test/json_data\', \'NOSIGN\', \'JSON\') +CREATE TABLE default.test\n(\n `a` Nullable(Int64)\n)\nENGINE = S3(\'http://localhost:11111/test/json_data\', \'test\', \'[HIDDEN]\', \'JSON\') +CREATE TABLE default.test\n(\n `a` Nullable(Int64)\n)\nENGINE = S3(\'http://localhost:11111/test/json_data\', \'NOSIGN\', \'JSON\', \'none\') +CREATE TABLE default.test\n(\n `a` Nullable(Int64)\n)\nENGINE = S3(\'http://localhost:11111/test/json_data\', \'test\', \'[HIDDEN]\', \'\', \'JSON\') +CREATE TABLE default.test\n(\n `a` Nullable(Int64)\n)\nENGINE = S3(\'http://localhost:11111/test/json_data\', \'test\', \'[HIDDEN]\', \'\', \'JSON\') +CREATE TABLE default.test\n(\n `a` Nullable(Int64)\n)\nENGINE = S3(\'http://localhost:11111/test/json_data\', \'test\', \'[HIDDEN]\', \'JSON\', \'none\') +CREATE TABLE default.test\n(\n `a` Nullable(Int64)\n)\nENGINE = S3(\'http://localhost:11111/test/json_data\', \'test\', \'[HIDDEN]\', \'\', \'JSON\', \'none\') +CREATE TABLE default.test\n(\n `a` Nullable(Int64)\n)\nENGINE = URL(\'http://localhost:11111/test/json_data\', \'JSON\') +CREATE TABLE default.test\n(\n `a` Nullable(Int64)\n)\nENGINE = URL(\'http://localhost:11111/test/json_data\', \'JSON\') +CREATE TABLE default.test\n(\n `a` Nullable(Int64)\n)\nENGINE = URL(\'http://localhost:11111/test/json_data\', \'JSON\', \'none\') diff --git a/tests/queries/0_stateless/03273_format_inference_create_query_s3_url.sql b/tests/queries/0_stateless/03273_format_inference_create_query_s3_url.sql new file mode 100644 index 00000000000..353b78b1e15 --- /dev/null +++ b/tests/queries/0_stateless/03273_format_inference_create_query_s3_url.sql @@ -0,0 +1,57 @@ +drop table if exists test; + +create table test engine=S3('http://localhost:11111/test/json_data'); +show create table test; +drop table test; + +create table test engine=S3('http://localhost:11111/test/json_data', NOSIGN); +show create table test; +drop table test; + +create table test engine=S3('http://localhost:11111/test/json_data', auto); +show create table test; +drop table test; + +create table test engine=S3('http://localhost:11111/test/json_data', auto, 'none'); +show create table test; +drop table test; + +create table test engine=S3('http://localhost:11111/test/json_data', NOSIGN, auto); +show create table test; +drop table test; + +create table test engine=S3('http://localhost:11111/test/json_data', 'test', 'testtest'); +show create table test; +drop table test; + +create table test engine=S3('http://localhost:11111/test/json_data', NOSIGN, auto, 'none'); +show create table test; +drop table test; + +create table test engine=S3('http://localhost:11111/test/json_data', 'test', 'testtest', ''); +show create table test; +drop table test; + +create table test engine=S3('http://localhost:11111/test/json_data', 'test', 'testtest', '', auto); +show create table test; +drop table test; + +create table test engine=S3('http://localhost:11111/test/json_data', 'test', 'testtest', auto, 'none'); +show create table test; +drop table test; + +create table test engine=S3('http://localhost:11111/test/json_data', 'test', 'testtest', '', auto, 'none'); +show create table test; +drop table test; + +create table test engine=URL('http://localhost:11111/test/json_data'); +show create table test; +drop table test; + +create table test engine=URL('http://localhost:11111/test/json_data', auto); +show create table test; +drop table test; + +create table test engine=URL('http://localhost:11111/test/json_data', auto, 'none'); +show create table test; +drop table test; diff --git a/tests/queries/0_stateless/03274_format_inference_create_query_file.reference b/tests/queries/0_stateless/03274_format_inference_create_query_file.reference new file mode 100644 index 00000000000..0cfbf08886f --- /dev/null +++ b/tests/queries/0_stateless/03274_format_inference_create_query_file.reference @@ -0,0 +1 @@ +2 diff --git a/tests/queries/0_stateless/03274_format_inference_create_query_file.sh b/tests/queries/0_stateless/03274_format_inference_create_query_file.sh new file mode 100755 index 00000000000..10c0650144c --- /dev/null +++ b/tests/queries/0_stateless/03274_format_inference_create_query_file.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +$CLICKHOUSE_LOCAL -q "select 42 as a format JSONEachRow" > data_$CLICKHOUSE_TEST_UNIQUE_NAME +$CLICKHOUSE_LOCAL -nm -q " +create table test engine=File(auto, './data_$CLICKHOUSE_TEST_UNIQUE_NAME'); +show create table test; +drop table test; + +create table test engine=File(auto, './data_$CLICKHOUSE_TEST_UNIQUE_NAME', 'none'); +show create table test; +drop table test; +" | grep "JSON" -c + +rm data_$CLICKHOUSE_TEST_UNIQUE_NAME + diff --git a/tests/queries/0_stateless/data_minio/json_data b/tests/queries/0_stateless/data_minio/json_data new file mode 100644 index 00000000000..bfe33aeb603 --- /dev/null +++ b/tests/queries/0_stateless/data_minio/json_data @@ -0,0 +1 @@ +{"a" : 42} From 1fc4757c0829a9b013972858c2eddb293bd3cce8 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 19 Nov 2024 22:27:29 +0000 Subject: [PATCH 2/4] Better naming --- src/Interpreters/InterpreterCreateQuery.cpp | 3 ++- src/Storages/IStorage.h | 4 ++-- src/Storages/ObjectStorage/StorageObjectStorage.cpp | 2 +- src/Storages/ObjectStorage/StorageObjectStorage.h | 2 +- src/Storages/StorageFile.cpp | 2 +- src/Storages/StorageFile.h | 2 +- src/Storages/StorageURL.cpp | 2 +- src/Storages/StorageURL.h | 2 +- 8 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 67c4d2c0413..a8cbf83ff23 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1888,8 +1888,9 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, /// If schema wes inferred while storage creation, add columns description to create query. auto & create_query = query_ptr->as(); addColumnsDescriptionToCreateQueryIfNecessary(create_query, res); + /// Add any inferred engine args if needed. For example, data format for engines File/S3/URL/etc if (auto * engine_args = getEngineArgsFromCreateQuery(create_query)) - res->updateEngineArgsForCreateQuery(*engine_args, getContext()); + res->addInferredEngineArgsToCreateQuery(*engine_args, getContext()); } validateVirtualColumns(*res); diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index e99a66b9bda..37359aad290 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -284,9 +284,9 @@ public: /// Returns hints for serialization of columns accorsing to statistics accumulated by storage. virtual SerializationInfoByName getSerializationHints() const { return {}; } - /// Update engine args in create query that were inferred during storage creation to avoid the same + /// Add engine args that were inferred during storage creation to create query to avoid the same /// inference on server restart. For example - data format inference in File/URL/S3/etc engines. - virtual void updateEngineArgsForCreateQuery(ASTs & /*args*/, const ContextPtr & /*context*/) const {} + virtual void addInferredEngineArgsToCreateQuery(ASTs & /*args*/, const ContextPtr & /*context*/) const {} private: StorageID storage_id; diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.cpp b/src/Storages/ObjectStorage/StorageObjectStorage.cpp index 419069dbb8d..098a5549fae 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.cpp +++ b/src/Storages/ObjectStorage/StorageObjectStorage.cpp @@ -479,7 +479,7 @@ std::pair StorageObjectStorage::resolveSchemaAn return std::pair(columns, format); } -void StorageObjectStorage::updateEngineArgsForCreateQuery(ASTs & args, const ContextPtr & context) const +void StorageObjectStorage::addInferredEngineArgsToCreateQuery(ASTs & args, const ContextPtr & context) const { configuration->addStructureAndFormatToArgsIfNeeded(args, "", configuration->format, context, /*with_structure=*/false); } diff --git a/src/Storages/ObjectStorage/StorageObjectStorage.h b/src/Storages/ObjectStorage/StorageObjectStorage.h index 81f9828ccc8..e32dcc438cd 100644 --- a/src/Storages/ObjectStorage/StorageObjectStorage.h +++ b/src/Storages/ObjectStorage/StorageObjectStorage.h @@ -122,7 +122,7 @@ public: std::string & sample_path, const ContextPtr & context); - void updateEngineArgsForCreateQuery(ASTs & args, const ContextPtr & context) const override; + void addInferredEngineArgsToCreateQuery(ASTs & args, const ContextPtr & context) const override; protected: String getPathSample(StorageInMemoryMetadata metadata, ContextPtr context); diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 91263180b5c..550ffa245e3 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -2114,7 +2114,7 @@ void StorageFile::truncate( } } -void StorageFile::updateEngineArgsForCreateQuery(ASTs & args, const ContextPtr & context) const +void StorageFile::addInferredEngineArgsToCreateQuery(ASTs & args, const ContextPtr & context) const { if (checkAndGetLiteralArgument(evaluateConstantExpressionOrIdentifierAsLiteral(args[0], context), "format") == "auto") args[0] = std::make_shared(format_name); diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index 5b95d1a9f8b..477d75a77a8 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -139,7 +139,7 @@ public: bool supportsTrivialCountOptimization(const StorageSnapshotPtr &, ContextPtr) const override { return true; } - void updateEngineArgsForCreateQuery(ASTs & args, const ContextPtr & context) const override; + void addInferredEngineArgsToCreateQuery(ASTs & args, const ContextPtr & context) const override; protected: friend class StorageFileSource; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index af91768f79e..5607054a149 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -1572,7 +1572,7 @@ void StorageURL::processNamedCollectionResult(Configuration & configuration, con configuration.structure = collection.getOrDefault("structure", "auto"); } -void StorageURL::updateEngineArgsForCreateQuery(ASTs & args, const ContextPtr & context) const +void StorageURL::addInferredEngineArgsToCreateQuery(ASTs & args, const ContextPtr & context) const { TableFunctionURL::updateStructureAndFormatArgumentsIfNeeded(args, "", format_name, context, /*with_structure=*/false); } diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 4f3e920afac..7df5b90653d 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -305,7 +305,7 @@ public: bool supportsDynamicSubcolumns() const override { return true; } - void updateEngineArgsForCreateQuery(ASTs & args, const ContextPtr & context) const override; + void addInferredEngineArgsToCreateQuery(ASTs & args, const ContextPtr & context) const override; static FormatSettings getFormatSettingsFromArgs(const StorageFactory::Arguments & args); From c185b5bc3813ad22d45ac29e6a1adc8b71b07265 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 19 Nov 2024 22:57:53 +0000 Subject: [PATCH 3/4] Fix style --- tests/integration/test_storage_hdfs/test.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/tests/integration/test_storage_hdfs/test.py b/tests/integration/test_storage_hdfs/test.py index 9f0afc57ad9..d37b0d7218f 100644 --- a/tests/integration/test_storage_hdfs/test.py +++ b/tests/integration/test_storage_hdfs/test.py @@ -1225,28 +1225,31 @@ def test_format_detection(started_cluster): node.query( f"create table test_format_detection engine=HDFS('hdfs://hdfs1:9000/{dir}/test_format_detection1')" ) - result = node.query( - f"show create table test_format_detection" + result = node.query(f"show create table test_format_detection") + assert ( + result + == f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = HDFS(\\'hdfs://hdfs1:9000/{dir}/test_format_detection1\\', \\'JSON\\')\n" ) - assert result == f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = HDFS(\\'hdfs://hdfs1:9000/{dir}/test_format_detection1\\', \\'JSON\\')\n" node.query("drop table test_format_detection") node.query( f"create table test_format_detection engine=HDFS('hdfs://hdfs1:9000/{dir}/test_format_detection1', auto)" ) - result = node.query( - f"show create table test_format_detection" + result = node.query(f"show create table test_format_detection") + assert ( + result + == f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = HDFS(\\'hdfs://hdfs1:9000/{dir}/test_format_detection1\\', \\'JSON\\')\n" ) - assert result == f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = HDFS(\\'hdfs://hdfs1:9000/{dir}/test_format_detection1\\', \\'JSON\\')\n" node.query("drop table test_format_detection") node.query( f"create table test_format_detection engine=HDFS('hdfs://hdfs1:9000/{dir}/test_format_detection1', auto, 'none')" ) - result = node.query( - f"show create table test_format_detection" + result = node.query(f"show create table test_format_detection") + assert ( + result + == f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = HDFS(\\'hdfs://hdfs1:9000/{dir}/test_format_detection1\\', \\'JSON\\', \\'none\\')\n" ) - assert result == f"CREATE TABLE default.test_format_detection\\n(\\n `x` Nullable(String),\\n `y` Nullable(String)\\n)\\nENGINE = HDFS(\\'hdfs://hdfs1:9000/{dir}/test_format_detection1\\', \\'JSON\\', \\'none\\')\n" def test_write_to_globbed_partitioned_path(started_cluster): From 130a1c52a252037120fdffa4d6f5b30657dba890 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov <48961922+Avogar@users.noreply.github.com> Date: Wed, 20 Nov 2024 12:11:51 +0100 Subject: [PATCH 4/4] Update 03273_format_inference_create_query_s3_url.sql --- .../0_stateless/03273_format_inference_create_query_s3_url.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/03273_format_inference_create_query_s3_url.sql b/tests/queries/0_stateless/03273_format_inference_create_query_s3_url.sql index 353b78b1e15..5d653843ec8 100644 --- a/tests/queries/0_stateless/03273_format_inference_create_query_s3_url.sql +++ b/tests/queries/0_stateless/03273_format_inference_create_query_s3_url.sql @@ -1,3 +1,5 @@ +-- Tags: no-fasttest + drop table if exists test; create table test engine=S3('http://localhost:11111/test/json_data');