mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 17:41:59 +00:00
Merge branch 'master' into formats-with-suffixes
This commit is contained in:
commit
a7df9cd53a
2
contrib/arrow
vendored
2
contrib/arrow
vendored
@ -1 +1 @@
|
||||
Subproject commit aa9a7a698e33e278abe053f4634170b3b026e48e
|
||||
Subproject commit 1d9cc51daa4e7e9fc6926320ef73759818bd736e
|
@ -45,7 +45,7 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
|
||||
|
||||
ENV DOCKER_CHANNEL stable
|
||||
RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -
|
||||
RUN add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -c -s) ${DOCKER_CHANNEL}"
|
||||
RUN add-apt-repository "deb https://download.docker.com/linux/ubuntu $(lsb_release -c -s) ${DOCKER_CHANNEL}"
|
||||
|
||||
RUN apt-get update \
|
||||
&& env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
|
||||
@ -58,7 +58,9 @@ RUN apt-get update \
|
||||
|
||||
RUN dockerd --version; docker --version
|
||||
|
||||
RUN python3 -m pip install \
|
||||
ARG TARGETARCH
|
||||
# FIXME: psycopg2-binary is not available for aarch64, we skip it for now
|
||||
RUN test x$TARGETARCH = xarm64 || ( python3 -m pip install \
|
||||
PyMySQL \
|
||||
aerospike==4.0.0 \
|
||||
avro==1.10.2 \
|
||||
@ -88,7 +90,7 @@ RUN python3 -m pip install \
|
||||
urllib3 \
|
||||
requests-kerberos \
|
||||
pyhdfs \
|
||||
azure-storage-blob
|
||||
azure-storage-blob )
|
||||
|
||||
COPY modprobe.sh /usr/local/bin/modprobe
|
||||
COPY dockerd-entrypoint.sh /usr/local/bin/
|
||||
@ -102,8 +104,6 @@ RUN set -x \
|
||||
&& echo 'dockremap:165536:65536' >> /etc/subuid \
|
||||
&& echo 'dockremap:165536:65536' >> /etc/subgid
|
||||
|
||||
RUN echo '127.0.0.1 localhost test.com' >> /etc/hosts
|
||||
|
||||
EXPOSE 2375
|
||||
ENTRYPOINT ["dockerd-entrypoint.sh"]
|
||||
CMD ["sh", "-c", "pytest $PYTEST_OPTS"]
|
||||
|
@ -178,7 +178,7 @@ toc_title: Adopters
|
||||
| <a href="https://cloud.yandex.ru/services/managed-clickhouse" class="favicon">Yandex Cloud</a> | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) |
|
||||
| <a href="https://cloud.yandex.ru/services/datalens" class="favicon">Yandex DataLens</a> | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.com/meetup38/datalens.pdf) |
|
||||
| <a href="https://market.yandex.ru/" class="favicon">Yandex Market</a> | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) |
|
||||
| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Macin product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/introduction/#13) |
|
||||
| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Main product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/introduction/#13) |
|
||||
| <a href="https://www.yellowfinbi.com" class="favicon"><COMPANYNAME></a> | Analytics | Main product | - | - | [Integration](https://www.yellowfinbi.com/campaign/yellowfin-9-whats-new#el-30219e0e) |
|
||||
| <a href="https://www.yotascale.com/" class="favicon">Yotascale</a> | Cloud | Data pipeline | — | 2 bn records/day | [LinkedIn (Accomplishments)](https://www.linkedin.com/in/adilsaleem/) |
|
||||
| <a href="https://www.your-analytics.org/" class="favicon">Your Analytics</a> | Product Analytics | Main Product | — | - | [Tweet, November 2021](https://twitter.com/mikenikles/status/1459737241165565953) |
|
||||
|
@ -1017,7 +1017,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
|
||||
|
||||
String current_format = parsed_insert_query->format;
|
||||
if (current_format.empty())
|
||||
current_format = FormatFactory::instance().getFormatFromFileName(in_file);
|
||||
current_format = FormatFactory::instance().getFormatFromFileName(in_file, true);
|
||||
|
||||
/// Create temporary storage file, to support globs and parallel reading
|
||||
StorageFile::CommonArguments args{
|
||||
|
@ -25,6 +25,7 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int FORMAT_IS_NOT_SUITABLE_FOR_INPUT;
|
||||
extern const int FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT;
|
||||
extern const int BAD_ARGUMENTS;
|
||||
}
|
||||
|
||||
const FormatFactory::Creators & FormatFactory::getCreators(const String & name) const
|
||||
@ -382,6 +383,7 @@ void FormatFactory::registerInputFormat(const String & name, InputCreator input_
|
||||
if (target)
|
||||
throw Exception("FormatFactory: Input format " + name + " is already registered", ErrorCodes::LOGICAL_ERROR);
|
||||
target = std::move(input_creator);
|
||||
registerFileExtension(name, name);
|
||||
}
|
||||
|
||||
void FormatFactory::registerNonTrivialPrefixAndSuffixChecker(const String & name, NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker)
|
||||
@ -419,14 +421,15 @@ void FormatFactory::registerOutputFormat(const String & name, OutputCreator outp
|
||||
if (target)
|
||||
throw Exception("FormatFactory: Output format " + name + " is already registered", ErrorCodes::LOGICAL_ERROR);
|
||||
target = std::move(output_creator);
|
||||
registerFileExtension(name, name);
|
||||
}
|
||||
|
||||
void FormatFactory::registerFileExtension(const String & extension, const String & format_name)
|
||||
{
|
||||
file_extension_formats[extension] = format_name;
|
||||
file_extension_formats[boost::to_lower_copy(extension)] = format_name;
|
||||
}
|
||||
|
||||
String FormatFactory::getFormatFromFileName(String file_name)
|
||||
String FormatFactory::getFormatFromFileName(String file_name, bool throw_if_not_found)
|
||||
{
|
||||
CompressionMethod compression_method = chooseCompressionMethod(file_name, "");
|
||||
if (CompressionMethod::None != compression_method)
|
||||
@ -438,11 +441,22 @@ String FormatFactory::getFormatFromFileName(String file_name)
|
||||
|
||||
auto pos = file_name.find_last_of('.');
|
||||
if (pos == String::npos)
|
||||
{
|
||||
if (throw_if_not_found)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot determine the file format by it's extension");
|
||||
return "";
|
||||
}
|
||||
|
||||
String file_extension = file_name.substr(pos + 1, String::npos);
|
||||
boost::algorithm::to_lower(file_extension);
|
||||
return file_extension_formats[file_extension];
|
||||
auto it = file_extension_formats.find(file_extension);
|
||||
if (it == file_extension_formats.end())
|
||||
{
|
||||
if (throw_if_not_found)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot determine the file format by it's extension");
|
||||
return "";
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
|
||||
void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegmentationEngine file_segmentation_engine)
|
||||
|
@ -186,7 +186,7 @@ public:
|
||||
|
||||
/// Register file extension for format
|
||||
void registerFileExtension(const String & extension, const String & format_name);
|
||||
String getFormatFromFileName(String file_name);
|
||||
String getFormatFromFileName(String file_name, bool throw_if_not_found = false);
|
||||
|
||||
/// Register schema readers for format its name.
|
||||
void registerSchemaReader(const String & name, SchemaReaderCreator schema_reader_creator);
|
||||
|
@ -107,6 +107,8 @@ void registerTSKVSchemaReader(FormatFactory & factory);
|
||||
void registerValuesSchemaReader(FormatFactory & factory);
|
||||
void registerTemplateSchemaReader(FormatFactory & factory);
|
||||
|
||||
void registerFileExtensions(FormatFactory & factory);
|
||||
|
||||
void registerFormats()
|
||||
{
|
||||
auto & factory = FormatFactory::instance();
|
||||
@ -203,16 +205,6 @@ void registerFormats()
|
||||
registerTSKVSchemaReader(factory);
|
||||
registerValuesSchemaReader(factory);
|
||||
registerTemplateSchemaReader(factory);
|
||||
|
||||
factory.registerFileExtension("csv", "CSV");
|
||||
factory.registerFileExtension("tsv", "TSV");
|
||||
factory.registerFileExtension("parquet", "Parquet");
|
||||
factory.registerFileExtension("orc", "ORC");
|
||||
factory.registerFileExtension("native", "Native");
|
||||
factory.registerFileExtension("json", "JSON");
|
||||
factory.registerFileExtension("ndjson", "JSONEachRow");
|
||||
factory.registerFileExtension("xml", "XML");
|
||||
factory.registerFileExtension("avro", "Avro");
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -114,6 +114,7 @@ void registerInputFormatRowBinary(FormatFactory & factory)
|
||||
};
|
||||
|
||||
registerWithNamesAndTypes("RowBinary", register_func);
|
||||
factory.registerFileExtension("bin", "RowBinary");
|
||||
}
|
||||
|
||||
void registerRowBinaryWithNamesAndTypesSchemaReader(FormatFactory & factory)
|
||||
|
@ -310,6 +310,7 @@ void registerInputFormatCapnProto(FormatFactory & factory)
|
||||
return std::make_shared<CapnProtoRowInputFormat>(buf, sample, std::move(params),
|
||||
FormatSchemaInfo(settings, "CapnProto", true), settings);
|
||||
});
|
||||
factory.registerFileExtension("capnp", "CapnProto");
|
||||
}
|
||||
|
||||
void registerCapnProtoSchemaReader(FormatFactory & factory)
|
||||
|
@ -340,6 +340,8 @@ void registerInputFormatJSONEachRow(FormatFactory & factory)
|
||||
return std::make_shared<JSONEachRowRowInputFormat>(buf, sample, std::move(params), settings, false);
|
||||
});
|
||||
|
||||
factory.registerFileExtension("ndjson", "JSONEachRow");
|
||||
|
||||
factory.registerInputFormat("JSONStringsEachRow", [](
|
||||
ReadBuffer & buf,
|
||||
const Block & sample,
|
||||
|
@ -67,6 +67,7 @@ void registerOutputFormatMarkdown(FormatFactory & factory)
|
||||
});
|
||||
|
||||
factory.markOutputFormatSupportsParallelFormatting("Markdown");
|
||||
factory.registerFileExtension("md", "Markdown");
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -486,6 +486,7 @@ void registerInputFormatMsgPack(FormatFactory & factory)
|
||||
{
|
||||
return std::make_shared<MsgPackRowInputFormat>(sample, buf, params);
|
||||
});
|
||||
factory.registerFileExtension("messagepack", "MsgPack");
|
||||
}
|
||||
|
||||
void registerMsgPackSchemaReader(FormatFactory & factory)
|
||||
|
@ -95,6 +95,8 @@ void registerProtobufSchemaReader(FormatFactory & factory)
|
||||
{
|
||||
return std::make_shared<ProtobufSchemaReader>(settings);
|
||||
});
|
||||
factory.registerFileExtension("pb", "Protobuf");
|
||||
|
||||
factory.registerExternalSchemaReader("ProtobufSingle", [](const FormatSettings & settings)
|
||||
{
|
||||
return std::make_shared<ProtobufSchemaReader>(settings);
|
||||
|
@ -98,7 +98,7 @@ getExternalDataSourceConfigurationByPriority(const Poco::Util::AbstractConfigura
|
||||
struct URLBasedDataSourceConfiguration
|
||||
{
|
||||
String url;
|
||||
String format;
|
||||
String format = "auto";
|
||||
String compression_method = "auto";
|
||||
String structure = "auto";
|
||||
|
||||
|
@ -620,17 +620,23 @@ void registerStorageHDFS(StorageFactory & factory)
|
||||
{
|
||||
ASTs & engine_args = args.engine_args;
|
||||
|
||||
if (engine_args.size() != 2 && engine_args.size() != 3)
|
||||
if (engine_args.empty() || engine_args.size() > 3)
|
||||
throw Exception(
|
||||
"Storage HDFS requires 2 or 3 arguments: url, name of used format and optional compression method.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
"Storage HDFS requires 1, 2 or 3 arguments: url, name of used format (taken from file extension by default) and optional compression method.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.getLocalContext());
|
||||
|
||||
String url = engine_args[0]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
|
||||
engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], args.getLocalContext());
|
||||
String format_name = "auto";
|
||||
if (engine_args.size() > 1)
|
||||
{
|
||||
engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], args.getLocalContext());
|
||||
format_name = engine_args[1]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
}
|
||||
|
||||
String format_name = engine_args[1]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
if (format_name == "auto")
|
||||
format_name = FormatFactory::instance().getFormatFromFileName(url, true);
|
||||
|
||||
String compression_method;
|
||||
if (engine_args.size() == 3)
|
||||
|
@ -789,9 +789,9 @@ StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPt
|
||||
}
|
||||
else
|
||||
{
|
||||
if (engine_args.size() < 2 || engine_args.size() > 5)
|
||||
if (engine_args.empty() || engine_args.size() > 5)
|
||||
throw Exception(
|
||||
"Storage S3 requires 2 to 5 arguments: url, [access_key_id, secret_access_key], name of used format and [compression_method].",
|
||||
"Storage S3 requires 1 to 5 arguments: url, [access_key_id, secret_access_key], name of used format and [compression_method].",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
for (auto & engine_arg : engine_args)
|
||||
@ -809,13 +809,16 @@ StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPt
|
||||
configuration.compression_method = engine_args.back()->as<ASTLiteral &>().value.safeGet<String>();
|
||||
configuration.format = engine_args[engine_args.size() - 2]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
}
|
||||
else
|
||||
else if (engine_args.size() != 1)
|
||||
{
|
||||
configuration.compression_method = "auto";
|
||||
configuration.format = engine_args.back()->as<ASTLiteral &>().value.safeGet<String>();
|
||||
}
|
||||
}
|
||||
|
||||
if (configuration.format == "auto")
|
||||
configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.url, true);
|
||||
|
||||
return configuration;
|
||||
}
|
||||
|
||||
|
@ -624,20 +624,24 @@ URLBasedDataSourceConfiguration StorageURL::getConfiguration(ASTs & args, Contex
|
||||
}
|
||||
else
|
||||
{
|
||||
if (args.size() != 2 && args.size() != 3)
|
||||
if (args.empty() || args.size() > 3)
|
||||
throw Exception(
|
||||
"Storage URL requires 2 or 3 arguments: url, name of used format and optional compression method.",
|
||||
"Storage URL requires 1, 2 or 3 arguments: url, name of used format (taken from file extension by default) and optional compression method.",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
for (auto & arg : args)
|
||||
arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, local_context);
|
||||
|
||||
configuration.url = args[0]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
configuration.format = args[1]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
if (args.size() > 1)
|
||||
configuration.format = args[1]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
if (args.size() == 3)
|
||||
configuration.compression_method = args[2]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
}
|
||||
|
||||
if (configuration.format == "auto")
|
||||
configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.url, true);
|
||||
|
||||
return configuration;
|
||||
}
|
||||
|
||||
|
@ -53,23 +53,28 @@ void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, Context
|
||||
|
||||
ASTs & args = args_func.at(0)->children;
|
||||
|
||||
if (args.size() < 2)
|
||||
throw Exception("Table function '" + getName() + "' requires at least 2 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
if (args.empty())
|
||||
throw Exception("Table function '" + getName() + "' requires at least 1 argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
for (auto & arg : args)
|
||||
arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
|
||||
|
||||
filename = args[0]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
format = args[1]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
|
||||
if (args.size() == 2)
|
||||
if (args.size() > 1)
|
||||
format = args[1]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
|
||||
if (format == "auto")
|
||||
format = FormatFactory::instance().getFormatFromFileName(filename, true);
|
||||
|
||||
if (args.size() <= 2)
|
||||
{
|
||||
checkIfFormatSupportsAutoStructure(getName(), format);
|
||||
return;
|
||||
}
|
||||
|
||||
if (args.size() != 3 && args.size() != 4)
|
||||
throw Exception("Table function '" + getName() + "' requires 2, 3 or 4 arguments: filename, format, structure (default auto) and compression method (default auto)",
|
||||
throw Exception("Table function '" + getName() + "' requires 1, 2, 3 or 4 arguments: filename, format (default auto), structure (default auto) and compression method (default auto)",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
structure = args[2]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
|
@ -17,7 +17,7 @@ protected:
|
||||
void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
|
||||
|
||||
String filename;
|
||||
String format;
|
||||
String format = "auto";
|
||||
String structure = "auto";
|
||||
String compression_method = "auto";
|
||||
|
||||
|
@ -71,6 +71,7 @@ void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr con
|
||||
/// Size -> argument indexes
|
||||
static auto size_to_args = std::map<size_t, std::map<String, size_t>>
|
||||
{
|
||||
{1, {{}}},
|
||||
{2, {{"format", 1}}},
|
||||
{3, {{"format", 1}, {"structure", 2}}},
|
||||
{5, {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}}},
|
||||
@ -113,6 +114,9 @@ void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr con
|
||||
configuration.secret_access_key = args[args_to_idx["secret_access_key"]]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
}
|
||||
|
||||
if (configuration.format == "auto")
|
||||
configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.url, true);
|
||||
|
||||
s3_configuration = std::move(configuration);
|
||||
}
|
||||
|
||||
|
@ -8,6 +8,7 @@
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
#include <TableFunctions/parseColumnsListForTableFunction.h>
|
||||
#include <Storages/StorageExternalDistributed.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -50,6 +51,8 @@ void TableFunctionURL::parseArguments(const ASTPtr & ast_function, ContextPtr co
|
||||
|
||||
filename = configuration.url;
|
||||
format = configuration.format;
|
||||
if (format == "auto")
|
||||
format = FormatFactory::instance().getFormatFromFileName(filename, true);
|
||||
structure = configuration.structure;
|
||||
compression_method = configuration.compression_method;
|
||||
}
|
||||
|
@ -398,6 +398,13 @@ def test_multiple_inserts(started_cluster):
|
||||
result = node1.query(f"select count() from test_multiple_inserts")
|
||||
assert(int(result) == 60)
|
||||
|
||||
|
||||
def test_format_detection(started_cluster):
|
||||
node1.query(f"create table arrow_table (x UInt64) engine=HDFS('hdfs://hdfs1:9000/data.arrow')")
|
||||
node1.query(f"insert into arrow_table select 1")
|
||||
result = node1.query(f"select * from hdfs('hdfs://hdfs1:9000/data.arrow')")
|
||||
assert(int(result) == 1)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
cluster.start()
|
||||
|
@ -20,5 +20,10 @@
|
||||
<access_key_id>minio</access_key_id>
|
||||
<secret_access_key>minio123</secret_access_key>
|
||||
</s3_native>
|
||||
<s3_arrow>
|
||||
<url>http://minio1:9001/root/test.arrow</url>
|
||||
<access_key_id>minio</access_key_id>
|
||||
<secret_access_key>minio123</secret_access_key>
|
||||
</s3_arrow>
|
||||
</named_collections>
|
||||
</clickhouse>
|
||||
|
@ -126,7 +126,7 @@ def run_query(instance, query, stdin=None, settings=None):
|
||||
pytest.param("'wrongid','wrongkey',", False, 'xz', id="xz"),
|
||||
pytest.param("'wrongid','wrongkey',", False, 'zstd', id="zstd")
|
||||
])
|
||||
def _test_put(started_cluster, maybe_auth, positive, compression):
|
||||
def test_put(started_cluster, maybe_auth, positive, compression):
|
||||
# type: (ClickHouseCluster) -> None
|
||||
|
||||
bucket = started_cluster.minio_bucket if not maybe_auth else started_cluster.minio_restricted_bucket
|
||||
@ -148,7 +148,7 @@ def _test_put(started_cluster, maybe_auth, positive, compression):
|
||||
assert values_csv == get_s3_file_content(started_cluster, bucket, filename)
|
||||
|
||||
|
||||
def _test_partition_by(started_cluster):
|
||||
def test_partition_by(started_cluster):
|
||||
bucket = started_cluster.minio_bucket
|
||||
instance = started_cluster.instances["dummy"] # type: ClickHouseInstance
|
||||
table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
|
||||
@ -173,7 +173,7 @@ def _test_partition_by(started_cluster):
|
||||
assert "78,43,45\n" == get_s3_file_content(started_cluster, bucket, "test2_45.csv")
|
||||
|
||||
|
||||
def _test_partition_by_string_column(started_cluster):
|
||||
def test_partition_by_string_column(started_cluster):
|
||||
bucket = started_cluster.minio_bucket
|
||||
instance = started_cluster.instances["dummy"] # type: ClickHouseInstance
|
||||
table_format = "col_num UInt32, col_str String"
|
||||
@ -191,7 +191,7 @@ def _test_partition_by_string_column(started_cluster):
|
||||
assert '78,"你好"\n' == get_s3_file_content(started_cluster, bucket, "test_你好.csv")
|
||||
|
||||
|
||||
def _test_partition_by_const_column(started_cluster):
|
||||
def test_partition_by_const_column(started_cluster):
|
||||
bucket = started_cluster.minio_bucket
|
||||
instance = started_cluster.instances["dummy"] # type: ClickHouseInstance
|
||||
table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
|
||||
@ -212,7 +212,7 @@ def _test_partition_by_const_column(started_cluster):
|
||||
"space",
|
||||
"plus"
|
||||
])
|
||||
def _test_get_file_with_special(started_cluster, special):
|
||||
def test_get_file_with_special(started_cluster, special):
|
||||
symbol = {"space": " ", "plus": "+"}[special]
|
||||
urlsafe_symbol = {"space": "%20", "plus": "%2B"}[special]
|
||||
auth = "'minio','minio123',"
|
||||
@ -239,7 +239,7 @@ def _test_get_file_with_special(started_cluster, special):
|
||||
"plus",
|
||||
"plus2"
|
||||
])
|
||||
def _test_get_path_with_special(started_cluster, special):
|
||||
def test_get_path_with_special(started_cluster, special):
|
||||
symbol = {"space": "%20", "plus": "%2B", "plus2": "%2B"}[special]
|
||||
safe_symbol = {"space": "%20", "plus": "+", "plus2": "%2B"}[special]
|
||||
auth = "'minio','minio123',"
|
||||
@ -253,7 +253,7 @@ def _test_get_path_with_special(started_cluster, special):
|
||||
@pytest.mark.parametrize("auth", [
|
||||
pytest.param("'minio','minio123',", id="minio")
|
||||
])
|
||||
def _test_empty_put(started_cluster, auth):
|
||||
def test_empty_put(started_cluster, auth):
|
||||
# type: (ClickHouseCluster, str) -> None
|
||||
|
||||
bucket = started_cluster.minio_bucket
|
||||
@ -291,7 +291,7 @@ def _test_empty_put(started_cluster, auth):
|
||||
pytest.param("'minio','minio123',", True, id="auth_positive"),
|
||||
pytest.param("'wrongid','wrongkey',", False, id="negative"),
|
||||
])
|
||||
def _test_put_csv(started_cluster, maybe_auth, positive):
|
||||
def test_put_csv(started_cluster, maybe_auth, positive):
|
||||
# type: (ClickHouseCluster, bool, str) -> None
|
||||
|
||||
bucket = started_cluster.minio_bucket if not maybe_auth else started_cluster.minio_restricted_bucket
|
||||
@ -313,7 +313,7 @@ def _test_put_csv(started_cluster, maybe_auth, positive):
|
||||
|
||||
|
||||
# Test put and get with S3 server redirect.
|
||||
def _test_put_get_with_redirect(started_cluster):
|
||||
def test_put_get_with_redirect(started_cluster):
|
||||
# type: (ClickHouseCluster) -> None
|
||||
|
||||
bucket = started_cluster.minio_bucket
|
||||
@ -340,7 +340,7 @@ def _test_put_get_with_redirect(started_cluster):
|
||||
|
||||
|
||||
# Test put with restricted S3 server redirect.
|
||||
def _test_put_with_zero_redirect(started_cluster):
|
||||
def test_put_with_zero_redirect(started_cluster):
|
||||
# type: (ClickHouseCluster) -> None
|
||||
|
||||
bucket = started_cluster.minio_bucket
|
||||
@ -367,7 +367,7 @@ def _test_put_with_zero_redirect(started_cluster):
|
||||
assert exception_raised
|
||||
|
||||
|
||||
def _test_put_get_with_globs(started_cluster):
|
||||
def test_put_get_with_globs(started_cluster):
|
||||
# type: (ClickHouseCluster) -> None
|
||||
unique_prefix = random.randint(1,10000)
|
||||
bucket = started_cluster.minio_bucket
|
||||
@ -399,7 +399,7 @@ def _test_put_get_with_globs(started_cluster):
|
||||
pytest.param("'wrongid','wrongkey'", False, id="negative"),
|
||||
# ("'minio','minio123',",True), Redirect with credentials not working with nginx.
|
||||
])
|
||||
def _test_multipart_put(started_cluster, maybe_auth, positive):
|
||||
def test_multipart_put(started_cluster, maybe_auth, positive):
|
||||
# type: (ClickHouseCluster) -> None
|
||||
|
||||
bucket = started_cluster.minio_bucket if not maybe_auth else started_cluster.minio_restricted_bucket
|
||||
@ -439,7 +439,7 @@ def _test_multipart_put(started_cluster, maybe_auth, positive):
|
||||
assert csv_data == get_s3_file_content(started_cluster, bucket, filename)
|
||||
|
||||
|
||||
def _test_remote_host_filter(started_cluster):
|
||||
def test_remote_host_filter(started_cluster):
|
||||
instance = started_cluster.instances["restricted_dummy"]
|
||||
format = "column1 UInt32, column2 UInt32, column3 UInt32"
|
||||
|
||||
@ -453,20 +453,21 @@ def _test_remote_host_filter(started_cluster):
|
||||
assert "not allowed in configuration file" in instance.query_and_get_error(query)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("s3_storage_args", [
|
||||
pytest.param("''", id="1_argument"),
|
||||
pytest.param("'','','','','',''", id="6_arguments"),
|
||||
])
|
||||
def _test_wrong_s3_syntax(started_cluster, s3_storage_args):
|
||||
def test_wrong_s3_syntax(started_cluster):
|
||||
instance = started_cluster.instances["dummy"] # type: ClickHouseInstance
|
||||
expected_err_msg = "Code: 42" # NUMBER_OF_ARGUMENTS_DOESNT_MATCH
|
||||
|
||||
query = "create table test_table_s3_syntax (id UInt32) ENGINE = S3({})".format(s3_storage_args)
|
||||
query = "create table test_table_s3_syntax (id UInt32) ENGINE = S3('', '', '', '', '', '')"
|
||||
assert expected_err_msg in instance.query_and_get_error(query)
|
||||
|
||||
expected_err_msg = "Code: 36" # BAD_ARGUMENTS
|
||||
|
||||
query = "create table test_table_s3_syntax (id UInt32) ENGINE = S3('')"
|
||||
assert expected_err_msg in instance.query_and_get_error(query)
|
||||
|
||||
|
||||
# https://en.wikipedia.org/wiki/One_Thousand_and_One_Nights
|
||||
def _test_s3_glob_scheherazade(started_cluster):
|
||||
def test_s3_glob_scheherazade(started_cluster):
|
||||
bucket = started_cluster.minio_bucket
|
||||
instance = started_cluster.instances["dummy"] # type: ClickHouseInstance
|
||||
table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
|
||||
@ -535,7 +536,7 @@ def replace_config(old, new):
|
||||
config.close()
|
||||
|
||||
|
||||
def _test_custom_auth_headers(started_cluster):
|
||||
def test_custom_auth_headers(started_cluster):
|
||||
table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
|
||||
filename = "test.csv"
|
||||
get_query = "select * from s3('http://resolver:8080/{bucket}/{file}', 'CSV', '{table_format}')".format(
|
||||
@ -566,7 +567,7 @@ def _test_custom_auth_headers(started_cluster):
|
||||
instance.query("DROP TABLE test")
|
||||
|
||||
|
||||
def _test_custom_auth_headers_exclusion(started_cluster):
|
||||
def test_custom_auth_headers_exclusion(started_cluster):
|
||||
table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
|
||||
filename = "test.csv"
|
||||
get_query = f"SELECT * FROM s3('http://resolver:8080/{started_cluster.minio_restricted_bucket}/restricteddirectory/{filename}', 'CSV', '{table_format}')"
|
||||
@ -580,7 +581,7 @@ def _test_custom_auth_headers_exclusion(started_cluster):
|
||||
assert 'Forbidden Error' in ei.value.stderr
|
||||
|
||||
|
||||
def _test_infinite_redirect(started_cluster):
|
||||
def test_infinite_redirect(started_cluster):
|
||||
bucket = "redirected"
|
||||
table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
|
||||
filename = "test.csv"
|
||||
@ -598,7 +599,7 @@ def _test_infinite_redirect(started_cluster):
|
||||
pytest.param("bin", "gzip", id="bin"),
|
||||
pytest.param("gz", "auto", id="gz"),
|
||||
])
|
||||
def _test_storage_s3_get_gzip(started_cluster, extension, method):
|
||||
def test_storage_s3_get_gzip(started_cluster, extension, method):
|
||||
bucket = started_cluster.minio_bucket
|
||||
instance = started_cluster.instances["dummy"]
|
||||
filename = f"test_get_gzip.{extension}"
|
||||
@ -638,7 +639,7 @@ def _test_storage_s3_get_gzip(started_cluster, extension, method):
|
||||
run_query(instance, f"DROP TABLE {name}")
|
||||
|
||||
|
||||
def _test_storage_s3_get_unstable(started_cluster):
|
||||
def test_storage_s3_get_unstable(started_cluster):
|
||||
bucket = started_cluster.minio_bucket
|
||||
instance = started_cluster.instances["dummy"]
|
||||
table_format = "column1 Int64, column2 Int64, column3 Int64, column4 Int64"
|
||||
@ -647,7 +648,7 @@ def _test_storage_s3_get_unstable(started_cluster):
|
||||
assert result.splitlines() == ["500001,500000,0"]
|
||||
|
||||
|
||||
def _test_storage_s3_put_uncompressed(started_cluster):
|
||||
def test_storage_s3_put_uncompressed(started_cluster):
|
||||
bucket = started_cluster.minio_bucket
|
||||
instance = started_cluster.instances["dummy"]
|
||||
filename = "test_put_uncompressed.bin"
|
||||
@ -684,7 +685,7 @@ def _test_storage_s3_put_uncompressed(started_cluster):
|
||||
pytest.param("bin", "gzip", id="bin"),
|
||||
pytest.param("gz", "auto", id="gz")
|
||||
])
|
||||
def _test_storage_s3_put_gzip(started_cluster, extension, method):
|
||||
def test_storage_s3_put_gzip(started_cluster, extension, method):
|
||||
bucket = started_cluster.minio_bucket
|
||||
instance = started_cluster.instances["dummy"]
|
||||
filename = f"test_put_gzip.{extension}"
|
||||
@ -721,7 +722,7 @@ def _test_storage_s3_put_gzip(started_cluster, extension, method):
|
||||
assert sum([ int(i.split(',')[1]) for i in uncompressed_content.splitlines() ]) == 708
|
||||
|
||||
|
||||
def _test_truncate_table(started_cluster):
|
||||
def test_truncate_table(started_cluster):
|
||||
bucket = started_cluster.minio_bucket
|
||||
instance = started_cluster.instances["dummy"] # type: ClickHouseInstance
|
||||
name = "truncate"
|
||||
@ -745,7 +746,7 @@ def _test_truncate_table(started_cluster):
|
||||
assert instance.query("SELECT * FROM {}".format(name)) == ""
|
||||
|
||||
|
||||
def _test_predefined_connection_configuration(started_cluster):
|
||||
def test_predefined_connection_configuration(started_cluster):
|
||||
bucket = started_cluster.minio_bucket
|
||||
instance = started_cluster.instances["dummy"] # type: ClickHouseInstance
|
||||
name = "test_table"
|
||||
@ -762,7 +763,7 @@ def _test_predefined_connection_configuration(started_cluster):
|
||||
|
||||
|
||||
result = ""
|
||||
def _test_url_reconnect_in_the_middle(started_cluster):
|
||||
def test_url_reconnect_in_the_middle(started_cluster):
|
||||
bucket = started_cluster.minio_bucket
|
||||
instance = started_cluster.instances["dummy"]
|
||||
table_format = "id String, data String"
|
||||
@ -799,7 +800,7 @@ def _test_url_reconnect_in_the_middle(started_cluster):
|
||||
assert(int(result) == 3914219105369203805)
|
||||
|
||||
|
||||
def _test_seekable_formats(started_cluster):
|
||||
def test_seekable_formats(started_cluster):
|
||||
bucket = started_cluster.minio_bucket
|
||||
instance = started_cluster.instances["dummy"] # type: ClickHouseInstance
|
||||
|
||||
@ -821,7 +822,7 @@ def _test_seekable_formats(started_cluster):
|
||||
assert(int(result[:3]) < 200)
|
||||
|
||||
|
||||
def _test_seekable_formats_url(started_cluster):
|
||||
def test_seekable_formats_url(started_cluster):
|
||||
bucket = started_cluster.minio_bucket
|
||||
instance = started_cluster.instances["dummy"]
|
||||
|
||||
@ -957,3 +958,16 @@ def test_create_new_files_on_insert(started_cluster):
|
||||
result = instance.query(f"select count() from test_multiple_inserts")
|
||||
assert(int(result) == 60)
|
||||
|
||||
|
||||
def test_format_detection(started_cluster):
|
||||
bucket = started_cluster.minio_bucket
|
||||
instance = started_cluster.instances["dummy"]
|
||||
|
||||
instance.query(f"create table arrow_table_s3 (x UInt64) engine=S3(s3_arrow)")
|
||||
instance.query(f"insert into arrow_table_s3 select 1")
|
||||
result = instance.query(f"select * from s3(s3_arrow)")
|
||||
assert(int(result) == 1)
|
||||
|
||||
result = instance.query(f"select * from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow')")
|
||||
assert(int(result) == 1)
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
drop table if exists test_table_hdfs_syntax
|
||||
;
|
||||
create table test_table_hdfs_syntax (id UInt32) ENGINE = HDFS('')
|
||||
; -- { serverError 42 }
|
||||
; -- { serverError 36 }
|
||||
create table test_table_hdfs_syntax (id UInt32) ENGINE = HDFS('','','', '')
|
||||
; -- { serverError 42 }
|
||||
drop table if exists test_table_hdfs_syntax
|
||||
|
@ -1,7 +1,7 @@
|
||||
drop table if exists test_table_url_syntax
|
||||
;
|
||||
create table test_table_url_syntax (id UInt32) ENGINE = URL('')
|
||||
; -- { serverError 42 }
|
||||
; -- { serverError 36 }
|
||||
create table test_table_url_syntax (id UInt32) ENGINE = URL('','','','')
|
||||
; -- { serverError 42 }
|
||||
drop table if exists test_table_url_syntax
|
||||
|
@ -47,8 +47,8 @@ ${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE 02165_in_tb FROM INFILE '${CLICK
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_in_tb;"
|
||||
${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE 02165_in_tb;"
|
||||
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_out_tb INTO OUTFILE '${CLICKHOUSE_TMP}/museum...protobuf';"
|
||||
${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE 02165_in_tb FROM INFILE '${CLICKHOUSE_TMP}/museum...protobuf' FORMAT TabSeparated;"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_out_tb INTO OUTFILE '${CLICKHOUSE_TMP}/museum...JSONEachRow';"
|
||||
${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE 02165_in_tb FROM INFILE '${CLICKHOUSE_TMP}/museum...JSONEachRow';"
|
||||
${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_in_tb;"
|
||||
${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE 02165_in_tb;"
|
||||
|
||||
@ -69,4 +69,4 @@ rm "${CLICKHOUSE_TMP}"/hello.world.csv
|
||||
rm "${CLICKHOUSE_TMP}"/hello.world.csv.xz
|
||||
rm "${CLICKHOUSE_TMP}"/.htaccess.json
|
||||
rm "${CLICKHOUSE_TMP}"/example.com.
|
||||
rm "${CLICKHOUSE_TMP}"/museum...protobuf
|
||||
rm "${CLICKHOUSE_TMP}"/museum...JSONEachRow
|
||||
|
@ -0,0 +1,56 @@
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
27
tests/queries/0_stateless/02167_format_from_file_extension.sh
Executable file
27
tests/queries/0_stateless/02167_format_from_file_extension.sh
Executable file
@ -0,0 +1,27 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-parallel, no-fasttest
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
|
||||
for format in TSV TabSeparated TSVWithNames TSVWithNamesAndTypes CSV Parquet ORC Arrow JSONEachRow JSONCompactEachRow CustomSeparatedWithNamesAndTypes
|
||||
do
|
||||
$CLICKHOUSE_CLIENT -q "insert into table function file('test_02167.$format', 'auto', 'x UInt64') select * from numbers(2)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file('test_02167.$format')"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file('test_02167.$format', '$format')"
|
||||
done
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "insert into table function file('test_02167.bin', 'auto', 'x UInt64') select * from numbers(2)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file('test_02167.bin', 'auto', 'x UInt64')"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file('test_02167.bin', 'RowBinary', 'x UInt64')"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "insert into table function file('test_02167.ndjson', 'auto', 'x UInt64') select * from numbers(2)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file('test_02167.ndjson')"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file('test_02167.ndjson', 'JSONEachRow', 'x UInt64')"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "insert into table function file('test_02167.messagepack', 'auto', 'x UInt64') select * from numbers(2)"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file('test_02167.messagepack') settings input_format_msgpack_number_of_columns=1"
|
||||
$CLICKHOUSE_CLIENT -q "select * from file('test_02167.messagepack', 'MsgPack', 'x UInt64')"
|
||||
|
Loading…
Reference in New Issue
Block a user