Merge branch 'master' into formats-with-suffixes

This commit is contained in:
Kruglov Pavel 2022-01-14 21:03:49 +03:00 committed by GitHub
commit a7df9cd53a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
29 changed files with 224 additions and 76 deletions

2
contrib/arrow vendored

@ -1 +1 @@
Subproject commit aa9a7a698e33e278abe053f4634170b3b026e48e
Subproject commit 1d9cc51daa4e7e9fc6926320ef73759818bd736e

View File

@ -45,7 +45,7 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
ENV DOCKER_CHANNEL stable
RUN curl -fsSL https://download.docker.com/linux/ubuntu/gpg | apt-key add -
RUN add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -c -s) ${DOCKER_CHANNEL}"
RUN add-apt-repository "deb https://download.docker.com/linux/ubuntu $(lsb_release -c -s) ${DOCKER_CHANNEL}"
RUN apt-get update \
&& env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
@ -58,7 +58,9 @@ RUN apt-get update \
RUN dockerd --version; docker --version
RUN python3 -m pip install \
ARG TARGETARCH
# FIXME: psycopg2-binary is not available for aarch64, we skip it for now
RUN test x$TARGETARCH = xarm64 || ( python3 -m pip install \
PyMySQL \
aerospike==4.0.0 \
avro==1.10.2 \
@ -88,7 +90,7 @@ RUN python3 -m pip install \
urllib3 \
requests-kerberos \
pyhdfs \
azure-storage-blob
azure-storage-blob )
COPY modprobe.sh /usr/local/bin/modprobe
COPY dockerd-entrypoint.sh /usr/local/bin/
@ -102,8 +104,6 @@ RUN set -x \
&& echo 'dockremap:165536:65536' >> /etc/subuid \
&& echo 'dockremap:165536:65536' >> /etc/subgid
RUN echo '127.0.0.1 localhost test.com' >> /etc/hosts
EXPOSE 2375
ENTRYPOINT ["dockerd-entrypoint.sh"]
CMD ["sh", "-c", "pytest $PYTEST_OPTS"]

View File

@ -178,7 +178,7 @@ toc_title: Adopters
| <a href="https://cloud.yandex.ru/services/managed-clickhouse" class="favicon">Yandex Cloud</a> | Public Cloud | Main product | — | — | [Talk in Russian, December 2019](https://www.youtube.com/watch?v=pgnak9e_E0o) |
| <a href="https://cloud.yandex.ru/services/datalens" class="favicon">Yandex DataLens</a> | Business Intelligence | Main product | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.com/meetup38/datalens.pdf) |
| <a href="https://market.yandex.ru/" class="favicon">Yandex Market</a> | e-Commerce | Metrics, Logging | — | — | [Talk in Russian, January 2019](https://youtu.be/_l1qP0DyBcA?t=478) |
| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Macin product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/introduction/#13) |
| <a href="https://metrica.yandex.com" class="favicon">Yandex Metrica</a> | Web analytics | Main product | 630 servers in one cluster, 360 servers in another cluster, 1862 servers in one department | 133 PiB / 8.31 PiB / 120 trillion records | [Slides, February 2020](https://presentations.clickhouse.com/meetup40/introduction/#13) |
| <a href="https://www.yellowfinbi.com" class="favicon"><COMPANYNAME></a> | Analytics | Main product | - | - | [Integration](https://www.yellowfinbi.com/campaign/yellowfin-9-whats-new#el-30219e0e) |
| <a href="https://www.yotascale.com/" class="favicon">Yotascale</a> | Cloud | Data pipeline | — | 2 bn records/day | [LinkedIn (Accomplishments)](https://www.linkedin.com/in/adilsaleem/) |
| <a href="https://www.your-analytics.org/" class="favicon">Your Analytics</a> | Product Analytics | Main Product | — | - | [Tweet, November 2021](https://twitter.com/mikenikles/status/1459737241165565953) |

View File

@ -1017,7 +1017,7 @@ void ClientBase::sendData(Block & sample, const ColumnsDescription & columns_des
String current_format = parsed_insert_query->format;
if (current_format.empty())
current_format = FormatFactory::instance().getFormatFromFileName(in_file);
current_format = FormatFactory::instance().getFormatFromFileName(in_file, true);
/// Create temporary storage file, to support globs and parallel reading
StorageFile::CommonArguments args{

View File

@ -25,6 +25,7 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
extern const int FORMAT_IS_NOT_SUITABLE_FOR_INPUT;
extern const int FORMAT_IS_NOT_SUITABLE_FOR_OUTPUT;
extern const int BAD_ARGUMENTS;
}
const FormatFactory::Creators & FormatFactory::getCreators(const String & name) const
@ -382,6 +383,7 @@ void FormatFactory::registerInputFormat(const String & name, InputCreator input_
if (target)
throw Exception("FormatFactory: Input format " + name + " is already registered", ErrorCodes::LOGICAL_ERROR);
target = std::move(input_creator);
registerFileExtension(name, name);
}
void FormatFactory::registerNonTrivialPrefixAndSuffixChecker(const String & name, NonTrivialPrefixAndSuffixChecker non_trivial_prefix_and_suffix_checker)
@ -419,14 +421,15 @@ void FormatFactory::registerOutputFormat(const String & name, OutputCreator outp
if (target)
throw Exception("FormatFactory: Output format " + name + " is already registered", ErrorCodes::LOGICAL_ERROR);
target = std::move(output_creator);
registerFileExtension(name, name);
}
void FormatFactory::registerFileExtension(const String & extension, const String & format_name)
{
file_extension_formats[extension] = format_name;
file_extension_formats[boost::to_lower_copy(extension)] = format_name;
}
String FormatFactory::getFormatFromFileName(String file_name)
String FormatFactory::getFormatFromFileName(String file_name, bool throw_if_not_found)
{
CompressionMethod compression_method = chooseCompressionMethod(file_name, "");
if (CompressionMethod::None != compression_method)
@ -438,11 +441,22 @@ String FormatFactory::getFormatFromFileName(String file_name)
auto pos = file_name.find_last_of('.');
if (pos == String::npos)
{
if (throw_if_not_found)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot determine the file format by it's extension");
return "";
}
String file_extension = file_name.substr(pos + 1, String::npos);
boost::algorithm::to_lower(file_extension);
return file_extension_formats[file_extension];
auto it = file_extension_formats.find(file_extension);
if (it == file_extension_formats.end())
{
if (throw_if_not_found)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot determine the file format by it's extension");
return "";
}
return it->second;
}
void FormatFactory::registerFileSegmentationEngine(const String & name, FileSegmentationEngine file_segmentation_engine)

View File

@ -186,7 +186,7 @@ public:
/// Register file extension for format
void registerFileExtension(const String & extension, const String & format_name);
String getFormatFromFileName(String file_name);
String getFormatFromFileName(String file_name, bool throw_if_not_found = false);
/// Register schema readers for format its name.
void registerSchemaReader(const String & name, SchemaReaderCreator schema_reader_creator);

View File

@ -107,6 +107,8 @@ void registerTSKVSchemaReader(FormatFactory & factory);
void registerValuesSchemaReader(FormatFactory & factory);
void registerTemplateSchemaReader(FormatFactory & factory);
void registerFileExtensions(FormatFactory & factory);
void registerFormats()
{
auto & factory = FormatFactory::instance();
@ -203,16 +205,6 @@ void registerFormats()
registerTSKVSchemaReader(factory);
registerValuesSchemaReader(factory);
registerTemplateSchemaReader(factory);
factory.registerFileExtension("csv", "CSV");
factory.registerFileExtension("tsv", "TSV");
factory.registerFileExtension("parquet", "Parquet");
factory.registerFileExtension("orc", "ORC");
factory.registerFileExtension("native", "Native");
factory.registerFileExtension("json", "JSON");
factory.registerFileExtension("ndjson", "JSONEachRow");
factory.registerFileExtension("xml", "XML");
factory.registerFileExtension("avro", "Avro");
}
}

View File

@ -114,6 +114,7 @@ void registerInputFormatRowBinary(FormatFactory & factory)
};
registerWithNamesAndTypes("RowBinary", register_func);
factory.registerFileExtension("bin", "RowBinary");
}
void registerRowBinaryWithNamesAndTypesSchemaReader(FormatFactory & factory)

View File

@ -310,6 +310,7 @@ void registerInputFormatCapnProto(FormatFactory & factory)
return std::make_shared<CapnProtoRowInputFormat>(buf, sample, std::move(params),
FormatSchemaInfo(settings, "CapnProto", true), settings);
});
factory.registerFileExtension("capnp", "CapnProto");
}
void registerCapnProtoSchemaReader(FormatFactory & factory)

View File

@ -340,6 +340,8 @@ void registerInputFormatJSONEachRow(FormatFactory & factory)
return std::make_shared<JSONEachRowRowInputFormat>(buf, sample, std::move(params), settings, false);
});
factory.registerFileExtension("ndjson", "JSONEachRow");
factory.registerInputFormat("JSONStringsEachRow", [](
ReadBuffer & buf,
const Block & sample,

View File

@ -67,6 +67,7 @@ void registerOutputFormatMarkdown(FormatFactory & factory)
});
factory.markOutputFormatSupportsParallelFormatting("Markdown");
factory.registerFileExtension("md", "Markdown");
}
}

View File

@ -486,6 +486,7 @@ void registerInputFormatMsgPack(FormatFactory & factory)
{
return std::make_shared<MsgPackRowInputFormat>(sample, buf, params);
});
factory.registerFileExtension("messagepack", "MsgPack");
}
void registerMsgPackSchemaReader(FormatFactory & factory)

View File

@ -95,6 +95,8 @@ void registerProtobufSchemaReader(FormatFactory & factory)
{
return std::make_shared<ProtobufSchemaReader>(settings);
});
factory.registerFileExtension("pb", "Protobuf");
factory.registerExternalSchemaReader("ProtobufSingle", [](const FormatSettings & settings)
{
return std::make_shared<ProtobufSchemaReader>(settings);

View File

@ -98,7 +98,7 @@ getExternalDataSourceConfigurationByPriority(const Poco::Util::AbstractConfigura
struct URLBasedDataSourceConfiguration
{
String url;
String format;
String format = "auto";
String compression_method = "auto";
String structure = "auto";

View File

@ -620,17 +620,23 @@ void registerStorageHDFS(StorageFactory & factory)
{
ASTs & engine_args = args.engine_args;
if (engine_args.size() != 2 && engine_args.size() != 3)
if (engine_args.empty() || engine_args.size() > 3)
throw Exception(
"Storage HDFS requires 2 or 3 arguments: url, name of used format and optional compression method.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
"Storage HDFS requires 1, 2 or 3 arguments: url, name of used format (taken from file extension by default) and optional compression method.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
engine_args[0] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[0], args.getLocalContext());
String url = engine_args[0]->as<ASTLiteral &>().value.safeGet<String>();
engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], args.getLocalContext());
String format_name = "auto";
if (engine_args.size() > 1)
{
engine_args[1] = evaluateConstantExpressionOrIdentifierAsLiteral(engine_args[1], args.getLocalContext());
format_name = engine_args[1]->as<ASTLiteral &>().value.safeGet<String>();
}
String format_name = engine_args[1]->as<ASTLiteral &>().value.safeGet<String>();
if (format_name == "auto")
format_name = FormatFactory::instance().getFormatFromFileName(url, true);
String compression_method;
if (engine_args.size() == 3)

View File

@ -789,9 +789,9 @@ StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPt
}
else
{
if (engine_args.size() < 2 || engine_args.size() > 5)
if (engine_args.empty() || engine_args.size() > 5)
throw Exception(
"Storage S3 requires 2 to 5 arguments: url, [access_key_id, secret_access_key], name of used format and [compression_method].",
"Storage S3 requires 1 to 5 arguments: url, [access_key_id, secret_access_key], name of used format and [compression_method].",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
for (auto & engine_arg : engine_args)
@ -809,13 +809,16 @@ StorageS3Configuration StorageS3::getConfiguration(ASTs & engine_args, ContextPt
configuration.compression_method = engine_args.back()->as<ASTLiteral &>().value.safeGet<String>();
configuration.format = engine_args[engine_args.size() - 2]->as<ASTLiteral &>().value.safeGet<String>();
}
else
else if (engine_args.size() != 1)
{
configuration.compression_method = "auto";
configuration.format = engine_args.back()->as<ASTLiteral &>().value.safeGet<String>();
}
}
if (configuration.format == "auto")
configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.url, true);
return configuration;
}

View File

@ -624,20 +624,24 @@ URLBasedDataSourceConfiguration StorageURL::getConfiguration(ASTs & args, Contex
}
else
{
if (args.size() != 2 && args.size() != 3)
if (args.empty() || args.size() > 3)
throw Exception(
"Storage URL requires 2 or 3 arguments: url, name of used format and optional compression method.",
"Storage URL requires 1, 2 or 3 arguments: url, name of used format (taken from file extension by default) and optional compression method.",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
for (auto & arg : args)
arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, local_context);
configuration.url = args[0]->as<ASTLiteral &>().value.safeGet<String>();
configuration.format = args[1]->as<ASTLiteral &>().value.safeGet<String>();
if (args.size() > 1)
configuration.format = args[1]->as<ASTLiteral &>().value.safeGet<String>();
if (args.size() == 3)
configuration.compression_method = args[2]->as<ASTLiteral &>().value.safeGet<String>();
}
if (configuration.format == "auto")
configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.url, true);
return configuration;
}

View File

@ -53,23 +53,28 @@ void ITableFunctionFileLike::parseArguments(const ASTPtr & ast_function, Context
ASTs & args = args_func.at(0)->children;
if (args.size() < 2)
throw Exception("Table function '" + getName() + "' requires at least 2 arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
if (args.empty())
throw Exception("Table function '" + getName() + "' requires at least 1 argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
for (auto & arg : args)
arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
filename = args[0]->as<ASTLiteral &>().value.safeGet<String>();
format = args[1]->as<ASTLiteral &>().value.safeGet<String>();
if (args.size() == 2)
if (args.size() > 1)
format = args[1]->as<ASTLiteral &>().value.safeGet<String>();
if (format == "auto")
format = FormatFactory::instance().getFormatFromFileName(filename, true);
if (args.size() <= 2)
{
checkIfFormatSupportsAutoStructure(getName(), format);
return;
}
if (args.size() != 3 && args.size() != 4)
throw Exception("Table function '" + getName() + "' requires 2, 3 or 4 arguments: filename, format, structure (default auto) and compression method (default auto)",
throw Exception("Table function '" + getName() + "' requires 1, 2, 3 or 4 arguments: filename, format (default auto), structure (default auto) and compression method (default auto)",
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
structure = args[2]->as<ASTLiteral &>().value.safeGet<String>();

View File

@ -17,7 +17,7 @@ protected:
void parseArguments(const ASTPtr & ast_function, ContextPtr context) override;
String filename;
String format;
String format = "auto";
String structure = "auto";
String compression_method = "auto";

View File

@ -71,6 +71,7 @@ void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr con
/// Size -> argument indexes
static auto size_to_args = std::map<size_t, std::map<String, size_t>>
{
{1, {{}}},
{2, {{"format", 1}}},
{3, {{"format", 1}, {"structure", 2}}},
{5, {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}}},
@ -113,6 +114,9 @@ void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr con
configuration.secret_access_key = args[args_to_idx["secret_access_key"]]->as<ASTLiteral &>().value.safeGet<String>();
}
if (configuration.format == "auto")
configuration.format = FormatFactory::instance().getFormatFromFileName(configuration.url, true);
s3_configuration = std::move(configuration);
}

View File

@ -8,6 +8,7 @@
#include <TableFunctions/TableFunctionFactory.h>
#include <TableFunctions/parseColumnsListForTableFunction.h>
#include <Storages/StorageExternalDistributed.h>
#include <Formats/FormatFactory.h>
namespace DB
@ -50,6 +51,8 @@ void TableFunctionURL::parseArguments(const ASTPtr & ast_function, ContextPtr co
filename = configuration.url;
format = configuration.format;
if (format == "auto")
format = FormatFactory::instance().getFormatFromFileName(filename, true);
structure = configuration.structure;
compression_method = configuration.compression_method;
}

View File

@ -398,6 +398,13 @@ def test_multiple_inserts(started_cluster):
result = node1.query(f"select count() from test_multiple_inserts")
assert(int(result) == 60)
def test_format_detection(started_cluster):
node1.query(f"create table arrow_table (x UInt64) engine=HDFS('hdfs://hdfs1:9000/data.arrow')")
node1.query(f"insert into arrow_table select 1")
result = node1.query(f"select * from hdfs('hdfs://hdfs1:9000/data.arrow')")
assert(int(result) == 1)
if __name__ == '__main__':
cluster.start()

View File

@ -20,5 +20,10 @@
<access_key_id>minio</access_key_id>
<secret_access_key>minio123</secret_access_key>
</s3_native>
<s3_arrow>
<url>http://minio1:9001/root/test.arrow</url>
<access_key_id>minio</access_key_id>
<secret_access_key>minio123</secret_access_key>
</s3_arrow>
</named_collections>
</clickhouse>

View File

@ -126,7 +126,7 @@ def run_query(instance, query, stdin=None, settings=None):
pytest.param("'wrongid','wrongkey',", False, 'xz', id="xz"),
pytest.param("'wrongid','wrongkey',", False, 'zstd', id="zstd")
])
def _test_put(started_cluster, maybe_auth, positive, compression):
def test_put(started_cluster, maybe_auth, positive, compression):
# type: (ClickHouseCluster) -> None
bucket = started_cluster.minio_bucket if not maybe_auth else started_cluster.minio_restricted_bucket
@ -148,7 +148,7 @@ def _test_put(started_cluster, maybe_auth, positive, compression):
assert values_csv == get_s3_file_content(started_cluster, bucket, filename)
def _test_partition_by(started_cluster):
def test_partition_by(started_cluster):
bucket = started_cluster.minio_bucket
instance = started_cluster.instances["dummy"] # type: ClickHouseInstance
table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
@ -173,7 +173,7 @@ def _test_partition_by(started_cluster):
assert "78,43,45\n" == get_s3_file_content(started_cluster, bucket, "test2_45.csv")
def _test_partition_by_string_column(started_cluster):
def test_partition_by_string_column(started_cluster):
bucket = started_cluster.minio_bucket
instance = started_cluster.instances["dummy"] # type: ClickHouseInstance
table_format = "col_num UInt32, col_str String"
@ -191,7 +191,7 @@ def _test_partition_by_string_column(started_cluster):
assert '78,"你好"\n' == get_s3_file_content(started_cluster, bucket, "test_你好.csv")
def _test_partition_by_const_column(started_cluster):
def test_partition_by_const_column(started_cluster):
bucket = started_cluster.minio_bucket
instance = started_cluster.instances["dummy"] # type: ClickHouseInstance
table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
@ -212,7 +212,7 @@ def _test_partition_by_const_column(started_cluster):
"space",
"plus"
])
def _test_get_file_with_special(started_cluster, special):
def test_get_file_with_special(started_cluster, special):
symbol = {"space": " ", "plus": "+"}[special]
urlsafe_symbol = {"space": "%20", "plus": "%2B"}[special]
auth = "'minio','minio123',"
@ -239,7 +239,7 @@ def _test_get_file_with_special(started_cluster, special):
"plus",
"plus2"
])
def _test_get_path_with_special(started_cluster, special):
def test_get_path_with_special(started_cluster, special):
symbol = {"space": "%20", "plus": "%2B", "plus2": "%2B"}[special]
safe_symbol = {"space": "%20", "plus": "+", "plus2": "%2B"}[special]
auth = "'minio','minio123',"
@ -253,7 +253,7 @@ def _test_get_path_with_special(started_cluster, special):
@pytest.mark.parametrize("auth", [
pytest.param("'minio','minio123',", id="minio")
])
def _test_empty_put(started_cluster, auth):
def test_empty_put(started_cluster, auth):
# type: (ClickHouseCluster, str) -> None
bucket = started_cluster.minio_bucket
@ -291,7 +291,7 @@ def _test_empty_put(started_cluster, auth):
pytest.param("'minio','minio123',", True, id="auth_positive"),
pytest.param("'wrongid','wrongkey',", False, id="negative"),
])
def _test_put_csv(started_cluster, maybe_auth, positive):
def test_put_csv(started_cluster, maybe_auth, positive):
# type: (ClickHouseCluster, bool, str) -> None
bucket = started_cluster.minio_bucket if not maybe_auth else started_cluster.minio_restricted_bucket
@ -313,7 +313,7 @@ def _test_put_csv(started_cluster, maybe_auth, positive):
# Test put and get with S3 server redirect.
def _test_put_get_with_redirect(started_cluster):
def test_put_get_with_redirect(started_cluster):
# type: (ClickHouseCluster) -> None
bucket = started_cluster.minio_bucket
@ -340,7 +340,7 @@ def _test_put_get_with_redirect(started_cluster):
# Test put with restricted S3 server redirect.
def _test_put_with_zero_redirect(started_cluster):
def test_put_with_zero_redirect(started_cluster):
# type: (ClickHouseCluster) -> None
bucket = started_cluster.minio_bucket
@ -367,7 +367,7 @@ def _test_put_with_zero_redirect(started_cluster):
assert exception_raised
def _test_put_get_with_globs(started_cluster):
def test_put_get_with_globs(started_cluster):
# type: (ClickHouseCluster) -> None
unique_prefix = random.randint(1,10000)
bucket = started_cluster.minio_bucket
@ -399,7 +399,7 @@ def _test_put_get_with_globs(started_cluster):
pytest.param("'wrongid','wrongkey'", False, id="negative"),
# ("'minio','minio123',",True), Redirect with credentials not working with nginx.
])
def _test_multipart_put(started_cluster, maybe_auth, positive):
def test_multipart_put(started_cluster, maybe_auth, positive):
# type: (ClickHouseCluster) -> None
bucket = started_cluster.minio_bucket if not maybe_auth else started_cluster.minio_restricted_bucket
@ -439,7 +439,7 @@ def _test_multipart_put(started_cluster, maybe_auth, positive):
assert csv_data == get_s3_file_content(started_cluster, bucket, filename)
def _test_remote_host_filter(started_cluster):
def test_remote_host_filter(started_cluster):
instance = started_cluster.instances["restricted_dummy"]
format = "column1 UInt32, column2 UInt32, column3 UInt32"
@ -453,20 +453,21 @@ def _test_remote_host_filter(started_cluster):
assert "not allowed in configuration file" in instance.query_and_get_error(query)
@pytest.mark.parametrize("s3_storage_args", [
pytest.param("''", id="1_argument"),
pytest.param("'','','','','',''", id="6_arguments"),
])
def _test_wrong_s3_syntax(started_cluster, s3_storage_args):
def test_wrong_s3_syntax(started_cluster):
instance = started_cluster.instances["dummy"] # type: ClickHouseInstance
expected_err_msg = "Code: 42" # NUMBER_OF_ARGUMENTS_DOESNT_MATCH
query = "create table test_table_s3_syntax (id UInt32) ENGINE = S3({})".format(s3_storage_args)
query = "create table test_table_s3_syntax (id UInt32) ENGINE = S3('', '', '', '', '', '')"
assert expected_err_msg in instance.query_and_get_error(query)
expected_err_msg = "Code: 36" # BAD_ARGUMENTS
query = "create table test_table_s3_syntax (id UInt32) ENGINE = S3('')"
assert expected_err_msg in instance.query_and_get_error(query)
# https://en.wikipedia.org/wiki/One_Thousand_and_One_Nights
def _test_s3_glob_scheherazade(started_cluster):
def test_s3_glob_scheherazade(started_cluster):
bucket = started_cluster.minio_bucket
instance = started_cluster.instances["dummy"] # type: ClickHouseInstance
table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
@ -535,7 +536,7 @@ def replace_config(old, new):
config.close()
def _test_custom_auth_headers(started_cluster):
def test_custom_auth_headers(started_cluster):
table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
filename = "test.csv"
get_query = "select * from s3('http://resolver:8080/{bucket}/{file}', 'CSV', '{table_format}')".format(
@ -566,7 +567,7 @@ def _test_custom_auth_headers(started_cluster):
instance.query("DROP TABLE test")
def _test_custom_auth_headers_exclusion(started_cluster):
def test_custom_auth_headers_exclusion(started_cluster):
table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
filename = "test.csv"
get_query = f"SELECT * FROM s3('http://resolver:8080/{started_cluster.minio_restricted_bucket}/restricteddirectory/{filename}', 'CSV', '{table_format}')"
@ -580,7 +581,7 @@ def _test_custom_auth_headers_exclusion(started_cluster):
assert 'Forbidden Error' in ei.value.stderr
def _test_infinite_redirect(started_cluster):
def test_infinite_redirect(started_cluster):
bucket = "redirected"
table_format = "column1 UInt32, column2 UInt32, column3 UInt32"
filename = "test.csv"
@ -598,7 +599,7 @@ def _test_infinite_redirect(started_cluster):
pytest.param("bin", "gzip", id="bin"),
pytest.param("gz", "auto", id="gz"),
])
def _test_storage_s3_get_gzip(started_cluster, extension, method):
def test_storage_s3_get_gzip(started_cluster, extension, method):
bucket = started_cluster.minio_bucket
instance = started_cluster.instances["dummy"]
filename = f"test_get_gzip.{extension}"
@ -638,7 +639,7 @@ def _test_storage_s3_get_gzip(started_cluster, extension, method):
run_query(instance, f"DROP TABLE {name}")
def _test_storage_s3_get_unstable(started_cluster):
def test_storage_s3_get_unstable(started_cluster):
bucket = started_cluster.minio_bucket
instance = started_cluster.instances["dummy"]
table_format = "column1 Int64, column2 Int64, column3 Int64, column4 Int64"
@ -647,7 +648,7 @@ def _test_storage_s3_get_unstable(started_cluster):
assert result.splitlines() == ["500001,500000,0"]
def _test_storage_s3_put_uncompressed(started_cluster):
def test_storage_s3_put_uncompressed(started_cluster):
bucket = started_cluster.minio_bucket
instance = started_cluster.instances["dummy"]
filename = "test_put_uncompressed.bin"
@ -684,7 +685,7 @@ def _test_storage_s3_put_uncompressed(started_cluster):
pytest.param("bin", "gzip", id="bin"),
pytest.param("gz", "auto", id="gz")
])
def _test_storage_s3_put_gzip(started_cluster, extension, method):
def test_storage_s3_put_gzip(started_cluster, extension, method):
bucket = started_cluster.minio_bucket
instance = started_cluster.instances["dummy"]
filename = f"test_put_gzip.{extension}"
@ -721,7 +722,7 @@ def _test_storage_s3_put_gzip(started_cluster, extension, method):
assert sum([ int(i.split(',')[1]) for i in uncompressed_content.splitlines() ]) == 708
def _test_truncate_table(started_cluster):
def test_truncate_table(started_cluster):
bucket = started_cluster.minio_bucket
instance = started_cluster.instances["dummy"] # type: ClickHouseInstance
name = "truncate"
@ -745,7 +746,7 @@ def _test_truncate_table(started_cluster):
assert instance.query("SELECT * FROM {}".format(name)) == ""
def _test_predefined_connection_configuration(started_cluster):
def test_predefined_connection_configuration(started_cluster):
bucket = started_cluster.minio_bucket
instance = started_cluster.instances["dummy"] # type: ClickHouseInstance
name = "test_table"
@ -762,7 +763,7 @@ def _test_predefined_connection_configuration(started_cluster):
result = ""
def _test_url_reconnect_in_the_middle(started_cluster):
def test_url_reconnect_in_the_middle(started_cluster):
bucket = started_cluster.minio_bucket
instance = started_cluster.instances["dummy"]
table_format = "id String, data String"
@ -799,7 +800,7 @@ def _test_url_reconnect_in_the_middle(started_cluster):
assert(int(result) == 3914219105369203805)
def _test_seekable_formats(started_cluster):
def test_seekable_formats(started_cluster):
bucket = started_cluster.minio_bucket
instance = started_cluster.instances["dummy"] # type: ClickHouseInstance
@ -821,7 +822,7 @@ def _test_seekable_formats(started_cluster):
assert(int(result[:3]) < 200)
def _test_seekable_formats_url(started_cluster):
def test_seekable_formats_url(started_cluster):
bucket = started_cluster.minio_bucket
instance = started_cluster.instances["dummy"]
@ -957,3 +958,16 @@ def test_create_new_files_on_insert(started_cluster):
result = instance.query(f"select count() from test_multiple_inserts")
assert(int(result) == 60)
def test_format_detection(started_cluster):
bucket = started_cluster.minio_bucket
instance = started_cluster.instances["dummy"]
instance.query(f"create table arrow_table_s3 (x UInt64) engine=S3(s3_arrow)")
instance.query(f"insert into arrow_table_s3 select 1")
result = instance.query(f"select * from s3(s3_arrow)")
assert(int(result) == 1)
result = instance.query(f"select * from url('http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/test.arrow')")
assert(int(result) == 1)

View File

@ -3,7 +3,7 @@
drop table if exists test_table_hdfs_syntax
;
create table test_table_hdfs_syntax (id UInt32) ENGINE = HDFS('')
; -- { serverError 42 }
; -- { serverError 36 }
create table test_table_hdfs_syntax (id UInt32) ENGINE = HDFS('','','', '')
; -- { serverError 42 }
drop table if exists test_table_hdfs_syntax

View File

@ -1,7 +1,7 @@
drop table if exists test_table_url_syntax
;
create table test_table_url_syntax (id UInt32) ENGINE = URL('')
; -- { serverError 42 }
; -- { serverError 36 }
create table test_table_url_syntax (id UInt32) ENGINE = URL('','','','')
; -- { serverError 42 }
drop table if exists test_table_url_syntax

View File

@ -47,8 +47,8 @@ ${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE 02165_in_tb FROM INFILE '${CLICK
${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_in_tb;"
${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE 02165_in_tb;"
${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_out_tb INTO OUTFILE '${CLICKHOUSE_TMP}/museum...protobuf';"
${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE 02165_in_tb FROM INFILE '${CLICKHOUSE_TMP}/museum...protobuf' FORMAT TabSeparated;"
${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_out_tb INTO OUTFILE '${CLICKHOUSE_TMP}/museum...JSONEachRow';"
${CLICKHOUSE_CLIENT} --query "INSERT INTO TABLE 02165_in_tb FROM INFILE '${CLICKHOUSE_TMP}/museum...JSONEachRow';"
${CLICKHOUSE_CLIENT} --query "SELECT * FROM 02165_in_tb;"
${CLICKHOUSE_CLIENT} --query "TRUNCATE TABLE 02165_in_tb;"
@ -69,4 +69,4 @@ rm "${CLICKHOUSE_TMP}"/hello.world.csv
rm "${CLICKHOUSE_TMP}"/hello.world.csv.xz
rm "${CLICKHOUSE_TMP}"/.htaccess.json
rm "${CLICKHOUSE_TMP}"/example.com.
rm "${CLICKHOUSE_TMP}"/museum...protobuf
rm "${CLICKHOUSE_TMP}"/museum...JSONEachRow

View File

@ -0,0 +1,56 @@
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1
0
1

View File

@ -0,0 +1,27 @@
#!/usr/bin/env bash
# Tags: no-parallel, no-fasttest
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
for format in TSV TabSeparated TSVWithNames TSVWithNamesAndTypes CSV Parquet ORC Arrow JSONEachRow JSONCompactEachRow CustomSeparatedWithNamesAndTypes
do
$CLICKHOUSE_CLIENT -q "insert into table function file('test_02167.$format', 'auto', 'x UInt64') select * from numbers(2)"
$CLICKHOUSE_CLIENT -q "select * from file('test_02167.$format')"
$CLICKHOUSE_CLIENT -q "select * from file('test_02167.$format', '$format')"
done
$CLICKHOUSE_CLIENT -q "insert into table function file('test_02167.bin', 'auto', 'x UInt64') select * from numbers(2)"
$CLICKHOUSE_CLIENT -q "select * from file('test_02167.bin', 'auto', 'x UInt64')"
$CLICKHOUSE_CLIENT -q "select * from file('test_02167.bin', 'RowBinary', 'x UInt64')"
$CLICKHOUSE_CLIENT -q "insert into table function file('test_02167.ndjson', 'auto', 'x UInt64') select * from numbers(2)"
$CLICKHOUSE_CLIENT -q "select * from file('test_02167.ndjson')"
$CLICKHOUSE_CLIENT -q "select * from file('test_02167.ndjson', 'JSONEachRow', 'x UInt64')"
$CLICKHOUSE_CLIENT -q "insert into table function file('test_02167.messagepack', 'auto', 'x UInt64') select * from numbers(2)"
$CLICKHOUSE_CLIENT -q "select * from file('test_02167.messagepack') settings input_format_msgpack_number_of_columns=1"
$CLICKHOUSE_CLIENT -q "select * from file('test_02167.messagepack', 'MsgPack', 'x UInt64')"