Merge pull request #66899 from Avogar/fix-bad-format-detection

Fix invalid format detection in schema inference
This commit is contained in:
Kruglov Pavel 2024-07-24 12:57:36 +00:00 committed by GitHub
commit 0ce04f47f5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 26 additions and 5 deletions

View File

@ -163,7 +163,9 @@ ReadBufferIterator::Data ReadBufferIterator::next()
{
for (const auto & object_info : read_keys)
{
if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(object_info->getFileName()))
auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(object_info->getFileName());
/// Use this format only if we have a schema reader for it.
if (format_from_file_name && FormatFactory::instance().checkIfFormatHasAnySchemaReader(*format_from_file_name))
{
format = format_from_file_name;
break;
@ -221,7 +223,9 @@ ReadBufferIterator::Data ReadBufferIterator::next()
{
for (auto it = read_keys.begin() + prev_read_keys_size; it != read_keys.end(); ++it)
{
if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->getFileName()))
auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName((*it)->getFileName());
/// Use this format only if we have a schema reader for it.
if (format_from_file_name && FormatFactory::instance().checkIfFormatHasAnySchemaReader(*format_from_file_name))
{
format = format_from_file_name;
break;

View File

@ -427,7 +427,9 @@ namespace
{
for (const auto & path : paths)
{
if (auto format_from_path = FormatFactory::instance().tryGetFormatFromFileName(path))
auto format_from_path = FormatFactory::instance().tryGetFormatFromFileName(path);
/// Use this format only if we have a schema reader for it.
if (format_from_path && FormatFactory::instance().checkIfFormatHasAnySchemaReader(*format_from_path))
{
format = format_from_path;
break;
@ -716,7 +718,9 @@ namespace
/// If format is unknown we can try to determine it by the file name.
if (!format)
{
if (auto format_from_file = FormatFactory::instance().tryGetFormatFromFileName(*filename))
auto format_from_file = FormatFactory::instance().tryGetFormatFromFileName(*filename);
/// Use this format only if we have a schema reader for it.
if (format_from_file && FormatFactory::instance().checkIfFormatHasAnySchemaReader(*format_from_file))
format = format_from_file;
}

View File

@ -737,7 +737,9 @@ namespace
{
for (const auto & url : options)
{
if (auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(url))
auto format_from_file_name = FormatFactory::instance().tryGetFormatFromFileName(url);
/// Use this format only if we have a schema reader for it.
if (format_from_file_name && FormatFactory::instance().checkIfFormatHasAnySchemaReader(*format_from_file_name))
{
format = format_from_file_name;
break;

View File

@ -0,0 +1,10 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
touch $CLICKHOUSE_TEST_UNIQUE_NAME.xml
$CLICKHOUSE_LOCAL -q "select * from file('$CLICKHOUSE_TEST_UNIQUE_NAME.*')" 2>&1 | grep -c "CANNOT_DETECT_FORMAT"
rm $CLICKHOUSE_TEST_UNIQUE_NAME.xml