mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-21 15:12:02 +00:00
Merge pull request #53135 from ClickHouse/file_diagnostics_while_reading_header
Add diagnostic info about file name during schema inference
This commit is contained in:
commit
fd7b92e90a
@ -1,13 +1,11 @@
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/DataTypeMap.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeTuple.h>
|
||||
#include <Formats/ReadSchemaUtils.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Processors/Formats/ISchemaReader.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include <IO/WithFileName.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -53,6 +51,7 @@ ColumnsDescription readSchemaFromFormat(
|
||||
bool retry,
|
||||
ContextPtr & context,
|
||||
std::unique_ptr<ReadBuffer> & buf)
|
||||
try
|
||||
{
|
||||
NamesAndTypesList names_and_types;
|
||||
if (FormatFactory::instance().checkIfFormatHasExternalSchemaReader(format_name))
|
||||
@ -209,12 +208,23 @@ ColumnsDescription readSchemaFromFormat(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"{} file format doesn't support schema inference. You must specify the structure manually",
|
||||
format_name);
|
||||
|
||||
/// Some formats like CSVWithNames can contain empty column names. We don't support empty column names and further processing can fail with an exception. Let's just remove columns with empty names from the structure.
|
||||
names_and_types.erase(
|
||||
std::remove_if(names_and_types.begin(), names_and_types.end(), [](const NameAndTypePair & pair) { return pair.name.empty(); }),
|
||||
names_and_types.end());
|
||||
return ColumnsDescription(names_and_types);
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
if (!buf)
|
||||
throw;
|
||||
auto file_name = getFileNameFromReadBuffer(*buf);
|
||||
if (!file_name.empty())
|
||||
e.addMessage(fmt::format("(in file/uri {})", file_name));
|
||||
throw;
|
||||
}
|
||||
|
||||
|
||||
ColumnsDescription readSchemaFromFormat(
|
||||
const String & format_name,
|
||||
|
@ -86,7 +86,21 @@ void IRowInputFormat::logError()
|
||||
Chunk IRowInputFormat::generate()
|
||||
{
|
||||
if (total_rows == 0)
|
||||
readPrefix();
|
||||
{
|
||||
try
|
||||
{
|
||||
readPrefix();
|
||||
}
|
||||
catch (Exception & e)
|
||||
{
|
||||
auto file_name = getFileNameFromReadBuffer(getReadBuffer());
|
||||
if (!file_name.empty())
|
||||
e.addMessage(fmt::format("(in file/uri {})", file_name));
|
||||
|
||||
e.addMessage("(while reading header)");
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
const Block & header = getPort().getHeader();
|
||||
|
||||
|
@ -0,0 +1,2 @@
|
||||
in file/uri
|
||||
test.csv
|
11
tests/queries/0_stateless/02836_file_diagnostics_while_reading_header.sh
Executable file
11
tests/queries/0_stateless/02836_file_diagnostics_while_reading_header.sh
Executable file
@ -0,0 +1,11 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
FILENAME="${CLICKHOUSE_TMP}/test.csv"
|
||||
|
||||
printf 'Bad\rHeader\n123\n' > "${FILENAME}"
|
||||
${CLICKHOUSE_LOCAL} --query "SELECT * FROM file('${CLICKHOUSE_TMP}/t*e*s*t.csv')" 2>&1 | grep -o -P 'in file/uri|test\.csv'
|
||||
rm "${FILENAME}"
|
Loading…
Reference in New Issue
Block a user