mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 07:31:57 +00:00
parent
77c06447d5
commit
32d267ec6c
@ -18,6 +18,7 @@ BinaryRowInputFormat::BinaryRowInputFormat(ReadBuffer & in_, Block header, Param
|
||||
header,
|
||||
in_,
|
||||
params_,
|
||||
true,
|
||||
with_names_,
|
||||
with_types_,
|
||||
format_settings_,
|
||||
|
@ -41,7 +41,15 @@ CSVRowInputFormat::CSVRowInputFormat(
|
||||
bool with_types_,
|
||||
const FormatSettings & format_settings_,
|
||||
std::unique_ptr<FormatWithNamesAndTypesReader> format_reader_)
|
||||
: RowInputFormatWithNamesAndTypes(header_, in_, params_, with_names_, with_types_, format_settings_, std::move(format_reader_))
|
||||
: RowInputFormatWithNamesAndTypes(
|
||||
header_,
|
||||
in_,
|
||||
params_,
|
||||
false,
|
||||
with_names_,
|
||||
with_types_,
|
||||
format_settings_,
|
||||
std::move(format_reader_))
|
||||
{
|
||||
const String bad_delimiters = " \t\"'.UL";
|
||||
if (bad_delimiters.find(format_settings.csv.delimiter) != String::npos)
|
||||
|
@ -47,6 +47,7 @@ CustomSeparatedRowInputFormat::CustomSeparatedRowInputFormat(
|
||||
header_,
|
||||
*buf_,
|
||||
params_,
|
||||
false,
|
||||
with_names_,
|
||||
with_types_,
|
||||
format_settings_,
|
||||
|
@ -28,6 +28,7 @@ JSONCompactEachRowRowInputFormat::JSONCompactEachRowRowInputFormat(
|
||||
header_,
|
||||
in_,
|
||||
params_,
|
||||
false,
|
||||
with_names_,
|
||||
with_types_,
|
||||
format_settings_,
|
||||
|
@ -40,7 +40,15 @@ TabSeparatedRowInputFormat::TabSeparatedRowInputFormat(
|
||||
bool with_types_,
|
||||
bool is_raw_,
|
||||
const FormatSettings & format_settings_)
|
||||
: RowInputFormatWithNamesAndTypes(header_, in_, params_, with_names_, with_types_, format_settings_, std::make_unique<TabSeparatedFormatReader>(in_, format_settings_, is_raw_))
|
||||
: RowInputFormatWithNamesAndTypes(
|
||||
header_,
|
||||
in_,
|
||||
params_,
|
||||
false,
|
||||
with_names_,
|
||||
with_types_,
|
||||
format_settings_,
|
||||
std::make_unique<TabSeparatedFormatReader>(in_, format_settings_, is_raw_))
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -17,6 +17,7 @@ RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes(
|
||||
const Block & header_,
|
||||
ReadBuffer & in_,
|
||||
const Params & params_,
|
||||
bool is_binary_,
|
||||
bool with_names_,
|
||||
bool with_types_,
|
||||
const FormatSettings & format_settings_,
|
||||
@ -24,6 +25,7 @@ RowInputFormatWithNamesAndTypes::RowInputFormatWithNamesAndTypes(
|
||||
: RowInputFormatWithDiagnosticInfo(header_, in_, params_)
|
||||
, format_settings(format_settings_)
|
||||
, data_types(header_.getDataTypes())
|
||||
, is_binary(is_binary_)
|
||||
, with_names(with_names_)
|
||||
, with_types(with_types_)
|
||||
, format_reader(std::move(format_reader_))
|
||||
@ -38,10 +40,11 @@ void RowInputFormatWithNamesAndTypes::readPrefix()
|
||||
if (getCurrentUnitNumber() != 0)
|
||||
return;
|
||||
|
||||
if (with_names || with_types || data_types.at(0)->textCanContainOnlyValidUTF8())
|
||||
/// Search and remove BOM only in textual formats (CSV, TSV etc), not in binary ones (RowBinary*).
|
||||
/// Also, we assume that column name or type cannot contain BOM, so, if format has header,
|
||||
/// then BOM at beginning of stream cannot be confused with name or type of field, and it is safe to skip it.
|
||||
if (!is_binary && (with_names || with_types || data_types.at(0)->textCanContainOnlyValidUTF8()))
|
||||
{
|
||||
/// We assume that column name or type cannot contain BOM, so, if format has header,
|
||||
/// then BOM at beginning of stream cannot be confused with name or type of field, and it is safe to skip it.
|
||||
skipBOMIfExists(*in);
|
||||
}
|
||||
|
||||
|
@ -24,13 +24,15 @@ class FormatWithNamesAndTypesReader;
|
||||
class RowInputFormatWithNamesAndTypes : public RowInputFormatWithDiagnosticInfo
|
||||
{
|
||||
protected:
|
||||
/** with_names - in the first line the header with column names
|
||||
/** is_binary - it is a binary format (e.g. don't search for BOM)
|
||||
* with_names - in the first line the header with column names
|
||||
* with_types - in the second line the header with column names
|
||||
*/
|
||||
RowInputFormatWithNamesAndTypes(
|
||||
const Block & header_,
|
||||
ReadBuffer & in_,
|
||||
const Params & params_,
|
||||
bool is_binary_,
|
||||
bool with_names_,
|
||||
bool with_types_,
|
||||
const FormatSettings & format_settings_,
|
||||
@ -51,6 +53,7 @@ private:
|
||||
bool parseRowAndPrintDiagnosticInfo(MutableColumns & columns, WriteBuffer & out) override;
|
||||
void tryDeserializeField(const DataTypePtr & type, IColumn & column, size_t file_column) override;
|
||||
|
||||
bool is_binary;
|
||||
bool with_names;
|
||||
bool with_types;
|
||||
std::unique_ptr<FormatWithNamesAndTypesReader> format_reader;
|
||||
|
@ -0,0 +1 @@
|
||||
1651760768976141295
|
10
tests/queries/0_stateless/02306_rowbinary_has_no_bom.sh
Executable file
10
tests/queries/0_stateless/02306_rowbinary_has_no_bom.sh
Executable file
@ -0,0 +1,10 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
echo "DROP TABLE IF EXISTS table_with_uint64" | ${CLICKHOUSE_CURL} -d@- -sS "${CLICKHOUSE_URL}"
|
||||
echo "CREATE TABLE table_with_uint64(no UInt64) ENGINE = MergeTree ORDER BY no" | ${CLICKHOUSE_CURL} -d@- -sS "${CLICKHOUSE_URL}"
|
||||
echo -en '\xef\xbb\xbf\x00\xab\x3b\xec\x16' | ${CLICKHOUSE_CURL} --data-binary @- "${CLICKHOUSE_URL}&query=INSERT+INTO+table_with_uint64(no)+FORMAT+RowBinary"
|
||||
echo "SELECT * FROM table_with_uint64" | ${CLICKHOUSE_CURL} -d@- -sS "${CLICKHOUSE_URL}"
|
Loading…
Reference in New Issue
Block a user