mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 09:32:01 +00:00
review fix
This commit is contained in:
parent
911f8ad8dc
commit
f3b99156ac
@ -468,7 +468,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
|
||||
- [input_format_csv_skip_first_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_first_lines) - skip the specified number of lines at the beginning of data. Default value - `0`.
|
||||
- [input_format_csv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_csv_detect_header) - automatically detect header with names and types in CSV format. Default value - `true`.
|
||||
- [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`.
|
||||
- [input_format_csv_use_whitespace_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_use_whitespace_tab_as_delimiter) - use whitespace or tab as field delimiter in CSV strings. Default value - `false`.
|
||||
- [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`.
|
||||
|
||||
## CSVWithNames {#csvwithnames}
|
||||
|
||||
|
@ -914,9 +914,9 @@ Result
|
||||
" string "
|
||||
```
|
||||
|
||||
### input_format_csv_use_whitespace_tab_as_delimiter {#input_format_csv_use_whitespace_tab_as_delimiter}
|
||||
### input_format_csv_allow_whitespace_or_tab_as_delimiter {#input_format_csv_allow_whitespace_or_tab_as_delimiter}
|
||||
|
||||
Use whitespace or tab as field delimiter in CSV strings.
|
||||
Allow to use whitespace or tab as field delimiter in CSV strings.
|
||||
|
||||
Default value: `false`.
|
||||
|
||||
@ -925,7 +925,7 @@ Default value: `false`.
|
||||
Query
|
||||
|
||||
```bash
|
||||
echo 'a b' | ./clickhouse local -q "select * from table FORMAT CSV" --input-format="CSV" --input_format_csv_use_whitespace_tab_as_delimiter=true --format_csv_delimiter=' '
|
||||
echo 'a b' | ./clickhouse local -q "select * from table FORMAT CSV" --input-format="CSV" --input_format_csv_allow_whitespace_or_tab_as_delimiter=true --format_csv_delimiter=' '
|
||||
```
|
||||
|
||||
Result
|
||||
@ -937,7 +937,7 @@ a b
|
||||
Query
|
||||
|
||||
```bash
|
||||
echo 'a b' | ./clickhouse local -q "select * from table FORMAT CSV" --input-format="CSV" --input_format_csv_use_whitespace_tab_as_delimiter=true --format_csv_delimiter='\t'
|
||||
echo 'a b' | ./clickhouse local -q "select * from table FORMAT CSV" --input-format="CSV" --input_format_csv_allow_whitespace_or_tab_as_delimiter=true --format_csv_delimiter='\t'
|
||||
```
|
||||
|
||||
Result
|
||||
|
@ -850,7 +850,7 @@ class IColumn;
|
||||
M(Bool, input_format_csv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in CSV format", 0) \
|
||||
M(Bool, input_format_tsv_use_best_effort_in_schema_inference, true, "Use some tweaks and heuristics to infer schema in TSV format", 0) \
|
||||
M(Bool, input_format_csv_detect_header, true, "Automatically detect header with names and types in CSV format", 0) \
|
||||
M(Bool, input_format_csv_use_whitespace_tab_as_delimiter, false, "Use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \
|
||||
M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, "Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \
|
||||
M(Bool, input_format_csv_trim_whitespaces, true, "Trims spaces and tabs (\\t) characters at the beginning and end in CSV strings", 0) \
|
||||
M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \
|
||||
M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \
|
||||
|
@ -70,7 +70,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.csv.skip_first_lines = settings.input_format_csv_skip_first_lines;
|
||||
format_settings.csv.try_detect_header = settings.input_format_csv_detect_header;
|
||||
format_settings.csv.trim_whitespaces = settings.input_format_csv_trim_whitespaces;
|
||||
format_settings.csv.use_whitespace_tab_as_delimiter = settings.input_format_csv_use_whitespace_tab_as_delimiter;
|
||||
format_settings.csv.allow_whitespace_or_tab_as_delimiter = settings.input_format_csv_allow_whitespace_or_tab_as_delimiter;
|
||||
format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter;
|
||||
format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter;
|
||||
format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter;
|
||||
|
@ -137,7 +137,7 @@ struct FormatSettings
|
||||
String custom_delimiter;
|
||||
bool try_detect_header = true;
|
||||
bool trim_whitespaces = true;
|
||||
bool use_whitespace_tab_as_delimiter = false;
|
||||
bool allow_whitespace_or_tab_as_delimiter = false;
|
||||
} csv;
|
||||
|
||||
struct HiveText
|
||||
|
@ -25,10 +25,14 @@ namespace ErrorCodes
|
||||
|
||||
namespace
|
||||
{
|
||||
void checkBadDelimiter(char delimiter, bool use_whitespace_tab_as_delimiter)
|
||||
void checkBadDelimiter(char delimiter, bool allow_whitespace_or_tab_as_delimiter)
|
||||
{
|
||||
if ((delimiter == ' ' || delimiter == '\t') && allow_whitespace_or_tab_as_delimiter)
|
||||
{
|
||||
return;
|
||||
}
|
||||
constexpr std::string_view bad_delimiters = " \t\"'.UL";
|
||||
if (bad_delimiters.find(delimiter) != std::string_view::npos && !use_whitespace_tab_as_delimiter)
|
||||
if (bad_delimiters.find(delimiter) != std::string_view::npos)
|
||||
throw Exception(
|
||||
ErrorCodes::BAD_ARGUMENTS,
|
||||
"CSV format may not work correctly with delimiter '{}'. Try use CustomSeparated format instead",
|
||||
@ -68,7 +72,7 @@ CSVRowInputFormat::CSVRowInputFormat(
|
||||
format_settings_.csv.try_detect_header),
|
||||
buf(std::move(in_))
|
||||
{
|
||||
checkBadDelimiter(format_settings_.csv.delimiter, format_settings_.csv.use_whitespace_tab_as_delimiter);
|
||||
checkBadDelimiter(format_settings_.csv.delimiter, format_settings_.csv.allow_whitespace_or_tab_as_delimiter);
|
||||
}
|
||||
|
||||
CSVRowInputFormat::CSVRowInputFormat(
|
||||
@ -90,7 +94,7 @@ CSVRowInputFormat::CSVRowInputFormat(
|
||||
format_settings_.csv.try_detect_header),
|
||||
buf(std::move(in_))
|
||||
{
|
||||
checkBadDelimiter(format_settings_.csv.delimiter, format_settings_.csv.use_whitespace_tab_as_delimiter);
|
||||
checkBadDelimiter(format_settings_.csv.delimiter, format_settings_.csv.allow_whitespace_or_tab_as_delimiter);
|
||||
}
|
||||
|
||||
void CSVRowInputFormat::syncAfterError()
|
||||
@ -134,9 +138,9 @@ static void skipEndOfLine(ReadBuffer & in)
|
||||
}
|
||||
|
||||
/// Skip `whitespace` symbols allowed in CSV.
|
||||
static inline void skipWhitespacesAndTabs(ReadBuffer & in, const bool & use_whitespace_tab_as_delimiter)
|
||||
static inline void skipWhitespacesAndTabs(ReadBuffer & in, const bool & allow_whitespace_or_tab_as_delimiter)
|
||||
{
|
||||
if (use_whitespace_tab_as_delimiter)
|
||||
if (allow_whitespace_or_tab_as_delimiter)
|
||||
{
|
||||
return;
|
||||
}
|
||||
@ -150,7 +154,7 @@ CSVFormatReader::CSVFormatReader(PeekableReadBuffer & buf_, const FormatSettings
|
||||
|
||||
void CSVFormatReader::skipFieldDelimiter()
|
||||
{
|
||||
skipWhitespacesAndTabs(*buf, format_settings.csv.use_whitespace_tab_as_delimiter);
|
||||
skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter);
|
||||
assertChar(format_settings.csv.delimiter, *buf);
|
||||
}
|
||||
|
||||
@ -158,7 +162,7 @@ template <bool read_string>
|
||||
String CSVFormatReader::readCSVFieldIntoString()
|
||||
{
|
||||
if (format_settings.csv.trim_whitespaces) [[likely]]
|
||||
skipWhitespacesAndTabs(*buf, format_settings.csv.use_whitespace_tab_as_delimiter);
|
||||
skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter);
|
||||
|
||||
String field;
|
||||
if constexpr (read_string)
|
||||
@ -170,14 +174,14 @@ String CSVFormatReader::readCSVFieldIntoString()
|
||||
|
||||
void CSVFormatReader::skipField()
|
||||
{
|
||||
skipWhitespacesAndTabs(*buf, format_settings.csv.use_whitespace_tab_as_delimiter);
|
||||
skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter);
|
||||
NullOutput out;
|
||||
readCSVStringInto(out, *buf, format_settings.csv);
|
||||
}
|
||||
|
||||
void CSVFormatReader::skipRowEndDelimiter()
|
||||
{
|
||||
skipWhitespacesAndTabs(*buf, format_settings.csv.use_whitespace_tab_as_delimiter);
|
||||
skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter);
|
||||
|
||||
if (buf->eof())
|
||||
return;
|
||||
@ -186,7 +190,7 @@ void CSVFormatReader::skipRowEndDelimiter()
|
||||
if (*buf->position() == format_settings.csv.delimiter)
|
||||
++buf->position();
|
||||
|
||||
skipWhitespacesAndTabs(*buf, format_settings.csv.use_whitespace_tab_as_delimiter);
|
||||
skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter);
|
||||
if (buf->eof())
|
||||
return;
|
||||
|
||||
@ -198,7 +202,7 @@ void CSVFormatReader::skipHeaderRow()
|
||||
do
|
||||
{
|
||||
skipField();
|
||||
skipWhitespacesAndTabs(*buf, format_settings.csv.use_whitespace_tab_as_delimiter);
|
||||
skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter);
|
||||
} while (checkChar(format_settings.csv.delimiter, *buf));
|
||||
|
||||
skipRowEndDelimiter();
|
||||
@ -211,7 +215,7 @@ std::vector<String> CSVFormatReader::readRowImpl()
|
||||
do
|
||||
{
|
||||
fields.push_back(readCSVFieldIntoString<is_header>());
|
||||
skipWhitespacesAndTabs(*buf, format_settings.csv.use_whitespace_tab_as_delimiter);
|
||||
skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter);
|
||||
} while (checkChar(format_settings.csv.delimiter, *buf));
|
||||
|
||||
skipRowEndDelimiter();
|
||||
@ -224,7 +228,7 @@ bool CSVFormatReader::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out)
|
||||
|
||||
try
|
||||
{
|
||||
skipWhitespacesAndTabs(*buf, format_settings.csv.use_whitespace_tab_as_delimiter);
|
||||
skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter);
|
||||
assertChar(delimiter, *buf);
|
||||
}
|
||||
catch (const DB::Exception &)
|
||||
@ -250,7 +254,7 @@ bool CSVFormatReader::parseFieldDelimiterWithDiagnosticInfo(WriteBuffer & out)
|
||||
|
||||
bool CSVFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out)
|
||||
{
|
||||
skipWhitespacesAndTabs(*buf, format_settings.csv.use_whitespace_tab_as_delimiter);
|
||||
skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter);
|
||||
|
||||
if (buf->eof())
|
||||
return true;
|
||||
@ -259,7 +263,7 @@ bool CSVFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out)
|
||||
if (*buf->position() == format_settings.csv.delimiter)
|
||||
{
|
||||
++buf->position();
|
||||
skipWhitespacesAndTabs(*buf, format_settings.csv.use_whitespace_tab_as_delimiter);
|
||||
skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter);
|
||||
if (buf->eof())
|
||||
return true;
|
||||
}
|
||||
@ -287,7 +291,7 @@ bool CSVFormatReader::readField(
|
||||
const String & /*column_name*/)
|
||||
{
|
||||
if (format_settings.csv.trim_whitespaces || !isStringOrFixedString(removeNullable(type))) [[likely]]
|
||||
skipWhitespacesAndTabs(*buf, format_settings.csv.use_whitespace_tab_as_delimiter);
|
||||
skipWhitespacesAndTabs(*buf, format_settings.csv.allow_whitespace_or_tab_as_delimiter);
|
||||
|
||||
const bool at_delimiter = !buf->eof() && *buf->position() == format_settings.csv.delimiter;
|
||||
const bool at_last_column_line_end = is_last_file_column && (buf->eof() || *buf->position() == '\n' || *buf->position() == '\r');
|
||||
|
@ -10,8 +10,8 @@ $CLICKHOUSE_CLIENT -q "drop table if exists test_whitespace"
|
||||
$CLICKHOUSE_CLIENT -q "drop table if exists test_tab"
|
||||
$CLICKHOUSE_CLIENT -q "create table test_whitespace (x UInt32, y String, z String) engine=MergeTree order by x"
|
||||
$CLICKHOUSE_CLIENT -q "create table test_tab (x UInt32, y String, z String) engine=MergeTree order by x"
|
||||
cat $CURDIR/data_csv/csv_with_space_delimiter.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_whitespace SETTINGS format_csv_delimiter=' ', input_format_csv_use_whitespace_tab_as_delimiter=true FORMAT CSV"
|
||||
cat $CURDIR/data_csv/csv_with_tab_delimiter.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_tab SETTINGS format_csv_delimiter='\t', input_format_csv_use_whitespace_tab_as_delimiter=true FORMAT CSV"
|
||||
cat $CURDIR/data_csv/csv_with_space_delimiter.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_whitespace SETTINGS format_csv_delimiter=' ', input_format_csv_allow_whitespace_or_tab_as_delimiter=true FORMAT CSV"
|
||||
cat $CURDIR/data_csv/csv_with_tab_delimiter.csv | ${CLICKHOUSE_CLIENT} -q "INSERT INTO test_tab SETTINGS format_csv_delimiter='\t', input_format_csv_allow_whitespace_or_tab_as_delimiter=true FORMAT CSV"
|
||||
$CLICKHOUSE_CLIENT -q "select * from test_whitespace"
|
||||
$CLICKHOUSE_CLIENT -q "select * from test_tab"
|
||||
$CLICKHOUSE_CLIENT -q "drop table test_whitespace"
|
||||
|
Loading…
Reference in New Issue
Block a user