mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-20 08:40:50 +00:00
bug fix csv read while end of line is not crlf
This commit is contained in:
parent
0f76ba83e0
commit
ef30e6723d
@ -897,6 +897,12 @@ Use DOS/Windows-style line separator (CRLF) in CSV instead of Unix style (LF).
|
||||
|
||||
Disabled by default.
|
||||
|
||||
### input_format_csv_crlf_end_of_line {#input_format_csv_crlf_end_of_line}
|
||||
|
||||
Use DOS/Windows-style line separator (CRLF) in CSV instead of Unix style (LF).
|
||||
|
||||
Disabled by default.
|
||||
|
||||
### input_format_csv_enum_as_number {#input_format_csv_enum_as_number}
|
||||
|
||||
When enabled, always treat enum values as enum ids for CSV input format. It's recommended to enable this setting if data contains only enum ids to optimize enum parsing.
|
||||
|
@ -884,6 +884,7 @@ class IColumn;
|
||||
M(Bool, format_csv_allow_single_quotes, false, "If it is set to true, allow strings in single quotes.", 0) \
|
||||
M(Bool, format_csv_allow_double_quotes, true, "If it is set to true, allow strings in double quotes.", 0) \
|
||||
M(Bool, output_format_csv_crlf_end_of_line, false, "If it is set true, end of line in CSV format will be \\r\\n instead of \\n.", 0) \
|
||||
M(Bool, input_format_csv_crlf_end_of_line, false, "If it is set true, end of line in CSV format will be \\r\\n instead of \\n", 0) \
|
||||
M(Bool, input_format_csv_enum_as_number, false, "Treat inserted enum values in CSV formats as enum indices", 0) \
|
||||
M(Bool, input_format_csv_arrays_as_nested_csv, false, R"(When reading Array from CSV, expect that its elements were serialized in nested CSV and then put into string. Example: "[""Hello"", ""world"", ""42"""" TV""]". Braces around array can be omitted.)", 0) \
|
||||
M(Bool, input_format_skip_unknown_fields, true, "Skip columns with unknown names from input data (it works for JSONEachRow, -WithNames, -WithNamesAndTypes and TSKV formats).", 0) \
|
||||
|
@ -58,7 +58,8 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.avro.output_rows_in_file = settings.output_format_avro_rows_in_file;
|
||||
format_settings.csv.allow_double_quotes = settings.format_csv_allow_double_quotes;
|
||||
format_settings.csv.allow_single_quotes = settings.format_csv_allow_single_quotes;
|
||||
format_settings.csv.crlf_end_of_line = settings.output_format_csv_crlf_end_of_line;
|
||||
format_settings.csv.crlf_end_of_line_for_output = settings.output_format_csv_crlf_end_of_line;
|
||||
format_settings.csv.crlf_end_of_line_for_input = settings.input_format_csv_crlf_end_of_line;
|
||||
format_settings.csv.delimiter = settings.format_csv_delimiter;
|
||||
format_settings.csv.tuple_delimiter = settings.format_csv_delimiter;
|
||||
format_settings.csv.empty_as_default = settings.input_format_csv_empty_as_default;
|
||||
|
@ -149,7 +149,8 @@ struct FormatSettings
|
||||
bool allow_single_quotes = true;
|
||||
bool allow_double_quotes = true;
|
||||
bool empty_as_default = false;
|
||||
bool crlf_end_of_line = false;
|
||||
bool crlf_end_of_line_for_output = false;
|
||||
bool crlf_end_of_line_for_input = false;
|
||||
bool enum_as_number = false;
|
||||
bool arrays_as_nested_csv = false;
|
||||
String null_representation = "\\N";
|
||||
|
@ -177,7 +177,7 @@ void CSVFormatReader::skipRow()
|
||||
}
|
||||
}
|
||||
|
||||
static void skipEndOfLine(ReadBuffer & in)
|
||||
static void skipEndOfLine(ReadBuffer & in, bool crlf_end_of_line)
|
||||
{
|
||||
/// \n (Unix) or \r\n (DOS/Windows) or \n\r (Mac OS Classic)
|
||||
|
||||
@ -192,7 +192,7 @@ static void skipEndOfLine(ReadBuffer & in)
|
||||
++in.position();
|
||||
if (!in.eof() && *in.position() == '\n')
|
||||
++in.position();
|
||||
else
|
||||
else if (crlf_end_of_line)
|
||||
throw Exception(ErrorCodes::INCORRECT_DATA,
|
||||
"Cannot parse CSV format: found \\r (CR) not followed by \\n (LF)."
|
||||
" Line must end by \\n (LF) or \\r\\n (CR LF) or \\n\\r.");
|
||||
@ -258,7 +258,7 @@ void CSVFormatReader::skipRowEndDelimiter()
|
||||
if (buf->eof())
|
||||
return;
|
||||
|
||||
skipEndOfLine(*buf);
|
||||
skipEndOfLine(*buf, format_settings.csv.crlf_end_of_line_for_input);
|
||||
}
|
||||
|
||||
void CSVFormatReader::skipHeaderRow()
|
||||
@ -343,7 +343,7 @@ bool CSVFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out)
|
||||
return false;
|
||||
}
|
||||
|
||||
skipEndOfLine(*buf);
|
||||
skipEndOfLine(*buf, format_settings.csv.crlf_end_of_line_for_input);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -56,7 +56,7 @@ void CSVRowOutputFormat::writeFieldDelimiter()
|
||||
|
||||
void CSVRowOutputFormat::writeRowEndDelimiter()
|
||||
{
|
||||
if (format_settings.csv.crlf_end_of_line)
|
||||
if (format_settings.csv.crlf_end_of_line_for_output)
|
||||
writeChar('\r', out);
|
||||
writeChar('\n', out);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user