mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 16:12:01 +00:00
Allow to skip some of the first rows in CSV/TSV formats
This commit is contained in:
parent
b50d4549c9
commit
4c9812d4c1
@ -673,6 +673,8 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
|
||||
M(Bool, input_format_json_read_bools_as_numbers, true, "Allow to parse bools as numbers in JSON input formats", 0) \
|
||||
M(Bool, input_format_protobuf_flatten_google_wrappers, false, "Enable Google wrappers for regular non-nested columns, e.g. google.protobuf.StringValue 'str' for String column 'str'. For Nullable columns empty wrappers are recognized as defaults, and missing as nulls", 0) \
|
||||
M(Bool, output_format_protobuf_nullables_with_google_wrappers, false, "When serializing Nullable columns with Google wrappers, serialize default values as empty wrappers. If turned off, default and null values are not serialized", 0) \
|
||||
M(UInt64, input_format_csv_skip_first_lines, 0, "Skip specified amount of lines in the beginning of data in CSV format", 0) \
|
||||
M(UInt64, input_format_tsv_skip_first_lines, 0, "Skip specified amount of lines in the beginning of data in TSV format", 0) \
|
||||
\
|
||||
M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic', 'best_effort' and 'best_effort_us'.", 0) \
|
||||
M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \
|
||||
|
@ -66,6 +66,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.csv.null_representation = settings.format_csv_null_representation;
|
||||
format_settings.csv.input_format_arrays_as_nested_csv = settings.input_format_csv_arrays_as_nested_csv;
|
||||
format_settings.csv.input_format_use_best_effort_in_schema_inference = settings.input_format_csv_use_best_effort_in_schema_inference;
|
||||
format_settings.csv.skip_first_lines = settings.input_format_csv_skip_first_lines;
|
||||
format_settings.hive_text.fields_delimiter = settings.input_format_hive_text_fields_delimiter;
|
||||
format_settings.hive_text.collection_items_delimiter = settings.input_format_hive_text_collection_items_delimiter;
|
||||
format_settings.hive_text.map_keys_delimiter = settings.input_format_hive_text_map_keys_delimiter;
|
||||
@ -123,6 +124,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
|
||||
format_settings.tsv.input_format_enum_as_number = settings.input_format_tsv_enum_as_number;
|
||||
format_settings.tsv.null_representation = settings.format_tsv_null_representation;
|
||||
format_settings.tsv.input_format_use_best_effort_in_schema_inference = settings.input_format_tsv_use_best_effort_in_schema_inference;
|
||||
format_settings.tsv.skip_first_lines = settings.input_format_tsv_skip_first_lines;
|
||||
format_settings.values.accurate_types_of_literals = settings.input_format_values_accurate_types_of_literals;
|
||||
format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions;
|
||||
format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions;
|
||||
|
@ -109,6 +109,7 @@ struct FormatSettings
|
||||
String null_representation = "\\N";
|
||||
char tuple_delimiter = ',';
|
||||
bool input_format_use_best_effort_in_schema_inference = true;
|
||||
UInt64 skip_first_lines = 0;
|
||||
} csv;
|
||||
|
||||
struct HiveText
|
||||
@ -219,6 +220,7 @@ struct FormatSettings
|
||||
String null_representation = "\\N";
|
||||
bool input_format_enum_as_number = false;
|
||||
bool input_format_use_best_effort_in_schema_inference = true;
|
||||
UInt64 skip_first_lines = 0;
|
||||
} tsv;
|
||||
|
||||
struct
|
||||
|
@ -259,6 +259,12 @@ bool CSVFormatReader::readField(
|
||||
}
|
||||
}
|
||||
|
||||
void CSVFormatReader::skipPrefixBeforeHeader()
|
||||
{
|
||||
for (size_t i = 0; i != format_settings.csv.skip_first_lines; ++i)
|
||||
readRow();
|
||||
}
|
||||
|
||||
|
||||
CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_setting_)
|
||||
: FormatWithNamesAndTypesSchemaReader(
|
||||
|
@ -58,6 +58,7 @@ public:
|
||||
void skipTypes() override { skipHeaderRow(); }
|
||||
void skipFieldDelimiter() override;
|
||||
void skipRowEndDelimiter() override;
|
||||
void skipPrefixBeforeHeader() override;
|
||||
|
||||
std::vector<String> readNames() override { return readHeaderRow(); }
|
||||
std::vector<String> readTypes() override { return readHeaderRow(); }
|
||||
|
@ -230,6 +230,12 @@ void TabSeparatedFormatReader::checkNullValueForNonNullable(DataTypePtr type)
|
||||
}
|
||||
}
|
||||
|
||||
void TabSeparatedFormatReader::skipPrefixBeforeHeader()
|
||||
{
|
||||
for (size_t i = 0; i != format_settings.csv.skip_first_lines; ++i)
|
||||
readRow();
|
||||
}
|
||||
|
||||
void TabSeparatedRowInputFormat::syncAfterError()
|
||||
{
|
||||
skipToUnescapedNextLineOrEOF(*in);
|
||||
|
@ -43,6 +43,7 @@ public:
|
||||
void skipTypes() override { skipHeaderRow(); }
|
||||
void skipFieldDelimiter() override;
|
||||
void skipRowEndDelimiter() override;
|
||||
void skipPrefixBeforeHeader() override;
|
||||
|
||||
std::vector<String> readRow();
|
||||
std::vector<String> readNames() override { return readRow(); }
|
||||
|
@ -0,0 +1,16 @@
|
||||
c1 Nullable(Float64)
|
||||
c2 Nullable(Float64)
|
||||
c3 Nullable(Float64)
|
||||
0 1 2
|
||||
1 2 3
|
||||
2 3 4
|
||||
3 4 5
|
||||
4 5 6
|
||||
c1 Nullable(Float64)
|
||||
c2 Nullable(Float64)
|
||||
c3 Nullable(Float64)
|
||||
0 1 2
|
||||
1 2 3
|
||||
2 3 4
|
||||
3 4 5
|
||||
4 5 6
|
12
tests/queries/0_stateless/02314_csv_tsv_skip_first_lines.sql
Normal file
12
tests/queries/0_stateless/02314_csv_tsv_skip_first_lines.sql
Normal file
@ -0,0 +1,12 @@
|
||||
-- Tags: no-parallel
|
||||
|
||||
insert into function file(data_02314.csv) select number, number + 1 from numbers(5) settings engine_file_truncate_on_insert=1;
|
||||
insert into function file(data_02314.csv) select number, number + 1, number + 2 from numbers(5);
|
||||
desc file(data_02314.csv) settings input_format_csv_skip_first_lines=5;
|
||||
select * from file(data_02314.csv) settings input_format_csv_skip_first_lines=5;
|
||||
|
||||
insert into function file(data_02314.tsv) select number, number + 1 from numbers(5) settings engine_file_truncate_on_insert=1;
|
||||
insert into function file(data_02314.tsv) select number, number + 1, number + 2 from numbers(5);
|
||||
desc file(data_02314.tsv) settings input_format_csv_skip_first_lines=5;
|
||||
select * from file(data_02314.tsv) settings input_format_csv_skip_first_lines=5;
|
||||
|
Loading…
Reference in New Issue
Block a user