From 6d77d52dfe034afe196fa1219ddc8897d1070146 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Thu, 27 Jul 2023 18:02:29 +0000
Subject: [PATCH 1/4] Allow variable number of columns in
 TSV/CuatomSeprarated/JSONCompactEachRow, make schema inference work with
 variable number of columns

---
 docs/en/interfaces/formats.md                 | 11 ++--
 .../operations/settings/settings-formats.md   | 25 +++++++-
 src/Core/Settings.h                           |  5 +-
 src/Formats/FormatFactory.cpp                 |  3 +
 src/Formats/FormatSettings.h                  |  3 +
 src/Processors/Formats/ISchemaReader.cpp      | 46 +++++++++++---
 src/Processors/Formats/ISchemaReader.h        |  6 +-
 .../Formats/Impl/CSVRowInputFormat.cpp        | 13 ++--
 .../Formats/Impl/CSVRowInputFormat.h          |  8 ++-
 .../Impl/CustomSeparatedRowInputFormat.cpp    | 19 +++---
 .../Impl/CustomSeparatedRowInputFormat.h      | 10 +++-
 .../Impl/JSONCompactEachRowRowInputFormat.cpp |  8 ++-
 .../Impl/JSONCompactEachRowRowInputFormat.h   |  7 ++-
 .../Formats/Impl/MsgPackRowInputFormat.cpp    |  2 +-
 .../Formats/Impl/MsgPackRowInputFormat.h      |  2 +-
 .../Formats/Impl/MySQLDumpRowInputFormat.cpp  |  2 +-
 .../Formats/Impl/MySQLDumpRowInputFormat.h    |  2 +-
 .../Formats/Impl/RegexpRowInputFormat.cpp     |  2 +-
 .../Formats/Impl/RegexpRowInputFormat.h       |  2 +-
 .../Impl/TabSeparatedRowInputFormat.cpp       | 16 +++--
 .../Formats/Impl/TabSeparatedRowInputFormat.h |  9 ++-
 .../Formats/Impl/TemplateRowInputFormat.cpp   |  2 +-
 .../Formats/Impl/TemplateRowInputFormat.h     |  2 +-
 .../Formats/Impl/ValuesBlockInputFormat.cpp   |  2 +-
 .../Formats/Impl/ValuesBlockInputFormat.h     |  2 +-
 .../RowInputFormatWithNamesAndTypes.cpp       | 60 ++++++++++---------
 .../Formats/RowInputFormatWithNamesAndTypes.h | 10 ++--
 ..._with_variable_number_of_columns.reference | 52 ++++++++++++++++
 ...ormats_with_variable_number_of_columns.sql | 18 ++++++
 29 files changed, 264 insertions(+), 85 deletions(-)
 create mode 100644 tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.reference
 create mode 100644 tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.sql

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index 15f9d1f47bf..95483068cb2 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -195,6 +195,7 @@ SELECT * FROM nestedt FORMAT TSV
 - [input_format_tsv_skip_first_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_first_lines) - skip specified number of lines at the beginning of data. Default value - `0`.
 - [input_format_tsv_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_detect_header) - automatically detect header with names and types in TSV format. Default value - `true`.
 - [input_format_tsv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`.
+- [input_format_tsv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_tsv_allow_variable_number_of_columns) - allow variable number of columns in TSV format, ignore extra columns and use default values on missing columns. Default value - `false`.
 
 ## TabSeparatedRaw {#tabseparatedraw}
 
@@ -472,7 +473,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
 - [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`.
 - [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`.
 - [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`.
-- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`.
+- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - allow variable number of columns in CSV format, ignore extra columns and use default values on missing columns. Default value - `false`.
 - [input_format_csv_use_default_on_bad_values](/docs/en/operations/settings/settings-formats.md/#input_format_csv_use_default_on_bad_values) - Allow to set default value to column when CSV field deserialization failed on bad value. Default value - `false`.
 
 ## CSVWithNames {#csvwithnames}
@@ -501,9 +502,10 @@ the types from input data will be compared with the types of the corresponding c
 
 Similar to [Template](#format-template), but it prints or reads all names and types of columns and uses escaping rule from [format_custom_escaping_rule](/docs/en/operations/settings/settings-formats.md/#format_custom_escaping_rule) setting and delimiters from [format_custom_field_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_field_delimiter), [format_custom_row_before_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_before_delimiter), [format_custom_row_after_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_after_delimiter), [format_custom_row_between_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_row_between_delimiter), [format_custom_result_before_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_result_before_delimiter) and [format_custom_result_after_delimiter](/docs/en/operations/settings/settings-formats.md/#format_custom_result_after_delimiter) settings, not from format strings.
 
-If setting [input_format_custom_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) is enabled, ClickHouse will automatically detect header with names and types if any.
-
-If setting [input_format_tsv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) is enabled, trailing empty lines at the end of file will be skipped.
+Additional settings:
+- [input_format_custom_detect_header](/docs/en/operations/settings/settings-formats.md/#input_format_custom_detect_header) - enables automatic detection of header with names and types if any. Default value - `true`.
+- [input_format_custom_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_custom_skip_trailing_empty_lines) - skip trailing empty lines at the end of file . Default value - `false`.
+- [input_format_custom_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_custom_allow_variable_number_of_columns) - allow variable number of columns in CustomSeparated format, ignore extra columns and use default values on missing columns. Default value - `false`.
 
 There is also `CustomSeparatedIgnoreSpaces` format, which is similar to [TemplateIgnoreSpaces](#templateignorespaces).
 
@@ -1261,6 +1263,7 @@ SELECT * FROM json_each_row_nested
 - [input_format_json_named_tuples_as_objects](/docs/en/operations/settings/settings-formats.md/#input_format_json_named_tuples_as_objects) - parse named tuple columns as JSON objects. Default value - `true`.
 - [input_format_json_defaults_for_missing_elements_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_defaults_for_missing_elements_in_named_tuple) - insert default values for missing elements in JSON object while parsing named tuple. Default value - `true`.
 - [input_format_json_ignore_unknown_keys_in_named_tuple](/docs/en/operations/settings/settings-formats.md/#input_format_json_ignore_unknown_keys_in_named_tuple) - Ignore unknown keys in json object for named tuples. Default value - `false`.
+- [input_format_json_compact_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_json_compact_allow_variable_number_of_columns) - allow variable number of columns in JSONCompact/JSONCompactEachRow format, ignore extra columns and use default values on missing columns. Default value - `false`.
 - [output_format_json_quote_64bit_integers](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_integers) - controls quoting of 64-bit integers in JSON output format. Default value - `true`.
 - [output_format_json_quote_64bit_floats](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_64bit_floats) - controls quoting of 64-bit floats in JSON output format. Default value - `false`.
 - [output_format_json_quote_denormals](/docs/en/operations/settings/settings-formats.md/#output_format_json_quote_denormals) - enables '+nan', '-nan', '+inf', '-inf' outputs in JSON output format. Default value - `false`.
diff --git a/docs/en/operations/settings/settings-formats.md b/docs/en/operations/settings/settings-formats.md
index ee8e0d547b8..8e3d6b74ffa 100644
--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@@ -623,6 +623,13 @@ Column type should be String. If value is empty, default names `row_{i}`will be
 
 Default value: ''.
 
+### input_format_json_compact_allow_variable_number_of_columns {#input_format_json_compact_allow_variable_number_of_columns}
+
+Allow variable number of columns in rows in JSONCompact/JSONCompactEachRow input formats.
+Ignore extra columns in rows with more columns than expected and treat missing columns as default values.
+
+Disabled by default.
+
 ## TSV format settings {#tsv-format-settings}
 
 ### input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default}
@@ -760,6 +767,13 @@ When enabled, trailing empty lines at the end of TSV file will be skipped.
 
 Disabled by default.
 
+### input_format_tsv_allow_variable_number_of_columns {#input_format_tsv_allow_variable_number_of_columns}
+
+Allow variable number of columns in rows in TSV input format.
+Ignore extra columns in rows with more columns than expected and treat missing columns as default values.
+
+Disabled by default.
+
 ## CSV format settings {#csv-format-settings}
 
 ### format_csv_delimiter {#format_csv_delimiter}
@@ -951,9 +965,11 @@ Result
 ```text
 "  string  "
 ```
+
 ### input_format_csv_allow_variable_number_of_columns {#input_format_csv_allow_variable_number_of_columns}
 
-ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values.
+Allow variable number of columns in rows in CSV input format.
+Ignore extra columns in rows with more columns than expected and treat missing columns as default values.
 
 Disabled by default.
 
@@ -1589,6 +1605,13 @@ When enabled, trailing empty lines at the end of file in CustomSeparated format
 
 Disabled by default.
 
+### input_format_custom_allow_variable_number_of_columns {#input_format_custom_allow_variable_number_of_columns}
+
+Allow variable number of columns in rows in CustomSeparated input format.
+Ignore extra columns in rows with more columns than expected and treat missing columns as default values.
+
+Disabled by default.
+
 ## Regexp format settings {#regexp-format-settings}
 
 ### format_regexp_escaping_rule {#format_regexp_escaping_rule}
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index c69d132ea25..86146bfad07 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -879,6 +879,10 @@ class IColumn;
     M(Bool, input_format_csv_allow_whitespace_or_tab_as_delimiter, false, "Allow to use spaces and tabs(\\t) as field delimiter in the CSV strings", 0) \
     M(Bool, input_format_csv_trim_whitespaces, true, "Trims spaces and tabs (\\t) characters at the beginning and end in CSV strings", 0) \
     M(Bool, input_format_csv_use_default_on_bad_values, false, "Allow to set default value to column when CSV field deserialization failed on bad value", 0) \
+    M(Bool, input_format_csv_allow_variable_number_of_columns, false, "Ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values", 0) \
+    M(Bool, input_format_tsv_allow_variable_number_of_columns, false, "Ignore extra columns in TSV input (if file has more columns than expected) and treat missing fields in TSV input as default values", 0) \
+    M(Bool, input_format_custom_allow_variable_number_of_columns, false, "Ignore extra columns in CustomSeparated input (if file has more columns than expected) and treat missing fields in CustomSeparated input as default values", 0) \
+    M(Bool, input_format_json_compact_allow_variable_number_of_columns, false, "Ignore extra columns in JSONCompact(EachRow) input (if file has more columns than expected) and treat missing fields in JSONCompact(EachRow) input as default values", 0) \
     M(Bool, input_format_tsv_detect_header, true, "Automatically detect header with names and types in TSV format", 0) \
     M(Bool, input_format_custom_detect_header, true, "Automatically detect header with names and types in CustomSeparated format", 0) \
     M(Bool, input_format_parquet_skip_columns_with_unsupported_types_in_schema_inference, false, "Skip columns with unsupported types while schema inference for format Parquet", 0) \
@@ -1023,7 +1027,6 @@ class IColumn;
     M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \
     \
     M(Bool, dictionary_use_async_executor, false, "Execute a pipeline for reading from a dictionary with several threads. It's supported only by DIRECT dictionary with CLICKHOUSE source.", 0) \
-    M(Bool, input_format_csv_allow_variable_number_of_columns, false, "Ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values", 0) \
 
 // End of FORMAT_FACTORY_SETTINGS
 // Please add settings non-related to formats into the COMMON_SETTINGS above.
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index 663b7f1ba95..dff480d1f79 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -86,6 +86,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.custom.row_between_delimiter = settings.format_custom_row_between_delimiter;
     format_settings.custom.try_detect_header = settings.input_format_custom_detect_header;
     format_settings.custom.skip_trailing_empty_lines = settings.input_format_custom_skip_trailing_empty_lines;
+    format_settings.custom.allow_variable_number_of_columns = settings.input_format_custom_allow_variable_number_of_columns;
     format_settings.date_time_input_format = settings.date_time_input_format;
     format_settings.date_time_output_format = settings.date_time_output_format;
     format_settings.interval.output_format = settings.interval_output_format;
@@ -115,6 +116,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.json.validate_utf8 = settings.output_format_json_validate_utf8;
     format_settings.json_object_each_row.column_for_object_name = settings.format_json_object_each_row_column_for_object_name;
     format_settings.json.allow_object_type = context->getSettingsRef().allow_experimental_object_type;
+    format_settings.json.compact_allow_variable_number_of_columns = settings.input_format_json_compact_allow_variable_number_of_columns;
     format_settings.null_as_default = settings.input_format_null_as_default;
     format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;
     format_settings.parquet.row_group_rows = settings.output_format_parquet_row_group_size;
@@ -161,6 +163,7 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
     format_settings.tsv.skip_first_lines = settings.input_format_tsv_skip_first_lines;
     format_settings.tsv.try_detect_header = settings.input_format_tsv_detect_header;
     format_settings.tsv.skip_trailing_empty_lines = settings.input_format_tsv_skip_trailing_empty_lines;
+    format_settings.tsv.allow_variable_number_of_columns = settings.input_format_tsv_allow_variable_number_of_columns;
     format_settings.values.accurate_types_of_literals = settings.input_format_values_accurate_types_of_literals;
     format_settings.values.deduce_templates_of_expressions = settings.input_format_values_deduce_templates_of_expressions;
     format_settings.values.interpret_expressions = settings.input_format_values_interpret_expressions;
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index 3259c46e5ff..68cf9ad817d 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -176,6 +176,7 @@ struct FormatSettings
         EscapingRule escaping_rule = EscapingRule::Escaped;
         bool try_detect_header = true;
         bool skip_trailing_empty_lines = false;
+        bool allow_variable_number_of_columns = false;
     } custom;
 
     struct
@@ -198,6 +199,7 @@ struct FormatSettings
         bool validate_types_from_metadata = true;
         bool validate_utf8 = false;
         bool allow_object_type = false;
+        bool compact_allow_variable_number_of_columns = false;
     } json;
 
     struct
@@ -316,6 +318,7 @@ struct FormatSettings
         UInt64 skip_first_lines = 0;
         bool try_detect_header = true;
         bool skip_trailing_empty_lines = false;
+        bool allow_variable_number_of_columns = false;
     } tsv;
 
     struct
diff --git a/src/Processors/Formats/ISchemaReader.cpp b/src/Processors/Formats/ISchemaReader.cpp
index 1fa520eaaee..15b53c2a499 100644
--- a/src/Processors/Formats/ISchemaReader.cpp
+++ b/src/Processors/Formats/ISchemaReader.cpp
@@ -115,21 +115,24 @@ NamesAndTypesList IRowSchemaReader::readSchema()
             "Cannot read rows to determine the schema, the maximum number of rows (or bytes) to read is set to 0. "
             "Most likely setting input_format_max_rows_to_read_for_schema_inference or input_format_max_bytes_to_read_for_schema_inference is set to 0");
 
-    DataTypes data_types = readRowAndGetDataTypes();
+    auto data_types_maybe = readRowAndGetDataTypes();
 
     /// Check that we read at list one column.
-    if (data_types.empty())
+    if (!data_types_maybe)
         throw Exception(ErrorCodes::EMPTY_DATA_PASSED, "Cannot read rows from the data");
 
+    DataTypes data_types = std::move(*data_types_maybe);
+
     /// If column names weren't set, use default names 'c1', 'c2', ...
-    if (column_names.empty())
+    bool use_default_column_names = column_names.empty();
+    if (use_default_column_names)
     {
         column_names.reserve(data_types.size());
         for (size_t i = 0; i != data_types.size(); ++i)
             column_names.push_back("c" + std::to_string(i + 1));
     }
     /// If column names were set, check that the number of names match the number of types.
-    else if (column_names.size() != data_types.size())
+    else if (column_names.size() != data_types.size() && !allowVariableNumberOfColumns())
     {
         throw Exception(
             ErrorCodes::INCORRECT_DATA,
@@ -137,6 +140,9 @@ NamesAndTypesList IRowSchemaReader::readSchema()
     }
     else
     {
+        if (column_names.size() != data_types.size())
+            data_types.resize(column_names.size());
+
         std::unordered_set<std::string_view> names_set;
         for (const auto & name : column_names)
         {
@@ -155,13 +161,39 @@ NamesAndTypesList IRowSchemaReader::readSchema()
 
     for (rows_read = 1; rows_read < max_rows_to_read && in.count() < max_bytes_to_read; ++rows_read)
     {
-        DataTypes new_data_types = readRowAndGetDataTypes();
-        if (new_data_types.empty())
+        auto new_data_types_maybe = readRowAndGetDataTypes();
+        if (!new_data_types_maybe)
             /// We reached eof.
             break;
 
+        DataTypes new_data_types = std::move(*new_data_types_maybe);
+
         if (new_data_types.size() != data_types.size())
-            throw Exception(ErrorCodes::INCORRECT_DATA, "Rows have different amount of values");
+        {
+            if (!allowVariableNumberOfColumns())
+                throw Exception(ErrorCodes::INCORRECT_DATA, "Rows have different amount of values");
+
+            if (use_default_column_names)
+            {
+                /// Current row contains new columns, add new default names.
+                if (new_data_types.size() > data_types.size())
+                {
+                    for (size_t i = data_types.size(); i < new_data_types.size(); ++i)
+                        column_names.push_back("c" + std::to_string(i + 1));
+                    data_types.resize(new_data_types.size());
+                }
+                /// Current row contain less columns than previous rows.
+                else
+                {
+                    new_data_types.resize(data_types.size());
+                }
+            }
+            /// If names were explicitly set, ignore all extra columns.
+            else
+            {
+                new_data_types.resize(column_names.size());
+            }
+        }
 
         for (field_index = 0; field_index != data_types.size(); ++field_index)
         {
diff --git a/src/Processors/Formats/ISchemaReader.h b/src/Processors/Formats/ISchemaReader.h
index 40702198a57..0cc8b98f05e 100644
--- a/src/Processors/Formats/ISchemaReader.h
+++ b/src/Processors/Formats/ISchemaReader.h
@@ -93,11 +93,13 @@ protected:
     /// Read one row and determine types of columns in it.
     /// Return types in the same order in which the values were in the row.
     /// If it's impossible to determine the type for some column, return nullptr for it.
-    /// Return empty list if can't read more data.
-    virtual DataTypes readRowAndGetDataTypes() = 0;
+    /// Return std::nullopt if can't read more data.
+    virtual std::optional<DataTypes> readRowAndGetDataTypes() = 0;
 
     void setColumnNames(const std::vector<String> & names) { column_names = names; }
 
+    virtual bool allowVariableNumberOfColumns() const { return false; }
+
     size_t field_index;
 
 private:
diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
index 244b906549e..9092c7fceba 100644
--- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
@@ -284,7 +284,7 @@ bool CSVFormatReader::parseRowEndWithDiagnosticInfo(WriteBuffer & out)
     return true;
 }
 
-bool CSVFormatReader::allowVariableNumberOfColumns()
+bool CSVFormatReader::allowVariableNumberOfColumns() const
 {
     return format_settings.csv.allow_variable_number_of_columns;
 }
@@ -410,19 +410,22 @@ CSVSchemaReader::CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_t
 {
 }
 
-std::pair<std::vector<String>, DataTypes> CSVSchemaReader::readRowAndGetFieldsAndDataTypes()
+std::optional<std::pair<std::vector<String>, DataTypes>> CSVSchemaReader::readRowAndGetFieldsAndDataTypes()
 {
     if (buf.eof())
         return {};
 
     auto fields = reader.readRow();
     auto data_types = tryInferDataTypesByEscapingRule(fields, format_settings, FormatSettings::EscapingRule::CSV);
-    return {fields, data_types};
+    return std::make_pair(fields, data_types);
 }
 
-DataTypes CSVSchemaReader::readRowAndGetDataTypesImpl()
+std::optional<DataTypes> CSVSchemaReader::readRowAndGetDataTypesImpl()
 {
-    return std::move(readRowAndGetFieldsAndDataTypes().second);
+    auto fields_with_types = readRowAndGetFieldsAndDataTypes();
+    if (!fields_with_types)
+        return {};
+    return std::move(fields_with_types->second);
 }
 
 
diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.h b/src/Processors/Formats/Impl/CSVRowInputFormat.h
index 7b1a1fc433d..2444477b184 100644
--- a/src/Processors/Formats/Impl/CSVRowInputFormat.h
+++ b/src/Processors/Formats/Impl/CSVRowInputFormat.h
@@ -70,7 +70,7 @@ public:
     void skipPrefixBeforeHeader() override;
 
     bool checkForEndOfRow() override;
-    bool allowVariableNumberOfColumns() override;
+    bool allowVariableNumberOfColumns() const override;
 
     std::vector<String> readNames() override { return readHeaderRow(); }
     std::vector<String> readTypes() override { return readHeaderRow(); }
@@ -102,8 +102,10 @@ public:
     CSVSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, const FormatSettings & format_settings_);
 
 private:
-    DataTypes readRowAndGetDataTypesImpl() override;
-    std::pair<std::vector<String>, DataTypes> readRowAndGetFieldsAndDataTypes() override;
+    bool allowVariableNumberOfColumns() const override { return format_settings.csv.allow_variable_number_of_columns; }
+
+    std::optional<DataTypes> readRowAndGetDataTypesImpl() override;
+    std::optional<std::pair<std::vector<String>, DataTypes>> readRowAndGetFieldsAndDataTypes() override;
 
     PeekableReadBuffer buf;
     CSVFormatReader reader;
diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp
index 1e67db79a2c..8f8e12e3c2a 100644
--- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp
@@ -142,7 +142,7 @@ void CustomSeparatedFormatReader::skipField()
     skipFieldByEscapingRule(*buf, format_settings.custom.escaping_rule, format_settings);
 }
 
-bool CustomSeparatedFormatReader::checkEndOfRow()
+bool CustomSeparatedFormatReader::checkForEndOfRow()
 {
     PeekableReadBufferCheckpoint checkpoint{*buf, true};
 
@@ -200,12 +200,12 @@ std::vector<String> CustomSeparatedFormatReader::readRowImpl()
     std::vector<String> values;
     skipRowStartDelimiter();
 
-    if (columns == 0)
+    if (columns == 0 || allowVariableNumberOfColumns())
     {
         do
         {
             values.push_back(readFieldIntoString<mode>(values.empty(), false, true));
-        } while (!checkEndOfRow());
+        } while (!checkForEndOfRow());
         columns = values.size();
     }
     else
@@ -230,7 +230,7 @@ void CustomSeparatedFormatReader::skipHeaderRow()
 
         skipField();
     }
-    while (!checkEndOfRow());
+    while (!checkForEndOfRow());
 
     skipRowEndDelimiter();
 }
@@ -369,7 +369,7 @@ CustomSeparatedSchemaReader::CustomSeparatedSchemaReader(
 {
 }
 
-std::pair<std::vector<String>, DataTypes> CustomSeparatedSchemaReader::readRowAndGetFieldsAndDataTypes()
+std::optional<std::pair<std::vector<String>, DataTypes>> CustomSeparatedSchemaReader::readRowAndGetFieldsAndDataTypes()
 {
     if (no_more_data || reader.checkForSuffix())
     {
@@ -385,12 +385,15 @@ std::pair<std::vector<String>, DataTypes> CustomSeparatedSchemaReader::readRowAn
 
     auto fields = reader.readRow();
     auto data_types = tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule(), &json_inference_info);
-    return {fields, data_types};
+    return std::make_pair(fields, data_types);
 }
 
-DataTypes CustomSeparatedSchemaReader::readRowAndGetDataTypesImpl()
+std::optional<DataTypes> CustomSeparatedSchemaReader::readRowAndGetDataTypesImpl()
 {
-    return readRowAndGetFieldsAndDataTypes().second;
+    auto fields_with_types = readRowAndGetFieldsAndDataTypes();
+    if (!fields_with_types)
+        return {};
+    return std::move(fields_with_types->second);
 }
 
 void CustomSeparatedSchemaReader::transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type)
diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h
index 2acf35bd143..893f06409f6 100644
--- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h
+++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.h
@@ -74,7 +74,9 @@ public:
 
     std::vector<String> readRowForHeaderDetection() override { return readRowImpl<ReadFieldMode::AS_POSSIBLE_STRING>(); }
 
-    bool checkEndOfRow();
+    bool checkForEndOfRow() override;
+    bool allowVariableNumberOfColumns() const override { return format_settings.custom.allow_variable_number_of_columns; }
+
     bool checkForSuffixImpl(bool check_eof);
     inline void skipSpaces() { if (ignore_spaces) skipWhitespaceIfAny(*buf, true); }
 
@@ -109,9 +111,11 @@ public:
     CustomSeparatedSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool ignore_spaces_, const FormatSettings & format_setting_);
 
 private:
-    DataTypes readRowAndGetDataTypesImpl() override;
+    bool allowVariableNumberOfColumns() const override { return format_settings.custom.allow_variable_number_of_columns; }
 
-    std::pair<std::vector<String>, DataTypes> readRowAndGetFieldsAndDataTypes() override;
+    std::optional<DataTypes> readRowAndGetDataTypesImpl() override;
+
+    std::optional<std::pair<std::vector<String>, DataTypes>> readRowAndGetFieldsAndDataTypes() override;
 
     void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override;
 
diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
index b91345bebe3..e3583a3dff0 100644
--- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
@@ -112,6 +112,12 @@ bool JSONCompactEachRowFormatReader::readField(IColumn & column, const DataTypeP
     return JSONUtils::readField(*in, column, type, serialization, column_name, format_settings, yield_strings);
 }
 
+bool JSONCompactEachRowFormatReader::checkForEndOfRow()
+{
+    skipWhitespaceIfAny(*in);
+    return !in->eof() && *in->position() == ']';
+}
+
 bool JSONCompactEachRowFormatReader::parseRowStartWithDiagnosticInfo(WriteBuffer & out)
 {
     skipWhitespaceIfAny(*in);
@@ -187,7 +193,7 @@ JSONCompactEachRowRowSchemaReader::JSONCompactEachRowRowSchemaReader(
 {
 }
 
-DataTypes JSONCompactEachRowRowSchemaReader::readRowAndGetDataTypesImpl()
+std::optional<DataTypes> JSONCompactEachRowRowSchemaReader::readRowAndGetDataTypesImpl()
 {
     if (first_row)
         first_row = false;
diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h
index bb699f0ca2e..378a41e6471 100644
--- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h
+++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.h
@@ -68,6 +68,9 @@ public:
     std::vector<String> readNames() override { return readHeaderRow(); }
     std::vector<String> readTypes() override { return readHeaderRow(); }
 
+    bool checkForEndOfRow() override;
+    bool allowVariableNumberOfColumns() const override { return format_settings.json.compact_allow_variable_number_of_columns; }
+
     bool yieldStrings() const { return yield_strings; }
 private:
     bool yield_strings;
@@ -79,7 +82,9 @@ public:
     JSONCompactEachRowRowSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool yield_strings_, const FormatSettings & format_settings_);
 
 private:
-    DataTypes readRowAndGetDataTypesImpl() override;
+    bool allowVariableNumberOfColumns() const override { return format_settings.json.compact_allow_variable_number_of_columns; }
+
+    std::optional<DataTypes> readRowAndGetDataTypesImpl() override;
 
     void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override;
     void transformFinalTypeIfNeeded(DataTypePtr & type) override;
diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
index eeca14176cc..a46f0018611 100644
--- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.cpp
@@ -634,7 +634,7 @@ DataTypePtr MsgPackSchemaReader::getDataType(const msgpack::object & object)
     UNREACHABLE();
 }
 
-DataTypes MsgPackSchemaReader::readRowAndGetDataTypes()
+std::optional<DataTypes> MsgPackSchemaReader::readRowAndGetDataTypes()
 {
     if (buf.eof())
         return {};
diff --git a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h
index 0b485d3b97c..028ab878ad0 100644
--- a/src/Processors/Formats/Impl/MsgPackRowInputFormat.h
+++ b/src/Processors/Formats/Impl/MsgPackRowInputFormat.h
@@ -91,7 +91,7 @@ public:
 private:
     msgpack::object_handle readObject();
     DataTypePtr getDataType(const msgpack::object & object);
-    DataTypes readRowAndGetDataTypes() override;
+    std::optional<DataTypes> readRowAndGetDataTypes() override;
 
     PeekableReadBuffer buf;
     UInt64 number_of_columns;
diff --git a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp
index 90dd07bd5a8..6c754f141da 100644
--- a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.cpp
@@ -422,7 +422,7 @@ NamesAndTypesList MySQLDumpSchemaReader::readSchema()
     return IRowSchemaReader::readSchema();
 }
 
-DataTypes MySQLDumpSchemaReader::readRowAndGetDataTypes()
+std::optional<DataTypes> MySQLDumpSchemaReader::readRowAndGetDataTypes()
 {
     if (in.eof())
         return {};
diff --git a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.h b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.h
index c28355054d7..14a73bf83b0 100644
--- a/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.h
+++ b/src/Processors/Formats/Impl/MySQLDumpRowInputFormat.h
@@ -33,7 +33,7 @@ public:
 
 private:
     NamesAndTypesList readSchema() override;
-    DataTypes readRowAndGetDataTypes() override;
+    std::optional<DataTypes> readRowAndGetDataTypes() override;
 
     String table_name;
 };
diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
index d902a8be6a7..8e94a568b1e 100644
--- a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
@@ -143,7 +143,7 @@ RegexpSchemaReader::RegexpSchemaReader(ReadBuffer & in_, const FormatSettings &
 {
 }
 
-DataTypes RegexpSchemaReader::readRowAndGetDataTypes()
+std::optional<DataTypes> RegexpSchemaReader::readRowAndGetDataTypes()
 {
     if (buf.eof())
         return {};
diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.h b/src/Processors/Formats/Impl/RegexpRowInputFormat.h
index 2469774aaf9..7417d48d8c1 100644
--- a/src/Processors/Formats/Impl/RegexpRowInputFormat.h
+++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.h
@@ -79,7 +79,7 @@ public:
     RegexpSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings);
 
 private:
-    DataTypes readRowAndGetDataTypes() override;
+    std::optional<DataTypes> readRowAndGetDataTypes() override;
 
     void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override;
 
diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
index 2239c8539e3..7fbad583ced 100644
--- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
@@ -300,6 +300,11 @@ bool TabSeparatedFormatReader::checkForSuffix()
     return false;
 }
 
+bool TabSeparatedFormatReader::checkForEndOfRow()
+{
+    return buf->eof() || *buf->position() == '\n';
+}
+
 TabSeparatedSchemaReader::TabSeparatedSchemaReader(
     ReadBuffer & in_, bool with_names_, bool with_types_, bool is_raw_, const FormatSettings & format_settings_)
     : FormatWithNamesAndTypesSchemaReader(
@@ -315,19 +320,22 @@ TabSeparatedSchemaReader::TabSeparatedSchemaReader(
 {
 }
 
-std::pair<std::vector<String>, DataTypes> TabSeparatedSchemaReader::readRowAndGetFieldsAndDataTypes()
+std::optional<std::pair<std::vector<String>, DataTypes>> TabSeparatedSchemaReader::readRowAndGetFieldsAndDataTypes()
 {
     if (buf.eof())
         return {};
 
     auto fields = reader.readRow();
     auto data_types = tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule());
-    return {fields, data_types};
+    return std::make_pair(fields, data_types);
 }
 
-DataTypes TabSeparatedSchemaReader::readRowAndGetDataTypesImpl()
+std::optional<DataTypes> TabSeparatedSchemaReader::readRowAndGetDataTypesImpl()
 {
-    return readRowAndGetFieldsAndDataTypes().second;
+    auto fields_with_types = readRowAndGetFieldsAndDataTypes();
+    if (!fields_with_types)
+        return {};
+    return std::move(fields_with_types->second);
 }
 
 void registerInputFormatTabSeparated(FormatFactory & factory)
diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h
index 8df57675cf5..e0234761d61 100644
--- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h
+++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.h
@@ -76,6 +76,9 @@ public:
     void setReadBuffer(ReadBuffer & in_) override;
 
     bool checkForSuffix() override;
+    bool checkForEndOfRow() override;
+
+    bool allowVariableNumberOfColumns() const override { return format_settings.tsv.allow_variable_number_of_columns; }
 
 private:
     template <bool is_header>
@@ -92,8 +95,10 @@ public:
     TabSeparatedSchemaReader(ReadBuffer & in_, bool with_names_, bool with_types_, bool is_raw_, const FormatSettings & format_settings);
 
 private:
-    DataTypes readRowAndGetDataTypesImpl() override;
-    std::pair<std::vector<String>, DataTypes> readRowAndGetFieldsAndDataTypes() override;
+    bool allowVariableNumberOfColumns() const override { return format_settings.tsv.allow_variable_number_of_columns; }
+
+    std::optional<DataTypes> readRowAndGetDataTypesImpl() override;
+    std::optional<std::pair<std::vector<String>, DataTypes>> readRowAndGetFieldsAndDataTypes() override;
 
     PeekableReadBuffer buf;
     TabSeparatedFormatReader reader;
diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp
index 8a09e800fa7..b065e00f5d1 100644
--- a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp
@@ -490,7 +490,7 @@ TemplateSchemaReader::TemplateSchemaReader(
     setColumnNames(row_format.column_names);
 }
 
-DataTypes TemplateSchemaReader::readRowAndGetDataTypes()
+std::optional<DataTypes> TemplateSchemaReader::readRowAndGetDataTypes()
 {
     if (first_row)
         format_reader.readPrefix();
diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.h b/src/Processors/Formats/Impl/TemplateRowInputFormat.h
index 8f9088e2c47..2752cb13e50 100644
--- a/src/Processors/Formats/Impl/TemplateRowInputFormat.h
+++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.h
@@ -119,7 +119,7 @@ public:
                          std::string row_between_delimiter,
                          const FormatSettings & format_settings_);
 
-    DataTypes readRowAndGetDataTypes() override;
+    std::optional<DataTypes> readRowAndGetDataTypes() override;
 
 private:
     void transformTypesIfNeeded(DataTypePtr & type, DataTypePtr & new_type) override;
diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
index 3a65a6fe4ea..6cb469afca1 100644
--- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.cpp
@@ -638,7 +638,7 @@ ValuesSchemaReader::ValuesSchemaReader(ReadBuffer & in_, const FormatSettings &
 {
 }
 
-DataTypes ValuesSchemaReader::readRowAndGetDataTypes()
+std::optional<DataTypes> ValuesSchemaReader::readRowAndGetDataTypes()
 {
     if (first_row)
     {
diff --git a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h
index 8f8d44ec088..7f1dbc0da66 100644
--- a/src/Processors/Formats/Impl/ValuesBlockInputFormat.h
+++ b/src/Processors/Formats/Impl/ValuesBlockInputFormat.h
@@ -105,7 +105,7 @@ public:
     ValuesSchemaReader(ReadBuffer & in_, const FormatSettings & format_settings);
 
 private:
-    DataTypes readRowAndGetDataTypes() override;
+    std::optional<DataTypes> readRowAndGetDataTypes() override;
 
     PeekableReadBuffer buf;
     ParserExpression parser;
diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
index fb49779e0af..cb5c11e2d3b 100644
--- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
+++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
@@ -212,8 +212,23 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE
     format_reader->skipRowStartDelimiter();
 
     ext.read_columns.resize(data_types.size());
-    for (size_t file_column = 0; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column)
+    size_t file_column = 0;
+    for (; file_column < column_mapping->column_indexes_for_input_fields.size(); ++file_column)
     {
+        if (format_reader->allowVariableNumberOfColumns() && format_reader->checkForEndOfRow())
+        {
+            while (file_column < column_mapping->column_indexes_for_input_fields.size())
+            {
+                const auto & rem_column_index = column_mapping->column_indexes_for_input_fields[file_column];
+                columns[*rem_column_index]->insertDefault();
+                ++file_column;
+            }
+            break;
+        }
+
+        if (file_column != 0)
+            format_reader->skipFieldDelimiter();
+
         const auto & column_index = column_mapping->column_indexes_for_input_fields[file_column];
         const bool is_last_file_column = file_column + 1 == column_mapping->column_indexes_for_input_fields.size();
         if (column_index)
@@ -225,22 +240,6 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE
                 column_mapping->names_of_columns[file_column]);
         else
             format_reader->skipField(file_column);
-
-        if (!is_last_file_column)
-        {
-            if (format_reader->allowVariableNumberOfColumns() && format_reader->checkForEndOfRow())
-            {
-                ++file_column;
-                while (file_column < column_mapping->column_indexes_for_input_fields.size())
-                {
-                    const auto & rem_column_index = column_mapping->column_indexes_for_input_fields[file_column];
-                    columns[*rem_column_index]->insertDefault();
-                    ++file_column;
-                }
-            }
-            else
-                format_reader->skipFieldDelimiter();
-        }
     }
 
     if (format_reader->allowVariableNumberOfColumns() && !format_reader->checkForEndOfRow())
@@ -248,7 +247,7 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE
         do
         {
             format_reader->skipFieldDelimiter();
-            format_reader->skipField(1);
+            format_reader->skipField(file_column++);
         }
         while (!format_reader->checkForEndOfRow());
     }
@@ -419,12 +418,14 @@ namespace
 
 void FormatWithNamesAndTypesSchemaReader::tryDetectHeader(std::vector<String> & column_names, std::vector<String> & type_names)
 {
-    auto [first_row_values, first_row_types] = readRowAndGetFieldsAndDataTypes();
+    auto first_row = readRowAndGetFieldsAndDataTypes();
 
     /// No data.
-    if (first_row_values.empty())
+    if (!first_row)
         return;
 
+    auto [first_row_values, first_row_types] = *first_row;
+
     /// The first row contains non String elements, it cannot be a header.
     if (!checkIfAllTypesAreString(first_row_types))
     {
@@ -432,15 +433,17 @@ void FormatWithNamesAndTypesSchemaReader::tryDetectHeader(std::vector<String> &
         return;
     }
 
-    auto [second_row_values, second_row_types] = readRowAndGetFieldsAndDataTypes();
+    auto second_row = readRowAndGetFieldsAndDataTypes();
 
     /// Data contains only 1 row, don't treat it as a header.
-    if (second_row_values.empty())
+    if (!second_row)
     {
         buffered_types = first_row_types;
         return;
     }
 
+    auto [second_row_values, second_row_types] = *second_row;
+
     DataTypes data_types;
     bool second_row_can_be_type_names = checkIfAllTypesAreString(second_row_types) && checkIfAllValuesAreTypeNames(readNamesFromFields(second_row_values));
     size_t row = 2;
@@ -450,15 +453,16 @@ void FormatWithNamesAndTypesSchemaReader::tryDetectHeader(std::vector<String> &
     }
     else
     {
-        data_types = readRowAndGetDataTypes();
+        auto data_types_maybe = readRowAndGetDataTypes();
         /// Data contains only 2 rows.
-        if (data_types.empty())
+        if (!data_types_maybe)
         {
             second_row_can_be_type_names = false;
             data_types = second_row_types;
         }
         else
         {
+            data_types = *data_types_maybe;
             ++row;
         }
     }
@@ -490,10 +494,10 @@ void FormatWithNamesAndTypesSchemaReader::tryDetectHeader(std::vector<String> &
             return;
         }
 
-        auto next_row_types = readRowAndGetDataTypes();
+        auto next_row_types_maybe = readRowAndGetDataTypes();
         /// Check if there are no more rows in data. It means that all rows contains only String values and Nulls,
         /// so, the first two rows with all String elements can be real data and we cannot use them as a header.
-        if (next_row_types.empty())
+        if (!next_row_types_maybe)
         {
             /// Buffer first data types from the first row, because it doesn't contain Nulls.
             buffered_types = first_row_types;
@@ -502,11 +506,11 @@ void FormatWithNamesAndTypesSchemaReader::tryDetectHeader(std::vector<String> &
 
         ++row;
         /// Combine types from current row and from previous rows.
-        chooseResultColumnTypes(*this, data_types, next_row_types, getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::CSV), default_colum_names, row);
+        chooseResultColumnTypes(*this, data_types, *next_row_types_maybe, getDefaultDataTypeForEscapingRule(FormatSettings::EscapingRule::CSV), default_colum_names, row);
     }
 }
 
-DataTypes FormatWithNamesAndTypesSchemaReader::readRowAndGetDataTypes()
+std::optional<DataTypes> FormatWithNamesAndTypesSchemaReader::readRowAndGetDataTypes()
 {
     /// Check if we tried to detect a header and have buffered types from read rows.
     if (!buffered_types.empty())
diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
index b5103d3db39..7b3e2cbea67 100644
--- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
+++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
@@ -121,7 +121,7 @@ public:
 
     virtual bool checkForEndOfRow() { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method checkForEndOfRow is not implemented"); }
 
-    virtual bool allowVariableNumberOfColumns() { return false; }
+    virtual bool allowVariableNumberOfColumns() const { return false; }
 
     const FormatSettings & getFormatSettings() const { return format_settings; }
 
@@ -160,15 +160,15 @@ public:
     NamesAndTypesList readSchema() override;
 
 protected:
-    virtual DataTypes readRowAndGetDataTypes() override;
+    virtual std::optional<DataTypes> readRowAndGetDataTypes() override;
 
-    virtual DataTypes readRowAndGetDataTypesImpl()
+    virtual std::optional<DataTypes> readRowAndGetDataTypesImpl()
     {
         throw Exception{ErrorCodes::NOT_IMPLEMENTED, "Method readRowAndGetDataTypesImpl is not implemented"};
     }
 
-    /// Return column fields with inferred types. In case of no more rows, return empty vectors.
-    virtual std::pair<std::vector<String>, DataTypes> readRowAndGetFieldsAndDataTypes()
+    /// Return column fields with inferred types. In case of no more rows, return nullopt.
+    virtual std::optional<std::pair<std::vector<String>, DataTypes>> readRowAndGetFieldsAndDataTypes()
     {
         throw Exception{ErrorCodes::NOT_IMPLEMENTED, "Method readRowAndGetFieldsAndDataTypes is not implemented"};
     }
diff --git a/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.reference b/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.reference
new file mode 100644
index 00000000000..39d24f2cbd2
--- /dev/null
+++ b/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.reference
@@ -0,0 +1,52 @@
+CSV
+1	1
+2	0
+0	0
+3	3
+1	1	\N	\N
+2	\N	\N	\N
+\N	\N	\N	\N
+3	3	3	3
+1	1
+2	\N
+\N	\N
+3	3
+TSV
+1	1
+2	0
+0	0
+3	3
+1	1	\N	\N
+2	\N	\N	\N
+\N	\N	\N	\N
+3	3	3	3
+1	1
+2	\N
+\N	\N
+3	3
+JSONCompactEachRow
+1	1
+2	0
+0	0
+3	3
+1	1	\N	\N
+2	\N	\N	\N
+\N	\N	\N	\N
+3	3	3	3
+1	1
+2	\N
+\N	\N
+3	3
+CustomSeparated
+1	1
+2	0
+0	0
+3	3
+1	1	\N	\N
+2	\N	\N	\N
+\N	\N	\N	\N
+3	3	3	3
+1	1
+2	\N
+\N	\N
+3	3
diff --git a/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.sql b/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.sql
new file mode 100644
index 00000000000..c0a80bf2114
--- /dev/null
+++ b/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.sql
@@ -0,0 +1,18 @@
+select 'CSV';
+select * from format(CSV, 'x UInt32, y UInt32', '1,1\n2\n\n3,3,3,3') settings input_format_csv_allow_variable_number_of_columns=1;
+select * from format(CSV, '1,1\n2\n\n3,3,3,3') settings input_format_csv_allow_variable_number_of_columns=1;
+select * from format(CSVWithNames, '"x","y"\n1,1\n2\n\n3,3,3,3') settings input_format_csv_allow_variable_number_of_columns=1;
+select 'TSV';
+select * from format(TSV, 'x UInt32, y UInt32', '1\t1\n2\n\n3\t3\t3\t3') settings input_format_tsv_allow_variable_number_of_columns=1;
+select * from format(TSV, '1\t1\n2\n\n3\t3\t3\t3') settings input_format_tsv_allow_variable_number_of_columns=1;
+select * from format(TSVWithNames, 'x\ty\n1\t1\n2\n\n3\t3\t3\t3') settings input_format_tsv_allow_variable_number_of_columns=1;
+select 'JSONCompactEachRow';
+select * from format(JSONCompactEachRow, 'x UInt32, y UInt32', '[1,1]\n[2]\n[]\n[3,3,3,3]') settings input_format_json_compact_allow_variable_number_of_columns=1;
+select * from format(JSONCompactEachRow, '[1,1]\n[2]\n[]\n[3,3,3,3]') settings input_format_json_compact_allow_variable_number_of_columns=1;
+select * from format(JSONCompactEachRowWithNames, '["x","y"]\n[1,1]\n[2]\n[]\n[3,3,3,3]') settings input_format_json_compact_allow_variable_number_of_columns=1;
+select 'CustomSeparated';
+set format_custom_escaping_rule='CSV', format_custom_field_delimiter='<field_delimiter>', format_custom_row_before_delimiter='<row_before_delimiter>', format_custom_row_after_delimiter='<row_after_delimiter>', format_custom_row_between_delimiter='<row_between_delimiter>', format_custom_result_before_delimiter='<result_before_delimiter>', format_custom_result_after_delimiter='<result_after_delimiter>';
+select * from format(CustomSeparated, 'x UInt32, y UInt32', '<result_before_delimiter><row_before_delimiter>1<field_delimiter>1<row_after_delimiter><row_between_delimiter><row_before_delimiter>2<row_after_delimiter><row_between_delimiter><row_before_delimiter><row_after_delimiter><row_between_delimiter><row_before_delimiter>3<field_delimiter>3<field_delimiter>3<field_delimiter>3<row_after_delimiter><result_after_delimiter>') settings input_format_custom_allow_variable_number_of_columns=1;
+select * from format(CustomSeparated, '<result_before_delimiter><row_before_delimiter>1<field_delimiter>1<row_after_delimiter><row_between_delimiter><row_before_delimiter>2<row_after_delimiter><row_between_delimiter><row_before_delimiter><row_after_delimiter><row_between_delimiter><row_before_delimiter>3<field_delimiter>3<field_delimiter>3<field_delimiter>3<row_after_delimiter><result_after_delimiter>') settings input_format_custom_allow_variable_number_of_columns=1;
+select * from format(CustomSeparatedWithNames, '<result_before_delimiter><row_before_delimiter>"x"<field_delimiter>"y"<row_after_delimiter><row_between_delimiter><row_before_delimiter>1<field_delimiter>1<row_after_delimiter><row_between_delimiter><row_before_delimiter>2<row_after_delimiter><row_between_delimiter><row_before_delimiter><row_after_delimiter><row_between_delimiter><row_before_delimiter>3<field_delimiter>3<field_delimiter>3<field_delimiter>3<row_after_delimiter><result_after_delimiter>') settings input_format_custom_allow_variable_number_of_columns=1;
+

From c3c64a7dd50ee0f25dd94eb1d1b645e0352471ec Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Fri, 28 Jul 2023 11:40:05 +0000
Subject: [PATCH 2/4] Fix

---
 .../Impl/CustomSeparatedRowInputFormat.cpp       |  5 ++++-
 .../Formats/RowInputFormatWithNamesAndTypes.cpp  |  3 ++-
 ...ats_with_variable_number_of_columns.reference | 16 ++++++++++++++++
 ...4_formats_with_variable_number_of_columns.sql |  4 ++++
 4 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp
index 8f8e12e3c2a..ff3d6d49199 100644
--- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp
@@ -139,7 +139,10 @@ void CustomSeparatedFormatReader::skipRowBetweenDelimiter()
 void CustomSeparatedFormatReader::skipField()
 {
     skipSpaces();
-    skipFieldByEscapingRule(*buf, format_settings.custom.escaping_rule, format_settings);
+    if (format_settings.custom.escaping_rule == FormatSettings::EscapingRule::CSV)
+        readCSVFieldWithTwoPossibleDelimiters(*buf, format_settings.csv, format_settings.custom.field_delimiter, format_settings.custom.row_after_delimiter);
+    else
+        skipFieldByEscapingRule(*buf, format_settings.custom.escaping_rule, format_settings);
 }
 
 bool CustomSeparatedFormatReader::checkForEndOfRow()
diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
index cb5c11e2d3b..4000bd14ddc 100644
--- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
+++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
@@ -220,7 +220,8 @@ bool RowInputFormatWithNamesAndTypes::readRow(MutableColumns & columns, RowReadE
             while (file_column < column_mapping->column_indexes_for_input_fields.size())
             {
                 const auto & rem_column_index = column_mapping->column_indexes_for_input_fields[file_column];
-                columns[*rem_column_index]->insertDefault();
+                if (rem_column_index)
+                    columns[*rem_column_index]->insertDefault();
                 ++file_column;
             }
             break;
diff --git a/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.reference b/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.reference
index 39d24f2cbd2..e9ff548e05c 100644
--- a/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.reference
+++ b/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.reference
@@ -11,6 +11,10 @@ CSV
 2	\N
 \N	\N
 3	3
+1	0
+2	0
+0	0
+3	0
 TSV
 1	1
 2	0
@@ -24,6 +28,10 @@ TSV
 2	\N
 \N	\N
 3	3
+1	0
+2	0
+0	0
+3	0
 JSONCompactEachRow
 1	1
 2	0
@@ -37,6 +45,10 @@ JSONCompactEachRow
 2	\N
 \N	\N
 3	3
+1	0
+2	0
+0	0
+3	0
 CustomSeparated
 1	1
 2	0
@@ -50,3 +62,7 @@ CustomSeparated
 2	\N
 \N	\N
 3	3
+1	0
+2	0
+0	0
+3	0
diff --git a/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.sql b/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.sql
index c0a80bf2114..dea4c20db8a 100644
--- a/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.sql
+++ b/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.sql
@@ -2,17 +2,21 @@ select 'CSV';
 select * from format(CSV, 'x UInt32, y UInt32', '1,1\n2\n\n3,3,3,3') settings input_format_csv_allow_variable_number_of_columns=1;
 select * from format(CSV, '1,1\n2\n\n3,3,3,3') settings input_format_csv_allow_variable_number_of_columns=1;
 select * from format(CSVWithNames, '"x","y"\n1,1\n2\n\n3,3,3,3') settings input_format_csv_allow_variable_number_of_columns=1;
+select * from format(CSVWithNames, 'x UInt32, z UInt32', '"x","y"\n1,1\n2\n\n3,3,3,3') settings input_format_csv_allow_variable_number_of_columns=1;
 select 'TSV';
 select * from format(TSV, 'x UInt32, y UInt32', '1\t1\n2\n\n3\t3\t3\t3') settings input_format_tsv_allow_variable_number_of_columns=1;
 select * from format(TSV, '1\t1\n2\n\n3\t3\t3\t3') settings input_format_tsv_allow_variable_number_of_columns=1;
 select * from format(TSVWithNames, 'x\ty\n1\t1\n2\n\n3\t3\t3\t3') settings input_format_tsv_allow_variable_number_of_columns=1;
+select * from format(TSVWithNames, 'x UInt32, z UInt32', 'x\ty\n1\t1\n2\n\n3\t3\t3\t3') settings input_format_tsv_allow_variable_number_of_columns=1;
 select 'JSONCompactEachRow';
 select * from format(JSONCompactEachRow, 'x UInt32, y UInt32', '[1,1]\n[2]\n[]\n[3,3,3,3]') settings input_format_json_compact_allow_variable_number_of_columns=1;
 select * from format(JSONCompactEachRow, '[1,1]\n[2]\n[]\n[3,3,3,3]') settings input_format_json_compact_allow_variable_number_of_columns=1;
 select * from format(JSONCompactEachRowWithNames, '["x","y"]\n[1,1]\n[2]\n[]\n[3,3,3,3]') settings input_format_json_compact_allow_variable_number_of_columns=1;
+select * from format(JSONCompactEachRowWithNames, 'x UInt32, z UInt32', '["x","y"]\n[1,1]\n[2]\n[]\n[3,3,3,3]') settings input_format_json_compact_allow_variable_number_of_columns=1;
 select 'CustomSeparated';
 set format_custom_escaping_rule='CSV', format_custom_field_delimiter='<field_delimiter>', format_custom_row_before_delimiter='<row_before_delimiter>', format_custom_row_after_delimiter='<row_after_delimiter>', format_custom_row_between_delimiter='<row_between_delimiter>', format_custom_result_before_delimiter='<result_before_delimiter>', format_custom_result_after_delimiter='<result_after_delimiter>';
 select * from format(CustomSeparated, 'x UInt32, y UInt32', '<result_before_delimiter><row_before_delimiter>1<field_delimiter>1<row_after_delimiter><row_between_delimiter><row_before_delimiter>2<row_after_delimiter><row_between_delimiter><row_before_delimiter><row_after_delimiter><row_between_delimiter><row_before_delimiter>3<field_delimiter>3<field_delimiter>3<field_delimiter>3<row_after_delimiter><result_after_delimiter>') settings input_format_custom_allow_variable_number_of_columns=1;
 select * from format(CustomSeparated, '<result_before_delimiter><row_before_delimiter>1<field_delimiter>1<row_after_delimiter><row_between_delimiter><row_before_delimiter>2<row_after_delimiter><row_between_delimiter><row_before_delimiter><row_after_delimiter><row_between_delimiter><row_before_delimiter>3<field_delimiter>3<field_delimiter>3<field_delimiter>3<row_after_delimiter><result_after_delimiter>') settings input_format_custom_allow_variable_number_of_columns=1;
 select * from format(CustomSeparatedWithNames, '<result_before_delimiter><row_before_delimiter>"x"<field_delimiter>"y"<row_after_delimiter><row_between_delimiter><row_before_delimiter>1<field_delimiter>1<row_after_delimiter><row_between_delimiter><row_before_delimiter>2<row_after_delimiter><row_between_delimiter><row_before_delimiter><row_after_delimiter><row_between_delimiter><row_before_delimiter>3<field_delimiter>3<field_delimiter>3<field_delimiter>3<row_after_delimiter><result_after_delimiter>') settings input_format_custom_allow_variable_number_of_columns=1;
+select * from format(CustomSeparatedWithNames, 'x UInt32, z UInt32', '<result_before_delimiter><row_before_delimiter>"x"<field_delimiter>"y"<row_after_delimiter><row_between_delimiter><row_before_delimiter>1<field_delimiter>1<row_after_delimiter><row_between_delimiter><row_before_delimiter>2<row_after_delimiter><row_between_delimiter><row_before_delimiter><row_after_delimiter><row_between_delimiter><row_before_delimiter>3<field_delimiter>3<field_delimiter>3<field_delimiter>3<row_after_delimiter><result_after_delimiter>') settings input_format_custom_allow_variable_number_of_columns=1;
 

From bb38918a263dd59307c463bf038ebf0c4d28d184 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Thu, 10 Aug 2023 13:21:11 +0200
Subject: [PATCH 3/4] Apply suggestions from code review
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: János Benjamin Antal <antaljanosbenjamin@users.noreply.github.com>
---
 src/Processors/Formats/Impl/CSVRowInputFormat.cpp             | 2 +-
 src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp | 2 +-
 src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp    | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
index 9092c7fceba..52f9571f962 100644
--- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
@@ -417,7 +417,7 @@ std::optional<std::pair<std::vector<String>, DataTypes>> CSVSchemaReader::readRo
 
     auto fields = reader.readRow();
     auto data_types = tryInferDataTypesByEscapingRule(fields, format_settings, FormatSettings::EscapingRule::CSV);
-    return std::make_pair(fields, data_types);
+    return std::make_pair(std::move(fields), std::move(data_types));
 }
 
 std::optional<DataTypes> CSVSchemaReader::readRowAndGetDataTypesImpl()
diff --git a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp
index ff3d6d49199..17cc88425f5 100644
--- a/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CustomSeparatedRowInputFormat.cpp
@@ -388,7 +388,7 @@ std::optional<std::pair<std::vector<String>, DataTypes>> CustomSeparatedSchemaRe
 
     auto fields = reader.readRow();
     auto data_types = tryInferDataTypesByEscapingRule(fields, reader.getFormatSettings(), reader.getEscapingRule(), &json_inference_info);
-    return std::make_pair(fields, data_types);
+    return std::make_pair(std::move(fields), std::move(data_types));
 }
 
 std::optional<DataTypes> CustomSeparatedSchemaReader::readRowAndGetDataTypesImpl()
diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
index 4000bd14ddc..fc2b5cd8207 100644
--- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
+++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.cpp
@@ -425,7 +425,7 @@ void FormatWithNamesAndTypesSchemaReader::tryDetectHeader(std::vector<String> &
     if (!first_row)
         return;
 
-    auto [first_row_values, first_row_types] = *first_row;
+    const auto & [first_row_values, first_row_types] = *first_row;
 
     /// The first row contains non String elements, it cannot be a header.
     if (!checkIfAllTypesAreString(first_row_types))
@@ -443,7 +443,7 @@ void FormatWithNamesAndTypesSchemaReader::tryDetectHeader(std::vector<String> &
         return;
     }
 
-    auto [second_row_values, second_row_types] = *second_row;
+    const auto & [second_row_values, second_row_types] = *second_row;
 
     DataTypes data_types;
     bool second_row_can_be_type_names = checkIfAllTypesAreString(second_row_types) && checkIfAllValuesAreTypeNames(readNamesFromFields(second_row_values));

From 82aff97dd04605233371c9c6de1e59933961cb78 Mon Sep 17 00:00:00 2001
From: avogar <avogar@clickhouse.com>
Date: Thu, 10 Aug 2023 11:51:36 +0000
Subject: [PATCH 4/4] Add comment, more test

---
 src/Processors/Formats/RowInputFormatWithNamesAndTypes.h  | 1 +
 ...2834_formats_with_variable_number_of_columns.reference | 8 ++++++++
 .../02834_formats_with_variable_number_of_columns.sql     | 2 ++
 3 files changed, 11 insertions(+)

diff --git a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
index 7b3e2cbea67..377341da685 100644
--- a/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
+++ b/src/Processors/Formats/RowInputFormatWithNamesAndTypes.h
@@ -119,6 +119,7 @@ public:
     /// Check suffix.
     virtual bool checkForSuffix() { return in->eof(); }
 
+    /// Check if we are at the end of row, not between fields.
     virtual bool checkForEndOfRow() { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method checkForEndOfRow is not implemented"); }
 
     virtual bool allowVariableNumberOfColumns() const { return false; }
diff --git a/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.reference b/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.reference
index e9ff548e05c..50173c150c0 100644
--- a/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.reference
+++ b/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.reference
@@ -37,6 +37,14 @@ JSONCompactEachRow
 2	0
 0	0
 3	3
+1	1
+2	0
+0	0
+3	3
+1	[1,2,3]
+2	[]
+0	[]
+3	[3]
 1	1	\N	\N
 2	\N	\N	\N
 \N	\N	\N	\N
diff --git a/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.sql b/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.sql
index dea4c20db8a..7c55cf2e9a7 100644
--- a/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.sql
+++ b/tests/queries/0_stateless/02834_formats_with_variable_number_of_columns.sql
@@ -10,6 +10,8 @@ select * from format(TSVWithNames, 'x\ty\n1\t1\n2\n\n3\t3\t3\t3') settings input
 select * from format(TSVWithNames, 'x UInt32, z UInt32', 'x\ty\n1\t1\n2\n\n3\t3\t3\t3') settings input_format_tsv_allow_variable_number_of_columns=1;
 select 'JSONCompactEachRow';
 select * from format(JSONCompactEachRow, 'x UInt32, y UInt32', '[1,1]\n[2]\n[]\n[3,3,3,3]') settings input_format_json_compact_allow_variable_number_of_columns=1;
+select * from format(JSONCompactEachRow, 'x UInt32, y UInt32', '[1,1,[1,2,3]]\n[2]\n[]\n[3,3,3,3,[1,2,3]]') settings input_format_json_compact_allow_variable_number_of_columns=1;
+select * from format(JSONCompactEachRow, 'x UInt32, y Array(UInt32)', '[1,[1,2,3],1]\n[2]\n[]\n[3,[3],3,3,[1,2,3]]') settings input_format_json_compact_allow_variable_number_of_columns=1;
 select * from format(JSONCompactEachRow, '[1,1]\n[2]\n[]\n[3,3,3,3]') settings input_format_json_compact_allow_variable_number_of_columns=1;
 select * from format(JSONCompactEachRowWithNames, '["x","y"]\n[1,1]\n[2]\n[]\n[3,3,3,3]') settings input_format_json_compact_allow_variable_number_of_columns=1;
 select * from format(JSONCompactEachRowWithNames, 'x UInt32, z UInt32', '["x","y"]\n[1,1]\n[2]\n[]\n[3,3,3,3]') settings input_format_json_compact_allow_variable_number_of_columns=1;