Merge pull request #25169 from nikitamikhaylov/csv-with-names-bug

Fix insert a subset of columns using CSVWithNames
This commit is contained in:
tavplubix 2021-06-11 10:42:48 +03:00 committed by GitHub
commit e99662c68e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 36 additions and 9 deletions

View File

@ -12,7 +12,7 @@ struct ColumnMapping
{
/// Non-atomic because there is strict `happens-before` between read and write access
/// See InputFormatParallelParsing
bool is_set;
bool is_set{false};
/// Maps indexes of columns in the input file to indexes of table columns
using OptionalIndexes = std::vector<std::optional<size_t>>;
OptionalIndexes column_indexes_for_input_fields;
@ -22,6 +22,11 @@ struct ColumnMapping
/// read the file header, and never changed afterwards.
/// For other columns, it is updated on each read() call.
std::vector<UInt8> read_columns;
/// Whether we have any columns that are not read from file at all,
/// and must be always initialized with defaults.
bool have_always_default_columns{false};
};
using ColumnMappingPtr = std::shared_ptr<ColumnMapping>;

View File

@ -193,7 +193,7 @@ void CSVRowInputFormat::readPrefix()
{
if (!read_column)
{
have_always_default_columns = true;
column_mapping->have_always_default_columns = true;
break;
}
}
@ -221,7 +221,7 @@ bool CSVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ext
/// Track whether we have to fill any columns in this row with default
/// values. If not, we return an empty column mask to the caller, so that
/// it doesn't have to check it.
bool have_default_columns = have_always_default_columns;
bool have_default_columns = column_mapping->have_always_default_columns;
ext.read_columns.assign(column_mapping->read_columns.size(), true);
const auto delimiter = format_settings.csv.delimiter;
@ -416,7 +416,7 @@ void CSVRowInputFormat::resetParser()
RowInputFormatWithDiagnosticInfo::resetParser();
column_mapping->column_indexes_for_input_fields.clear();
column_mapping->read_columns.clear();
have_always_default_columns = false;
column_mapping->have_always_default_columns = false;
}

View File

@ -31,17 +31,13 @@ public:
void resetParser() override;
private:
/// There fields are computed in constructor.
bool with_names;
const FormatSettings format_settings;
DataTypes data_types;
using IndexesMap = std::unordered_map<String, size_t>;
IndexesMap column_indexes_by_names;
/// Whether we have any columns that are not read from file at all,
/// and must be always initialized with defaults.
bool have_always_default_columns = false;
void addInputColumn(const String & column_name);
void setupAllColumnsByTableSchema();

View File

@ -0,0 +1,3 @@
1000000
2000000
3000000

View File

@ -0,0 +1,23 @@
#!/usr/bin/env bash
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_01903"
$CLICKHOUSE_CLIENT -q "CREATE TABLE test_01903 (col0 Date, col1 Nullable(UInt8)) ENGINE MergeTree() PARTITION BY toYYYYMM(col0) ORDER BY col0;"
(echo col0,col1; for _ in `seq 1 1000000`; do echo '2021-05-05',1; done) | $CLICKHOUSE_CLIENT -q "INSERT INTO test_01903 FORMAT CSVWithNames"
$CLICKHOUSE_CLIENT -q "SELECT count() FROM test_01903"
(echo col0; for _ in `seq 1 1000000`; do echo '2021-05-05'; done) | $CLICKHOUSE_CLIENT -q "INSERT INTO test_01903 (col0) FORMAT CSVWithNames"
$CLICKHOUSE_CLIENT -q "SELECT count() FROM test_01903"
(echo col0; for _ in `seq 1 1000000`; do echo '2021-05-05'; done) | $CLICKHOUSE_CLIENT -q "INSERT INTO test_01903 (col0) FORMAT TSVWithNames"
$CLICKHOUSE_CLIENT -q "SELECT count() FROM test_01903"
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_01903"