From eb0ae55e0297081a4e713ebdde6ddde232e71acc Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 28 Aug 2024 21:36:46 +0000 Subject: [PATCH] better rollbacks of columns --- src/Columns/ColumnTuple.cpp | 4 ++-- src/Columns/ColumnVariant.cpp | 4 ++-- src/Columns/IColumn.h | 15 ++++++++------- src/Processors/Formats/IRowInputFormat.cpp | 13 ++++++------- .../03231_bson_tuple_array_map.reference | 0 .../0_stateless/03231_bson_tuple_array_map.sh | 18 ++++++++++++++++++ 6 files changed, 36 insertions(+), 18 deletions(-) create mode 100644 tests/queries/0_stateless/03231_bson_tuple_array_map.reference create mode 100755 tests/queries/0_stateless/03231_bson_tuple_array_map.sh diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 3819ba3352b..65f3285bcfc 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -262,13 +262,13 @@ ColumnCheckpointPtr ColumnTuple::getCheckpoint() const for (const auto & column : columns) checkpoints.push_back(column->getCheckpoint()); - return std::make_shared(size(), std::move(checkpoints)); + return std::make_shared(size(), std::move(checkpoints)); } void ColumnTuple::rollback(const ColumnCheckpoint & checkpoint) { column_length = checkpoint.size; - const auto & checkpoints = assert_cast(checkpoint).nested; + const auto & checkpoints = assert_cast(checkpoint).nested; chassert(columns.size() == checkpoints.size()); for (size_t i = 0; i < columns.size(); ++i) diff --git a/src/Columns/ColumnVariant.cpp b/src/Columns/ColumnVariant.cpp index f73d074e726..a8cb779ad16 100644 --- a/src/Columns/ColumnVariant.cpp +++ b/src/Columns/ColumnVariant.cpp @@ -747,7 +747,7 @@ ColumnCheckpointPtr ColumnVariant::getCheckpoint() const for (const auto & column : variants) checkpoints.push_back(column->getCheckpoint()); - return std::make_shared(size(), std::move(checkpoints)); + return std::make_shared(size(), std::move(checkpoints)); } void ColumnVariant::rollback(const ColumnCheckpoint & checkpoint) @@ -755,7 +755,7 @@ void ColumnVariant::rollback(const ColumnCheckpoint & checkpoint) getOffsets().resize_assume_reserved(checkpoint.size); getLocalDiscriminators().resize_assume_reserved(checkpoint.size); - const auto & checkpoints = assert_cast(checkpoint).nested; + const auto & checkpoints = assert_cast(checkpoint).nested; chassert(variants.size() == checkpoints.size()); for (size_t i = 0; i < variants.size(); ++i) diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index 61c1891a7a7..53f31d2b96d 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -54,8 +54,10 @@ using EqualRanges = std::vector; /// after failed parsing when column may be in inconsistent state. struct ColumnCheckpoint { + size_t size; + explicit ColumnCheckpoint(size_t size_) : size(size_) {} - size_t size = 0; + virtual ~ColumnCheckpoint() = default; }; using ColumnCheckpointPtr = std::shared_ptr; @@ -64,19 +66,17 @@ using ColumnCheckpoints = std::vector; struct ColumnCheckpointWithNested : public ColumnCheckpoint { ColumnCheckpointWithNested(size_t size_, ColumnCheckpointPtr nested_) - : ColumnCheckpoint(size_) - , nested(std::move(nested_)) + : ColumnCheckpoint(size_), nested(std::move(nested_)) { } ColumnCheckpointPtr nested; }; -struct ColumnCheckpointWithNestedTuple : public ColumnCheckpoint +struct ColumnCheckpointWithMultipleNested : public ColumnCheckpoint { - ColumnCheckpointWithNestedTuple(size_t size_, ColumnCheckpoints nested_) - : ColumnCheckpoint(size_) - , nested(std::move(nested_)) + ColumnCheckpointWithMultipleNested(size_t size_, ColumnCheckpoints nested_) + : ColumnCheckpoint(size_), nested(std::move(nested_)) { } @@ -548,6 +548,7 @@ public: /// Rollbacks column to the checkpoint. /// Unlike 'popBack' this method should work correctly even if column has invalid state. + /// Sizes of columns in checkpoint must be less or equal than current. virtual void rollback(const ColumnCheckpoint & checkpoint) { popBack(size() - checkpoint.size); } /// If the column contains subcolumns (such as Array, Nullable, etc), do callback on them. diff --git a/src/Processors/Formats/IRowInputFormat.cpp b/src/Processors/Formats/IRowInputFormat.cpp index 0b6c81923db..2a0695764b2 100644 --- a/src/Processors/Formats/IRowInputFormat.cpp +++ b/src/Processors/Formats/IRowInputFormat.cpp @@ -104,6 +104,7 @@ Chunk IRowInputFormat::read() size_t num_columns = header.columns(); MutableColumns columns = header.cloneEmptyColumns(); + ColumnCheckpoints checkpoints(columns.size()); block_missing_values.clear(); @@ -130,6 +131,9 @@ Chunk IRowInputFormat::read() { try { + for (size_t column_idx = 0; column_idx < columns.size(); ++column_idx) + checkpoints[column_idx] = columns[column_idx]->getCheckpoint(); + info.read_columns.clear(); continue_reading = readRow(columns, info); @@ -193,14 +197,9 @@ Chunk IRowInputFormat::read() syncAfterError(); - /// Truncate all columns in block to initial size (remove values, that was appended to only part of columns). - + /// Rollback all columns in block to initial size (remove values, that was appended to only part of columns). for (size_t column_idx = 0; column_idx < num_columns; ++column_idx) - { - auto & column = columns[column_idx]; - if (column->size() > num_rows) - column->popBack(column->size() - num_rows); - } + columns[column_idx]->rollback(*checkpoints[column_idx]); } } } diff --git a/tests/queries/0_stateless/03231_bson_tuple_array_map.reference b/tests/queries/0_stateless/03231_bson_tuple_array_map.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03231_bson_tuple_array_map.sh b/tests/queries/0_stateless/03231_bson_tuple_array_map.sh new file mode 100755 index 00000000000..600b15fb70a --- /dev/null +++ b/tests/queries/0_stateless/03231_bson_tuple_array_map.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME.data + +$CLICKHOUSE_LOCAL -q "select tuple(1, x'00000000000000000000FFFF0000000000') as x format BSONEachRow" > $DATA_FILE +$CLICKHOUSE_LOCAL -q "select * from file('$DATA_FILE', BSONEachRow, 'x Tuple(UInt32, IPv6)') settings input_format_allow_errors_num=1" + +$CLICKHOUSE_LOCAL -q "select [x'00000000000000000000FFFF00000000', x'00000000000000000000FFFF0000000000'] as x format BSONEachRow" > $DATA_FILE +$CLICKHOUSE_LOCAL -q "select * from file('$DATA_FILE', BSONEachRow, 'x Array(IPv6)') settings input_format_allow_errors_num=1" + +$CLICKHOUSE_LOCAL -q "select map('key1', x'00000000000000000000FFFF00000000', 'key2', x'00000000000000000000FFFF0000000000') as x format BSONEachRow" > $DATA_FILE +$CLICKHOUSE_LOCAL -q "select * from file('$DATA_FILE', BSONEachRow, 'x Map(String, IPv6)') settings input_format_allow_errors_num=1" + +rm $DATA_FILE