better rollbacks of columns

This commit is contained in:
Anton Popov 2024-08-28 21:36:46 +00:00
parent c39d7092d0
commit eb0ae55e02
6 changed files with 36 additions and 18 deletions

View File

@ -262,13 +262,13 @@ ColumnCheckpointPtr ColumnTuple::getCheckpoint() const
for (const auto & column : columns)
checkpoints.push_back(column->getCheckpoint());
return std::make_shared<ColumnCheckpointWithNestedTuple>(size(), std::move(checkpoints));
return std::make_shared<ColumnCheckpointWithMultipleNested>(size(), std::move(checkpoints));
}
void ColumnTuple::rollback(const ColumnCheckpoint & checkpoint)
{
column_length = checkpoint.size;
const auto & checkpoints = assert_cast<const ColumnCheckpointWithNestedTuple &>(checkpoint).nested;
const auto & checkpoints = assert_cast<const ColumnCheckpointWithMultipleNested &>(checkpoint).nested;
chassert(columns.size() == checkpoints.size());
for (size_t i = 0; i < columns.size(); ++i)

View File

@ -747,7 +747,7 @@ ColumnCheckpointPtr ColumnVariant::getCheckpoint() const
for (const auto & column : variants)
checkpoints.push_back(column->getCheckpoint());
return std::make_shared<ColumnCheckpointWithNestedTuple>(size(), std::move(checkpoints));
return std::make_shared<ColumnCheckpointWithMultipleNested>(size(), std::move(checkpoints));
}
void ColumnVariant::rollback(const ColumnCheckpoint & checkpoint)
@ -755,7 +755,7 @@ void ColumnVariant::rollback(const ColumnCheckpoint & checkpoint)
getOffsets().resize_assume_reserved(checkpoint.size);
getLocalDiscriminators().resize_assume_reserved(checkpoint.size);
const auto & checkpoints = assert_cast<const ColumnCheckpointWithNestedTuple &>(checkpoint).nested;
const auto & checkpoints = assert_cast<const ColumnCheckpointWithMultipleNested &>(checkpoint).nested;
chassert(variants.size() == checkpoints.size());
for (size_t i = 0; i < variants.size(); ++i)

View File

@ -54,8 +54,10 @@ using EqualRanges = std::vector<EqualRange>;
/// after failed parsing when column may be in inconsistent state.
struct ColumnCheckpoint
{
size_t size;
explicit ColumnCheckpoint(size_t size_) : size(size_) {}
size_t size = 0;
virtual ~ColumnCheckpoint() = default;
};
using ColumnCheckpointPtr = std::shared_ptr<const ColumnCheckpoint>;
@ -64,19 +66,17 @@ using ColumnCheckpoints = std::vector<ColumnCheckpointPtr>;
struct ColumnCheckpointWithNested : public ColumnCheckpoint
{
ColumnCheckpointWithNested(size_t size_, ColumnCheckpointPtr nested_)
: ColumnCheckpoint(size_)
, nested(std::move(nested_))
: ColumnCheckpoint(size_), nested(std::move(nested_))
{
}
ColumnCheckpointPtr nested;
};
struct ColumnCheckpointWithNestedTuple : public ColumnCheckpoint
struct ColumnCheckpointWithMultipleNested : public ColumnCheckpoint
{
ColumnCheckpointWithNestedTuple(size_t size_, ColumnCheckpoints nested_)
: ColumnCheckpoint(size_)
, nested(std::move(nested_))
ColumnCheckpointWithMultipleNested(size_t size_, ColumnCheckpoints nested_)
: ColumnCheckpoint(size_), nested(std::move(nested_))
{
}
@ -548,6 +548,7 @@ public:
/// Rollbacks column to the checkpoint.
/// Unlike 'popBack' this method should work correctly even if column has invalid state.
/// Sizes of columns in checkpoint must be less or equal than current.
virtual void rollback(const ColumnCheckpoint & checkpoint) { popBack(size() - checkpoint.size); }
/// If the column contains subcolumns (such as Array, Nullable, etc), do callback on them.

View File

@ -104,6 +104,7 @@ Chunk IRowInputFormat::read()
size_t num_columns = header.columns();
MutableColumns columns = header.cloneEmptyColumns();
ColumnCheckpoints checkpoints(columns.size());
block_missing_values.clear();
@ -130,6 +131,9 @@ Chunk IRowInputFormat::read()
{
try
{
for (size_t column_idx = 0; column_idx < columns.size(); ++column_idx)
checkpoints[column_idx] = columns[column_idx]->getCheckpoint();
info.read_columns.clear();
continue_reading = readRow(columns, info);
@ -193,14 +197,9 @@ Chunk IRowInputFormat::read()
syncAfterError();
/// Truncate all columns in block to initial size (remove values, that was appended to only part of columns).
/// Rollback all columns in block to initial size (remove values, that was appended to only part of columns).
for (size_t column_idx = 0; column_idx < num_columns; ++column_idx)
{
auto & column = columns[column_idx];
if (column->size() > num_rows)
column->popBack(column->size() - num_rows);
}
columns[column_idx]->rollback(*checkpoints[column_idx]);
}
}
}

View File

@ -0,0 +1,18 @@
#!/usr/bin/env bash
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
DATA_FILE=$CLICKHOUSE_TEST_UNIQUE_NAME.data
$CLICKHOUSE_LOCAL -q "select tuple(1, x'00000000000000000000FFFF0000000000') as x format BSONEachRow" > $DATA_FILE
$CLICKHOUSE_LOCAL -q "select * from file('$DATA_FILE', BSONEachRow, 'x Tuple(UInt32, IPv6)') settings input_format_allow_errors_num=1"
$CLICKHOUSE_LOCAL -q "select [x'00000000000000000000FFFF00000000', x'00000000000000000000FFFF0000000000'] as x format BSONEachRow" > $DATA_FILE
$CLICKHOUSE_LOCAL -q "select * from file('$DATA_FILE', BSONEachRow, 'x Array(IPv6)') settings input_format_allow_errors_num=1"
$CLICKHOUSE_LOCAL -q "select map('key1', x'00000000000000000000FFFF00000000', 'key2', x'00000000000000000000FFFF0000000000') as x format BSONEachRow" > $DATA_FILE
$CLICKHOUSE_LOCAL -q "select * from file('$DATA_FILE', BSONEachRow, 'x Map(String, IPv6)') settings input_format_allow_errors_num=1"
rm $DATA_FILE