mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-01 12:01:58 +00:00
fix performance of parsing row formats
This commit is contained in:
parent
b7fccd8617
commit
d932d0ae4f
@ -374,6 +374,12 @@ ColumnCheckpointPtr ColumnArray::getCheckpoint() const
|
|||||||
return std::make_shared<ColumnCheckpointWithNested>(size(), getData().getCheckpoint());
|
return std::make_shared<ColumnCheckpointWithNested>(size(), getData().getCheckpoint());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ColumnArray::updateCheckpoint(ColumnCheckpoint & checkpoint) const
|
||||||
|
{
|
||||||
|
checkpoint.size = size();
|
||||||
|
getData().updateCheckpoint(*assert_cast<ColumnCheckpointWithNested &>(checkpoint).nested);
|
||||||
|
}
|
||||||
|
|
||||||
void ColumnArray::rollback(const ColumnCheckpoint & checkpoint)
|
void ColumnArray::rollback(const ColumnCheckpoint & checkpoint)
|
||||||
{
|
{
|
||||||
getOffsets().resize_assume_reserved(checkpoint.size);
|
getOffsets().resize_assume_reserved(checkpoint.size);
|
||||||
|
@ -162,6 +162,7 @@ public:
|
|||||||
ColumnPtr compress() const override;
|
ColumnPtr compress() const override;
|
||||||
|
|
||||||
ColumnCheckpointPtr getCheckpoint() const override;
|
ColumnCheckpointPtr getCheckpoint() const override;
|
||||||
|
void updateCheckpoint(ColumnCheckpoint & checkpoint) const override;
|
||||||
void rollback(const ColumnCheckpoint & checkpoint) override;
|
void rollback(const ColumnCheckpoint & checkpoint) override;
|
||||||
|
|
||||||
void forEachSubcolumn(MutableColumnCallback callback) override
|
void forEachSubcolumn(MutableColumnCallback callback) override
|
||||||
|
@ -309,6 +309,11 @@ public:
|
|||||||
return variant_column_ptr->getCheckpoint();
|
return variant_column_ptr->getCheckpoint();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void updateCheckpoint(ColumnCheckpoint & checkpoint) const override
|
||||||
|
{
|
||||||
|
variant_column_ptr->updateCheckpoint(checkpoint);
|
||||||
|
}
|
||||||
|
|
||||||
void rollback(const ColumnCheckpoint & checkpoint) override
|
void rollback(const ColumnCheckpoint & checkpoint) override
|
||||||
{
|
{
|
||||||
variant_column_ptr->rollback(checkpoint);
|
variant_column_ptr->rollback(checkpoint);
|
||||||
|
@ -317,6 +317,11 @@ ColumnCheckpointPtr ColumnMap::getCheckpoint() const
|
|||||||
return nested->getCheckpoint();
|
return nested->getCheckpoint();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ColumnMap::updateCheckpoint(ColumnCheckpoint & checkpoint) const
|
||||||
|
{
|
||||||
|
nested->updateCheckpoint(checkpoint);
|
||||||
|
}
|
||||||
|
|
||||||
void ColumnMap::rollback(const ColumnCheckpoint & checkpoint)
|
void ColumnMap::rollback(const ColumnCheckpoint & checkpoint)
|
||||||
{
|
{
|
||||||
nested->rollback(checkpoint);
|
nested->rollback(checkpoint);
|
||||||
|
@ -103,6 +103,7 @@ public:
|
|||||||
size_t allocatedBytes() const override;
|
size_t allocatedBytes() const override;
|
||||||
void protect() override;
|
void protect() override;
|
||||||
ColumnCheckpointPtr getCheckpoint() const override;
|
ColumnCheckpointPtr getCheckpoint() const override;
|
||||||
|
void updateCheckpoint(ColumnCheckpoint & checkpoint) const override;
|
||||||
void rollback(const ColumnCheckpoint & checkpoint) override;
|
void rollback(const ColumnCheckpoint & checkpoint) override;
|
||||||
void forEachSubcolumn(MutableColumnCallback callback) override;
|
void forEachSubcolumn(MutableColumnCallback callback) override;
|
||||||
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
|
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
|
||||||
|
@ -310,6 +310,12 @@ ColumnCheckpointPtr ColumnNullable::getCheckpoint() const
|
|||||||
return std::make_shared<ColumnCheckpointWithNested>(size(), nested_column->getCheckpoint());
|
return std::make_shared<ColumnCheckpointWithNested>(size(), nested_column->getCheckpoint());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ColumnNullable::updateCheckpoint(ColumnCheckpoint & checkpoint) const
|
||||||
|
{
|
||||||
|
checkpoint.size = size();
|
||||||
|
nested_column->updateCheckpoint(*assert_cast<ColumnCheckpointWithNested &>(checkpoint).nested);
|
||||||
|
}
|
||||||
|
|
||||||
void ColumnNullable::rollback(const ColumnCheckpoint & checkpoint)
|
void ColumnNullable::rollback(const ColumnCheckpoint & checkpoint)
|
||||||
{
|
{
|
||||||
getNullMapData().resize_assume_reserved(checkpoint.size);
|
getNullMapData().resize_assume_reserved(checkpoint.size);
|
||||||
|
@ -144,6 +144,7 @@ public:
|
|||||||
ColumnPtr compress() const override;
|
ColumnPtr compress() const override;
|
||||||
|
|
||||||
ColumnCheckpointPtr getCheckpoint() const override;
|
ColumnCheckpointPtr getCheckpoint() const override;
|
||||||
|
void updateCheckpoint(ColumnCheckpoint & checkpoint) const override;
|
||||||
void rollback(const ColumnCheckpoint & checkpoint) override;
|
void rollback(const ColumnCheckpoint & checkpoint) override;
|
||||||
|
|
||||||
void forEachSubcolumn(MutableColumnCallback callback) override
|
void forEachSubcolumn(MutableColumnCallback callback) override
|
||||||
|
@ -686,22 +686,44 @@ ColumnCheckpointPtr ColumnObject::getCheckpoint() const
|
|||||||
return std::make_shared<ColumnObjectCheckpoint>(size(), get_checkpoints(typed_paths), get_checkpoints(dynamic_paths_ptrs), shared_data->getCheckpoint());
|
return std::make_shared<ColumnObjectCheckpoint>(size(), get_checkpoints(typed_paths), get_checkpoints(dynamic_paths_ptrs), shared_data->getCheckpoint());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ColumnObject::updateCheckpoint(ColumnCheckpoint & checkpoint) const
|
||||||
|
{
|
||||||
|
auto & object_checkpoint = assert_cast<ColumnObjectCheckpoint &>(checkpoint);
|
||||||
|
|
||||||
|
auto update_checkpoints = [&](const auto & columns_map, auto & checkpoints_map)
|
||||||
|
{
|
||||||
|
for (const auto & [name, column] : columns_map)
|
||||||
|
{
|
||||||
|
auto & nested = checkpoints_map[name];
|
||||||
|
if (!nested)
|
||||||
|
nested = column->getCheckpoint();
|
||||||
|
else
|
||||||
|
column->updateCheckpoint(*nested);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
checkpoint.size = size();
|
||||||
|
update_checkpoints(typed_paths, object_checkpoint.typed_paths);
|
||||||
|
update_checkpoints(dynamic_paths, object_checkpoint.dynamic_paths);
|
||||||
|
shared_data->updateCheckpoint(*object_checkpoint.shared_data);
|
||||||
|
}
|
||||||
|
|
||||||
void ColumnObject::rollback(const ColumnCheckpoint & checkpoint)
|
void ColumnObject::rollback(const ColumnCheckpoint & checkpoint)
|
||||||
{
|
{
|
||||||
const auto & object_checkpoint = assert_cast<const ColumnObjectCheckpoint &>(checkpoint);
|
const auto & object_checkpoint = assert_cast<const ColumnObjectCheckpoint &>(checkpoint);
|
||||||
|
|
||||||
for (auto & [name, column] : typed_paths)
|
for (auto & [name, column] : typed_paths)
|
||||||
{
|
{
|
||||||
const auto & nested_checkpoint = object_checkpoint.typed_paths.at(name);
|
const auto & nested = object_checkpoint.typed_paths.at(name);
|
||||||
chassert(nested_checkpoint);
|
chassert(nested);
|
||||||
column->rollback(*nested_checkpoint);
|
column->rollback(*nested);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (auto & [name, column] : dynamic_paths_ptrs)
|
for (auto & [name, column] : dynamic_paths_ptrs)
|
||||||
{
|
{
|
||||||
const auto & nested_checkpoint = object_checkpoint.dynamic_paths.at(name);
|
const auto & nested = object_checkpoint.dynamic_paths.at(name);
|
||||||
chassert(nested_checkpoint);
|
chassert(nested);
|
||||||
column->rollback(*nested_checkpoint);
|
column->rollback(*nested);
|
||||||
}
|
}
|
||||||
|
|
||||||
shared_data->rollback(*object_checkpoint.shared_data);
|
shared_data->rollback(*object_checkpoint.shared_data);
|
||||||
|
@ -160,6 +160,7 @@ public:
|
|||||||
size_t allocatedBytes() const override;
|
size_t allocatedBytes() const override;
|
||||||
void protect() override;
|
void protect() override;
|
||||||
ColumnCheckpointPtr getCheckpoint() const override;
|
ColumnCheckpointPtr getCheckpoint() const override;
|
||||||
|
void updateCheckpoint(ColumnCheckpoint & checkpoint) const override;
|
||||||
void rollback(const ColumnCheckpoint & checkpoint) override;
|
void rollback(const ColumnCheckpoint & checkpoint) override;
|
||||||
|
|
||||||
void forEachSubcolumn(MutableColumnCallback callback) override;
|
void forEachSubcolumn(MutableColumnCallback callback) override;
|
||||||
|
@ -313,6 +313,12 @@ ColumnCheckpointPtr ColumnSparse::getCheckpoint() const
|
|||||||
return std::make_shared<ColumnCheckpointWithNested>(size(), values->getCheckpoint());
|
return std::make_shared<ColumnCheckpointWithNested>(size(), values->getCheckpoint());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ColumnSparse::updateCheckpoint(ColumnCheckpoint & checkpoint) const
|
||||||
|
{
|
||||||
|
checkpoint.size = size();
|
||||||
|
values->updateCheckpoint(*assert_cast<ColumnCheckpointWithNested &>(checkpoint).nested);
|
||||||
|
}
|
||||||
|
|
||||||
void ColumnSparse::rollback(const ColumnCheckpoint & checkpoint)
|
void ColumnSparse::rollback(const ColumnCheckpoint & checkpoint)
|
||||||
{
|
{
|
||||||
_size = checkpoint.size;
|
_size = checkpoint.size;
|
||||||
|
@ -150,6 +150,7 @@ public:
|
|||||||
ColumnPtr compress() const override;
|
ColumnPtr compress() const override;
|
||||||
|
|
||||||
ColumnCheckpointPtr getCheckpoint() const override;
|
ColumnCheckpointPtr getCheckpoint() const override;
|
||||||
|
void updateCheckpoint(ColumnCheckpoint & checkpoint) const override;
|
||||||
void rollback(const ColumnCheckpoint & checkpoint) override;
|
void rollback(const ColumnCheckpoint & checkpoint) override;
|
||||||
|
|
||||||
void forEachSubcolumn(MutableColumnCallback callback) override;
|
void forEachSubcolumn(MutableColumnCallback callback) override;
|
||||||
|
@ -265,6 +265,16 @@ ColumnCheckpointPtr ColumnTuple::getCheckpoint() const
|
|||||||
return std::make_shared<ColumnCheckpointWithMultipleNested>(size(), std::move(checkpoints));
|
return std::make_shared<ColumnCheckpointWithMultipleNested>(size(), std::move(checkpoints));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ColumnTuple::updateCheckpoint(ColumnCheckpoint & checkpoint) const
|
||||||
|
{
|
||||||
|
auto & checkpoints = assert_cast<ColumnCheckpointWithMultipleNested &>(checkpoint).nested;
|
||||||
|
chassert(checkpoints.size() == columns.size());
|
||||||
|
|
||||||
|
checkpoint.size = size();
|
||||||
|
for (size_t i = 0; i < columns.size(); ++i)
|
||||||
|
columns[i]->updateCheckpoint(*checkpoints[i]);
|
||||||
|
}
|
||||||
|
|
||||||
void ColumnTuple::rollback(const ColumnCheckpoint & checkpoint)
|
void ColumnTuple::rollback(const ColumnCheckpoint & checkpoint)
|
||||||
{
|
{
|
||||||
column_length = checkpoint.size;
|
column_length = checkpoint.size;
|
||||||
|
@ -119,6 +119,7 @@ public:
|
|||||||
size_t allocatedBytes() const override;
|
size_t allocatedBytes() const override;
|
||||||
void protect() override;
|
void protect() override;
|
||||||
ColumnCheckpointPtr getCheckpoint() const override;
|
ColumnCheckpointPtr getCheckpoint() const override;
|
||||||
|
void updateCheckpoint(ColumnCheckpoint & checkpoint) const override;
|
||||||
void rollback(const ColumnCheckpoint & checkpoint) override;
|
void rollback(const ColumnCheckpoint & checkpoint) override;
|
||||||
void forEachSubcolumn(MutableColumnCallback callback) override;
|
void forEachSubcolumn(MutableColumnCallback callback) override;
|
||||||
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
|
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
|
||||||
|
@ -750,6 +750,16 @@ ColumnCheckpointPtr ColumnVariant::getCheckpoint() const
|
|||||||
return std::make_shared<ColumnCheckpointWithMultipleNested>(size(), std::move(checkpoints));
|
return std::make_shared<ColumnCheckpointWithMultipleNested>(size(), std::move(checkpoints));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ColumnVariant::updateCheckpoint(ColumnCheckpoint & checkpoint) const
|
||||||
|
{
|
||||||
|
auto & checkpoints = assert_cast<ColumnCheckpointWithMultipleNested &>(checkpoint).nested;
|
||||||
|
chassert(checkpoints.size() == variants.size());
|
||||||
|
|
||||||
|
checkpoint.size = size();
|
||||||
|
for (size_t i = 0; i < variants.size(); ++i)
|
||||||
|
variants[i]->updateCheckpoint(*checkpoints[i]);
|
||||||
|
}
|
||||||
|
|
||||||
void ColumnVariant::rollback(const ColumnCheckpoint & checkpoint)
|
void ColumnVariant::rollback(const ColumnCheckpoint & checkpoint)
|
||||||
{
|
{
|
||||||
getOffsets().resize_assume_reserved(checkpoint.size);
|
getOffsets().resize_assume_reserved(checkpoint.size);
|
||||||
|
@ -249,6 +249,7 @@ public:
|
|||||||
size_t allocatedBytes() const override;
|
size_t allocatedBytes() const override;
|
||||||
void protect() override;
|
void protect() override;
|
||||||
ColumnCheckpointPtr getCheckpoint() const override;
|
ColumnCheckpointPtr getCheckpoint() const override;
|
||||||
|
void updateCheckpoint(ColumnCheckpoint & checkpoint) const override;
|
||||||
void rollback(const ColumnCheckpoint & checkpoint) override;
|
void rollback(const ColumnCheckpoint & checkpoint) override;
|
||||||
void forEachSubcolumn(MutableColumnCallback callback) override;
|
void forEachSubcolumn(MutableColumnCallback callback) override;
|
||||||
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
|
void forEachSubcolumnRecursively(RecursiveMutableColumnCallback callback) override;
|
||||||
|
@ -60,7 +60,7 @@ struct ColumnCheckpoint
|
|||||||
virtual ~ColumnCheckpoint() = default;
|
virtual ~ColumnCheckpoint() = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
using ColumnCheckpointPtr = std::shared_ptr<const ColumnCheckpoint>;
|
using ColumnCheckpointPtr = std::shared_ptr<ColumnCheckpoint>;
|
||||||
using ColumnCheckpoints = std::vector<ColumnCheckpointPtr>;
|
using ColumnCheckpoints = std::vector<ColumnCheckpointPtr>;
|
||||||
|
|
||||||
struct ColumnCheckpointWithNested : public ColumnCheckpoint
|
struct ColumnCheckpointWithNested : public ColumnCheckpoint
|
||||||
@ -546,9 +546,12 @@ public:
|
|||||||
/// Returns checkpoint of current state of column.
|
/// Returns checkpoint of current state of column.
|
||||||
virtual ColumnCheckpointPtr getCheckpoint() const { return std::make_shared<ColumnCheckpoint>(size()); }
|
virtual ColumnCheckpointPtr getCheckpoint() const { return std::make_shared<ColumnCheckpoint>(size()); }
|
||||||
|
|
||||||
|
/// Updates the checkpoint with current state. It is used to avoid extra allocations in 'getCheckpoint'.
|
||||||
|
virtual void updateCheckpoint(ColumnCheckpoint & checkpoint) const { checkpoint.size = size(); }
|
||||||
|
|
||||||
/// Rollbacks column to the checkpoint.
|
/// Rollbacks column to the checkpoint.
|
||||||
/// Unlike 'popBack' this method should work correctly even if column has invalid state.
|
/// Unlike 'popBack' this method should work correctly even if column has invalid state.
|
||||||
/// Sizes of columns in checkpoint must be less or equal than current.
|
/// Sizes of columns in checkpoint must be less or equal than current size.
|
||||||
virtual void rollback(const ColumnCheckpoint & checkpoint) { popBack(size() - checkpoint.size); }
|
virtual void rollback(const ColumnCheckpoint & checkpoint) { popBack(size() - checkpoint.size); }
|
||||||
|
|
||||||
/// If the column contains subcolumns (such as Array, Nullable, etc), do callback on them.
|
/// If the column contains subcolumns (such as Array, Nullable, etc), do callback on them.
|
||||||
|
@ -25,6 +25,9 @@ StreamingFormatExecutor::StreamingFormatExecutor(
|
|||||||
, checkpoints(result_columns.size())
|
, checkpoints(result_columns.size())
|
||||||
{
|
{
|
||||||
connect(format->getPort(), port);
|
connect(format->getPort(), port);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < result_columns.size(); ++i)
|
||||||
|
checkpoints[i] = result_columns[i]->getCheckpoint();
|
||||||
}
|
}
|
||||||
|
|
||||||
MutableColumns StreamingFormatExecutor::getResultColumns()
|
MutableColumns StreamingFormatExecutor::getResultColumns()
|
||||||
@ -54,7 +57,8 @@ size_t StreamingFormatExecutor::execute(ReadBuffer & buffer)
|
|||||||
|
|
||||||
size_t StreamingFormatExecutor::execute()
|
size_t StreamingFormatExecutor::execute()
|
||||||
{
|
{
|
||||||
setCheckpoints();
|
for (size_t i = 0; i < result_columns.size(); ++i)
|
||||||
|
result_columns[i]->updateCheckpoint(*checkpoints[i]);
|
||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
@ -117,11 +121,4 @@ size_t StreamingFormatExecutor::insertChunk(Chunk chunk)
|
|||||||
return chunk_rows;
|
return chunk_rows;
|
||||||
}
|
}
|
||||||
|
|
||||||
void StreamingFormatExecutor::setCheckpoints()
|
|
||||||
{
|
|
||||||
for (size_t i = 0; i < result_columns.size(); ++i)
|
|
||||||
checkpoints[i] = result_columns[i]->getCheckpoint();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -43,8 +43,6 @@ public:
|
|||||||
void setQueryParameters(const NameToNameMap & parameters);
|
void setQueryParameters(const NameToNameMap & parameters);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void setCheckpoints();
|
|
||||||
|
|
||||||
const Block header;
|
const Block header;
|
||||||
const InputFormatPtr format;
|
const InputFormatPtr format;
|
||||||
const ErrorCallback on_error;
|
const ErrorCallback on_error;
|
||||||
|
@ -104,7 +104,10 @@ Chunk IRowInputFormat::read()
|
|||||||
|
|
||||||
size_t num_columns = header.columns();
|
size_t num_columns = header.columns();
|
||||||
MutableColumns columns = header.cloneEmptyColumns();
|
MutableColumns columns = header.cloneEmptyColumns();
|
||||||
|
|
||||||
ColumnCheckpoints checkpoints(columns.size());
|
ColumnCheckpoints checkpoints(columns.size());
|
||||||
|
for (size_t column_idx = 0; column_idx < columns.size(); ++column_idx)
|
||||||
|
checkpoints[column_idx] = columns[column_idx]->getCheckpoint();
|
||||||
|
|
||||||
block_missing_values.clear();
|
block_missing_values.clear();
|
||||||
|
|
||||||
@ -132,7 +135,7 @@ Chunk IRowInputFormat::read()
|
|||||||
try
|
try
|
||||||
{
|
{
|
||||||
for (size_t column_idx = 0; column_idx < columns.size(); ++column_idx)
|
for (size_t column_idx = 0; column_idx < columns.size(); ++column_idx)
|
||||||
checkpoints[column_idx] = columns[column_idx]->getCheckpoint();
|
columns[column_idx]->updateCheckpoint(*checkpoints[column_idx]);
|
||||||
|
|
||||||
info.read_columns.clear();
|
info.read_columns.clear();
|
||||||
continue_reading = readRow(columns, info);
|
continue_reading = readRow(columns, info);
|
||||||
|
Loading…
Reference in New Issue
Block a user