From 3c1e6b5d2c52d8f54705428199d007a0f58857bb Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 7 Mar 2024 13:11:42 +0000 Subject: [PATCH 001/139] init --- src/Interpreters/InterpreterInsertQuery.cpp | 17 +- src/Interpreters/SquashingTransform.cpp | 194 +++++++++++++++ src/Interpreters/SquashingTransform.h | 63 +++++ .../Transforms/BalancingTransform.cpp | 223 ++++++++++++++++++ .../Transforms/BalancingTransform.h | 128 ++++++++++ .../Transforms/SquashingChunksTransform.cpp | 136 ++++++++++- .../Transforms/SquashingChunksTransform.h | 34 ++- 7 files changed, 787 insertions(+), 8 deletions(-) create mode 100644 src/Processors/Transforms/BalancingTransform.cpp create mode 100644 src/Processors/Transforms/BalancingTransform.h diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index e27a8bd414b..0041a0f0846 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -604,9 +605,15 @@ BlockIO InterpreterInsertQuery::execute() { bool table_prefers_large_blocks = table->prefersLargeBlocks(); + pipeline.addTransform(std::make_shared( + header, + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, + settings.max_memory_usage, presink_chains.size())); + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { - return std::make_shared( + return std::make_shared( in_header, table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); @@ -668,6 +675,14 @@ BlockIO InterpreterInsertQuery::execute() table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); chain.addSource(std::move(squashing)); + + // auto balancing = std::make_shared( + // chain.getInputHeader(), + // table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + // table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, + // settings.max_memory_usage, true); + + // chain.addSource(std::move(balancing)); } auto context_ptr = getContext(); diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index 4ed0dddc191..0d976bd967a 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -1,4 +1,12 @@ +#include +#include #include +#include "DataTypes/Serializations/ISerialization.h" +#include "Processors/Chunk.h" +#include "base/sleep.h" +#include "base/types.h" +#include +#include namespace DB @@ -126,4 +134,190 @@ bool SquashingTransform::isEnoughSize(size_t rows, size_t bytes) const || (min_block_size_bytes && bytes >= min_block_size_bytes); } + +NewSquashingTransform::NewSquashingTransform(size_t min_block_size_rows_, size_t min_block_size_bytes_) + : min_block_size_rows(min_block_size_rows_) + , min_block_size_bytes(min_block_size_bytes_) +{ +} + +Block NewSquashingTransform::add(Chunk && input_chunk) +{ + return addImpl(std::move(input_chunk)); +} + +const ChunksToSquash * getInfoFromChunk(const Chunk & chunk) +{ + auto info = chunk.getChunkInfo(); + const auto * agg_info = typeid_cast(info.get()); + + return agg_info; +} + +template +Block NewSquashingTransform::addImpl(ReferenceType input_chunk) +{ + if (!input_chunk.hasChunkInfo()) + { + Block to_return; + std::swap(to_return, accumulated_block); + return to_return; + } + + const auto *info = getInfoFromChunk(input_chunk); + for (auto & one : info->chunks) + { + append(std::move(one), info->data_type); + } + + // if (isEnoughSize(accumulated_block)) + { + Block to_return; + std::swap(to_return, accumulated_block); + return to_return; + } +} + +template +void NewSquashingTransform::append(ReferenceType input_chunk, DataTypePtr data_type) +{ + if (input_chunk.getNumColumns() == 0) + return; + if (!accumulated_block) + { + for (const ColumnPtr& column : input_chunk.getColumns()) + { + ColumnWithTypeAndName col = ColumnWithTypeAndName(column, data_type, " "); + accumulated_block.insert(accumulated_block.columns(), col); + } + return; + } + + for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i) + { + const auto source_column = input_chunk.getColumns()[i]; + + auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column)); + mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); + accumulated_block.getByPosition(i).column = std::move(mutable_column); + } +} + + + +BalanceTransform::BalanceTransform(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) + : min_block_size_rows(min_block_size_rows_) + , min_block_size_bytes(min_block_size_bytes_) + , header(std::move(header_)) +{ + // Use query-level memory tracker + if (auto * memory_tracker_child = CurrentThread::getMemoryTracker()) + memory_tracker = memory_tracker_child->getParent(); +} + +Chunk BalanceTransform::add(Block && input_block) +{ + return addImpl(std::move(input_block)); +} + +Chunk BalanceTransform::convertToChunk(std::vector &chunks) +{ + if (chunks.empty()) + return {}; + + auto info = std::make_shared(); + for (auto &chunk : chunks) + info->chunks.push_back(chunk.clone()); + info->data_type = data_type; + + if (!info->chunks.empty()) /// Note: This if is only for debugging, structure of chunk copies the structure of info + { /// it's possible to use only 'Chunk(header.cloneEmptyColumns(), 0, info)' + return Chunk({info->chunks[0].getColumns(), info->chunks[0].getNumRows(), info}); + } + + return Chunk(header.cloneEmptyColumns(), 0, info); +} + + +template +Chunk BalanceTransform::addImpl(ReferenceType input_block) +{ + Chunk input_chunk(input_block.getColumns(), input_block.rows()); + if (!data_type && !input_block.getDataTypes().empty()) + data_type = input_block.getDataTypes()[0]; + // /// End of input stream. + if (!input_chunk) + { + Chunk res_chunk = convertToChunk(chunks_to_merge_vec); + // // std::cerr << "end of stream. Adding info to chunk " << std::endl; + return res_chunk; + } + + if (isEnoughSize(chunks_to_merge_vec)) + chunks_to_merge_vec.clear(); + + if (input_chunk) + chunks_to_merge_vec.push_back(input_chunk.clone()); + // std::cerr << "pushing back data. size: " << chunks_to_merge_vec.size() << std::endl; + + if (isEnoughSize(chunks_to_merge_vec)) + { + // // // std::cerr << "enough size" << std::endl; + Chunk res_chunk = convertToChunk(chunks_to_merge_vec); + return res_chunk; + } + return input_chunk; +} + +bool BalanceTransform::isEnoughSize(const std::vector & chunks) +{ + size_t rows = 0; + size_t bytes = 0; + + for (const Chunk & chunk : chunks) + { + rows += chunk.getNumRows(); + bytes += chunk.bytes(); + } + auto free_memory = memory_tracker->getHardLimit() - memory_tracker->get(); + std::cerr << "========Just memory representation, free memory: " << free_memory << ", chunk size: " << bytes << std::endl + << " hardLimit: " << memory_tracker->getHardLimit() << " get(): " << memory_tracker->get() << std::endl; + checkAndWaitMemoryAvailability(bytes); + + free_memory = memory_tracker->getHardLimit() - memory_tracker->get(); + std::cerr << "========Just memory representation after, free memory: " << free_memory << ", chunk size: " << bytes << std::endl + << ", hardLimit: " << memory_tracker->getHardLimit() << ", get(): " << memory_tracker->get() << std::endl; + + return isEnoughSize(rows, bytes); +} + +void BalanceTransform::checkAndWaitMemoryAvailability(size_t bytes) +{ + // bytes_used += bytes; + if (const auto hard_limit = memory_tracker->getHardLimit() != 0) + { + auto free_memory = hard_limit - memory_tracker->get(); + while (Int64(bytes) >= free_memory) + { + // std::cerr << "========Waiting a while from memory, free memory: " << free_memory << ", chunk size: " << bytes << std::endl; + // sleepForMilliseconds(10); + // checkAndWaitMemoryAvailability(bytes); + free_memory = hard_limit - memory_tracker->get(); + } + } +} + +bool BalanceTransform::isEnoughSize(const Chunk & chunk) +{ + return isEnoughSize(chunk.getNumRows(), chunk.bytes()); +} + + +bool BalanceTransform::isEnoughSize(size_t rows, size_t bytes) const +{ + return (!min_block_size_rows && !min_block_size_bytes) + || (min_block_size_rows && rows >= min_block_size_rows) + || (min_block_size_bytes && bytes >= min_block_size_bytes); +} + } diff --git a/src/Interpreters/SquashingTransform.h b/src/Interpreters/SquashingTransform.h index b04d012bcd1..0c2fe1ef12b 100644 --- a/src/Interpreters/SquashingTransform.h +++ b/src/Interpreters/SquashingTransform.h @@ -1,11 +1,22 @@ #pragma once +#include +#include +#include #include +#include +#include "Common/MemoryTracker.h" +#include "DataTypes/Serializations/ISerialization.h" namespace DB { +struct ChunksToSquash : public ChunkInfo +{ + mutable std::vector chunks = {}; + DataTypePtr data_type = nullptr; +}; /** Merging consecutive passed blocks to specified minimum size. * @@ -47,4 +58,56 @@ private: bool isEnoughSize(size_t rows, size_t bytes) const; }; +class NewSquashingTransform +{ +public: + NewSquashingTransform(size_t min_block_size_rows_, size_t min_block_size_bytes_); + + Block add(Chunk && input_chunk); + +private: + size_t min_block_size_rows; + size_t min_block_size_bytes; + + Block accumulated_block; + + template + Block addImpl(ReferenceType chunk); + + template + void append(ReferenceType input_chunk, DataTypePtr data_type); + + bool isEnoughSize(const Block & block); + bool isEnoughSize(size_t rows, size_t bytes) const; +}; + +class BalanceTransform +{ +public: + BalanceTransform(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); + + Chunk add(Block && input_block); + +private: + std::vector chunks_to_merge_vec = {}; + size_t min_block_size_rows; + size_t min_block_size_bytes; + + Chunk accumulated_block; + const Block header; + + template + Chunk addImpl(ReferenceType input_block); + + bool isEnoughSize(const Chunk & chunk); + bool isEnoughSize(const std::vector & chunks); + bool isEnoughSize(size_t rows, size_t bytes) const; + void checkAndWaitMemoryAvailability(size_t bytes); + DataTypePtr data_type = nullptr; + + MemoryTracker * memory_tracker; + + Chunk convertToChunk(std::vector &chunks); +}; + } diff --git a/src/Processors/Transforms/BalancingTransform.cpp b/src/Processors/Transforms/BalancingTransform.cpp new file mode 100644 index 00000000000..b899702561e --- /dev/null +++ b/src/Processors/Transforms/BalancingTransform.cpp @@ -0,0 +1,223 @@ +#include +#include +#include "Common/Logger.h" +#include "Common/logger_useful.h" +#include "Interpreters/SquashingTransform.h" +#include "Processors/Chunk.h" + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int MEMORY_LIMIT_EXCEEDED; +} + +LBalancingChunksTransform::LBalancingChunksTransform( + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t max_memory_usage_, [[maybe_unused]] bool skip_empty_chunks_) + : ISimpleTransform(header, header, false), max_memory_usage(max_memory_usage_), squashing(min_block_size_rows, min_block_size_bytes), balance(header, min_block_size_rows, min_block_size_bytes) +{ +} + +void LBalancingChunksTransform::transform(Chunk & chunk) +{ + if (!finished) + { + Chunk res_chunk = balance.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); + if (res_chunk.hasChunkInfo()) + { + // std::cerr << "BalancingTransform: adding chunk " << std::endl; + + // { + // [[maybe_unused]]const auto * agg_info = typeid_cast(res_chunk.getChunkInfo().get()); + // std::cerr << "End of BalancingTransform: size of one group: " << agg_info->chunks.size() << std::endl; + // if (!agg_info->chunks.empty()) + // std::cerr << "!group is not empty, first column: " << agg_info->chunks[0].dumpStructure() << std::endl << std::endl; + // } + + } + else + LOG_TRACE(getLogger("balancing"), "{}, BalancingTransform: not adding chunk, not finished.", reinterpret_cast(this));/// ISSUE: it's not clear why finished label is not set + std::swap(res_chunk, chunk); + } + else + { + Chunk res_chunk = balance.add({}); + if (res_chunk.hasChunkInfo()) + { + // std::cerr << "BalancingTransform: finished adding, NumRows:" << res_chunk.getNumRows() << ", HasInfo: " << res_chunk.hasChunkInfo() << std::endl; + // { + // [[maybe_unused]]const auto * agg_info = typeid_cast(res_chunk.getChunkInfo().get()); + // std::cerr << "End of BalancingTransform: size of one group: " << agg_info->chunks.size() << std::endl; + // if (!agg_info->chunks.empty()) + // std::cerr << "!group is not empty, first column: " << agg_info->chunks[0].dumpStructure() << std::endl << std::endl; + // } + + } + else + LOG_TRACE(getLogger("balancing"), "{}, BalancingTransform: not adding chunk on finished", reinterpret_cast(this)); + std::swap(res_chunk, chunk); + } + LOG_TRACE(getLogger("balancing"), "{}, BalancingTransform: struct of output chunk: {}", reinterpret_cast(this), chunk.dumpStructure()); +} + +IProcessor::Status LBalancingChunksTransform::prepare() +{ + if (!finished && input.isFinished()) + { + finished = true; + return Status::Ready; + } + return ISimpleTransform::prepare(); +} + + +BalancingChunksTransform::BalancingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t max_memory_usage_, size_t num_ports) + : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), max_memory_usage(max_memory_usage_), squashing(min_block_size_rows, min_block_size_bytes), balance(header, min_block_size_rows, min_block_size_bytes) +{ +} + +IProcessor::Status BalancingChunksTransform::prepare() +{ + Status status = Status::Ready; + + while (status == Status::Ready) + { + status = !has_data ? prepareConsume() + : prepareSend(); + } + + return status; +} + +IProcessor::Status BalancingChunksTransform::prepareConsume() +{ + LOG_TRACE(getLogger("balancingProcessor"), "prepareConsume"); + for (auto & input : inputs) + { + bool all_finished = true; + for (auto & output : outputs) + { + if (output.isFinished()) + continue; + + all_finished = false; + } + + if (all_finished) + { + input.close(); + return Status::Finished; + } + + if (input.isFinished()) + { + for (auto & output : outputs) + output.finish(); + + return Status::Finished; + } + + input.setNeeded(); + if (!input.hasData()) + return Status::NeedData; + + chunk = input.pull(); + was_output_processed.assign(outputs.size(), false); + transform(chunk); + if (chunk.hasChunkInfo()) + { + LOG_TRACE(getLogger("balancingProcessor"), "hasData"); + has_data = true; + } + else + { + finished = true; + LOG_TRACE(getLogger("balancingProcessor"), "hasData, finished"); + transform(chunk); + has_data = true; + } + } + return Status::Ready; +} + +void BalancingChunksTransform::transform(Chunk & chunk_) +{ + if (!finished) + { + Chunk res_chunk = balance.add(getInputPorts().front().getHeader().cloneWithColumns(chunk_.detachColumns())); + if (res_chunk.hasChunkInfo()) + { + // std::cerr << "BalancingTransform: adding chunk " << std::endl; + + // { + // [[maybe_unused]]const auto * agg_info = typeid_cast(res_chunk.getChunkInfo().get()); + // std::cerr << "End of BalancingTransform: size of one group: " << agg_info->chunks.size() << std::endl; + // if (!agg_info->chunks.empty()) + // std::cerr << "!group is not empty, first column: " << agg_info->chunks[0].dumpStructure() << std::endl << std::endl; + // } + } + else + LOG_TRACE(getLogger("balancing"), "{}, BalancingTransform: not adding chunk, not finished.", reinterpret_cast(this));/// ISSUE: it's not clear why finished label is not set + std::swap(res_chunk, chunk_); + } + else + { + Chunk res_chunk = balance.add({}); + if (res_chunk.hasChunkInfo()) + { + // std::cerr << "BalancingTransform: finished adding, NumRows:" << res_chunk.getNumRows() << ", HasInfo: " << res_chunk.hasChunkInfo() << std::endl; + // { + // [[maybe_unused]]const auto * agg_info = typeid_cast(res_chunk.getChunkInfo().get()); + // std::cerr << "End of BalancingTransform: size of one group: " << agg_info->chunks.size() << std::endl; + // if (!agg_info->chunks.empty()) + // std::cerr << "!group is not empty, first column: " << agg_info->chunks[0].dumpStructure() << std::endl << std::endl; + // } + } + else + LOG_TRACE(getLogger("balancing"), "{}, BalancingTransform: not adding chunk on finished", reinterpret_cast(this)); + std::swap(res_chunk, chunk_); + } + LOG_TRACE(getLogger("balancing"), "{}, BalancingTransform: struct of output chunk: {}, hasInfo: {}", reinterpret_cast(this), chunk_.dumpStructure(), chunk.hasChunkInfo()); +} + +IProcessor::Status BalancingChunksTransform::prepareSend() +{ + LOG_TRACE(getLogger("balancingProcessor"), "prepareGenerate {}", chunk.dumpStructure()); + bool all_outputs_processed = true; + + size_t chunk_number = 0; + for (auto &output : outputs) + { + auto & was_processed = was_output_processed[chunk_number]; + ++chunk_number; + + if (!chunk.hasChunkInfo()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info must be not empty in prepareGenerate()"); + + if (was_processed) + continue; + + if (output.isFinished()) + continue; + + if (!output.canPush()) + { + all_outputs_processed = false; + continue; + } + + LOG_TRACE(getLogger("balancingProcessor"), "chunk struct: {}", chunk.dumpStructure()); + output.push(chunk.clone()); + was_processed = true; + } + + if (all_outputs_processed) + { + has_data = false; + return Status::Ready; + } + + return Status::PortFull; +} +} diff --git a/src/Processors/Transforms/BalancingTransform.h b/src/Processors/Transforms/BalancingTransform.h new file mode 100644 index 00000000000..d992a14cdd4 --- /dev/null +++ b/src/Processors/Transforms/BalancingTransform.h @@ -0,0 +1,128 @@ +#pragma once + +#include +#include +#include "Processors/Chunk.h" +#include "Processors/IProcessor.h" +#include "Processors/Transforms/ExceptionKeepingTransform.h" +#include + +namespace DB +{ + +class BalancingTransform : public ExceptionKeepingTransform +{ +public: + explicit BalancingTransform( + const Block & header, size_t max_memory_usage_); + + String getName() const override { return "BalancingTransform"; } + + void work() override; + + const Chunks & getChunks() const + { + return chunks; + } + +protected: + void onConsume(Chunk chunk) override; + GenerateResult onGenerate() override; + void onFinish() override; + +private: + size_t CalculateBlockSize(const Block & block); + Chunks chunks; + Blocks blocks; + size_t blocks_size; + Chunk cur_chunk; + Chunk finish_chunk; + size_t max_memory_usage; +}; + +/// Doesn't care about propagating exceptions and thus doesn't throw LOGICAL_ERROR if the following transform closes its input port. + + +/// Doesn't care about propagating exceptions and thus doesn't throw LOGICAL_ERROR if the following transform closes its input port. +class LBalancingChunksTransform : public ISimpleTransform +{ +public: + explicit LBalancingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t max_memory_usage, bool skip_empty_chunks_); + + String getName() const override { return "LBalancingChunksTransform"; } + + const Chunks & getChunks() const + { + return chunks; + } + +protected: + void transform(Chunk &) override; + + IProcessor::Status prepare() override; + +private: + size_t CalculateBlockSize(const Block & block); + [[maybe_unused]] ChunksToSquash chunks_to_merge; + Chunks chunks; + Blocks blocks; + [[maybe_unused]] size_t blocks_size; + Chunk cur_chunk; + Chunk finish_chunk; + [[maybe_unused]] size_t max_memory_usage; + SquashingTransform squashing; + BalanceTransform balance; + [[maybe_unused]]size_t acc_size = 0; + + /// When consumption is finished we need to release the final chunk regardless of its size. + bool finished = false; +}; + +class BalancingChunksTransform : public IProcessor +{ +public: + BalancingChunksTransform( + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t max_memory_usage_, size_t num_ports); + // explicit BalancingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t max_memory_usage, bool skip_empty_chunks_); + + String getName() const override { return "BalancingChunksTransform"; } + + const Chunks & getChunks() const + { + return chunks; + } + + InputPorts & getInputPorts() { return inputs; } + OutputPorts & getOutputPorts() { return outputs; } + + Status prepare() override; + Status prepareConsume(); + Status prepareSend(); + + // void work() override; + void transform(Chunk & chunk); + +protected: + // void transform(Chunk &) ; + +private: + size_t CalculateBlockSize(const Block & block); + [[maybe_unused]] ChunksToSquash chunks_to_merge; + Chunks chunks; + Chunk chunk; + Blocks blocks; + [[maybe_unused]] size_t blocks_size; + Chunk cur_chunk; + Chunk finish_chunk; + [[maybe_unused]] size_t max_memory_usage; + SquashingTransform squashing; + BalanceTransform balance; + [[maybe_unused]]size_t acc_size = 0; + bool has_data = false; + std::vector was_output_processed; + + /// When consumption is finished we need to release the final chunk regardless of its size. + bool finished = false; +}; +} + diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 7de9538e435..22ce3ba9359 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -1,4 +1,6 @@ #include +#include +#include "Common/logger_useful.h" namespace DB { @@ -12,7 +14,8 @@ SquashingChunksTransform::SquashingChunksTransform( void SquashingChunksTransform::onConsume(Chunk chunk) { - if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) + LOG_TRACE(getLogger("squashing"), "{}, SquashingTransform: !finished, hasInfo: {}", reinterpret_cast(this), chunk.hasChunkInfo()); + if (auto block = squashing.add(std::move(chunk))) { cur_chunk.setColumns(block.getColumns(), block.rows()); } @@ -29,7 +32,9 @@ SquashingChunksTransform::GenerateResult SquashingChunksTransform::onGenerate() void SquashingChunksTransform::onFinish() { auto block = squashing.add({}); + LOG_TRACE(getLogger("squashing"), "{}, SquashingTransform: finished, structure of block: {}", reinterpret_cast(this), block.dumpStructure()); finish_chunk.setColumns(block.getColumns(), block.rows()); + LOG_TRACE(getLogger("squashing"), "{}, SquashingTransform: finished, hasInfo: {}", reinterpret_cast(this), finish_chunk.hasChunkInfo()); } void SquashingChunksTransform::work() @@ -50,8 +55,8 @@ void SquashingChunksTransform::work() } SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : ISimpleTransform(header, header, true), squashing(min_block_size_rows, min_block_size_bytes) + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, [[maybe_unused]] bool skip_empty_chunks_) + : ISimpleTransform(header, header, false), squashing(min_block_size_rows, min_block_size_bytes) { } @@ -59,11 +64,13 @@ void SimpleSquashingChunksTransform::transform(Chunk & chunk) { if (!finished) { - if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) + LOG_TRACE(getLogger("squashing"), "{}, SquashingTransform: !finished, hasInfo: {}", reinterpret_cast(this), chunk.hasChunkInfo()); + if (auto block = squashing.add(std::move(chunk))) chunk.setColumns(block.getColumns(), block.rows()); } else { + LOG_TRACE(getLogger("squashing"), "{}, SquashingTransform: finished, hasInfo: {}", reinterpret_cast(this), chunk.hasChunkInfo()); auto block = squashing.add({}); chunk.setColumns(block.getColumns(), block.rows()); } @@ -79,4 +86,125 @@ IProcessor::Status SimpleSquashingChunksTransform::prepare() return ISimpleTransform::prepare(); } +//maybe it makes sense to pass not the IProcessor entity, but the SimpleTransform? anyway we have one input and one output +ProcessorSquashingTransform::ProcessorSquashingTransform( + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, [[maybe_unused]]size_t num_ports) + : IProcessor(InputPorts(1, header), OutputPorts(1, header)), squashing(min_block_size_rows, min_block_size_bytes) +{ +} + +IProcessor::Status ProcessorSquashingTransform::prepare() +{ + Status status = Status::Ready; + + while (status == Status::Ready) + { + status = !has_data ? prepareConsume() + : prepareGenerate(); + } + + return status; +} + +IProcessor::Status ProcessorSquashingTransform::prepareConsume() +{ + LOG_TRACE(getLogger("balancing"), "prepareConsume"); + for (auto & input : getInputPorts()) + { + bool all_finished = true; + for (auto & output : outputs) + { + if (output.isFinished()) + continue; + + all_finished = false; + } + + if (all_finished) + { + input.close(); + return Status::Finished; + } + + if (input.isFinished()) + { + for (auto & output : outputs) + output.finish(); + + return Status::Finished; + } + + input.setNeeded(); + if (!input.hasData()) + return Status::NeedData; + + chunk = input.pull(); + has_data = true; + was_output_processed.assign(outputs.size(), false); + transform(chunk); + // if (chunk) + // chunks.push_back(std::move(chunk)); + } + return Status::Ready; +} + +void ProcessorSquashingTransform::transform(Chunk & chunk_) +{ + // [[maybe_unused]]const auto * agg_info = typeid_cast(chunk.getChunkInfo().get()); + // if (agg_info) + // { + // std::cerr << "Beginning of SquashingTransform: size of one group: " << agg_info->chunks.size() << std::endl; + // if (!agg_info->chunks.empty()) + // std::cerr << "!group is not empty, first column: " << agg_info->chunks[0].dumpStructure() << std::endl; + // } + LOG_TRACE(getLogger("squashing"), "{}, SquashingTransform: Struct of input chunk: {}", reinterpret_cast(this), chunk_.dumpStructure()); + if (!finished) + { + LOG_TRACE(getLogger("squashing"), "{}, SquashingTransform: !finished, hasInfo: {}", reinterpret_cast(this), chunk_.hasChunkInfo()); + if (auto block = squashing.add(std::move(chunk_))) + chunk_.setColumns(block.getColumns(), block.rows()); + } + else + { + LOG_TRACE(getLogger("squashing"), "{}, SquashingTransform: finished, hasInfo: {}", reinterpret_cast(this), chunk_.hasChunkInfo()); + auto block = squashing.add({}); + chunk_.setColumns(block.getColumns(), block.rows()); + } +} + +IProcessor::Status ProcessorSquashingTransform::prepareGenerate() +{ + LOG_TRACE(getLogger("squashingProcessor"), "prepareGenerate"); + bool all_outputs_processed = true; + + size_t chunk_number = 0; + for (auto &output : getOutputPorts()) + { + auto & was_processed = was_output_processed[chunk_number]; + ++chunk_number; + + if (was_processed) + continue; + + if (output.isFinished()) + continue; + + if (!output.canPush()) + { + all_outputs_processed = false; + continue; + } + + LOG_TRACE(getLogger("squashingProcessor"), "chunk struct: {}", chunk.dumpStructure()); + output.push(chunk.clone()); + was_processed = true; + } + + if (all_outputs_processed) + { + has_data = false; + return Status::Ready; + } + return Status::PortFull; +} } diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h index f82e9e46a61..f140f5274d7 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingChunksTransform.h @@ -23,7 +23,7 @@ protected: void onFinish() override; private: - SquashingTransform squashing; + NewSquashingTransform squashing; Chunk cur_chunk; Chunk finish_chunk; }; @@ -32,7 +32,7 @@ private: class SimpleSquashingChunksTransform : public ISimpleTransform { public: - explicit SimpleSquashingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); + explicit SimpleSquashingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, bool skip_empty_chunks_ = true); String getName() const override { return "SimpleSquashingTransform"; } @@ -42,7 +42,35 @@ protected: IProcessor::Status prepare() override; private: - SquashingTransform squashing; + NewSquashingTransform squashing; + + bool finished = false; +}; + + +class ProcessorSquashingTransform : public IProcessor +{ +public: + explicit ProcessorSquashingTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t num_ports); + + String getName() const override { return "ProcessorSquashingTransform"; } + +protected: + InputPorts & getInputPorts() { return inputs; } + OutputPorts & getOutputPorts() { return outputs; } + + Status prepare() override; + Status prepareConsume(); + Status prepareGenerate(); + + // void work() override; + void transform(Chunk & chunk); + +private: + NewSquashingTransform squashing; + Chunk chunk; + bool has_data = false; + std::vector was_output_processed; /// When consumption is finished we need to release the final chunk regardless of its size. bool finished = false; From 852dd4c059f7163a28207be84c133212f683fa68 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 7 Mar 2024 13:42:01 +0000 Subject: [PATCH 002/139] refactor --- src/Interpreters/SquashingTransform.cpp | 27 +--- src/Interpreters/SquashingTransform.h | 3 +- .../Transforms/BalancingTransform.cpp | 93 +------------ .../Transforms/BalancingTransform.h | 90 +------------ .../Transforms/SquashingChunksTransform.cpp | 127 +----------------- .../Transforms/SquashingChunksTransform.h | 29 ---- 6 files changed, 9 insertions(+), 360 deletions(-) diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index 0d976bd967a..a52b54653c1 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -1,11 +1,4 @@ -#include -#include #include -#include "DataTypes/Serializations/ISerialization.h" -#include "Processors/Chunk.h" -#include "base/sleep.h" -#include "base/types.h" -#include #include @@ -135,6 +128,7 @@ bool SquashingTransform::isEnoughSize(size_t rows, size_t bytes) const } + NewSquashingTransform::NewSquashingTransform(size_t min_block_size_rows_, size_t min_block_size_bytes_) : min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) @@ -170,7 +164,6 @@ Block NewSquashingTransform::addImpl(ReferenceType input_chunk) append(std::move(one), info->data_type); } - // if (isEnoughSize(accumulated_block)) { Block to_return; std::swap(to_return, accumulated_block); @@ -203,8 +196,6 @@ void NewSquashingTransform::append(ReferenceType input_chunk, DataTypePtr data_t } } - - BalanceTransform::BalanceTransform(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) : min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) @@ -245,11 +236,9 @@ Chunk BalanceTransform::addImpl(ReferenceType input_block) Chunk input_chunk(input_block.getColumns(), input_block.rows()); if (!data_type && !input_block.getDataTypes().empty()) data_type = input_block.getDataTypes()[0]; - // /// End of input stream. if (!input_chunk) { Chunk res_chunk = convertToChunk(chunks_to_merge_vec); - // // std::cerr << "end of stream. Adding info to chunk " << std::endl; return res_chunk; } @@ -258,11 +247,9 @@ Chunk BalanceTransform::addImpl(ReferenceType input_block) if (input_chunk) chunks_to_merge_vec.push_back(input_chunk.clone()); - // std::cerr << "pushing back data. size: " << chunks_to_merge_vec.size() << std::endl; if (isEnoughSize(chunks_to_merge_vec)) { - // // // std::cerr << "enough size" << std::endl; Chunk res_chunk = convertToChunk(chunks_to_merge_vec); return res_chunk; } @@ -279,15 +266,8 @@ bool BalanceTransform::isEnoughSize(const std::vector & chunks) rows += chunk.getNumRows(); bytes += chunk.bytes(); } - auto free_memory = memory_tracker->getHardLimit() - memory_tracker->get(); - std::cerr << "========Just memory representation, free memory: " << free_memory << ", chunk size: " << bytes << std::endl - << " hardLimit: " << memory_tracker->getHardLimit() << " get(): " << memory_tracker->get() << std::endl; checkAndWaitMemoryAvailability(bytes); - free_memory = memory_tracker->getHardLimit() - memory_tracker->get(); - std::cerr << "========Just memory representation after, free memory: " << free_memory << ", chunk size: " << bytes << std::endl - << ", hardLimit: " << memory_tracker->getHardLimit() << ", get(): " << memory_tracker->get() << std::endl; - return isEnoughSize(rows, bytes); } @@ -298,12 +278,7 @@ void BalanceTransform::checkAndWaitMemoryAvailability(size_t bytes) { auto free_memory = hard_limit - memory_tracker->get(); while (Int64(bytes) >= free_memory) - { - // std::cerr << "========Waiting a while from memory, free memory: " << free_memory << ", chunk size: " << bytes << std::endl; - // sleepForMilliseconds(10); - // checkAndWaitMemoryAvailability(bytes); free_memory = hard_limit - memory_tracker->get(); - } } } diff --git a/src/Interpreters/SquashingTransform.h b/src/Interpreters/SquashingTransform.h index 0c2fe1ef12b..fb6834e03be 100644 --- a/src/Interpreters/SquashingTransform.h +++ b/src/Interpreters/SquashingTransform.h @@ -5,8 +5,7 @@ #include #include #include -#include "Common/MemoryTracker.h" -#include "DataTypes/Serializations/ISerialization.h" +#include namespace DB diff --git a/src/Processors/Transforms/BalancingTransform.cpp b/src/Processors/Transforms/BalancingTransform.cpp index b899702561e..52d24fc9d01 100644 --- a/src/Processors/Transforms/BalancingTransform.cpp +++ b/src/Processors/Transforms/BalancingTransform.cpp @@ -1,9 +1,6 @@ -#include #include -#include "Common/Logger.h" -#include "Common/logger_useful.h" -#include "Interpreters/SquashingTransform.h" -#include "Processors/Chunk.h" +#include +#include namespace DB { @@ -13,67 +10,8 @@ namespace ErrorCodes extern const int MEMORY_LIMIT_EXCEEDED; } -LBalancingChunksTransform::LBalancingChunksTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t max_memory_usage_, [[maybe_unused]] bool skip_empty_chunks_) - : ISimpleTransform(header, header, false), max_memory_usage(max_memory_usage_), squashing(min_block_size_rows, min_block_size_bytes), balance(header, min_block_size_rows, min_block_size_bytes) -{ -} - -void LBalancingChunksTransform::transform(Chunk & chunk) -{ - if (!finished) - { - Chunk res_chunk = balance.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns())); - if (res_chunk.hasChunkInfo()) - { - // std::cerr << "BalancingTransform: adding chunk " << std::endl; - - // { - // [[maybe_unused]]const auto * agg_info = typeid_cast(res_chunk.getChunkInfo().get()); - // std::cerr << "End of BalancingTransform: size of one group: " << agg_info->chunks.size() << std::endl; - // if (!agg_info->chunks.empty()) - // std::cerr << "!group is not empty, first column: " << agg_info->chunks[0].dumpStructure() << std::endl << std::endl; - // } - - } - else - LOG_TRACE(getLogger("balancing"), "{}, BalancingTransform: not adding chunk, not finished.", reinterpret_cast(this));/// ISSUE: it's not clear why finished label is not set - std::swap(res_chunk, chunk); - } - else - { - Chunk res_chunk = balance.add({}); - if (res_chunk.hasChunkInfo()) - { - // std::cerr << "BalancingTransform: finished adding, NumRows:" << res_chunk.getNumRows() << ", HasInfo: " << res_chunk.hasChunkInfo() << std::endl; - // { - // [[maybe_unused]]const auto * agg_info = typeid_cast(res_chunk.getChunkInfo().get()); - // std::cerr << "End of BalancingTransform: size of one group: " << agg_info->chunks.size() << std::endl; - // if (!agg_info->chunks.empty()) - // std::cerr << "!group is not empty, first column: " << agg_info->chunks[0].dumpStructure() << std::endl << std::endl; - // } - - } - else - LOG_TRACE(getLogger("balancing"), "{}, BalancingTransform: not adding chunk on finished", reinterpret_cast(this)); - std::swap(res_chunk, chunk); - } - LOG_TRACE(getLogger("balancing"), "{}, BalancingTransform: struct of output chunk: {}", reinterpret_cast(this), chunk.dumpStructure()); -} - -IProcessor::Status LBalancingChunksTransform::prepare() -{ - if (!finished && input.isFinished()) - { - finished = true; - return Status::Ready; - } - return ISimpleTransform::prepare(); -} - - BalancingChunksTransform::BalancingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t max_memory_usage_, size_t num_ports) - : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), max_memory_usage(max_memory_usage_), squashing(min_block_size_rows, min_block_size_bytes), balance(header, min_block_size_rows, min_block_size_bytes) + : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), max_memory_usage(max_memory_usage_), balance(header, min_block_size_rows, min_block_size_bytes) { } @@ -146,36 +84,11 @@ void BalancingChunksTransform::transform(Chunk & chunk_) if (!finished) { Chunk res_chunk = balance.add(getInputPorts().front().getHeader().cloneWithColumns(chunk_.detachColumns())); - if (res_chunk.hasChunkInfo()) - { - // std::cerr << "BalancingTransform: adding chunk " << std::endl; - - // { - // [[maybe_unused]]const auto * agg_info = typeid_cast(res_chunk.getChunkInfo().get()); - // std::cerr << "End of BalancingTransform: size of one group: " << agg_info->chunks.size() << std::endl; - // if (!agg_info->chunks.empty()) - // std::cerr << "!group is not empty, first column: " << agg_info->chunks[0].dumpStructure() << std::endl << std::endl; - // } - } - else - LOG_TRACE(getLogger("balancing"), "{}, BalancingTransform: not adding chunk, not finished.", reinterpret_cast(this));/// ISSUE: it's not clear why finished label is not set std::swap(res_chunk, chunk_); } else { Chunk res_chunk = balance.add({}); - if (res_chunk.hasChunkInfo()) - { - // std::cerr << "BalancingTransform: finished adding, NumRows:" << res_chunk.getNumRows() << ", HasInfo: " << res_chunk.hasChunkInfo() << std::endl; - // { - // [[maybe_unused]]const auto * agg_info = typeid_cast(res_chunk.getChunkInfo().get()); - // std::cerr << "End of BalancingTransform: size of one group: " << agg_info->chunks.size() << std::endl; - // if (!agg_info->chunks.empty()) - // std::cerr << "!group is not empty, first column: " << agg_info->chunks[0].dumpStructure() << std::endl << std::endl; - // } - } - else - LOG_TRACE(getLogger("balancing"), "{}, BalancingTransform: not adding chunk on finished", reinterpret_cast(this)); std::swap(res_chunk, chunk_); } LOG_TRACE(getLogger("balancing"), "{}, BalancingTransform: struct of output chunk: {}, hasInfo: {}", reinterpret_cast(this), chunk_.dumpStructure(), chunk.hasChunkInfo()); diff --git a/src/Processors/Transforms/BalancingTransform.h b/src/Processors/Transforms/BalancingTransform.h index d992a14cdd4..8812a0b8c17 100644 --- a/src/Processors/Transforms/BalancingTransform.h +++ b/src/Processors/Transforms/BalancingTransform.h @@ -1,96 +1,22 @@ #pragma once -#include #include -#include "Processors/Chunk.h" -#include "Processors/IProcessor.h" -#include "Processors/Transforms/ExceptionKeepingTransform.h" +#include #include namespace DB { -class BalancingTransform : public ExceptionKeepingTransform -{ -public: - explicit BalancingTransform( - const Block & header, size_t max_memory_usage_); - - String getName() const override { return "BalancingTransform"; } - - void work() override; - - const Chunks & getChunks() const - { - return chunks; - } - -protected: - void onConsume(Chunk chunk) override; - GenerateResult onGenerate() override; - void onFinish() override; - -private: - size_t CalculateBlockSize(const Block & block); - Chunks chunks; - Blocks blocks; - size_t blocks_size; - Chunk cur_chunk; - Chunk finish_chunk; - size_t max_memory_usage; -}; - -/// Doesn't care about propagating exceptions and thus doesn't throw LOGICAL_ERROR if the following transform closes its input port. - - -/// Doesn't care about propagating exceptions and thus doesn't throw LOGICAL_ERROR if the following transform closes its input port. -class LBalancingChunksTransform : public ISimpleTransform -{ -public: - explicit LBalancingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t max_memory_usage, bool skip_empty_chunks_); - - String getName() const override { return "LBalancingChunksTransform"; } - - const Chunks & getChunks() const - { - return chunks; - } - -protected: - void transform(Chunk &) override; - - IProcessor::Status prepare() override; - -private: - size_t CalculateBlockSize(const Block & block); - [[maybe_unused]] ChunksToSquash chunks_to_merge; - Chunks chunks; - Blocks blocks; - [[maybe_unused]] size_t blocks_size; - Chunk cur_chunk; - Chunk finish_chunk; - [[maybe_unused]] size_t max_memory_usage; - SquashingTransform squashing; - BalanceTransform balance; - [[maybe_unused]]size_t acc_size = 0; - - /// When consumption is finished we need to release the final chunk regardless of its size. - bool finished = false; -}; class BalancingChunksTransform : public IProcessor { public: BalancingChunksTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t max_memory_usage_, size_t num_ports); - // explicit BalancingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t max_memory_usage, bool skip_empty_chunks_); String getName() const override { return "BalancingChunksTransform"; } - const Chunks & getChunks() const - { - return chunks; - } + InputPorts & getInputPorts() { return inputs; } OutputPorts & getOutputPorts() { return outputs; } @@ -99,25 +25,15 @@ public: Status prepareConsume(); Status prepareSend(); - // void work() override; void transform(Chunk & chunk); protected: - // void transform(Chunk &) ; private: size_t CalculateBlockSize(const Block & block); - [[maybe_unused]] ChunksToSquash chunks_to_merge; - Chunks chunks; Chunk chunk; - Blocks blocks; - [[maybe_unused]] size_t blocks_size; - Chunk cur_chunk; - Chunk finish_chunk; - [[maybe_unused]] size_t max_memory_usage; - SquashingTransform squashing; + size_t max_memory_usage; BalanceTransform balance; - [[maybe_unused]]size_t acc_size = 0; bool has_data = false; std::vector was_output_processed; diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 22ce3ba9359..5b68df6b6c6 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -1,6 +1,5 @@ #include -#include -#include "Common/logger_useful.h" +#include namespace DB { @@ -16,9 +15,7 @@ void SquashingChunksTransform::onConsume(Chunk chunk) { LOG_TRACE(getLogger("squashing"), "{}, SquashingTransform: !finished, hasInfo: {}", reinterpret_cast(this), chunk.hasChunkInfo()); if (auto block = squashing.add(std::move(chunk))) - { cur_chunk.setColumns(block.getColumns(), block.rows()); - } } SquashingChunksTransform::GenerateResult SquashingChunksTransform::onGenerate() @@ -85,126 +82,4 @@ IProcessor::Status SimpleSquashingChunksTransform::prepare() } return ISimpleTransform::prepare(); } - -//maybe it makes sense to pass not the IProcessor entity, but the SimpleTransform? anyway we have one input and one output -ProcessorSquashingTransform::ProcessorSquashingTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, [[maybe_unused]]size_t num_ports) - : IProcessor(InputPorts(1, header), OutputPorts(1, header)), squashing(min_block_size_rows, min_block_size_bytes) -{ -} - -IProcessor::Status ProcessorSquashingTransform::prepare() -{ - Status status = Status::Ready; - - while (status == Status::Ready) - { - status = !has_data ? prepareConsume() - : prepareGenerate(); - } - - return status; -} - -IProcessor::Status ProcessorSquashingTransform::prepareConsume() -{ - LOG_TRACE(getLogger("balancing"), "prepareConsume"); - for (auto & input : getInputPorts()) - { - bool all_finished = true; - for (auto & output : outputs) - { - if (output.isFinished()) - continue; - - all_finished = false; - } - - if (all_finished) - { - input.close(); - return Status::Finished; - } - - if (input.isFinished()) - { - for (auto & output : outputs) - output.finish(); - - return Status::Finished; - } - - input.setNeeded(); - if (!input.hasData()) - return Status::NeedData; - - chunk = input.pull(); - has_data = true; - was_output_processed.assign(outputs.size(), false); - transform(chunk); - // if (chunk) - // chunks.push_back(std::move(chunk)); - } - return Status::Ready; -} - -void ProcessorSquashingTransform::transform(Chunk & chunk_) -{ - // [[maybe_unused]]const auto * agg_info = typeid_cast(chunk.getChunkInfo().get()); - // if (agg_info) - // { - // std::cerr << "Beginning of SquashingTransform: size of one group: " << agg_info->chunks.size() << std::endl; - // if (!agg_info->chunks.empty()) - // std::cerr << "!group is not empty, first column: " << agg_info->chunks[0].dumpStructure() << std::endl; - // } - LOG_TRACE(getLogger("squashing"), "{}, SquashingTransform: Struct of input chunk: {}", reinterpret_cast(this), chunk_.dumpStructure()); - if (!finished) - { - LOG_TRACE(getLogger("squashing"), "{}, SquashingTransform: !finished, hasInfo: {}", reinterpret_cast(this), chunk_.hasChunkInfo()); - if (auto block = squashing.add(std::move(chunk_))) - chunk_.setColumns(block.getColumns(), block.rows()); - } - else - { - LOG_TRACE(getLogger("squashing"), "{}, SquashingTransform: finished, hasInfo: {}", reinterpret_cast(this), chunk_.hasChunkInfo()); - auto block = squashing.add({}); - chunk_.setColumns(block.getColumns(), block.rows()); - } -} - -IProcessor::Status ProcessorSquashingTransform::prepareGenerate() -{ - LOG_TRACE(getLogger("squashingProcessor"), "prepareGenerate"); - bool all_outputs_processed = true; - - size_t chunk_number = 0; - for (auto &output : getOutputPorts()) - { - auto & was_processed = was_output_processed[chunk_number]; - ++chunk_number; - - if (was_processed) - continue; - - if (output.isFinished()) - continue; - - if (!output.canPush()) - { - all_outputs_processed = false; - continue; - } - - LOG_TRACE(getLogger("squashingProcessor"), "chunk struct: {}", chunk.dumpStructure()); - output.push(chunk.clone()); - was_processed = true; - } - - if (all_outputs_processed) - { - has_data = false; - return Status::Ready; - } - return Status::PortFull; -} } diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h index f140f5274d7..5c7ad12889f 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingChunksTransform.h @@ -46,33 +46,4 @@ private: bool finished = false; }; - - -class ProcessorSquashingTransform : public IProcessor -{ -public: - explicit ProcessorSquashingTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t num_ports); - - String getName() const override { return "ProcessorSquashingTransform"; } - -protected: - InputPorts & getInputPorts() { return inputs; } - OutputPorts & getOutputPorts() { return outputs; } - - Status prepare() override; - Status prepareConsume(); - Status prepareGenerate(); - - // void work() override; - void transform(Chunk & chunk); - -private: - NewSquashingTransform squashing; - Chunk chunk; - bool has_data = false; - std::vector was_output_processed; - - /// When consumption is finished we need to release the final chunk regardless of its size. - bool finished = false; -}; } From 47efd981f04e21d3a8cad6ff86c91f0a4531d90e Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 7 Mar 2024 15:29:07 +0000 Subject: [PATCH 003/139] style fix --- src/Interpreters/SquashingTransform.cpp | 6 +----- src/Processors/Transforms/BalancingTransform.cpp | 2 +- src/Processors/Transforms/BalancingTransform.h | 2 -- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index a52b54653c1..4a259d0b7c9 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -128,7 +128,6 @@ bool SquashingTransform::isEnoughSize(size_t rows, size_t bytes) const } - NewSquashingTransform::NewSquashingTransform(size_t min_block_size_rows_, size_t min_block_size_bytes_) : min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) @@ -160,10 +159,8 @@ Block NewSquashingTransform::addImpl(ReferenceType input_chunk) const auto *info = getInfoFromChunk(input_chunk); for (auto & one : info->chunks) - { append(std::move(one), info->data_type); - } - + { Block to_return; std::swap(to_return, accumulated_block); @@ -294,5 +291,4 @@ bool BalanceTransform::isEnoughSize(size_t rows, size_t bytes) const || (min_block_size_rows && rows >= min_block_size_rows) || (min_block_size_bytes && bytes >= min_block_size_bytes); } - } diff --git a/src/Processors/Transforms/BalancingTransform.cpp b/src/Processors/Transforms/BalancingTransform.cpp index 52d24fc9d01..58c2f052c1a 100644 --- a/src/Processors/Transforms/BalancingTransform.cpp +++ b/src/Processors/Transforms/BalancingTransform.cpp @@ -7,7 +7,7 @@ namespace DB namespace ErrorCodes { - extern const int MEMORY_LIMIT_EXCEEDED; + extern const int LOGICAL_ERROR; } BalancingChunksTransform::BalancingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t max_memory_usage_, size_t num_ports) diff --git a/src/Processors/Transforms/BalancingTransform.h b/src/Processors/Transforms/BalancingTransform.h index 8812a0b8c17..1b1d3ec6295 100644 --- a/src/Processors/Transforms/BalancingTransform.h +++ b/src/Processors/Transforms/BalancingTransform.h @@ -16,8 +16,6 @@ public: String getName() const override { return "BalancingChunksTransform"; } - - InputPorts & getInputPorts() { return inputs; } OutputPorts & getOutputPorts() { return outputs; } From 149e2af36925ebc36bdd9bee2466ed7424bb259c Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 10 Apr 2024 17:46:30 +0000 Subject: [PATCH 004/139] fix for one thread --- src/Interpreters/SquashingTransform.cpp | 5 +- src/Interpreters/SquashingTransform.h | 4 + .../Transforms/BalancingTransform.cpp | 86 +++++++++++-------- 3 files changed, 54 insertions(+), 41 deletions(-) diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index 4a259d0b7c9..62c819a6105 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -218,10 +218,7 @@ Chunk BalanceTransform::convertToChunk(std::vector &chunks) info->chunks.push_back(chunk.clone()); info->data_type = data_type; - if (!info->chunks.empty()) /// Note: This if is only for debugging, structure of chunk copies the structure of info - { /// it's possible to use only 'Chunk(header.cloneEmptyColumns(), 0, info)' - return Chunk({info->chunks[0].getColumns(), info->chunks[0].getNumRows(), info}); - } + chunks.clear(); return Chunk(header.cloneEmptyColumns(), 0, info); } diff --git a/src/Interpreters/SquashingTransform.h b/src/Interpreters/SquashingTransform.h index fb6834e03be..4551b76e25f 100644 --- a/src/Interpreters/SquashingTransform.h +++ b/src/Interpreters/SquashingTransform.h @@ -86,6 +86,10 @@ public: BalanceTransform(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); Chunk add(Block && input_block); + bool isDataLeft() + { + return !chunks_to_merge_vec.empty(); + } private: std::vector chunks_to_merge_vec = {}; diff --git a/src/Processors/Transforms/BalancingTransform.cpp b/src/Processors/Transforms/BalancingTransform.cpp index 58c2f052c1a..a6a79f65ea4 100644 --- a/src/Processors/Transforms/BalancingTransform.cpp +++ b/src/Processors/Transforms/BalancingTransform.cpp @@ -1,6 +1,7 @@ #include #include #include +#include "Processors/IProcessor.h" namespace DB { @@ -17,6 +18,7 @@ BalancingChunksTransform::BalancingChunksTransform(const Block & header, size_t IProcessor::Status BalancingChunksTransform::prepare() { + LOG_TRACE(getLogger("balancingProcessor"), "prepare"); Status status = Status::Ready; while (status == Status::Ready) @@ -31,49 +33,58 @@ IProcessor::Status BalancingChunksTransform::prepare() IProcessor::Status BalancingChunksTransform::prepareConsume() { LOG_TRACE(getLogger("balancingProcessor"), "prepareConsume"); - for (auto & input : inputs) + finished = false; + while (!chunk.hasChunkInfo()) { - bool all_finished = true; - for (auto & output : outputs) - { - if (output.isFinished()) - continue; - - all_finished = false; - } - - if (all_finished) - { - input.close(); - return Status::Finished; - } - - if (input.isFinished()) + for (auto & input : inputs) { + bool all_finished = true; for (auto & output : outputs) - output.finish(); + { + if (output.isFinished()) + continue; - return Status::Finished; - } + all_finished = false; + } - input.setNeeded(); - if (!input.hasData()) - return Status::NeedData; + if (all_finished) + { + input.close(); + return Status::Finished; + } - chunk = input.pull(); - was_output_processed.assign(outputs.size(), false); - transform(chunk); - if (chunk.hasChunkInfo()) - { - LOG_TRACE(getLogger("balancingProcessor"), "hasData"); - has_data = true; - } - else - { - finished = true; - LOG_TRACE(getLogger("balancingProcessor"), "hasData, finished"); + if (input.isFinished()) + { + for (auto & output : outputs) + output.finish(); + + return Status::Finished; + } + + input.setNeeded(); + if (!input.hasData()) + { + finished = true; + if (!balance.isDataLeft()) + return Status::NeedData; + else + { + transform(chunk); + has_data = true; + return Status::Ready; + } + } + + chunk = input.pull(); transform(chunk); - has_data = true; + was_output_processed.assign(outputs.size(), false); + if (chunk.hasChunkInfo()) + { + LOG_TRACE(getLogger("balancingProcessor"), "hasData"); + has_data = true; + return Status::Ready; + } + } } return Status::Ready; @@ -121,8 +132,9 @@ IProcessor::Status BalancingChunksTransform::prepareSend() } LOG_TRACE(getLogger("balancingProcessor"), "chunk struct: {}", chunk.dumpStructure()); - output.push(chunk.clone()); + output.push(std::move(chunk)); was_processed = true; + break; } if (all_outputs_processed) From 94eb0782a945f6276481dc14262cd90d27dd1ebd Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 10 Apr 2024 22:22:13 +0000 Subject: [PATCH 005/139] fix for multiple threads --- src/Processors/Transforms/BalancingTransform.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/Processors/Transforms/BalancingTransform.cpp b/src/Processors/Transforms/BalancingTransform.cpp index a6a79f65ea4..73672be5da4 100644 --- a/src/Processors/Transforms/BalancingTransform.cpp +++ b/src/Processors/Transforms/BalancingTransform.cpp @@ -53,7 +53,7 @@ IProcessor::Status BalancingChunksTransform::prepareConsume() return Status::Finished; } - if (input.isFinished()) + if (input.isFinished() && !balance.isDataLeft()) { for (auto & output : outputs) output.finish(); @@ -117,7 +117,10 @@ IProcessor::Status BalancingChunksTransform::prepareSend() ++chunk_number; if (!chunk.hasChunkInfo()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk info must be not empty in prepareGenerate()"); + { + has_data = false; + return Status::Ready; + } if (was_processed) continue; From e5e076a778c951eb66e07e2b85de1d82fbd60bff Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 10 Apr 2024 22:34:50 +0000 Subject: [PATCH 006/139] style fix --- src/Processors/Transforms/BalancingTransform.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Processors/Transforms/BalancingTransform.cpp b/src/Processors/Transforms/BalancingTransform.cpp index 73672be5da4..deb1abdb2fe 100644 --- a/src/Processors/Transforms/BalancingTransform.cpp +++ b/src/Processors/Transforms/BalancingTransform.cpp @@ -6,11 +6,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - BalancingChunksTransform::BalancingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t max_memory_usage_, size_t num_ports) : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), max_memory_usage(max_memory_usage_), balance(header, min_block_size_rows, min_block_size_bytes) { From 59718eafebcc23255d20ef73b400a4f9e4aa6219 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 11 Apr 2024 14:59:39 +0000 Subject: [PATCH 007/139] fix for unmatching types --- src/Interpreters/SquashingTransform.cpp | 26 ++++++++++++++++--------- src/Interpreters/SquashingTransform.h | 6 +++--- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index 62c819a6105..ca74bb7894a 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -1,5 +1,7 @@ #include +#include "Common/logger_useful.h" #include +#include "IO/WriteHelpers.h" namespace DB @@ -141,7 +143,7 @@ Block NewSquashingTransform::add(Chunk && input_chunk) const ChunksToSquash * getInfoFromChunk(const Chunk & chunk) { - auto info = chunk.getChunkInfo(); + const auto& info = chunk.getChunkInfo(); const auto * agg_info = typeid_cast(info.get()); return agg_info; @@ -158,8 +160,10 @@ Block NewSquashingTransform::addImpl(ReferenceType input_chunk) } const auto *info = getInfoFromChunk(input_chunk); - for (auto & one : info->chunks) - append(std::move(one), info->data_type); + for (size_t i = 0; i < info->chunks.size(); i++) + append(std::move(info->chunks[i]), info->data_types); + // for (auto & one : info->chunks) + // append(std::move(one), info->data_types); { Block to_return; @@ -169,15 +173,19 @@ Block NewSquashingTransform::addImpl(ReferenceType input_chunk) } template -void NewSquashingTransform::append(ReferenceType input_chunk, DataTypePtr data_type) +void NewSquashingTransform::append(ReferenceType input_chunk, DataTypes data_types) { + // LOG_TRACE(getLogger("Squashing"), "data_type: {}", data_type->getName()); if (input_chunk.getNumColumns() == 0) return; if (!accumulated_block) { - for (const ColumnPtr& column : input_chunk.getColumns()) + // for (const ColumnPtr& column : input_chunk.getColumns()) + for (size_t i = 0; i < input_chunk.getNumColumns(); ++ i) { - ColumnWithTypeAndName col = ColumnWithTypeAndName(column, data_type, " "); + String name = data_types[i]->getName() + toString(i); + LOG_TRACE(getLogger("Squashing"), "data_type: {}", data_types[i]->getName()); + ColumnWithTypeAndName col = ColumnWithTypeAndName(input_chunk.getColumns()[i], data_types[i], name); accumulated_block.insert(accumulated_block.columns(), col); } return; @@ -216,7 +224,7 @@ Chunk BalanceTransform::convertToChunk(std::vector &chunks) auto info = std::make_shared(); for (auto &chunk : chunks) info->chunks.push_back(chunk.clone()); - info->data_type = data_type; + info->data_types = data_types; chunks.clear(); @@ -228,8 +236,8 @@ template Chunk BalanceTransform::addImpl(ReferenceType input_block) { Chunk input_chunk(input_block.getColumns(), input_block.rows()); - if (!data_type && !input_block.getDataTypes().empty()) - data_type = input_block.getDataTypes()[0]; + if (!input_block.getDataTypes().empty()) + data_types = input_block.getDataTypes(); if (!input_chunk) { Chunk res_chunk = convertToChunk(chunks_to_merge_vec); diff --git a/src/Interpreters/SquashingTransform.h b/src/Interpreters/SquashingTransform.h index 4551b76e25f..b04d12b9bcd 100644 --- a/src/Interpreters/SquashingTransform.h +++ b/src/Interpreters/SquashingTransform.h @@ -14,7 +14,7 @@ namespace DB struct ChunksToSquash : public ChunkInfo { mutable std::vector chunks = {}; - DataTypePtr data_type = nullptr; + DataTypes data_types = {}; }; /** Merging consecutive passed blocks to specified minimum size. @@ -74,7 +74,7 @@ private: Block addImpl(ReferenceType chunk); template - void append(ReferenceType input_chunk, DataTypePtr data_type); + void append(ReferenceType input_chunk, DataTypes data_types); bool isEnoughSize(const Block & block); bool isEnoughSize(size_t rows, size_t bytes) const; @@ -106,7 +106,7 @@ private: bool isEnoughSize(const std::vector & chunks); bool isEnoughSize(size_t rows, size_t bytes) const; void checkAndWaitMemoryAvailability(size_t bytes); - DataTypePtr data_type = nullptr; + DataTypes data_types = {}; MemoryTracker * memory_tracker; From 635b17aad2f8b53ae284a76698847774ef91a6e1 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 11 Apr 2024 15:46:09 +0000 Subject: [PATCH 008/139] chunk -> block in chunkInfo --- src/Interpreters/SquashingTransform.cpp | 150 ++++++++++++++++++ src/Interpreters/SquashingTransform.h | 57 +++++++ .../Transforms/BalancingTransform.h | 2 +- 3 files changed, 208 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index ca74bb7894a..ebd8a5f0c6e 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -296,4 +296,154 @@ bool BalanceTransform::isEnoughSize(size_t rows, size_t bytes) const || (min_block_size_rows && rows >= min_block_size_rows) || (min_block_size_bytes && bytes >= min_block_size_bytes); } + +NewSquashingBlockTransform::NewSquashingBlockTransform(size_t min_block_size_rows_, size_t min_block_size_bytes_) + : min_block_size_rows(min_block_size_rows_) + , min_block_size_bytes(min_block_size_bytes_) +{ +} + +Block NewSquashingBlockTransform::add(Chunk && input_chunk) +{ + return addImpl(std::move(input_chunk)); +} + +const BlocksToSquash * getInfoFromChunkBlock(const Chunk & chunk) +{ + const auto& info = chunk.getChunkInfo(); + const auto * agg_info = typeid_cast(info.get()); + + return agg_info; +} + +Block NewSquashingBlockTransform::addImpl(Chunk && input_chunk) +{ + if (!input_chunk.hasChunkInfo()) + { + Block to_return; + std::swap(to_return, accumulated_block); + return to_return; + } + + const auto *info = getInfoFromChunkBlock(input_chunk); + for (auto & block : info->blocks) + append(std::move(block)); + + { + Block to_return; + std::swap(to_return, accumulated_block); + return to_return; + } +} + +void NewSquashingBlockTransform::append(Block && input_block) +{ + if (input_block.columns() == 0) + return; + if (!accumulated_block) + { + for (size_t i = 0; i < input_block.columns(); ++ i) + { + LOG_TRACE(getLogger("Squashing"), "data_type: {}", input_block.getDataTypeNames()[i]); + ColumnWithTypeAndName col = ColumnWithTypeAndName(input_block.getColumns()[i], input_block.getDataTypes()[i], input_block.getNames()[i]); + accumulated_block.insert(accumulated_block.columns(), col); + } + return; + } + + for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i) + { + const auto source_column = input_block.getColumns()[i]; + + auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column)); + mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); + accumulated_block.getByPosition(i).column = std::move(mutable_column); + } +} + +BalanceBlockTransform::BalanceBlockTransform(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) + : min_block_size_rows(min_block_size_rows_) + , min_block_size_bytes(min_block_size_bytes_) + , header(std::move(header_)) +{ + // Use query-level memory tracker + if (auto * memory_tracker_child = CurrentThread::getMemoryTracker()) + memory_tracker = memory_tracker_child->getParent(); +} + +Chunk BalanceBlockTransform::add(Block && input_block) +{ + return addImpl(std::move(input_block)); +} + +Chunk BalanceBlockTransform::addImpl(Block && input_block) +{ + Chunk input_chunk(input_block.getColumns(), input_block.rows()); + + if (!input_chunk) + { + Chunk res_chunk = convertToChunk(blocks_to_merge_vec); + return res_chunk; + } + + if (isEnoughSize(blocks_to_merge_vec)) + blocks_to_merge_vec.clear(); + + if (input_chunk) + blocks_to_merge_vec.push_back(std::move(input_block)); + + if (isEnoughSize(blocks_to_merge_vec)) + { + Chunk res_chunk = convertToChunk(blocks_to_merge_vec); + return res_chunk; + } + return input_chunk; +} + +Chunk BalanceBlockTransform::convertToChunk(std::vector &blocks) +{ + if (blocks.empty()) + return {}; + + auto info = std::make_shared(); + for (auto &block : blocks) + info->blocks.push_back(std::move(block)); + + blocks.clear(); // we can remove this + + return Chunk(header.cloneEmptyColumns(), 0, info); +} + +bool BalanceBlockTransform::isEnoughSize(const std::vector & blocks) +{ + size_t rows = 0; + size_t bytes = 0; + + for (const Block & block : blocks) + { + rows += block.rows(); + bytes += block.bytes(); + } + checkAndWaitMemoryAvailability(bytes); + + return isEnoughSize(rows, bytes); +} + +void BalanceBlockTransform::checkAndWaitMemoryAvailability(size_t bytes) +{ + // bytes_used += bytes; + if (const auto hard_limit = memory_tracker->getHardLimit() != 0) + { + auto free_memory = hard_limit - memory_tracker->get(); + while (Int64(bytes) >= free_memory) + free_memory = hard_limit - memory_tracker->get(); + } +} + +bool BalanceBlockTransform::isEnoughSize(size_t rows, size_t bytes) const +{ + return (!min_block_size_rows && !min_block_size_bytes) + || (min_block_size_rows && rows >= min_block_size_rows) + || (min_block_size_bytes && bytes >= min_block_size_bytes); +} } diff --git a/src/Interpreters/SquashingTransform.h b/src/Interpreters/SquashingTransform.h index b04d12b9bcd..792a8c2efcf 100644 --- a/src/Interpreters/SquashingTransform.h +++ b/src/Interpreters/SquashingTransform.h @@ -113,4 +113,61 @@ private: Chunk convertToChunk(std::vector &chunks); }; +class NewSquashingBlockTransform +{ +public: + NewSquashingBlockTransform(size_t min_block_size_rows_, size_t min_block_size_bytes_); + + Block add(Chunk && input_chunk); + +private: + size_t min_block_size_rows; + size_t min_block_size_bytes; + + Block accumulated_block; + + Block addImpl(Chunk && chunk); + + void append(Block && input_block); + + bool isEnoughSize(const Block & block); + bool isEnoughSize(size_t rows, size_t bytes) const; +}; + +struct BlocksToSquash : public ChunkInfo +{ + mutable std::vector blocks = {}; +}; + +class BalanceBlockTransform +{ +public: + BalanceBlockTransform(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); + + Chunk add(Block && input_block); + bool isDataLeft() + { + return !blocks_to_merge_vec.empty(); + } + +private: + std::vector blocks_to_merge_vec = {}; + size_t min_block_size_rows; + size_t min_block_size_bytes; + + Block accumulated_block; + const Block header; + + // template + Chunk addImpl(Block && input_block); + + bool isEnoughSize(const std::vector & blocks); + bool isEnoughSize(size_t rows, size_t bytes) const; + void checkAndWaitMemoryAvailability(size_t bytes); + + MemoryTracker * memory_tracker; + + Chunk convertToChunk(std::vector &blocks); +}; + } diff --git a/src/Processors/Transforms/BalancingTransform.h b/src/Processors/Transforms/BalancingTransform.h index 1b1d3ec6295..a50b38c773f 100644 --- a/src/Processors/Transforms/BalancingTransform.h +++ b/src/Processors/Transforms/BalancingTransform.h @@ -31,7 +31,7 @@ private: size_t CalculateBlockSize(const Block & block); Chunk chunk; size_t max_memory_usage; - BalanceTransform balance; + BalanceBlockTransform balance; bool has_data = false; std::vector was_output_processed; From 958b83a76a588c98b76f8c310b63cf2798fdbc1a Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 11 Apr 2024 18:36:54 +0000 Subject: [PATCH 009/139] changed Block -> Chunk and removed dataTypes, beautified --- src/Interpreters/InterpreterInsertQuery.cpp | 14 +- src/Interpreters/SquashingTransform.cpp | 245 +++--------------- src/Interpreters/SquashingTransform.h | 75 +----- .../Transforms/BalancingTransform.cpp | 10 +- .../Transforms/BalancingTransform.h | 2 +- .../Transforms/SquashingChunksTransform.cpp | 11 +- .../Transforms/SquashingChunksTransform.h | 4 +- 7 files changed, 60 insertions(+), 301 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 0041a0f0846..31fef267afc 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -669,20 +669,20 @@ BlockIO InterpreterInsertQuery::execute() { bool table_prefers_large_blocks = table->prefersLargeBlocks(); - auto squashing = std::make_shared( + auto squashing = std::make_shared( chain.getInputHeader(), table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); chain.addSource(std::move(squashing)); - // auto balancing = std::make_shared( - // chain.getInputHeader(), - // table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - // table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, - // settings.max_memory_usage, true); + auto balancing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, + settings.max_memory_usage, true); - // chain.addSource(std::move(balancing)); + chain.addSource(std::move(balancing)); } auto context_ptr = getContext(); diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index ebd8a5f0c6e..d4370b45119 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -1,7 +1,5 @@ #include -#include "Common/logger_useful.h" #include -#include "IO/WriteHelpers.h" namespace DB @@ -129,28 +127,19 @@ bool SquashingTransform::isEnoughSize(size_t rows, size_t bytes) const || (min_block_size_bytes && bytes >= min_block_size_bytes); } - -NewSquashingTransform::NewSquashingTransform(size_t min_block_size_rows_, size_t min_block_size_bytes_) +NewSquashingTransform::NewSquashingTransform(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) : min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) + , header(std::move(header_)) { } Block NewSquashingTransform::add(Chunk && input_chunk) { - return addImpl(std::move(input_chunk)); + return addImpl(std::move(input_chunk)); } -const ChunksToSquash * getInfoFromChunk(const Chunk & chunk) -{ - const auto& info = chunk.getChunkInfo(); - const auto * agg_info = typeid_cast(info.get()); - - return agg_info; -} - -template -Block NewSquashingTransform::addImpl(ReferenceType input_chunk) +Block NewSquashingTransform::addImpl(Chunk && input_chunk) { if (!input_chunk.hasChunkInfo()) { @@ -160,10 +149,8 @@ Block NewSquashingTransform::addImpl(ReferenceType input_chunk) } const auto *info = getInfoFromChunk(input_chunk); - for (size_t i = 0; i < info->chunks.size(); i++) - append(std::move(info->chunks[i]), info->data_types); - // for (auto & one : info->chunks) - // append(std::move(one), info->data_types); + for (auto & chunk : info->chunks) + append(chunk.clone()); { Block to_return; @@ -172,20 +159,15 @@ Block NewSquashingTransform::addImpl(ReferenceType input_chunk) } } -template -void NewSquashingTransform::append(ReferenceType input_chunk, DataTypes data_types) +void NewSquashingTransform::append(Chunk && input_chunk) { - // LOG_TRACE(getLogger("Squashing"), "data_type: {}", data_type->getName()); if (input_chunk.getNumColumns() == 0) return; if (!accumulated_block) { - // for (const ColumnPtr& column : input_chunk.getColumns()) for (size_t i = 0; i < input_chunk.getNumColumns(); ++ i) { - String name = data_types[i]->getName() + toString(i); - LOG_TRACE(getLogger("Squashing"), "data_type: {}", data_types[i]->getName()); - ColumnWithTypeAndName col = ColumnWithTypeAndName(input_chunk.getColumns()[i], data_types[i], name); + ColumnWithTypeAndName col = ColumnWithTypeAndName(input_chunk.getColumns()[i], header.getDataTypes()[i], header.getNames()[i]); accumulated_block.insert(accumulated_block.columns(), col); } return; @@ -201,6 +183,14 @@ void NewSquashingTransform::append(ReferenceType input_chunk, DataTypes data_typ } } +const ChunksToSquash* NewSquashingTransform::getInfoFromChunk(const Chunk & chunk) +{ + const auto& info = chunk.getChunkInfo(); + const auto * agg_info = typeid_cast(info.get()); + + return agg_info; +} + BalanceTransform::BalanceTransform(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) : min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) @@ -213,31 +203,13 @@ BalanceTransform::BalanceTransform(Block header_, size_t min_block_size_rows_, s Chunk BalanceTransform::add(Block && input_block) { - return addImpl(std::move(input_block)); + return addImpl(std::move(input_block)); } -Chunk BalanceTransform::convertToChunk(std::vector &chunks) -{ - if (chunks.empty()) - return {}; - - auto info = std::make_shared(); - for (auto &chunk : chunks) - info->chunks.push_back(chunk.clone()); - info->data_types = data_types; - - chunks.clear(); - - return Chunk(header.cloneEmptyColumns(), 0, info); -} - - -template -Chunk BalanceTransform::addImpl(ReferenceType input_block) +Chunk BalanceTransform::addImpl(Block && input_block) { Chunk input_chunk(input_block.getColumns(), input_block.rows()); - if (!input_block.getDataTypes().empty()) - data_types = input_block.getDataTypes(); + if (!input_chunk) { Chunk res_chunk = convertToChunk(chunks_to_merge_vec); @@ -248,7 +220,7 @@ Chunk BalanceTransform::addImpl(ReferenceType input_block) chunks_to_merge_vec.clear(); if (input_chunk) - chunks_to_merge_vec.push_back(input_chunk.clone()); + chunks_to_merge_vec.push_back(std::move(input_chunk)); if (isEnoughSize(chunks_to_merge_vec)) { @@ -258,6 +230,20 @@ Chunk BalanceTransform::addImpl(ReferenceType input_block) return input_chunk; } +Chunk BalanceTransform::convertToChunk(std::vector &chunks) +{ + if (chunks.empty()) + return {}; + + auto info = std::make_shared(); + for (auto &chunk : chunks) + info->chunks.push_back(std::move(chunk)); + + chunks.clear(); // we can remove this + + return Chunk(header.cloneEmptyColumns(), 0, info); +} + bool BalanceTransform::isEnoughSize(const std::vector & chunks) { size_t rows = 0; @@ -268,14 +254,17 @@ bool BalanceTransform::isEnoughSize(const std::vector & chunks) rows += chunk.getNumRows(); bytes += chunk.bytes(); } + + if (!isEnoughSize(rows, bytes)) + return false; + checkAndWaitMemoryAvailability(bytes); - return isEnoughSize(rows, bytes); + return true; } void BalanceTransform::checkAndWaitMemoryAvailability(size_t bytes) { - // bytes_used += bytes; if (const auto hard_limit = memory_tracker->getHardLimit() != 0) { auto free_memory = hard_limit - memory_tracker->get(); @@ -284,166 +273,10 @@ void BalanceTransform::checkAndWaitMemoryAvailability(size_t bytes) } } -bool BalanceTransform::isEnoughSize(const Chunk & chunk) -{ - return isEnoughSize(chunk.getNumRows(), chunk.bytes()); -} - - bool BalanceTransform::isEnoughSize(size_t rows, size_t bytes) const { return (!min_block_size_rows && !min_block_size_bytes) || (min_block_size_rows && rows >= min_block_size_rows) || (min_block_size_bytes && bytes >= min_block_size_bytes); } - -NewSquashingBlockTransform::NewSquashingBlockTransform(size_t min_block_size_rows_, size_t min_block_size_bytes_) - : min_block_size_rows(min_block_size_rows_) - , min_block_size_bytes(min_block_size_bytes_) -{ -} - -Block NewSquashingBlockTransform::add(Chunk && input_chunk) -{ - return addImpl(std::move(input_chunk)); -} - -const BlocksToSquash * getInfoFromChunkBlock(const Chunk & chunk) -{ - const auto& info = chunk.getChunkInfo(); - const auto * agg_info = typeid_cast(info.get()); - - return agg_info; -} - -Block NewSquashingBlockTransform::addImpl(Chunk && input_chunk) -{ - if (!input_chunk.hasChunkInfo()) - { - Block to_return; - std::swap(to_return, accumulated_block); - return to_return; - } - - const auto *info = getInfoFromChunkBlock(input_chunk); - for (auto & block : info->blocks) - append(std::move(block)); - - { - Block to_return; - std::swap(to_return, accumulated_block); - return to_return; - } -} - -void NewSquashingBlockTransform::append(Block && input_block) -{ - if (input_block.columns() == 0) - return; - if (!accumulated_block) - { - for (size_t i = 0; i < input_block.columns(); ++ i) - { - LOG_TRACE(getLogger("Squashing"), "data_type: {}", input_block.getDataTypeNames()[i]); - ColumnWithTypeAndName col = ColumnWithTypeAndName(input_block.getColumns()[i], input_block.getDataTypes()[i], input_block.getNames()[i]); - accumulated_block.insert(accumulated_block.columns(), col); - } - return; - } - - for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i) - { - const auto source_column = input_block.getColumns()[i]; - - auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column)); - mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); - accumulated_block.getByPosition(i).column = std::move(mutable_column); - } -} - -BalanceBlockTransform::BalanceBlockTransform(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) - : min_block_size_rows(min_block_size_rows_) - , min_block_size_bytes(min_block_size_bytes_) - , header(std::move(header_)) -{ - // Use query-level memory tracker - if (auto * memory_tracker_child = CurrentThread::getMemoryTracker()) - memory_tracker = memory_tracker_child->getParent(); -} - -Chunk BalanceBlockTransform::add(Block && input_block) -{ - return addImpl(std::move(input_block)); -} - -Chunk BalanceBlockTransform::addImpl(Block && input_block) -{ - Chunk input_chunk(input_block.getColumns(), input_block.rows()); - - if (!input_chunk) - { - Chunk res_chunk = convertToChunk(blocks_to_merge_vec); - return res_chunk; - } - - if (isEnoughSize(blocks_to_merge_vec)) - blocks_to_merge_vec.clear(); - - if (input_chunk) - blocks_to_merge_vec.push_back(std::move(input_block)); - - if (isEnoughSize(blocks_to_merge_vec)) - { - Chunk res_chunk = convertToChunk(blocks_to_merge_vec); - return res_chunk; - } - return input_chunk; -} - -Chunk BalanceBlockTransform::convertToChunk(std::vector &blocks) -{ - if (blocks.empty()) - return {}; - - auto info = std::make_shared(); - for (auto &block : blocks) - info->blocks.push_back(std::move(block)); - - blocks.clear(); // we can remove this - - return Chunk(header.cloneEmptyColumns(), 0, info); -} - -bool BalanceBlockTransform::isEnoughSize(const std::vector & blocks) -{ - size_t rows = 0; - size_t bytes = 0; - - for (const Block & block : blocks) - { - rows += block.rows(); - bytes += block.bytes(); - } - checkAndWaitMemoryAvailability(bytes); - - return isEnoughSize(rows, bytes); -} - -void BalanceBlockTransform::checkAndWaitMemoryAvailability(size_t bytes) -{ - // bytes_used += bytes; - if (const auto hard_limit = memory_tracker->getHardLimit() != 0) - { - auto free_memory = hard_limit - memory_tracker->get(); - while (Int64(bytes) >= free_memory) - free_memory = hard_limit - memory_tracker->get(); - } -} - -bool BalanceBlockTransform::isEnoughSize(size_t rows, size_t bytes) const -{ - return (!min_block_size_rows && !min_block_size_bytes) - || (min_block_size_rows && rows >= min_block_size_rows) - || (min_block_size_bytes && bytes >= min_block_size_bytes); -} } diff --git a/src/Interpreters/SquashingTransform.h b/src/Interpreters/SquashingTransform.h index 792a8c2efcf..ce54c49e441 100644 --- a/src/Interpreters/SquashingTransform.h +++ b/src/Interpreters/SquashingTransform.h @@ -14,7 +14,6 @@ namespace DB struct ChunksToSquash : public ChunkInfo { mutable std::vector chunks = {}; - DataTypes data_types = {}; }; /** Merging consecutive passed blocks to specified minimum size. @@ -60,7 +59,7 @@ private: class NewSquashingTransform { public: - NewSquashingTransform(size_t min_block_size_rows_, size_t min_block_size_bytes_); + NewSquashingTransform(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); Block add(Chunk && input_chunk); @@ -69,12 +68,13 @@ private: size_t min_block_size_bytes; Block accumulated_block; + const Block header; - template - Block addImpl(ReferenceType chunk); + Block addImpl(Chunk && chunk); - template - void append(ReferenceType input_chunk, DataTypes data_types); + const ChunksToSquash * getInfoFromChunk(const Chunk & chunk); + + void append(Chunk && input_chunk); bool isEnoughSize(const Block & block); bool isEnoughSize(size_t rows, size_t bytes) const; @@ -96,78 +96,17 @@ private: size_t min_block_size_rows; size_t min_block_size_bytes; - Chunk accumulated_block; const Block header; - template - Chunk addImpl(ReferenceType input_block); + Chunk addImpl(Block && input_block); - bool isEnoughSize(const Chunk & chunk); bool isEnoughSize(const std::vector & chunks); bool isEnoughSize(size_t rows, size_t bytes) const; void checkAndWaitMemoryAvailability(size_t bytes); - DataTypes data_types = {}; MemoryTracker * memory_tracker; Chunk convertToChunk(std::vector &chunks); }; -class NewSquashingBlockTransform -{ -public: - NewSquashingBlockTransform(size_t min_block_size_rows_, size_t min_block_size_bytes_); - - Block add(Chunk && input_chunk); - -private: - size_t min_block_size_rows; - size_t min_block_size_bytes; - - Block accumulated_block; - - Block addImpl(Chunk && chunk); - - void append(Block && input_block); - - bool isEnoughSize(const Block & block); - bool isEnoughSize(size_t rows, size_t bytes) const; -}; - -struct BlocksToSquash : public ChunkInfo -{ - mutable std::vector blocks = {}; -}; - -class BalanceBlockTransform -{ -public: - BalanceBlockTransform(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); - - Chunk add(Block && input_block); - bool isDataLeft() - { - return !blocks_to_merge_vec.empty(); - } - -private: - std::vector blocks_to_merge_vec = {}; - size_t min_block_size_rows; - size_t min_block_size_bytes; - - Block accumulated_block; - const Block header; - - // template - Chunk addImpl(Block && input_block); - - bool isEnoughSize(const std::vector & blocks); - bool isEnoughSize(size_t rows, size_t bytes) const; - void checkAndWaitMemoryAvailability(size_t bytes); - - MemoryTracker * memory_tracker; - - Chunk convertToChunk(std::vector &blocks); -}; - } diff --git a/src/Processors/Transforms/BalancingTransform.cpp b/src/Processors/Transforms/BalancingTransform.cpp index deb1abdb2fe..7a9edbe5d86 100644 --- a/src/Processors/Transforms/BalancingTransform.cpp +++ b/src/Processors/Transforms/BalancingTransform.cpp @@ -1,7 +1,5 @@ #include -#include -#include -#include "Processors/IProcessor.h" +#include namespace DB { @@ -13,7 +11,6 @@ BalancingChunksTransform::BalancingChunksTransform(const Block & header, size_t IProcessor::Status BalancingChunksTransform::prepare() { - LOG_TRACE(getLogger("balancingProcessor"), "prepare"); Status status = Status::Ready; while (status == Status::Ready) @@ -27,7 +24,6 @@ IProcessor::Status BalancingChunksTransform::prepare() IProcessor::Status BalancingChunksTransform::prepareConsume() { - LOG_TRACE(getLogger("balancingProcessor"), "prepareConsume"); finished = false; while (!chunk.hasChunkInfo()) { @@ -75,7 +71,6 @@ IProcessor::Status BalancingChunksTransform::prepareConsume() was_output_processed.assign(outputs.size(), false); if (chunk.hasChunkInfo()) { - LOG_TRACE(getLogger("balancingProcessor"), "hasData"); has_data = true; return Status::Ready; } @@ -97,12 +92,10 @@ void BalancingChunksTransform::transform(Chunk & chunk_) Chunk res_chunk = balance.add({}); std::swap(res_chunk, chunk_); } - LOG_TRACE(getLogger("balancing"), "{}, BalancingTransform: struct of output chunk: {}, hasInfo: {}", reinterpret_cast(this), chunk_.dumpStructure(), chunk.hasChunkInfo()); } IProcessor::Status BalancingChunksTransform::prepareSend() { - LOG_TRACE(getLogger("balancingProcessor"), "prepareGenerate {}", chunk.dumpStructure()); bool all_outputs_processed = true; size_t chunk_number = 0; @@ -129,7 +122,6 @@ IProcessor::Status BalancingChunksTransform::prepareSend() continue; } - LOG_TRACE(getLogger("balancingProcessor"), "chunk struct: {}", chunk.dumpStructure()); output.push(std::move(chunk)); was_processed = true; break; diff --git a/src/Processors/Transforms/BalancingTransform.h b/src/Processors/Transforms/BalancingTransform.h index a50b38c773f..1b1d3ec6295 100644 --- a/src/Processors/Transforms/BalancingTransform.h +++ b/src/Processors/Transforms/BalancingTransform.h @@ -31,7 +31,7 @@ private: size_t CalculateBlockSize(const Block & block); Chunk chunk; size_t max_memory_usage; - BalanceBlockTransform balance; + BalanceTransform balance; bool has_data = false; std::vector was_output_processed; diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 5b68df6b6c6..ec226a56548 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -7,13 +7,12 @@ namespace DB SquashingChunksTransform::SquashingChunksTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) - , squashing(min_block_size_rows, min_block_size_bytes) + , squashing(header, min_block_size_rows, min_block_size_bytes) { } void SquashingChunksTransform::onConsume(Chunk chunk) { - LOG_TRACE(getLogger("squashing"), "{}, SquashingTransform: !finished, hasInfo: {}", reinterpret_cast(this), chunk.hasChunkInfo()); if (auto block = squashing.add(std::move(chunk))) cur_chunk.setColumns(block.getColumns(), block.rows()); } @@ -29,9 +28,7 @@ SquashingChunksTransform::GenerateResult SquashingChunksTransform::onGenerate() void SquashingChunksTransform::onFinish() { auto block = squashing.add({}); - LOG_TRACE(getLogger("squashing"), "{}, SquashingTransform: finished, structure of block: {}", reinterpret_cast(this), block.dumpStructure()); finish_chunk.setColumns(block.getColumns(), block.rows()); - LOG_TRACE(getLogger("squashing"), "{}, SquashingTransform: finished, hasInfo: {}", reinterpret_cast(this), finish_chunk.hasChunkInfo()); } void SquashingChunksTransform::work() @@ -52,7 +49,7 @@ void SquashingChunksTransform::work() } SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, [[maybe_unused]] bool skip_empty_chunks_) + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ISimpleTransform(header, header, false), squashing(min_block_size_rows, min_block_size_bytes) { } @@ -61,13 +58,11 @@ void SimpleSquashingChunksTransform::transform(Chunk & chunk) { if (!finished) { - LOG_TRACE(getLogger("squashing"), "{}, SquashingTransform: !finished, hasInfo: {}", reinterpret_cast(this), chunk.hasChunkInfo()); - if (auto block = squashing.add(std::move(chunk))) + if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) chunk.setColumns(block.getColumns(), block.rows()); } else { - LOG_TRACE(getLogger("squashing"), "{}, SquashingTransform: finished, hasInfo: {}", reinterpret_cast(this), chunk.hasChunkInfo()); auto block = squashing.add({}); chunk.setColumns(block.getColumns(), block.rows()); } diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h index 5c7ad12889f..4bcf2216182 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingChunksTransform.h @@ -32,7 +32,7 @@ private: class SimpleSquashingChunksTransform : public ISimpleTransform { public: - explicit SimpleSquashingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, bool skip_empty_chunks_ = true); + explicit SimpleSquashingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); String getName() const override { return "SimpleSquashingTransform"; } @@ -42,7 +42,7 @@ protected: IProcessor::Status prepare() override; private: - NewSquashingTransform squashing; + SquashingTransform squashing; bool finished = false; }; From ba8af1fcd87c1d0bd419249cab37ccd7a0add564 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Fri, 12 Apr 2024 16:21:41 +0000 Subject: [PATCH 010/139] fix for projections --- src/Interpreters/InterpreterInsertQuery.cpp | 4 ++-- src/Processors/Transforms/BalancingTransform.cpp | 4 ++-- src/Processors/Transforms/BalancingTransform.h | 3 +-- src/Storages/ProjectionsDescription.cpp | 3 +++ 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 31fef267afc..412fba0c763 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -609,7 +609,7 @@ BlockIO InterpreterInsertQuery::execute() header, table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, - settings.max_memory_usage, presink_chains.size())); + presink_chains.size())); pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { @@ -680,7 +680,7 @@ BlockIO InterpreterInsertQuery::execute() chain.getInputHeader(), table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, - settings.max_memory_usage, true); + true); chain.addSource(std::move(balancing)); } diff --git a/src/Processors/Transforms/BalancingTransform.cpp b/src/Processors/Transforms/BalancingTransform.cpp index 7a9edbe5d86..dab73eae2c0 100644 --- a/src/Processors/Transforms/BalancingTransform.cpp +++ b/src/Processors/Transforms/BalancingTransform.cpp @@ -4,8 +4,8 @@ namespace DB { -BalancingChunksTransform::BalancingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t max_memory_usage_, size_t num_ports) - : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), max_memory_usage(max_memory_usage_), balance(header, min_block_size_rows, min_block_size_bytes) +BalancingChunksTransform::BalancingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t num_ports) + : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), balance(header, min_block_size_rows, min_block_size_bytes) { } diff --git a/src/Processors/Transforms/BalancingTransform.h b/src/Processors/Transforms/BalancingTransform.h index 1b1d3ec6295..a8a8bc5cfab 100644 --- a/src/Processors/Transforms/BalancingTransform.h +++ b/src/Processors/Transforms/BalancingTransform.h @@ -12,7 +12,7 @@ class BalancingChunksTransform : public IProcessor { public: BalancingChunksTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t max_memory_usage_, size_t num_ports); + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t num_ports); String getName() const override { return "BalancingChunksTransform"; } @@ -30,7 +30,6 @@ protected: private: size_t CalculateBlockSize(const Block & block); Chunk chunk; - size_t max_memory_usage; BalanceTransform balance; bool has_data = false; std::vector was_output_processed; diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 08ebe3a10d0..55639641c95 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -20,6 +20,8 @@ #include #include #include +#include "Interpreters/SquashingTransform.h" +#include "Processors/Transforms/BalancingTransform.h" namespace DB @@ -310,6 +312,7 @@ Block ProjectionDescription::calculate(const Block & block, ContextPtr context) builder.resize(1); // Generate aggregated blocks with rows less or equal than the original block. // There should be only one output block after this transformation. + builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0, true)); builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); From 24432f875abd5b5f77050f986e999bc15fda595d Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 24 Apr 2024 16:15:47 +0200 Subject: [PATCH 011/139] empty commit From 0065a4cc6e8ac7eff3e72765f5ae0a7eb593ed2d Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 24 Apr 2024 14:55:01 +0000 Subject: [PATCH 012/139] fixing bugs by adding old transform to some parts --- src/Interpreters/InterpreterInsertQuery.cpp | 4 +- .../Transforms/SquashingChunksTransform.cpp | 50 ++++++++++++++++++- .../Transforms/SquashingChunksTransform.h | 23 ++++++++- .../Transforms/buildPushingToViewsChain.cpp | 9 +++- src/Storages/ProjectionsDescription.cpp | 2 +- 5 files changed, 81 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 412fba0c763..06ae92f7f0b 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -613,7 +613,7 @@ BlockIO InterpreterInsertQuery::execute() pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { - return std::make_shared( + return std::make_shared( in_header, table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); @@ -669,7 +669,7 @@ BlockIO InterpreterInsertQuery::execute() { bool table_prefers_large_blocks = table->prefersLargeBlocks(); - auto squashing = std::make_shared( + auto squashing = std::make_shared( chain.getInputHeader(), table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index ec226a56548..00e430933be 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -7,13 +7,13 @@ namespace DB SquashingChunksTransform::SquashingChunksTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) - , squashing(header, min_block_size_rows, min_block_size_bytes) + , squashing(min_block_size_rows, min_block_size_bytes) { } void SquashingChunksTransform::onConsume(Chunk chunk) { - if (auto block = squashing.add(std::move(chunk))) + if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) cur_chunk.setColumns(block.getColumns(), block.rows()); } @@ -29,6 +29,7 @@ void SquashingChunksTransform::onFinish() { auto block = squashing.add({}); finish_chunk.setColumns(block.getColumns(), block.rows()); + LOG_TRACE(getLogger("squashing"), "{}, SquashingTransform: finished, hasInfo: {}", reinterpret_cast(this), finish_chunk.hasChunkInfo()); } void SquashingChunksTransform::work() @@ -48,6 +49,51 @@ void SquashingChunksTransform::work() } } +NewSquashingChunksTransform::NewSquashingChunksTransform( + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) + : ExceptionKeepingTransform(header, header, false) + , squashing(header, min_block_size_rows, min_block_size_bytes) +{ +} + +void NewSquashingChunksTransform::onConsume(Chunk chunk) +{ + if (auto block = squashing.add(std::move(chunk))) + cur_chunk.setColumns(block.getColumns(), block.rows()); +} + +NewSquashingChunksTransform::GenerateResult NewSquashingChunksTransform::onGenerate() +{ + GenerateResult res; + res.chunk = std::move(cur_chunk); + res.is_done = true; + return res; +} + +void NewSquashingChunksTransform::onFinish() +{ + auto block = squashing.add({}); + finish_chunk.setColumns(block.getColumns(), block.rows()); + LOG_TRACE(getLogger("squashing"), "{}, SquashingTransform: finished, hasInfo: {}", reinterpret_cast(this), finish_chunk.hasChunkInfo()); +} + +void NewSquashingChunksTransform::work() +{ + if (stage == Stage::Exception) + { + data.chunk.clear(); + ready_input = false; + return; + } + + ExceptionKeepingTransform::work(); + if (finish_chunk) + { + data.chunk = std::move(finish_chunk); + ready_output = true; + } +} + SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ISimpleTransform(header, header, false), squashing(min_block_size_rows, min_block_size_bytes) diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h index 4bcf2216182..1db3d46371c 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingChunksTransform.h @@ -7,6 +7,27 @@ namespace DB { +class NewSquashingChunksTransform : public ExceptionKeepingTransform +{ +public: + explicit NewSquashingChunksTransform( + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); + + String getName() const override { return "SquashingTransform"; } + + void work() override; + +protected: + void onConsume(Chunk chunk) override; + GenerateResult onGenerate() override; + void onFinish() override; + +private: + NewSquashingTransform squashing; + Chunk cur_chunk; + Chunk finish_chunk; +}; + class SquashingChunksTransform : public ExceptionKeepingTransform { public: @@ -23,7 +44,7 @@ protected: void onFinish() override; private: - NewSquashingTransform squashing; + SquashingTransform squashing; Chunk cur_chunk; Chunk finish_chunk; }; diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index dd07d043599..5c16cdbe84c 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -368,10 +369,16 @@ std::optional generateViewChain( bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); const auto & settings = insert_context->getSettingsRef(); - out.addSource(std::make_shared( + out.addSource(std::make_shared( out.getInputHeader(), table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); + + out.addSource(std::make_shared( + out.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, + true)); } auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 55639641c95..155c32e30ae 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -313,7 +313,7 @@ Block ProjectionDescription::calculate(const Block & block, ContextPtr context) // Generate aggregated blocks with rows less or equal than the original block. // There should be only one output block after this transformation. builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0, true)); - builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); + builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); PullingPipelineExecutor executor(pipeline); From 7ce5e5a38c4e0eb6e16695aba4708106330a66c9 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 24 Apr 2024 15:36:11 +0000 Subject: [PATCH 013/139] SquashingChunksTranform -> (BalancingChunksTransform && SquashingChunksTranform) || SimpleSquashingChunksTranform --- programs/copier/Internals.cpp | 3 +- src/Interpreters/InterpreterInsertQuery.cpp | 4 +- .../Transforms/SquashingChunksTransform.cpp | 50 +------------------ .../Transforms/SquashingChunksTransform.h | 23 +-------- .../Transforms/buildPushingToViewsChain.cpp | 4 +- src/Storages/LiveView/StorageLiveView.cpp | 2 +- src/Storages/ProjectionsDescription.cpp | 4 +- src/Storages/WindowView/StorageWindowView.cpp | 4 +- 8 files changed, 14 insertions(+), 80 deletions(-) diff --git a/programs/copier/Internals.cpp b/programs/copier/Internals.cpp index 0cfff7e3f6c..bb672f5cfbd 100644 --- a/programs/copier/Internals.cpp +++ b/programs/copier/Internals.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -62,7 +63,7 @@ std::shared_ptr createASTStorageDistributed( Block getBlockWithAllStreamData(QueryPipelineBuilder builder) { - builder.addTransform(std::make_shared( + builder.addTransform(std::make_shared( builder.getHeader(), std::numeric_limits::max(), std::numeric_limits::max())); diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 06ae92f7f0b..412fba0c763 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -613,7 +613,7 @@ BlockIO InterpreterInsertQuery::execute() pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { - return std::make_shared( + return std::make_shared( in_header, table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); @@ -669,7 +669,7 @@ BlockIO InterpreterInsertQuery::execute() { bool table_prefers_large_blocks = table->prefersLargeBlocks(); - auto squashing = std::make_shared( + auto squashing = std::make_shared( chain.getInputHeader(), table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index 00e430933be..ec226a56548 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -7,13 +7,13 @@ namespace DB SquashingChunksTransform::SquashingChunksTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) - , squashing(min_block_size_rows, min_block_size_bytes) + , squashing(header, min_block_size_rows, min_block_size_bytes) { } void SquashingChunksTransform::onConsume(Chunk chunk) { - if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) + if (auto block = squashing.add(std::move(chunk))) cur_chunk.setColumns(block.getColumns(), block.rows()); } @@ -29,7 +29,6 @@ void SquashingChunksTransform::onFinish() { auto block = squashing.add({}); finish_chunk.setColumns(block.getColumns(), block.rows()); - LOG_TRACE(getLogger("squashing"), "{}, SquashingTransform: finished, hasInfo: {}", reinterpret_cast(this), finish_chunk.hasChunkInfo()); } void SquashingChunksTransform::work() @@ -49,51 +48,6 @@ void SquashingChunksTransform::work() } } -NewSquashingChunksTransform::NewSquashingChunksTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : ExceptionKeepingTransform(header, header, false) - , squashing(header, min_block_size_rows, min_block_size_bytes) -{ -} - -void NewSquashingChunksTransform::onConsume(Chunk chunk) -{ - if (auto block = squashing.add(std::move(chunk))) - cur_chunk.setColumns(block.getColumns(), block.rows()); -} - -NewSquashingChunksTransform::GenerateResult NewSquashingChunksTransform::onGenerate() -{ - GenerateResult res; - res.chunk = std::move(cur_chunk); - res.is_done = true; - return res; -} - -void NewSquashingChunksTransform::onFinish() -{ - auto block = squashing.add({}); - finish_chunk.setColumns(block.getColumns(), block.rows()); - LOG_TRACE(getLogger("squashing"), "{}, SquashingTransform: finished, hasInfo: {}", reinterpret_cast(this), finish_chunk.hasChunkInfo()); -} - -void NewSquashingChunksTransform::work() -{ - if (stage == Stage::Exception) - { - data.chunk.clear(); - ready_input = false; - return; - } - - ExceptionKeepingTransform::work(); - if (finish_chunk) - { - data.chunk = std::move(finish_chunk); - ready_output = true; - } -} - SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ISimpleTransform(header, header, false), squashing(min_block_size_rows, min_block_size_bytes) diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h index 1db3d46371c..4bcf2216182 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingChunksTransform.h @@ -7,27 +7,6 @@ namespace DB { -class NewSquashingChunksTransform : public ExceptionKeepingTransform -{ -public: - explicit NewSquashingChunksTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); - - String getName() const override { return "SquashingTransform"; } - - void work() override; - -protected: - void onConsume(Chunk chunk) override; - GenerateResult onGenerate() override; - void onFinish() override; - -private: - NewSquashingTransform squashing; - Chunk cur_chunk; - Chunk finish_chunk; -}; - class SquashingChunksTransform : public ExceptionKeepingTransform { public: @@ -44,7 +23,7 @@ protected: void onFinish() override; private: - SquashingTransform squashing; + NewSquashingTransform squashing; Chunk cur_chunk; Chunk finish_chunk; }; diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 5c16cdbe84c..da7f10990e5 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -369,7 +369,7 @@ std::optional generateViewChain( bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); const auto & settings = insert_context->getSettingsRef(); - out.addSource(std::make_shared( + out.addSource(std::make_shared( out.getInputHeader(), table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); @@ -625,7 +625,7 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat /// Squashing is needed here because the materialized view query can generate a lot of blocks /// even when only one block is inserted into the parent table (e.g. if the query is a GROUP BY /// and two-level aggregation is triggered). - pipeline.addTransform(std::make_shared( + pipeline.addTransform(std::make_shared( pipeline.getHeader(), context->getSettingsRef().min_insert_block_size_rows, context->getSettingsRef().min_insert_block_size_bytes)); diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 2f011567b90..b019660cf5e 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -667,7 +667,7 @@ QueryPipelineBuilder StorageLiveView::completeQuery(Pipes pipes) /// and two-level aggregation is triggered). builder.addSimpleTransform([&](const Block & cur_header) { - return std::make_shared( + return std::make_shared( cur_header, getContext()->getSettingsRef().min_insert_block_size_rows, getContext()->getSettingsRef().min_insert_block_size_bytes); diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 155c32e30ae..14713d151d7 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -16,12 +16,12 @@ #include #include #include +#include #include #include #include #include #include "Interpreters/SquashingTransform.h" -#include "Processors/Transforms/BalancingTransform.h" namespace DB @@ -313,7 +313,7 @@ Block ProjectionDescription::calculate(const Block & block, ContextPtr context) // Generate aggregated blocks with rows less or equal than the original block. // There should be only one output block after this transformation. builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0, true)); - builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); + builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); PullingPipelineExecutor executor(pipeline); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index b1984a947c8..a3bc09f8d72 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -610,7 +610,7 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) }); builder.addSimpleTransform([&](const Block & current_header) { - return std::make_shared( + return std::make_shared( current_header, getContext()->getSettingsRef().min_insert_block_size_rows, getContext()->getSettingsRef().min_insert_block_size_bytes); @@ -1498,7 +1498,7 @@ void StorageWindowView::writeIntoWindowView( builder = select_block.buildQueryPipeline(); builder.addSimpleTransform([&](const Block & current_header) { - return std::make_shared( + return std::make_shared( current_header, local_context->getSettingsRef().min_insert_block_size_rows, local_context->getSettingsRef().min_insert_block_size_bytes); From 91b189ab534936fafdaf522b71d02fd4ba3e60cc Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 24 Apr 2024 17:40:15 +0200 Subject: [PATCH 014/139] Update Internals.cpp --- programs/copier/Internals.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/programs/copier/Internals.cpp b/programs/copier/Internals.cpp index bb672f5cfbd..650d606295f 100644 --- a/programs/copier/Internals.cpp +++ b/programs/copier/Internals.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include From 2eba133b536a7f22ba100792e6c594d42afb9278 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 24 Apr 2024 15:51:23 +0000 Subject: [PATCH 015/139] implement SquashingChunksTransformForBalancing --- programs/copier/Internals.cpp | 2 +- src/Interpreters/InterpreterInsertQuery.cpp | 4 +- .../Transforms/SquashingChunksTransform.cpp | 48 ++++++++++++++++++- .../Transforms/SquashingChunksTransform.h | 21 ++++++++ .../Transforms/buildPushingToViewsChain.cpp | 4 +- src/Storages/LiveView/StorageLiveView.cpp | 2 +- src/Storages/ProjectionsDescription.cpp | 3 +- src/Storages/WindowView/StorageWindowView.cpp | 4 +- 8 files changed, 76 insertions(+), 12 deletions(-) diff --git a/programs/copier/Internals.cpp b/programs/copier/Internals.cpp index 650d606295f..0cfff7e3f6c 100644 --- a/programs/copier/Internals.cpp +++ b/programs/copier/Internals.cpp @@ -62,7 +62,7 @@ std::shared_ptr createASTStorageDistributed( Block getBlockWithAllStreamData(QueryPipelineBuilder builder) { - builder.addTransform(std::make_shared( + builder.addTransform(std::make_shared( builder.getHeader(), std::numeric_limits::max(), std::numeric_limits::max())); diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 412fba0c763..50f7bc91d90 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -613,7 +613,7 @@ BlockIO InterpreterInsertQuery::execute() pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { - return std::make_shared( + return std::make_shared( in_header, table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); @@ -669,7 +669,7 @@ BlockIO InterpreterInsertQuery::execute() { bool table_prefers_large_blocks = table->prefersLargeBlocks(); - auto squashing = std::make_shared( + auto squashing = std::make_shared( chain.getInputHeader(), table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp index ec226a56548..62b87061344 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ b/src/Processors/Transforms/SquashingChunksTransform.cpp @@ -7,13 +7,13 @@ namespace DB SquashingChunksTransform::SquashingChunksTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) - , squashing(header, min_block_size_rows, min_block_size_bytes) + , squashing(min_block_size_rows, min_block_size_bytes) { } void SquashingChunksTransform::onConsume(Chunk chunk) { - if (auto block = squashing.add(std::move(chunk))) + if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) cur_chunk.setColumns(block.getColumns(), block.rows()); } @@ -48,6 +48,50 @@ void SquashingChunksTransform::work() } } +SquashingChunksTransformForBalancing::SquashingChunksTransformForBalancing( + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) + : ExceptionKeepingTransform(header, header, false) + , squashing(header, min_block_size_rows, min_block_size_bytes) +{ +} + +void SquashingChunksTransformForBalancing::onConsume(Chunk chunk) +{ + if (auto block = squashing.add(std::move(chunk))) + cur_chunk.setColumns(block.getColumns(), block.rows()); +} + +SquashingChunksTransformForBalancing::GenerateResult SquashingChunksTransformForBalancing::onGenerate() +{ + GenerateResult res; + res.chunk = std::move(cur_chunk); + res.is_done = true; + return res; +} + +void SquashingChunksTransformForBalancing::onFinish() +{ + auto block = squashing.add({}); + finish_chunk.setColumns(block.getColumns(), block.rows()); +} + +void SquashingChunksTransformForBalancing::work() +{ + if (stage == Stage::Exception) + { + data.chunk.clear(); + ready_input = false; + return; + } + + ExceptionKeepingTransform::work(); + if (finish_chunk) + { + data.chunk = std::move(finish_chunk); + ready_output = true; + } +} + SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ISimpleTransform(header, header, false), squashing(min_block_size_rows, min_block_size_bytes) diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h index 4bcf2216182..7c7948d1af9 100644 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ b/src/Processors/Transforms/SquashingChunksTransform.h @@ -22,6 +22,27 @@ protected: GenerateResult onGenerate() override; void onFinish() override; +private: + SquashingTransform squashing; + Chunk cur_chunk; + Chunk finish_chunk; +}; + +class SquashingChunksTransformForBalancing : public ExceptionKeepingTransform +{ +public: + explicit SquashingChunksTransformForBalancing( + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); + + String getName() const override { return "SquashingTransform"; } + + void work() override; + +protected: + void onConsume(Chunk chunk) override; + GenerateResult onGenerate() override; + void onFinish() override; + private: NewSquashingTransform squashing; Chunk cur_chunk; diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index da7f10990e5..e05f3643874 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -369,7 +369,7 @@ std::optional generateViewChain( bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); const auto & settings = insert_context->getSettingsRef(); - out.addSource(std::make_shared( + out.addSource(std::make_shared( out.getInputHeader(), table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); @@ -625,7 +625,7 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat /// Squashing is needed here because the materialized view query can generate a lot of blocks /// even when only one block is inserted into the parent table (e.g. if the query is a GROUP BY /// and two-level aggregation is triggered). - pipeline.addTransform(std::make_shared( + pipeline.addTransform(std::make_shared( pipeline.getHeader(), context->getSettingsRef().min_insert_block_size_rows, context->getSettingsRef().min_insert_block_size_bytes)); diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index b019660cf5e..2f011567b90 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -667,7 +667,7 @@ QueryPipelineBuilder StorageLiveView::completeQuery(Pipes pipes) /// and two-level aggregation is triggered). builder.addSimpleTransform([&](const Block & cur_header) { - return std::make_shared( + return std::make_shared( cur_header, getContext()->getSettingsRef().min_insert_block_size_rows, getContext()->getSettingsRef().min_insert_block_size_bytes); diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 14713d151d7..f6922efc272 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -21,7 +21,6 @@ #include #include #include -#include "Interpreters/SquashingTransform.h" namespace DB @@ -313,7 +312,7 @@ Block ProjectionDescription::calculate(const Block & block, ContextPtr context) // Generate aggregated blocks with rows less or equal than the original block. // There should be only one output block after this transformation. builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0, true)); - builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); + builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); PullingPipelineExecutor executor(pipeline); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index a3bc09f8d72..b1984a947c8 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -610,7 +610,7 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) }); builder.addSimpleTransform([&](const Block & current_header) { - return std::make_shared( + return std::make_shared( current_header, getContext()->getSettingsRef().min_insert_block_size_rows, getContext()->getSettingsRef().min_insert_block_size_bytes); @@ -1498,7 +1498,7 @@ void StorageWindowView::writeIntoWindowView( builder = select_block.buildQueryPipeline(); builder.addSimpleTransform([&](const Block & current_header) { - return std::make_shared( + return std::make_shared( current_header, local_context->getSettingsRef().min_insert_block_size_rows, local_context->getSettingsRef().min_insert_block_size_bytes); From 6c8d7b866142842dd4b1a508c2b5af12b41d1f32 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 24 Apr 2024 17:04:26 +0000 Subject: [PATCH 016/139] mv balancing fix --- src/Processors/Transforms/buildPushingToViewsChain.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index e05f3643874..804af037c58 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -625,7 +625,12 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat /// Squashing is needed here because the materialized view query can generate a lot of blocks /// even when only one block is inserted into the parent table (e.g. if the query is a GROUP BY /// and two-level aggregation is triggered). - pipeline.addTransform(std::make_shared( + pipeline.addTransform(std::make_shared( + pipeline.getHeader(), + context->getSettingsRef().min_insert_block_size_rows, + context->getSettingsRef().min_insert_block_size_bytes, + true)); + pipeline.addTransform(std::make_shared( pipeline.getHeader(), context->getSettingsRef().min_insert_block_size_rows, context->getSettingsRef().min_insert_block_size_bytes)); From 3501348e1fe4c6fe95bf3c9670be31e65f612458 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Mon, 29 Apr 2024 18:32:16 +0200 Subject: [PATCH 017/139] empty commit From 0bc664ac5bd0104219e061660cb6cd1cb0698b7c Mon Sep 17 00:00:00 2001 From: yariks5s Date: Mon, 29 Apr 2024 18:21:19 +0000 Subject: [PATCH 018/139] added correct number of input ports for balancing --- src/Interpreters/InterpreterInsertQuery.cpp | 2 +- src/Processors/Transforms/buildPushingToViewsChain.cpp | 4 ++-- src/Storages/ProjectionsDescription.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 50f7bc91d90..a30616b0f3e 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -680,7 +680,7 @@ BlockIO InterpreterInsertQuery::execute() chain.getInputHeader(), table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, - true); + presink_chains.size()); chain.addSource(std::move(balancing)); } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 804af037c58..3b4304dc39b 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -378,7 +378,7 @@ std::optional generateViewChain( out.getInputHeader(), table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, - true)); + out.getNumThreads())); } auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); @@ -629,7 +629,7 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat pipeline.getHeader(), context->getSettingsRef().min_insert_block_size_rows, context->getSettingsRef().min_insert_block_size_bytes, - true)); + pipeline.getNumStreams())); pipeline.addTransform(std::make_shared( pipeline.getHeader(), context->getSettingsRef().min_insert_block_size_rows, diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index f6922efc272..731ac04a8c8 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -311,7 +311,7 @@ Block ProjectionDescription::calculate(const Block & block, ContextPtr context) builder.resize(1); // Generate aggregated blocks with rows less or equal than the original block. // There should be only one output block after this transformation. - builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0, true)); + builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0, builder.getNumStreams())); builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); From 7dc4f1778bd8690f62e378ba3c26a013e6ae208b Mon Sep 17 00:00:00 2001 From: yariks5s Date: Mon, 6 May 2024 19:44:59 +0000 Subject: [PATCH 019/139] fixes + remove memory tracker --- src/Interpreters/SquashingTransform.cpp | 20 +------------------ src/Interpreters/SquashingTransform.h | 4 ---- .../Transforms/buildPushingToViewsChain.cpp | 2 +- 3 files changed, 2 insertions(+), 24 deletions(-) diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp index d4370b45119..dc2ce69ed2f 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/SquashingTransform.cpp @@ -196,9 +196,6 @@ BalanceTransform::BalanceTransform(Block header_, size_t min_block_size_rows_, s , min_block_size_bytes(min_block_size_bytes_) , header(std::move(header_)) { - // Use query-level memory tracker - if (auto * memory_tracker_child = CurrentThread::getMemoryTracker()) - memory_tracker = memory_tracker_child->getParent(); } Chunk BalanceTransform::add(Block && input_block) @@ -255,22 +252,7 @@ bool BalanceTransform::isEnoughSize(const std::vector & chunks) bytes += chunk.bytes(); } - if (!isEnoughSize(rows, bytes)) - return false; - - checkAndWaitMemoryAvailability(bytes); - - return true; -} - -void BalanceTransform::checkAndWaitMemoryAvailability(size_t bytes) -{ - if (const auto hard_limit = memory_tracker->getHardLimit() != 0) - { - auto free_memory = hard_limit - memory_tracker->get(); - while (Int64(bytes) >= free_memory) - free_memory = hard_limit - memory_tracker->get(); - } + return isEnoughSize(rows, bytes); } bool BalanceTransform::isEnoughSize(size_t rows, size_t bytes) const diff --git a/src/Interpreters/SquashingTransform.h b/src/Interpreters/SquashingTransform.h index ce54c49e441..95e8c2a8fa1 100644 --- a/src/Interpreters/SquashingTransform.h +++ b/src/Interpreters/SquashingTransform.h @@ -5,7 +5,6 @@ #include #include #include -#include namespace DB @@ -102,9 +101,6 @@ private: bool isEnoughSize(const std::vector & chunks); bool isEnoughSize(size_t rows, size_t bytes) const; - void checkAndWaitMemoryAvailability(size_t bytes); - - MemoryTracker * memory_tracker; Chunk convertToChunk(std::vector &chunks); }; diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 3b4304dc39b..93cfc8f6d10 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -378,7 +378,7 @@ std::optional generateViewChain( out.getInputHeader(), table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, - out.getNumThreads())); + 1)); // Chain requires a single input } auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); From 0b939044087f9494cafab57ac1377ed58ed95971 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 8 May 2024 12:16:42 +0000 Subject: [PATCH 020/139] fix problems with ports --- .../Transforms/BalancingTransform.cpp | 70 ++++++++++++------- 1 file changed, 46 insertions(+), 24 deletions(-) diff --git a/src/Processors/Transforms/BalancingTransform.cpp b/src/Processors/Transforms/BalancingTransform.cpp index dab73eae2c0..def12407019 100644 --- a/src/Processors/Transforms/BalancingTransform.cpp +++ b/src/Processors/Transforms/BalancingTransform.cpp @@ -25,41 +25,63 @@ IProcessor::Status BalancingChunksTransform::prepare() IProcessor::Status BalancingChunksTransform::prepareConsume() { finished = false; + bool all_finished = true; + for (auto & output : outputs) + { + if (output.isFinished()) + continue; + + all_finished = false; + } + + if (all_finished) /// If all outputs are closed, we close inputs (just in case) + { + for (auto & in : inputs) + in.close(); + return Status::Finished; + } + + all_finished = true; + for (auto & input : inputs) + { + if (input.isFinished()) + continue; + + all_finished = false; + } + + if (all_finished) /// If all inputs are closed, we check if we have data in balancing + { + if (balance.isDataLeft()) /// If we have data in balancing, we process this data + { + finished = false; + transform(chunk); + has_data = true; + } + else /// If we don't have data, We send FINISHED + { + for (auto & output : outputs) + output.finish(); + + return Status::Finished; + } + } + while (!chunk.hasChunkInfo()) { for (auto & input : inputs) { - bool all_finished = true; - for (auto & output : outputs) - { - if (output.isFinished()) - continue; - - all_finished = false; - } - - if (all_finished) - { - input.close(); - return Status::Finished; - } - - if (input.isFinished() && !balance.isDataLeft()) - { - for (auto & output : outputs) - output.finish(); - - return Status::Finished; - } + if (input.isFinished()) + continue; input.setNeeded(); if (!input.hasData()) { - finished = true; if (!balance.isDataLeft()) return Status::NeedData; else { + finished = true; transform(chunk); has_data = true; return Status::Ready; @@ -68,7 +90,7 @@ IProcessor::Status BalancingChunksTransform::prepareConsume() chunk = input.pull(); transform(chunk); - was_output_processed.assign(outputs.size(), false); + was_output_processed.assign(inputs.size(), false); if (chunk.hasChunkInfo()) { has_data = true; From e1ed0af3d2598f6511a8d804ed52f0822d06b5b5 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 8 May 2024 14:28:09 +0000 Subject: [PATCH 021/139] Added pointer check, Chunk as argument to PlanSquashingTransform, fully refactored names of functions --- programs/copier/Internals.cpp | 4 +- src/Interpreters/InterpreterInsertQuery.cpp | 12 +- .../{SquashingTransform.cpp => Squashing.cpp} | 45 +++---- .../{SquashingTransform.h => Squashing.h} | 16 +-- .../Transforms/AggregatingTransform.cpp | 4 +- .../Transforms/ApplySquashingTransform.h | 63 +++++++++ ...ansform.cpp => PlanSquashingTransform.cpp} | 16 +-- ...ngTransform.h => PlanSquashingTransform.h} | 10 +- .../Transforms/SquashingChunksTransform.cpp | 124 ------------------ .../Transforms/SquashingChunksTransform.h | 70 ---------- .../Transforms/SquashingTransform.cpp | 80 +++++++++++ .../Transforms/SquashingTransform.h | 50 +++++++ .../Transforms/buildPushingToViewsChain.cpp | 12 +- src/Server/TCPHandler.cpp | 8 +- src/Storages/LiveView/StorageLiveView.cpp | 4 +- src/Storages/MergeTree/MutateTask.cpp | 4 +- src/Storages/ProjectionsDescription.cpp | 8 +- src/Storages/WindowView/StorageWindowView.cpp | 6 +- src/Storages/buildQueryTreeForShard.cpp | 4 +- 19 files changed, 270 insertions(+), 270 deletions(-) rename src/Interpreters/{SquashingTransform.cpp => Squashing.cpp} (80%) rename src/Interpreters/{SquashingTransform.h => Squashing.h} (84%) create mode 100644 src/Processors/Transforms/ApplySquashingTransform.h rename src/Processors/Transforms/{BalancingTransform.cpp => PlanSquashingTransform.cpp} (83%) rename src/Processors/Transforms/{BalancingTransform.h => PlanSquashingTransform.h} (76%) delete mode 100644 src/Processors/Transforms/SquashingChunksTransform.cpp delete mode 100644 src/Processors/Transforms/SquashingChunksTransform.h create mode 100644 src/Processors/Transforms/SquashingTransform.cpp create mode 100644 src/Processors/Transforms/SquashingTransform.h diff --git a/programs/copier/Internals.cpp b/programs/copier/Internals.cpp index 0cfff7e3f6c..1e118bd6a32 100644 --- a/programs/copier/Internals.cpp +++ b/programs/copier/Internals.cpp @@ -2,7 +2,7 @@ #include #include #include -#include +#include #include #include #include @@ -62,7 +62,7 @@ std::shared_ptr createASTStorageDistributed( Block getBlockWithAllStreamData(QueryPipelineBuilder builder) { - builder.addTransform(std::make_shared( + builder.addTransform(std::make_shared( builder.getHeader(), std::numeric_limits::max(), std::numeric_limits::max())); diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index a30616b0f3e..e39af9e2804 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -25,8 +25,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include @@ -605,7 +605,7 @@ BlockIO InterpreterInsertQuery::execute() { bool table_prefers_large_blocks = table->prefersLargeBlocks(); - pipeline.addTransform(std::make_shared( + pipeline.addTransform(std::make_shared( header, table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, @@ -613,7 +613,7 @@ BlockIO InterpreterInsertQuery::execute() pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { - return std::make_shared( + return std::make_shared( in_header, table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); @@ -669,14 +669,14 @@ BlockIO InterpreterInsertQuery::execute() { bool table_prefers_large_blocks = table->prefersLargeBlocks(); - auto squashing = std::make_shared( + auto squashing = std::make_shared( chain.getInputHeader(), table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); chain.addSource(std::move(squashing)); - auto balancing = std::make_shared( + auto balancing = std::make_shared( chain.getInputHeader(), table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/Squashing.cpp similarity index 80% rename from src/Interpreters/SquashingTransform.cpp rename to src/Interpreters/Squashing.cpp index dc2ce69ed2f..6063714e8db 100644 --- a/src/Interpreters/SquashingTransform.cpp +++ b/src/Interpreters/Squashing.cpp @@ -1,4 +1,4 @@ -#include +#include #include @@ -9,18 +9,18 @@ namespace ErrorCodes extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; } -SquashingTransform::SquashingTransform(size_t min_block_size_rows_, size_t min_block_size_bytes_) +Squashing::Squashing(size_t min_block_size_rows_, size_t min_block_size_bytes_) : min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) { } -Block SquashingTransform::add(Block && input_block) +Block Squashing::add(Block && input_block) { return addImpl(std::move(input_block)); } -Block SquashingTransform::add(const Block & input_block) +Block Squashing::add(const Block & input_block) { return addImpl(input_block); } @@ -32,7 +32,7 @@ Block SquashingTransform::add(const Block & input_block) * have to. */ template -Block SquashingTransform::addImpl(ReferenceType input_block) +Block Squashing::addImpl(ReferenceType input_block) { /// End of input stream. if (!input_block) @@ -80,7 +80,7 @@ Block SquashingTransform::addImpl(ReferenceType input_block) template -void SquashingTransform::append(ReferenceType input_block) +void Squashing::append(ReferenceType input_block) { if (!accumulated_block) { @@ -101,7 +101,7 @@ void SquashingTransform::append(ReferenceType input_block) } -bool SquashingTransform::isEnoughSize(const Block & block) +bool Squashing::isEnoughSize(const Block & block) { size_t rows = 0; size_t bytes = 0; @@ -120,26 +120,26 @@ bool SquashingTransform::isEnoughSize(const Block & block) } -bool SquashingTransform::isEnoughSize(size_t rows, size_t bytes) const +bool Squashing::isEnoughSize(size_t rows, size_t bytes) const { return (!min_block_size_rows && !min_block_size_bytes) || (min_block_size_rows && rows >= min_block_size_rows) || (min_block_size_bytes && bytes >= min_block_size_bytes); } -NewSquashingTransform::NewSquashingTransform(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) +ApplySquashing::ApplySquashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) : min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) , header(std::move(header_)) { } -Block NewSquashingTransform::add(Chunk && input_chunk) +Block ApplySquashing::add(Chunk && input_chunk) { return addImpl(std::move(input_chunk)); } -Block NewSquashingTransform::addImpl(Chunk && input_chunk) +Block ApplySquashing::addImpl(Chunk && input_chunk) { if (!input_chunk.hasChunkInfo()) { @@ -159,7 +159,7 @@ Block NewSquashingTransform::addImpl(Chunk && input_chunk) } } -void NewSquashingTransform::append(Chunk && input_chunk) +void ApplySquashing::append(Chunk && input_chunk) { if (input_chunk.getNumColumns() == 0) return; @@ -183,30 +183,31 @@ void NewSquashingTransform::append(Chunk && input_chunk) } } -const ChunksToSquash* NewSquashingTransform::getInfoFromChunk(const Chunk & chunk) +const ChunksToSquash* ApplySquashing::getInfoFromChunk(const Chunk & chunk) { const auto& info = chunk.getChunkInfo(); const auto * agg_info = typeid_cast(info.get()); + if (!agg_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr"); + return agg_info; } -BalanceTransform::BalanceTransform(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) +PlanSquashing::PlanSquashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) : min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) , header(std::move(header_)) { } -Chunk BalanceTransform::add(Block && input_block) +Chunk PlanSquashing::add(Chunk && input_chunk) { - return addImpl(std::move(input_block)); + return addImpl(std::move(input_chunk)); } -Chunk BalanceTransform::addImpl(Block && input_block) +Chunk PlanSquashing::addImpl(Chunk && input_chunk) { - Chunk input_chunk(input_block.getColumns(), input_block.rows()); - if (!input_chunk) { Chunk res_chunk = convertToChunk(chunks_to_merge_vec); @@ -227,7 +228,7 @@ Chunk BalanceTransform::addImpl(Block && input_block) return input_chunk; } -Chunk BalanceTransform::convertToChunk(std::vector &chunks) +Chunk PlanSquashing::convertToChunk(std::vector &chunks) { if (chunks.empty()) return {}; @@ -241,7 +242,7 @@ Chunk BalanceTransform::convertToChunk(std::vector &chunks) return Chunk(header.cloneEmptyColumns(), 0, info); } -bool BalanceTransform::isEnoughSize(const std::vector & chunks) +bool PlanSquashing::isEnoughSize(const std::vector & chunks) { size_t rows = 0; size_t bytes = 0; @@ -255,7 +256,7 @@ bool BalanceTransform::isEnoughSize(const std::vector & chunks) return isEnoughSize(rows, bytes); } -bool BalanceTransform::isEnoughSize(size_t rows, size_t bytes) const +bool PlanSquashing::isEnoughSize(size_t rows, size_t bytes) const { return (!min_block_size_rows && !min_block_size_bytes) || (min_block_size_rows && rows >= min_block_size_rows) diff --git a/src/Interpreters/SquashingTransform.h b/src/Interpreters/Squashing.h similarity index 84% rename from src/Interpreters/SquashingTransform.h rename to src/Interpreters/Squashing.h index 95e8c2a8fa1..82d7fe616f6 100644 --- a/src/Interpreters/SquashingTransform.h +++ b/src/Interpreters/Squashing.h @@ -27,11 +27,11 @@ struct ChunksToSquash : public ChunkInfo * * Order of data is kept. */ -class SquashingTransform +class Squashing { public: /// Conditions on rows and bytes are OR-ed. If one of them is zero, then corresponding condition is ignored. - SquashingTransform(size_t min_block_size_rows_, size_t min_block_size_bytes_); + Squashing(size_t min_block_size_rows_, size_t min_block_size_bytes_); /** Add next block and possibly returns squashed block. * At end, you need to pass empty block. As the result for last (empty) block, you will get last Result with ready = true. @@ -55,10 +55,10 @@ private: bool isEnoughSize(size_t rows, size_t bytes) const; }; -class NewSquashingTransform +class ApplySquashing { public: - NewSquashingTransform(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); + ApplySquashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); Block add(Chunk && input_chunk); @@ -79,12 +79,12 @@ private: bool isEnoughSize(size_t rows, size_t bytes) const; }; -class BalanceTransform +class PlanSquashing { public: - BalanceTransform(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); + PlanSquashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); - Chunk add(Block && input_block); + Chunk add(Chunk && input_chunk); bool isDataLeft() { return !chunks_to_merge_vec.empty(); @@ -97,7 +97,7 @@ private: const Block header; - Chunk addImpl(Block && input_block); + Chunk addImpl(Chunk && input_chunk); bool isEnoughSize(const std::vector & chunks); bool isEnoughSize(size_t rows, size_t bytes) const; diff --git a/src/Processors/Transforms/AggregatingTransform.cpp b/src/Processors/Transforms/AggregatingTransform.cpp index 74da97f2199..7ca9cd754b2 100644 --- a/src/Processors/Transforms/AggregatingTransform.cpp +++ b/src/Processors/Transforms/AggregatingTransform.cpp @@ -9,7 +9,7 @@ #include #include -#include +#include namespace ProfileEvents @@ -773,7 +773,7 @@ void AggregatingTransform::initGenerate() { /// Just a reasonable constant, matches default value for the setting `preferred_block_size_bytes` static constexpr size_t oneMB = 1024 * 1024; - return std::make_shared(header, params->params.max_block_size, oneMB); + return std::make_shared(header, params->params.max_block_size, oneMB); }); } /// AggregatingTransform::expandPipeline expects single output port. diff --git a/src/Processors/Transforms/ApplySquashingTransform.h b/src/Processors/Transforms/ApplySquashingTransform.h new file mode 100644 index 00000000000..26507d9c496 --- /dev/null +++ b/src/Processors/Transforms/ApplySquashingTransform.h @@ -0,0 +1,63 @@ +#include +#include +#include + +namespace DB +{ + +class ApplySquashingTransform : public ExceptionKeepingTransform +{ +public: + ApplySquashingTransform( + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) + : ExceptionKeepingTransform(header, header, false) + , squashing(header, min_block_size_rows, min_block_size_bytes) + { + } + + String getName() const override { return "ApplySquashingTransform"; } + + void work() override + { + if (stage == Stage::Exception) + { + data.chunk.clear(); + ready_input = false; + return; + } + + ExceptionKeepingTransform::work(); + if (finish_chunk) + { + data.chunk = std::move(finish_chunk); + ready_output = true; + } + } + +protected: + void onConsume(Chunk chunk) override + { + if (auto block = squashing.add(std::move(chunk))) + cur_chunk.setColumns(block.getColumns(), block.rows()); + } + + GenerateResult onGenerate() override + { + GenerateResult res; + res.chunk = std::move(cur_chunk); + res.is_done = true; + return res; + } + void onFinish() override + { + auto block = squashing.add({}); + finish_chunk.setColumns(block.getColumns(), block.rows()); + } + +private: + ApplySquashing squashing; + Chunk cur_chunk; + Chunk finish_chunk; +}; + +} diff --git a/src/Processors/Transforms/BalancingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp similarity index 83% rename from src/Processors/Transforms/BalancingTransform.cpp rename to src/Processors/Transforms/PlanSquashingTransform.cpp index def12407019..62ff3a0bf39 100644 --- a/src/Processors/Transforms/BalancingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -1,15 +1,15 @@ -#include +#include #include namespace DB { -BalancingChunksTransform::BalancingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t num_ports) +PlanSquashingTransform::PlanSquashingTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t num_ports) : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), balance(header, min_block_size_rows, min_block_size_bytes) { } -IProcessor::Status BalancingChunksTransform::prepare() +IProcessor::Status PlanSquashingTransform::prepare() { Status status = Status::Ready; @@ -22,7 +22,7 @@ IProcessor::Status BalancingChunksTransform::prepare() return status; } -IProcessor::Status BalancingChunksTransform::prepareConsume() +IProcessor::Status PlanSquashingTransform::prepareConsume() { finished = false; bool all_finished = true; @@ -90,7 +90,7 @@ IProcessor::Status BalancingChunksTransform::prepareConsume() chunk = input.pull(); transform(chunk); - was_output_processed.assign(inputs.size(), false); + was_output_processed.assign(outputs.size(), false); if (chunk.hasChunkInfo()) { has_data = true; @@ -102,11 +102,11 @@ IProcessor::Status BalancingChunksTransform::prepareConsume() return Status::Ready; } -void BalancingChunksTransform::transform(Chunk & chunk_) +void PlanSquashingTransform::transform(Chunk & chunk_) { if (!finished) { - Chunk res_chunk = balance.add(getInputPorts().front().getHeader().cloneWithColumns(chunk_.detachColumns())); + Chunk res_chunk = balance.add(std::move(chunk_)); std::swap(res_chunk, chunk_); } else @@ -116,7 +116,7 @@ void BalancingChunksTransform::transform(Chunk & chunk_) } } -IProcessor::Status BalancingChunksTransform::prepareSend() +IProcessor::Status PlanSquashingTransform::prepareSend() { bool all_outputs_processed = true; diff --git a/src/Processors/Transforms/BalancingTransform.h b/src/Processors/Transforms/PlanSquashingTransform.h similarity index 76% rename from src/Processors/Transforms/BalancingTransform.h rename to src/Processors/Transforms/PlanSquashingTransform.h index a8a8bc5cfab..c30569fffb5 100644 --- a/src/Processors/Transforms/BalancingTransform.h +++ b/src/Processors/Transforms/PlanSquashingTransform.h @@ -2,19 +2,19 @@ #include #include -#include +#include namespace DB { -class BalancingChunksTransform : public IProcessor +class PlanSquashingTransform : public IProcessor { public: - BalancingChunksTransform( + PlanSquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t num_ports); - String getName() const override { return "BalancingChunksTransform"; } + String getName() const override { return "PlanSquashingTransform"; } InputPorts & getInputPorts() { return inputs; } OutputPorts & getOutputPorts() { return outputs; } @@ -30,7 +30,7 @@ protected: private: size_t CalculateBlockSize(const Block & block); Chunk chunk; - BalanceTransform balance; + PlanSquashing balance; bool has_data = false; std::vector was_output_processed; diff --git a/src/Processors/Transforms/SquashingChunksTransform.cpp b/src/Processors/Transforms/SquashingChunksTransform.cpp deleted file mode 100644 index 62b87061344..00000000000 --- a/src/Processors/Transforms/SquashingChunksTransform.cpp +++ /dev/null @@ -1,124 +0,0 @@ -#include -#include - -namespace DB -{ - -SquashingChunksTransform::SquashingChunksTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : ExceptionKeepingTransform(header, header, false) - , squashing(min_block_size_rows, min_block_size_bytes) -{ -} - -void SquashingChunksTransform::onConsume(Chunk chunk) -{ - if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) - cur_chunk.setColumns(block.getColumns(), block.rows()); -} - -SquashingChunksTransform::GenerateResult SquashingChunksTransform::onGenerate() -{ - GenerateResult res; - res.chunk = std::move(cur_chunk); - res.is_done = true; - return res; -} - -void SquashingChunksTransform::onFinish() -{ - auto block = squashing.add({}); - finish_chunk.setColumns(block.getColumns(), block.rows()); -} - -void SquashingChunksTransform::work() -{ - if (stage == Stage::Exception) - { - data.chunk.clear(); - ready_input = false; - return; - } - - ExceptionKeepingTransform::work(); - if (finish_chunk) - { - data.chunk = std::move(finish_chunk); - ready_output = true; - } -} - -SquashingChunksTransformForBalancing::SquashingChunksTransformForBalancing( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : ExceptionKeepingTransform(header, header, false) - , squashing(header, min_block_size_rows, min_block_size_bytes) -{ -} - -void SquashingChunksTransformForBalancing::onConsume(Chunk chunk) -{ - if (auto block = squashing.add(std::move(chunk))) - cur_chunk.setColumns(block.getColumns(), block.rows()); -} - -SquashingChunksTransformForBalancing::GenerateResult SquashingChunksTransformForBalancing::onGenerate() -{ - GenerateResult res; - res.chunk = std::move(cur_chunk); - res.is_done = true; - return res; -} - -void SquashingChunksTransformForBalancing::onFinish() -{ - auto block = squashing.add({}); - finish_chunk.setColumns(block.getColumns(), block.rows()); -} - -void SquashingChunksTransformForBalancing::work() -{ - if (stage == Stage::Exception) - { - data.chunk.clear(); - ready_input = false; - return; - } - - ExceptionKeepingTransform::work(); - if (finish_chunk) - { - data.chunk = std::move(finish_chunk); - ready_output = true; - } -} - -SimpleSquashingChunksTransform::SimpleSquashingChunksTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : ISimpleTransform(header, header, false), squashing(min_block_size_rows, min_block_size_bytes) -{ -} - -void SimpleSquashingChunksTransform::transform(Chunk & chunk) -{ - if (!finished) - { - if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) - chunk.setColumns(block.getColumns(), block.rows()); - } - else - { - auto block = squashing.add({}); - chunk.setColumns(block.getColumns(), block.rows()); - } -} - -IProcessor::Status SimpleSquashingChunksTransform::prepare() -{ - if (!finished && input.isFinished()) - { - finished = true; - return Status::Ready; - } - return ISimpleTransform::prepare(); -} -} diff --git a/src/Processors/Transforms/SquashingChunksTransform.h b/src/Processors/Transforms/SquashingChunksTransform.h deleted file mode 100644 index 7c7948d1af9..00000000000 --- a/src/Processors/Transforms/SquashingChunksTransform.h +++ /dev/null @@ -1,70 +0,0 @@ -#pragma once - -#include -#include -#include - -namespace DB -{ - -class SquashingChunksTransform : public ExceptionKeepingTransform -{ -public: - explicit SquashingChunksTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); - - String getName() const override { return "SquashingTransform"; } - - void work() override; - -protected: - void onConsume(Chunk chunk) override; - GenerateResult onGenerate() override; - void onFinish() override; - -private: - SquashingTransform squashing; - Chunk cur_chunk; - Chunk finish_chunk; -}; - -class SquashingChunksTransformForBalancing : public ExceptionKeepingTransform -{ -public: - explicit SquashingChunksTransformForBalancing( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); - - String getName() const override { return "SquashingTransform"; } - - void work() override; - -protected: - void onConsume(Chunk chunk) override; - GenerateResult onGenerate() override; - void onFinish() override; - -private: - NewSquashingTransform squashing; - Chunk cur_chunk; - Chunk finish_chunk; -}; - -/// Doesn't care about propagating exceptions and thus doesn't throw LOGICAL_ERROR if the following transform closes its input port. -class SimpleSquashingChunksTransform : public ISimpleTransform -{ -public: - explicit SimpleSquashingChunksTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); - - String getName() const override { return "SimpleSquashingTransform"; } - -protected: - void transform(Chunk &) override; - - IProcessor::Status prepare() override; - -private: - SquashingTransform squashing; - - bool finished = false; -}; -} diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp new file mode 100644 index 00000000000..43f72262846 --- /dev/null +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -0,0 +1,80 @@ +#include +#include + +namespace DB +{ + +SquashingTransform::SquashingTransform( + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) + : ExceptionKeepingTransform(header, header, false) + , squashing(min_block_size_rows, min_block_size_bytes) +{ +} + +void SquashingTransform::onConsume(Chunk chunk) +{ + if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) + cur_chunk.setColumns(block.getColumns(), block.rows()); +} + +SquashingTransform::GenerateResult SquashingTransform::onGenerate() +{ + GenerateResult res; + res.chunk = std::move(cur_chunk); + res.is_done = true; + return res; +} + +void SquashingTransform::onFinish() +{ + auto block = squashing.add({}); + finish_chunk.setColumns(block.getColumns(), block.rows()); +} + +void SquashingTransform::work() +{ + if (stage == Stage::Exception) + { + data.chunk.clear(); + ready_input = false; + return; + } + + ExceptionKeepingTransform::work(); + if (finish_chunk) + { + data.chunk = std::move(finish_chunk); + ready_output = true; + } +} + +SimpleSquashingTransform::SimpleSquashingTransform( + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) + : ISimpleTransform(header, header, false), squashing(min_block_size_rows, min_block_size_bytes) +{ +} + +void SimpleSquashingTransform::transform(Chunk & chunk) +{ + if (!finished) + { + if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) + chunk.setColumns(block.getColumns(), block.rows()); + } + else + { + auto block = squashing.add({}); + chunk.setColumns(block.getColumns(), block.rows()); + } +} + +IProcessor::Status SimpleSquashingTransform::prepare() +{ + if (!finished && input.isFinished()) + { + finished = true; + return Status::Ready; + } + return ISimpleTransform::prepare(); +} +} diff --git a/src/Processors/Transforms/SquashingTransform.h b/src/Processors/Transforms/SquashingTransform.h new file mode 100644 index 00000000000..c5b727ac6ec --- /dev/null +++ b/src/Processors/Transforms/SquashingTransform.h @@ -0,0 +1,50 @@ +#pragma once + +#include +#include +#include +#include + +namespace DB +{ + +class SquashingTransform : public ExceptionKeepingTransform +{ +public: + explicit SquashingTransform( + const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); + + String getName() const override { return "SquashingTransform"; } + + void work() override; + +protected: + void onConsume(Chunk chunk) override; + GenerateResult onGenerate() override; + void onFinish() override; + +private: + Squashing squashing; + Chunk cur_chunk; + Chunk finish_chunk; +}; + +/// Doesn't care about propagating exceptions and thus doesn't throw LOGICAL_ERROR if the following transform closes its input port. +class SimpleSquashingTransform : public ISimpleTransform +{ +public: + explicit SimpleSquashingTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes); + + String getName() const override { return "SimpleSquashingTransform"; } + +protected: + void transform(Chunk &) override; + + IProcessor::Status prepare() override; + +private: + Squashing squashing; + + bool finished = false; +}; +} diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 93cfc8f6d10..e106cbf8d42 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -6,8 +6,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include @@ -369,12 +369,12 @@ std::optional generateViewChain( bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); const auto & settings = insert_context->getSettingsRef(); - out.addSource(std::make_shared( + out.addSource(std::make_shared( out.getInputHeader(), table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); - out.addSource(std::make_shared( + out.addSource(std::make_shared( out.getInputHeader(), table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, @@ -625,12 +625,12 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat /// Squashing is needed here because the materialized view query can generate a lot of blocks /// even when only one block is inserted into the parent table (e.g. if the query is a GROUP BY /// and two-level aggregation is triggered). - pipeline.addTransform(std::make_shared( + pipeline.addTransform(std::make_shared( pipeline.getHeader(), context->getSettingsRef().min_insert_block_size_rows, context->getSettingsRef().min_insert_block_size_bytes, pipeline.getNumStreams())); - pipeline.addTransform(std::make_shared( + pipeline.addTransform(std::make_shared( pipeline.getHeader(), context->getSettingsRef().min_insert_block_size_rows, context->getSettingsRef().min_insert_block_size_bytes)); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index d883029408c..08d8b1b9fbc 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -1,6 +1,6 @@ -#include "Interpreters/AsynchronousInsertQueue.h" -#include "Interpreters/SquashingTransform.h" -#include "Parsers/ASTInsertQuery.h" +#include +#include +#include #include #include #include @@ -876,7 +876,7 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro using PushResult = AsynchronousInsertQueue::PushResult; startInsertQuery(); - SquashingTransform squashing(0, query_context->getSettingsRef().async_insert_max_data_size); + Squashing squashing(0, query_context->getSettingsRef().async_insert_max_data_size); while (readDataNext()) { diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 2f011567b90..b274518e248 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -21,7 +21,7 @@ limitations under the License. */ #include #include #include -#include +#include #include #include #include @@ -667,7 +667,7 @@ QueryPipelineBuilder StorageLiveView::completeQuery(Pipes pipes) /// and two-level aggregation is triggered). builder.addSimpleTransform([&](const Block & cur_header) { - return std::make_shared( + return std::make_shared( cur_header, getContext()->getSettingsRef().min_insert_block_size_rows, getContext()->getSettingsRef().min_insert_block_size_bytes); diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 1c33f018a5d..514e7b8299b 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -6,7 +6,7 @@ #include #include #include -#include +#include #include #include #include @@ -1223,7 +1223,7 @@ private: ProjectionNameToItsBlocks projection_parts; std::move_iterator projection_parts_iterator; - std::vector projection_squashes; + std::vector projection_squashes; const ProjectionsDescription & projections; ExecutableTaskPtr merge_projection_parts_task_ptr; diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 731ac04a8c8..d1bcc89cbe0 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -16,8 +16,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include @@ -311,8 +311,8 @@ Block ProjectionDescription::calculate(const Block & block, ContextPtr context) builder.resize(1); // Generate aggregated blocks with rows less or equal than the original block. // There should be only one output block after this transformation. - builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0, builder.getNumStreams())); - builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); + builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0, builder.getNumStreams())); + builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); PullingPipelineExecutor executor(pipeline); diff --git a/src/Storages/WindowView/StorageWindowView.cpp b/src/Storages/WindowView/StorageWindowView.cpp index b1984a947c8..f6ffaf679ed 100644 --- a/src/Storages/WindowView/StorageWindowView.cpp +++ b/src/Storages/WindowView/StorageWindowView.cpp @@ -35,7 +35,7 @@ #include #include #include -#include +#include #include #include #include @@ -610,7 +610,7 @@ std::pair StorageWindowView::getNewBlocks(UInt32 watermark) }); builder.addSimpleTransform([&](const Block & current_header) { - return std::make_shared( + return std::make_shared( current_header, getContext()->getSettingsRef().min_insert_block_size_rows, getContext()->getSettingsRef().min_insert_block_size_bytes); @@ -1498,7 +1498,7 @@ void StorageWindowView::writeIntoWindowView( builder = select_block.buildQueryPipeline(); builder.addSimpleTransform([&](const Block & current_header) { - return std::make_shared( + return std::make_shared( current_header, local_context->getSettingsRef().min_insert_block_size_rows, local_context->getSettingsRef().min_insert_block_size_bytes); diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index c87a1b216ca..977a803bd28 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include @@ -288,7 +288,7 @@ TableNodePtr executeSubqueryNode(const QueryTreeNodePtr & subquery_node, size_t min_block_size_rows = mutable_context->getSettingsRef().min_external_table_block_size_rows; size_t min_block_size_bytes = mutable_context->getSettingsRef().min_external_table_block_size_bytes; - auto squashing = std::make_shared(builder->getHeader(), min_block_size_rows, min_block_size_bytes); + auto squashing = std::make_shared(builder->getHeader(), min_block_size_rows, min_block_size_bytes); builder->resize(1); builder->addTransform(std::move(squashing)); From 8c0786bd80a2aad2934395124d9c1213fe79e0cc Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 8 May 2024 19:43:22 +0000 Subject: [PATCH 022/139] fix for projections --- src/Interpreters/Squashing.cpp | 10 +--- .../Transforms/PlanSquashingTransform.cpp | 60 +++++++++---------- src/Storages/ProjectionsDescription.cpp | 2 +- 3 files changed, 31 insertions(+), 41 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 6063714e8db..ece124e8a15 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -142,11 +142,7 @@ Block ApplySquashing::add(Chunk && input_chunk) Block ApplySquashing::addImpl(Chunk && input_chunk) { if (!input_chunk.hasChunkInfo()) - { - Block to_return; - std::swap(to_return, accumulated_block); - return to_return; - } + return Block(); const auto *info = getInfoFromChunk(input_chunk); for (auto & chunk : info->chunks) @@ -225,7 +221,7 @@ Chunk PlanSquashing::addImpl(Chunk && input_chunk) Chunk res_chunk = convertToChunk(chunks_to_merge_vec); return res_chunk; } - return input_chunk; + return {}; } Chunk PlanSquashing::convertToChunk(std::vector &chunks) @@ -237,7 +233,7 @@ Chunk PlanSquashing::convertToChunk(std::vector &chunks) for (auto &chunk : chunks) info->chunks.push_back(std::move(chunk)); - chunks.clear(); // we can remove this + chunks.clear(); return Chunk(header.cloneEmptyColumns(), 0, info); } diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 62ff3a0bf39..fe0f6ed39f5 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -41,34 +41,32 @@ IProcessor::Status PlanSquashingTransform::prepareConsume() return Status::Finished; } - all_finished = true; - for (auto & input : inputs) - { - if (input.isFinished()) - continue; - - all_finished = false; - } - - if (all_finished) /// If all inputs are closed, we check if we have data in balancing - { - if (balance.isDataLeft()) /// If we have data in balancing, we process this data - { - finished = false; - transform(chunk); - has_data = true; - } - else /// If we don't have data, We send FINISHED - { - for (auto & output : outputs) - output.finish(); - - return Status::Finished; - } - } - while (!chunk.hasChunkInfo()) { + all_finished = true; + for (auto & input : inputs) + { + if (!input.isFinished()) + all_finished = false; + } + + if (all_finished) /// If all inputs are closed, we check if we have data in balancing + { + if (balance.isDataLeft()) /// If we have data in balancing, we process this data + { + finished = false; + transform(chunk); + has_data = true; + } + else /// If we don't have data, We send FINISHED + { + for (auto & output : outputs) + output.finish(); + + return Status::Finished; + } + } + for (auto & input : inputs) { if (input.isFinished()) @@ -80,12 +78,7 @@ IProcessor::Status PlanSquashingTransform::prepareConsume() if (!balance.isDataLeft()) return Status::NeedData; else - { - finished = true; - transform(chunk); - has_data = true; - return Status::Ready; - } + continue; } chunk = input.pull(); @@ -96,7 +89,8 @@ IProcessor::Status PlanSquashingTransform::prepareConsume() has_data = true; return Status::Ready; } - + else + return Status::NeedData; } } return Status::Ready; diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index d1bcc89cbe0..87e203e8665 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -311,7 +311,7 @@ Block ProjectionDescription::calculate(const Block & block, ContextPtr context) builder.resize(1); // Generate aggregated blocks with rows less or equal than the original block. // There should be only one output block after this transformation. - builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0, builder.getNumStreams())); + builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0, 1)); builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); From e39213a8879abbb54ed286f954dc3de6702c61db Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Thu, 9 May 2024 00:45:16 +0200 Subject: [PATCH 023/139] empty commit From 37c67aba9f933b949c3cf27f246e71174ed0d8a6 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 10 May 2024 13:32:34 +0200 Subject: [PATCH 024/139] remove squashingTransform.cpp --- src/Interpreters/SquashingTransform.cpp | 145 ------------------------ 1 file changed, 145 deletions(-) delete mode 100644 src/Interpreters/SquashingTransform.cpp diff --git a/src/Interpreters/SquashingTransform.cpp b/src/Interpreters/SquashingTransform.cpp deleted file mode 100644 index 41f024df7a7..00000000000 --- a/src/Interpreters/SquashingTransform.cpp +++ /dev/null @@ -1,145 +0,0 @@ -#include - - -namespace DB -{ -namespace ErrorCodes -{ - extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; - extern const int LOGICAL_ERROR; -} - -SquashingTransform::SquashingTransform(size_t min_block_size_rows_, size_t min_block_size_bytes_) - : min_block_size_rows(min_block_size_rows_) - , min_block_size_bytes(min_block_size_bytes_) -{ -} - -Block SquashingTransform::add(Block && input_block) -{ - return addImpl(std::move(input_block)); -} - -Block SquashingTransform::add(const Block & input_block) -{ - return addImpl(input_block); -} - -/* - * To minimize copying, accept two types of argument: const reference for output - * stream, and rvalue reference for input stream, and decide whether to copy - * inside this function. This allows us not to copy Block unless we absolutely - * have to. - */ -template -Block SquashingTransform::addImpl(ReferenceType input_block) -{ - /// End of input stream. - if (!input_block) - { - Block to_return; - std::swap(to_return, accumulated_block); - return to_return; - } - - /// Just read block is already enough. - if (isEnoughSize(input_block)) - { - /// If no accumulated data, return just read block. - if (!accumulated_block) - { - return std::move(input_block); - } - - /// Return accumulated data (maybe it has small size) and place new block to accumulated data. - Block to_return = std::move(input_block); - std::swap(to_return, accumulated_block); - return to_return; - } - - /// Accumulated block is already enough. - if (isEnoughSize(accumulated_block)) - { - /// Return accumulated data and place new block to accumulated data. - Block to_return = std::move(input_block); - std::swap(to_return, accumulated_block); - return to_return; - } - - append(std::move(input_block)); - if (isEnoughSize(accumulated_block)) - { - Block to_return; - std::swap(to_return, accumulated_block); - return to_return; - } - - /// Squashed block is not ready. - return {}; -} - - -template -void SquashingTransform::append(ReferenceType input_block) -{ - if (!accumulated_block) - { - accumulated_block = std::move(input_block); - return; - } - - assert(blocksHaveEqualStructure(input_block, accumulated_block)); - - try - { - for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i) - { - const auto source_column = input_block.getByPosition(i).column; - - auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column)); - mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); - accumulated_block.getByPosition(i).column = std::move(mutable_column); - } - } - catch (...) - { - /// add() may be called again even after a previous add() threw an exception. - /// Keep accumulated_block in a valid state. - /// Seems ok to discard accumulated data because we're throwing an exception, which the caller will - /// hopefully interpret to mean "this block and all *previous* blocks are potentially lost". - accumulated_block.clear(); - throw; - } -} - - -bool SquashingTransform::isEnoughSize(const Block & block) -{ - size_t rows = 0; - size_t bytes = 0; - - for (const auto & [column, type, name] : block) - { - if (!column) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid column in block."); - - if (!rows) - rows = column->size(); - else if (rows != column->size()) - throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Sizes of columns doesn't match"); - - bytes += column->byteSize(); - } - - return isEnoughSize(rows, bytes); -} - - -bool SquashingTransform::isEnoughSize(size_t rows, size_t bytes) const -{ - return (!min_block_size_rows && !min_block_size_bytes) - || (min_block_size_rows && rows >= min_block_size_rows) - || (min_block_size_bytes && bytes >= min_block_size_bytes); -} - -} From bcd5482c5b73743ec958a2f818c6e72dfd784832 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 10 May 2024 13:34:09 +0200 Subject: [PATCH 025/139] remove internals.cpp --- programs/copier/Internals.cpp | 280 ---------------------------------- 1 file changed, 280 deletions(-) delete mode 100644 programs/copier/Internals.cpp diff --git a/programs/copier/Internals.cpp b/programs/copier/Internals.cpp deleted file mode 100644 index 1e118bd6a32..00000000000 --- a/programs/copier/Internals.cpp +++ /dev/null @@ -1,280 +0,0 @@ -#include "Internals.h" -#include -#include -#include -#include -#include -#include -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; -} - -using ConfigurationPtr = Poco::AutoPtr; - -ConfigurationPtr getConfigurationFromXMLString(const std::string & xml_data) -{ - std::stringstream ss(xml_data); // STYLE_CHECK_ALLOW_STD_STRING_STREAM - Poco::XML::InputSource input_source{ss}; - return {new Poco::Util::XMLConfiguration{&input_source}}; -} - -String getQuotedTable(const String & database, const String & table) -{ - if (database.empty()) - return backQuoteIfNeed(table); - - return backQuoteIfNeed(database) + "." + backQuoteIfNeed(table); -} - -String getQuotedTable(const DatabaseAndTableName & db_and_table) -{ - return getQuotedTable(db_and_table.first, db_and_table.second); -} - - -// Creates AST representing 'ENGINE = Distributed(cluster, db, table, [sharding_key]) -std::shared_ptr createASTStorageDistributed( - const String & cluster_name, const String & database, const String & table, - const ASTPtr & sharding_key_ast) -{ - auto args = std::make_shared(); - args->children.emplace_back(std::make_shared(cluster_name)); - args->children.emplace_back(std::make_shared(database)); - args->children.emplace_back(std::make_shared(table)); - if (sharding_key_ast) - args->children.emplace_back(sharding_key_ast); - - auto engine = std::make_shared(); - engine->name = "Distributed"; - engine->arguments = args; - - auto storage = std::make_shared(); - storage->set(storage->engine, engine); - - return storage; -} - - -Block getBlockWithAllStreamData(QueryPipelineBuilder builder) -{ - builder.addTransform(std::make_shared( - builder.getHeader(), - std::numeric_limits::max(), - std::numeric_limits::max())); - - auto cur_pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); - Block block; - PullingPipelineExecutor executor(cur_pipeline); - executor.pull(block); - - return block; -} - -bool isExtendedDefinitionStorage(const ASTPtr & storage_ast) -{ - const auto & storage = storage_ast->as(); - return storage.partition_by || storage.order_by || storage.sample_by; -} - -ASTPtr extractPartitionKey(const ASTPtr & storage_ast) -{ - String storage_str = queryToString(storage_ast); - - const auto & storage = storage_ast->as(); - const auto & engine = storage.engine->as(); - - if (!endsWith(engine.name, "MergeTree")) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str); - } - - if (isExtendedDefinitionStorage(storage_ast)) - { - if (storage.partition_by) - return storage.partition_by->clone(); - - static const char * all = "all"; - return std::make_shared(Field(all, strlen(all))); - } - else - { - bool is_replicated = startsWith(engine.name, "Replicated"); - size_t min_args = is_replicated ? 3 : 1; - - if (!engine.arguments) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected arguments in {}", storage_str); - - ASTPtr arguments_ast = engine.arguments->clone(); - ASTs & arguments = arguments_ast->children; - - if (arguments.size() < min_args) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected at least {} arguments in {}", min_args, storage_str); - - ASTPtr & month_arg = is_replicated ? arguments[2] : arguments[1]; - return makeASTFunction("toYYYYMM", month_arg->clone()); - } -} - -ASTPtr extractPrimaryKey(const ASTPtr & storage_ast) -{ - String storage_str = queryToString(storage_ast); - - const auto & storage = storage_ast->as(); - const auto & engine = storage.engine->as(); - - if (!endsWith(engine.name, "MergeTree")) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str); - } - - if (!isExtendedDefinitionStorage(storage_ast)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Is not extended deginition storage {} Will be fixed later.", storage_str); - } - - if (storage.primary_key) - return storage.primary_key->clone(); - - return nullptr; -} - - -ASTPtr extractOrderBy(const ASTPtr & storage_ast) -{ - String storage_str = queryToString(storage_ast); - - const auto & storage = storage_ast->as(); - const auto & engine = storage.engine->as(); - - if (!endsWith(engine.name, "MergeTree")) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str); - } - - if (!isExtendedDefinitionStorage(storage_ast)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Is not extended deginition storage {} Will be fixed later.", storage_str); - } - - if (storage.order_by) - return storage.order_by->clone(); - - throw Exception(ErrorCodes::BAD_ARGUMENTS, "ORDER BY cannot be empty"); -} - -/// Wraps only identifiers with backticks. -std::string wrapIdentifiersWithBackticks(const ASTPtr & root) -{ - if (auto identifier = std::dynamic_pointer_cast(root)) - return backQuote(identifier->name()); - - if (auto function = std::dynamic_pointer_cast(root)) - return function->name + '(' + wrapIdentifiersWithBackticks(function->arguments) + ')'; - - if (auto expression_list = std::dynamic_pointer_cast(root)) - { - Names function_arguments(expression_list->children.size()); - for (size_t i = 0; i < expression_list->children.size(); ++i) - function_arguments[i] = wrapIdentifiersWithBackticks(expression_list->children[0]); - return boost::algorithm::join(function_arguments, ", "); - } - - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key could be represented only as columns or functions from columns."); -} - - -Names extractPrimaryKeyColumnNames(const ASTPtr & storage_ast) -{ - const auto sorting_key_ast = extractOrderBy(storage_ast); - const auto primary_key_ast = extractPrimaryKey(storage_ast); - - const auto sorting_key_expr_list = extractKeyExpressionList(sorting_key_ast); - const auto primary_key_expr_list = primary_key_ast - ? extractKeyExpressionList(primary_key_ast) : sorting_key_expr_list->clone(); - - /// Maybe we have to handle VersionedCollapsing engine separately. But in our case in looks pointless. - - size_t primary_key_size = primary_key_expr_list->children.size(); - size_t sorting_key_size = sorting_key_expr_list->children.size(); - - if (primary_key_size > sorting_key_size) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key must be a prefix of the sorting key, but its length: " - "{} is greater than the sorting key length: {}", - primary_key_size, sorting_key_size); - - Names primary_key_columns; - NameSet primary_key_columns_set; - - for (size_t i = 0; i < sorting_key_size; ++i) - { - /// Column name could be represented as a f_1(f_2(...f_n(column_name))). - /// Each f_i could take one or more parameters. - /// We will wrap identifiers with backticks to allow non-standard identifier names. - String sorting_key_column = sorting_key_expr_list->children[i]->getColumnName(); - - if (i < primary_key_size) - { - String pk_column = primary_key_expr_list->children[i]->getColumnName(); - if (pk_column != sorting_key_column) - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Primary key must be a prefix of the sorting key, " - "but the column in the position {} is {}, not {}", i, sorting_key_column, pk_column); - - if (!primary_key_columns_set.emplace(pk_column).second) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Primary key contains duplicate columns"); - - primary_key_columns.push_back(wrapIdentifiersWithBackticks(primary_key_expr_list->children[i])); - } - } - - return primary_key_columns; -} - -bool isReplicatedTableEngine(const ASTPtr & storage_ast) -{ - const auto & storage = storage_ast->as(); - const auto & engine = storage.engine->as(); - - if (!endsWith(engine.name, "MergeTree")) - { - String storage_str = queryToString(storage_ast); - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unsupported engine was specified in {}, only *MergeTree engines are supported", storage_str); - } - - return startsWith(engine.name, "Replicated"); -} - -ShardPriority getReplicasPriority(const Cluster::Addresses & replicas, const std::string & local_hostname, UInt8 random) -{ - ShardPriority res; - - if (replicas.empty()) - return res; - - res.is_remote = 1; - for (const auto & replica : replicas) - { - if (isLocalAddress(DNSResolver::instance().resolveHost(replica.host_name))) - { - res.is_remote = 0; - break; - } - } - - res.hostname_difference = std::numeric_limits::max(); - for (const auto & replica : replicas) - { - size_t difference = getHostNamePrefixDistance(local_hostname, replica.host_name); - res.hostname_difference = std::min(difference, res.hostname_difference); - } - - res.random = random; - return res; -} - -} From 2a5671d8819787d4d675a9131c9e3c491110c409 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 10 May 2024 13:41:30 +0200 Subject: [PATCH 026/139] fix style --- src/Interpreters/Squashing.cpp | 1 + src/Processors/Transforms/ApplySquashingTransform.h | 1 + 2 files changed, 2 insertions(+) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index ece124e8a15..78d1b9fc643 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -7,6 +7,7 @@ namespace DB namespace ErrorCodes { extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; + extern const int LOGICAL_ERROR; } Squashing::Squashing(size_t min_block_size_rows_, size_t min_block_size_bytes_) diff --git a/src/Processors/Transforms/ApplySquashingTransform.h b/src/Processors/Transforms/ApplySquashingTransform.h index 26507d9c496..584fb72cccb 100644 --- a/src/Processors/Transforms/ApplySquashingTransform.h +++ b/src/Processors/Transforms/ApplySquashingTransform.h @@ -1,3 +1,4 @@ +#pragma once #include #include #include From 568c6dfd8039dc389760f3060106e15f96c72d46 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Fri, 10 May 2024 14:35:32 +0200 Subject: [PATCH 027/139] fix link in RecursiveCTESource --- src/Processors/Sources/RecursiveCTESource.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Sources/RecursiveCTESource.cpp b/src/Processors/Sources/RecursiveCTESource.cpp index b94cb188086..2f7568c2bb0 100644 --- a/src/Processors/Sources/RecursiveCTESource.cpp +++ b/src/Processors/Sources/RecursiveCTESource.cpp @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include From d833c9cce05cf508596ef5191a9ee179c59a1c6f Mon Sep 17 00:00:00 2001 From: yariks5s Date: Mon, 13 May 2024 16:45:38 +0000 Subject: [PATCH 028/139] full refactoring of planning --- .../Transforms/PlanSquashingTransform.cpp | 270 ++++++++++++------ .../Transforms/PlanSquashingTransform.h | 26 +- 2 files changed, 203 insertions(+), 93 deletions(-) diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index fe0f6ed39f5..89b59354722 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -1,5 +1,6 @@ #include #include +#include namespace DB { @@ -15,85 +16,172 @@ IProcessor::Status PlanSquashingTransform::prepare() while (status == Status::Ready) { - status = !has_data ? prepareConsume() - : prepareSend(); + switch (planning_status) + { + case PlanningStatus::INIT: + { + status = init(); + break; + } + case PlanningStatus::READ_IF_CAN: + { + status = prepareConsume(); + break; + } + case PlanningStatus::WAIT_IN: + { + status = waitForDataIn(); + break; + } + case PlanningStatus::WAIT_OUT_AND_PUSH: + { + status = prepareSend(); + break; + } + case PlanningStatus::PUSH: + { + status = prepareSend(); + break; + } + case PlanningStatus::WAIT_OUT_FLUSH: + { + status = prepareSendFlush(); + break; + } + case FINISH: + { + status = finish(); + break; + } + } } return status; } +IProcessor::Status PlanSquashingTransform::init() +{ + for (auto input : inputs) + { + input.setNeeded(); + if (input.hasData()) + available_inputs++; + } + + planning_status = PlanningStatus::READ_IF_CAN; + return Status::Ready; +} + IProcessor::Status PlanSquashingTransform::prepareConsume() { + if (available_inputs == 0) + { + planning_status = PlanningStatus::WAIT_IN; + return Status::NeedData; + } finished = false; + + bool inputs_have_no_data = true; + for (auto & input : inputs) + { + if (input.hasData()) + { + inputs_have_no_data = false; + chunk = input.pull(); + transform(chunk); + + available_inputs--; + if (chunk.hasChunkInfo()) + { + planning_status = PlanningStatus::WAIT_OUT_AND_PUSH; + return Status::Ready; + } + } + + if (available_inputs == 0) + { + planning_status = PlanningStatus::WAIT_IN; + return Status::NeedData; + } + } + + if (inputs_have_no_data) + { + if (checkInputs()) + return Status::Ready; + + planning_status = PlanningStatus::WAIT_IN; + return Status::NeedData; + } + return Status::Ready; +} + +bool PlanSquashingTransform::checkInputs() +{ bool all_finished = true; + for (auto & output : outputs) { - if (output.isFinished()) + if (!output.isFinished()) + all_finished = false; + } + if (all_finished) /// If all outputs are closed, we close inputs (just in case) + { + planning_status = PlanningStatus::FINISH; + return true; + } + + all_finished = true; + for (auto & input : inputs) + { + + if (!input.isFinished()) + all_finished = false; + } + + if (all_finished) /// If all inputs are closed, we check if we have data in balancing + { + if (balance.isDataLeft()) /// If we have data in balancing, we process this data + { + planning_status = PlanningStatus::WAIT_OUT_FLUSH; + finished = false; + transform(chunk); + } + else /// If we don't have data, We send FINISHED + planning_status = PlanningStatus::FINISH; + return true; + } + return false; +} + +IProcessor::Status PlanSquashingTransform::waitForDataIn() +{ + bool all_finished = true; + for (auto & input : inputs) + { + if (input.isFinished()) continue; all_finished = false; - } - if (all_finished) /// If all outputs are closed, we close inputs (just in case) + if (!input.hasData()) + continue; + + available_inputs++; + } + if (all_finished) { - for (auto & in : inputs) - in.close(); - return Status::Finished; + checkInputs(); + return Status::Ready; } - - while (!chunk.hasChunkInfo()) + + if (available_inputs > 0) { - all_finished = true; - for (auto & input : inputs) - { - if (!input.isFinished()) - all_finished = false; - } - - if (all_finished) /// If all inputs are closed, we check if we have data in balancing - { - if (balance.isDataLeft()) /// If we have data in balancing, we process this data - { - finished = false; - transform(chunk); - has_data = true; - } - else /// If we don't have data, We send FINISHED - { - for (auto & output : outputs) - output.finish(); - - return Status::Finished; - } - } - - for (auto & input : inputs) - { - if (input.isFinished()) - continue; - - input.setNeeded(); - if (!input.hasData()) - { - if (!balance.isDataLeft()) - return Status::NeedData; - else - continue; - } - - chunk = input.pull(); - transform(chunk); - was_output_processed.assign(outputs.size(), false); - if (chunk.hasChunkInfo()) - { - has_data = true; - return Status::Ready; - } - else - return Status::NeedData; - } + planning_status = PlanningStatus::READ_IF_CAN; + return Status::Ready; } - return Status::Ready; + + return Status::NeedData; } void PlanSquashingTransform::transform(Chunk & chunk_) @@ -112,43 +200,47 @@ void PlanSquashingTransform::transform(Chunk & chunk_) IProcessor::Status PlanSquashingTransform::prepareSend() { - bool all_outputs_processed = true; + if (!chunk) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should be available in prepareSend"); - size_t chunk_number = 0; for (auto &output : outputs) { - auto & was_processed = was_output_processed[chunk_number]; - ++chunk_number; - if (!chunk.hasChunkInfo()) + if (output.canPush()) { - has_data = false; + planning_status = PlanningStatus::READ_IF_CAN; + output.push(std::move(chunk)); return Status::Ready; } - - if (was_processed) - continue; - - if (output.isFinished()) - continue; - - if (!output.canPush()) - { - all_outputs_processed = false; - continue; - } - - output.push(std::move(chunk)); - was_processed = true; - break; } - - if (all_outputs_processed) - { - has_data = false; - return Status::Ready; - } - return Status::PortFull; } + +IProcessor::Status PlanSquashingTransform::prepareSendFlush() +{ + if (!chunk) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should be available in prepareSendFlush"); + + for (auto &output : outputs) + { + + if (output.canPush()) + { + planning_status = PlanningStatus::FINISH; + output.push(std::move(chunk)); + return Status::Ready; + } + } + return Status::PortFull; +} + +IProcessor::Status PlanSquashingTransform::finish() +{ + for (auto & in : inputs) + in.close(); + for (auto & output : outputs) + output.finish(); + + return Status::Finished; +} } diff --git a/src/Processors/Transforms/PlanSquashingTransform.h b/src/Processors/Transforms/PlanSquashingTransform.h index c30569fffb5..a500787ad0c 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.h +++ b/src/Processors/Transforms/PlanSquashingTransform.h @@ -4,9 +4,23 @@ #include #include +enum PlanningStatus +{ + INIT, + READ_IF_CAN, + WAIT_IN, + PUSH, + WAIT_OUT_AND_PUSH, + WAIT_OUT_FLUSH, + FINISH +}; + namespace DB { - +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} class PlanSquashingTransform : public IProcessor { @@ -20,19 +34,23 @@ public: OutputPorts & getOutputPorts() { return outputs; } Status prepare() override; + Status init(); Status prepareConsume(); Status prepareSend(); + Status prepareSendFlush(); + Status waitForDataIn(); + Status finish(); + bool checkInputs(); void transform(Chunk & chunk); protected: private: - size_t CalculateBlockSize(const Block & block); Chunk chunk; PlanSquashing balance; - bool has_data = false; - std::vector was_output_processed; + PlanningStatus planning_status = PlanningStatus::INIT; + size_t available_inputs = 0; /// When consumption is finished we need to release the final chunk regardless of its size. bool finished = false; From 00f9355ede76b5cf5a207d5043201b0d6473f64e Mon Sep 17 00:00:00 2001 From: yariks5s Date: Mon, 13 May 2024 16:57:46 +0000 Subject: [PATCH 029/139] fix style + PUSH removal --- .../Transforms/PlanSquashingTransform.cpp | 22 ++++++++----------- .../Transforms/PlanSquashingTransform.h | 5 ----- 2 files changed, 9 insertions(+), 18 deletions(-) diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 89b59354722..5125c28fb06 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -5,6 +5,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + PlanSquashingTransform::PlanSquashingTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t num_ports) : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), balance(header, min_block_size_rows, min_block_size_bytes) { @@ -38,11 +43,6 @@ IProcessor::Status PlanSquashingTransform::prepare() status = prepareSend(); break; } - case PlanningStatus::PUSH: - { - status = prepareSend(); - break; - } case PlanningStatus::WAIT_OUT_FLUSH: { status = prepareSendFlush(); @@ -121,10 +121,9 @@ bool PlanSquashingTransform::checkInputs() bool all_finished = true; for (auto & output : outputs) - { if (!output.isFinished()) all_finished = false; - } + if (all_finished) /// If all outputs are closed, we close inputs (just in case) { planning_status = PlanningStatus::FINISH; @@ -133,11 +132,8 @@ bool PlanSquashingTransform::checkInputs() all_finished = true; for (auto & input : inputs) - { - if (!input.isFinished()) all_finished = false; - } if (all_finished) /// If all inputs are closed, we check if we have data in balancing { @@ -166,7 +162,7 @@ IProcessor::Status PlanSquashingTransform::waitForDataIn() if (!input.hasData()) continue; - + available_inputs++; } if (all_finished) @@ -174,13 +170,13 @@ IProcessor::Status PlanSquashingTransform::waitForDataIn() checkInputs(); return Status::Ready; } - + if (available_inputs > 0) { planning_status = PlanningStatus::READ_IF_CAN; return Status::Ready; } - + return Status::NeedData; } diff --git a/src/Processors/Transforms/PlanSquashingTransform.h b/src/Processors/Transforms/PlanSquashingTransform.h index a500787ad0c..39f3a70a4a2 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.h +++ b/src/Processors/Transforms/PlanSquashingTransform.h @@ -9,7 +9,6 @@ enum PlanningStatus INIT, READ_IF_CAN, WAIT_IN, - PUSH, WAIT_OUT_AND_PUSH, WAIT_OUT_FLUSH, FINISH @@ -17,10 +16,6 @@ enum PlanningStatus namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} class PlanSquashingTransform : public IProcessor { From 38e71274d58be6356e03d76189076ba5dc7a556a Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 14 May 2024 13:58:14 +0000 Subject: [PATCH 030/139] improve automata state transitions --- .../Transforms/PlanSquashingTransform.cpp | 46 +++++++++++-------- .../Transforms/PlanSquashingTransform.h | 1 + 2 files changed, 29 insertions(+), 18 deletions(-) diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 5125c28fb06..5600c30b1ba 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -19,7 +19,7 @@ IProcessor::Status PlanSquashingTransform::prepare() { Status status = Status::Ready; - while (status == Status::Ready) + while (planning_status != PlanningStatus::FINISH) { switch (planning_status) { @@ -119,18 +119,6 @@ IProcessor::Status PlanSquashingTransform::prepareConsume() bool PlanSquashingTransform::checkInputs() { bool all_finished = true; - - for (auto & output : outputs) - if (!output.isFinished()) - all_finished = false; - - if (all_finished) /// If all outputs are closed, we close inputs (just in case) - { - planning_status = PlanningStatus::FINISH; - return true; - } - - all_finished = true; for (auto & input : inputs) if (!input.isFinished()) all_finished = false; @@ -140,11 +128,27 @@ bool PlanSquashingTransform::checkInputs() if (balance.isDataLeft()) /// If we have data in balancing, we process this data { planning_status = PlanningStatus::WAIT_OUT_FLUSH; - finished = false; + finished = true; transform(chunk); } - else /// If we don't have data, We send FINISHED - planning_status = PlanningStatus::FINISH; + // else /// If we don't have data, We send FINISHED + // planning_status = PlanningStatus::FINISH; + return true; + } + return false; +} + +bool PlanSquashingTransform::checkOutputs() +{ + bool all_finished = true; + + for (auto & output : outputs) + if (!output.isFinished()) + all_finished = false; + + if (all_finished) /// If all outputs are closed, we close inputs (just in case) + { + planning_status = PlanningStatus::FINISH; return true; } return false; @@ -197,7 +201,10 @@ void PlanSquashingTransform::transform(Chunk & chunk_) IProcessor::Status PlanSquashingTransform::prepareSend() { if (!chunk) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should be available in prepareSend"); + { + planning_status = PlanningStatus::FINISH; + return Status::Ready; + } for (auto &output : outputs) { @@ -215,7 +222,10 @@ IProcessor::Status PlanSquashingTransform::prepareSend() IProcessor::Status PlanSquashingTransform::prepareSendFlush() { if (!chunk) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk should be available in prepareSendFlush"); + { + planning_status = PlanningStatus::FINISH; + return Status::Ready; + } for (auto &output : outputs) { diff --git a/src/Processors/Transforms/PlanSquashingTransform.h b/src/Processors/Transforms/PlanSquashingTransform.h index 39f3a70a4a2..57c77274863 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.h +++ b/src/Processors/Transforms/PlanSquashingTransform.h @@ -37,6 +37,7 @@ public: Status finish(); bool checkInputs(); + bool checkOutputs(); void transform(Chunk & chunk); protected: From a8a2aa21b289bc6467f45f92ba3b7b76ebd172cc Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 14 May 2024 16:03:05 +0000 Subject: [PATCH 031/139] change automata algorithm --- .../Transforms/PlanSquashingTransform.cpp | 61 +++++++++++-------- .../Transforms/PlanSquashingTransform.h | 7 ++- 2 files changed, 43 insertions(+), 25 deletions(-) diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 5600c30b1ba..95d3b454a4c 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -23,42 +23,41 @@ IProcessor::Status PlanSquashingTransform::prepare() { switch (planning_status) { - case PlanningStatus::INIT: + case INIT: { status = init(); break; } - case PlanningStatus::READ_IF_CAN: + case READ_IF_CAN: { status = prepareConsume(); break; } - case PlanningStatus::WAIT_IN: + case PUSH: { - status = waitForDataIn(); - break; - } - case PlanningStatus::WAIT_OUT_AND_PUSH: - { - status = prepareSend(); - break; - } - case PlanningStatus::WAIT_OUT_FLUSH: - { - status = prepareSendFlush(); + status = push(); break; } + case WAIT_IN: + return waitForDataIn(); + case WAIT_OUT: + return prepareSend(); + case WAIT_OUT_FLUSH: + return prepareSendFlush(); case FINISH: - { - status = finish(); - break; - } + break; /// never reached } } + status = finish(); return status; } +void PlanSquashingTransform::work() +{ + prepare(); +} + IProcessor::Status PlanSquashingTransform::init() { for (auto input : inputs) @@ -93,7 +92,7 @@ IProcessor::Status PlanSquashingTransform::prepareConsume() available_inputs--; if (chunk.hasChunkInfo()) { - planning_status = PlanningStatus::WAIT_OUT_AND_PUSH; + planning_status = PlanningStatus::WAIT_OUT; return Status::Ready; } } @@ -198,6 +197,21 @@ void PlanSquashingTransform::transform(Chunk & chunk_) } } +IProcessor::Status PlanSquashingTransform::push() +{ + if (!free_output) + throw Exception(ErrorCodes::LOGICAL_ERROR, "There should be a free output in push()"); + + if (finished) + planning_status = PlanningStatus::FINISH; + else + planning_status = PlanningStatus::READ_IF_CAN; + + free_output->push(std::move(chunk)); + free_output = nullptr; + return Status::Ready; +} + IProcessor::Status PlanSquashingTransform::prepareSend() { if (!chunk) @@ -208,11 +222,10 @@ IProcessor::Status PlanSquashingTransform::prepareSend() for (auto &output : outputs) { - if (output.canPush()) { - planning_status = PlanningStatus::READ_IF_CAN; - output.push(std::move(chunk)); + planning_status = PlanningStatus::PUSH; + free_output = &output; return Status::Ready; } } @@ -232,8 +245,8 @@ IProcessor::Status PlanSquashingTransform::prepareSendFlush() if (output.canPush()) { - planning_status = PlanningStatus::FINISH; - output.push(std::move(chunk)); + planning_status = PlanningStatus::PUSH; + free_output = &output; return Status::Ready; } } diff --git a/src/Processors/Transforms/PlanSquashingTransform.h b/src/Processors/Transforms/PlanSquashingTransform.h index 57c77274863..55685b0c532 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.h +++ b/src/Processors/Transforms/PlanSquashingTransform.h @@ -3,13 +3,15 @@ #include #include #include +#include "Processors/Port.h" enum PlanningStatus { INIT, READ_IF_CAN, WAIT_IN, - WAIT_OUT_AND_PUSH, + WAIT_OUT, + PUSH, WAIT_OUT_FLUSH, FINISH }; @@ -29,9 +31,11 @@ public: OutputPorts & getOutputPorts() { return outputs; } Status prepare() override; + void work() override; Status init(); Status prepareConsume(); Status prepareSend(); + Status push(); Status prepareSendFlush(); Status waitForDataIn(); Status finish(); @@ -47,6 +51,7 @@ private: PlanSquashing balance; PlanningStatus planning_status = PlanningStatus::INIT; size_t available_inputs = 0; + OutputPort* free_output = nullptr; /// When consumption is finished we need to release the final chunk regardless of its size. bool finished = false; From 0619b0921f195951b8e72c02dcc0ad06094811b2 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 15 May 2024 15:56:24 +0000 Subject: [PATCH 032/139] removed memory from automata, refactored the code --- src/Interpreters/Squashing.cpp | 10 +- src/Interpreters/Squashing.h | 1 + .../Transforms/PlanSquashingTransform.cpp | 166 ++++-------------- .../Transforms/PlanSquashingTransform.h | 19 +- 4 files changed, 49 insertions(+), 147 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 78d1b9fc643..2d87b47798c 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -198,6 +198,11 @@ PlanSquashing::PlanSquashing(Block header_, size_t min_block_size_rows_, size_t { } +Chunk PlanSquashing::flush() +{ + return convertToChunk(chunks_to_merge_vec); +} + Chunk PlanSquashing::add(Chunk && input_chunk) { return addImpl(std::move(input_chunk)); @@ -206,10 +211,7 @@ Chunk PlanSquashing::add(Chunk && input_chunk) Chunk PlanSquashing::addImpl(Chunk && input_chunk) { if (!input_chunk) - { - Chunk res_chunk = convertToChunk(chunks_to_merge_vec); - return res_chunk; - } + return {}; if (isEnoughSize(chunks_to_merge_vec)) chunks_to_merge_vec.clear(); diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index 82d7fe616f6..0bb6acf9372 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -85,6 +85,7 @@ public: PlanSquashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); Chunk add(Chunk && input_chunk); + Chunk flush(); bool isDataLeft() { return !chunks_to_merge_vec.empty(); diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 95d3b454a4c..9c42b846a7b 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -1,5 +1,6 @@ #include #include +#include "Common/logger_useful.h" #include namespace DB @@ -24,26 +25,18 @@ IProcessor::Status PlanSquashingTransform::prepare() switch (planning_status) { case INIT: - { - status = init(); + init(); break; - } case READ_IF_CAN: - { status = prepareConsume(); break; - } - case PUSH: - { - status = push(); - break; - } case WAIT_IN: - return waitForDataIn(); - case WAIT_OUT: - return prepareSend(); - case WAIT_OUT_FLUSH: - return prepareSendFlush(); + planning_status = PlanningStatus::READ_IF_CAN; + return Status::NeedData; + case PUSH: + return sendOrFlush(); + case FLUSH: + return sendOrFlush(); case FINISH: break; /// never reached } @@ -58,104 +51,58 @@ void PlanSquashingTransform::work() prepare(); } -IProcessor::Status PlanSquashingTransform::init() +void PlanSquashingTransform::init() { - for (auto input : inputs) - { - input.setNeeded(); - if (input.hasData()) - available_inputs++; - } + for (auto input: inputs) + if (!input.isFinished()) + input.setNeeded(); planning_status = PlanningStatus::READ_IF_CAN; - return Status::Ready; } IProcessor::Status PlanSquashingTransform::prepareConsume() { - if (available_inputs == 0) - { - planning_status = PlanningStatus::WAIT_IN; - return Status::NeedData; - } - finished = false; - - bool inputs_have_no_data = true; + bool inputs_have_no_data = true, all_finished = true; for (auto & input : inputs) { + if (!input.isFinished()) + all_finished = false; + if (input.hasData()) { inputs_have_no_data = false; chunk = input.pull(); transform(chunk); - available_inputs--; if (chunk.hasChunkInfo()) { - planning_status = PlanningStatus::WAIT_OUT; + planning_status = PlanningStatus::PUSH; return Status::Ready; } } - - if (available_inputs == 0) - { - planning_status = PlanningStatus::WAIT_IN; - return Status::NeedData; - } } - if (inputs_have_no_data) - { - if (checkInputs()) - return Status::Ready; - - planning_status = PlanningStatus::WAIT_IN; - return Status::NeedData; - } - return Status::Ready; -} - -bool PlanSquashingTransform::checkInputs() -{ - bool all_finished = true; - for (auto & input : inputs) - if (!input.isFinished()) - all_finished = false; - if (all_finished) /// If all inputs are closed, we check if we have data in balancing { if (balance.isDataLeft()) /// If we have data in balancing, we process this data { - planning_status = PlanningStatus::WAIT_OUT_FLUSH; - finished = true; - transform(chunk); + planning_status = PlanningStatus::FLUSH; + flushChunk(); } - // else /// If we don't have data, We send FINISHED - // planning_status = PlanningStatus::FINISH; - return true; + planning_status = PlanningStatus::PUSH; + return Status::Ready; } - return false; -} -bool PlanSquashingTransform::checkOutputs() -{ - bool all_finished = true; + if (inputs_have_no_data) + planning_status = PlanningStatus::WAIT_IN; - for (auto & output : outputs) - if (!output.isFinished()) - all_finished = false; - - if (all_finished) /// If all outputs are closed, we close inputs (just in case) - { - planning_status = PlanningStatus::FINISH; - return true; - } - return false; + return Status::Ready; } IProcessor::Status PlanSquashingTransform::waitForDataIn() { bool all_finished = true; + bool inputs_have_no_data = true; for (auto & input : inputs) { if (input.isFinished()) @@ -163,18 +110,17 @@ IProcessor::Status PlanSquashingTransform::waitForDataIn() all_finished = false; - if (!input.hasData()) - continue; + if (input.hasData()) + inputs_have_no_data = false; - available_inputs++; } if (all_finished) { - checkInputs(); + planning_status = PlanningStatus::READ_IF_CAN; return Status::Ready; } - if (available_inputs > 0) + if (!inputs_have_no_data) { planning_status = PlanningStatus::READ_IF_CAN; return Status::Ready; @@ -185,34 +131,17 @@ IProcessor::Status PlanSquashingTransform::waitForDataIn() void PlanSquashingTransform::transform(Chunk & chunk_) { - if (!finished) - { - Chunk res_chunk = balance.add(std::move(chunk_)); - std::swap(res_chunk, chunk_); - } - else - { - Chunk res_chunk = balance.add({}); - std::swap(res_chunk, chunk_); - } + Chunk res_chunk = balance.add(std::move(chunk_)); + std::swap(res_chunk, chunk_); } -IProcessor::Status PlanSquashingTransform::push() +void PlanSquashingTransform::flushChunk() { - if (!free_output) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There should be a free output in push()"); - - if (finished) - planning_status = PlanningStatus::FINISH; - else - planning_status = PlanningStatus::READ_IF_CAN; - - free_output->push(std::move(chunk)); - free_output = nullptr; - return Status::Ready; + Chunk res_chunk = balance.flush(); + std::swap(res_chunk, chunk); } -IProcessor::Status PlanSquashingTransform::prepareSend() +IProcessor::Status PlanSquashingTransform::sendOrFlush() { if (!chunk) { @@ -224,29 +153,10 @@ IProcessor::Status PlanSquashingTransform::prepareSend() { if (output.canPush()) { - planning_status = PlanningStatus::PUSH; - free_output = &output; - return Status::Ready; - } - } - return Status::PortFull; -} + if (planning_status == PlanningStatus::PUSH) + planning_status = PlanningStatus::READ_IF_CAN; -IProcessor::Status PlanSquashingTransform::prepareSendFlush() -{ - if (!chunk) - { - planning_status = PlanningStatus::FINISH; - return Status::Ready; - } - - for (auto &output : outputs) - { - - if (output.canPush()) - { - planning_status = PlanningStatus::PUSH; - free_output = &output; + output.push(std::move(chunk)); return Status::Ready; } } diff --git a/src/Processors/Transforms/PlanSquashingTransform.h b/src/Processors/Transforms/PlanSquashingTransform.h index 55685b0c532..dc5b6d669b1 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.h +++ b/src/Processors/Transforms/PlanSquashingTransform.h @@ -10,9 +10,8 @@ enum PlanningStatus INIT, READ_IF_CAN, WAIT_IN, - WAIT_OUT, PUSH, - WAIT_OUT_FLUSH, + FLUSH, FINISH }; @@ -32,29 +31,19 @@ public: Status prepare() override; void work() override; - Status init(); + void init(); Status prepareConsume(); - Status prepareSend(); - Status push(); - Status prepareSendFlush(); + Status sendOrFlush(); Status waitForDataIn(); Status finish(); - bool checkInputs(); - bool checkOutputs(); void transform(Chunk & chunk); - -protected: + void flushChunk(); private: Chunk chunk; PlanSquashing balance; PlanningStatus planning_status = PlanningStatus::INIT; - size_t available_inputs = 0; - OutputPort* free_output = nullptr; - - /// When consumption is finished we need to release the final chunk regardless of its size. - bool finished = false; }; } From 04dd58430a75871d742ba8b424023307108eac10 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Wed, 15 May 2024 18:09:33 +0200 Subject: [PATCH 033/139] style fix --- src/Processors/Transforms/PlanSquashingTransform.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 9c42b846a7b..f4106204462 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -6,11 +6,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int LOGICAL_ERROR; -} - PlanSquashingTransform::PlanSquashingTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t num_ports) : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), balance(header, min_block_size_rows, min_block_size_bytes) { From fed6c65858f26e31ad8f3d63a2cb6e9a0b404ff7 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 16 May 2024 17:57:01 +0000 Subject: [PATCH 034/139] add setting to enable planning --- src/Core/Settings.h | 1 + src/Interpreters/InterpreterInsertQuery.cpp | 72 +++++++++++++------ .../Transforms/buildPushingToViewsChain.cpp | 28 +++++--- src/Server/TCPHandler.cpp | 67 +++++++++++++++++ src/Storages/ProjectionsDescription.cpp | 11 ++- 5 files changed, 145 insertions(+), 34 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4a0de354a03..393d8202d05 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -668,6 +668,7 @@ class IColumn; M(Bool, mutations_execute_nondeterministic_on_initiator, false, "If true nondeterministic function are executed on initiator and replaced to literals in UPDATE and DELETE queries", 0) \ M(Bool, mutations_execute_subqueries_on_initiator, false, "If true scalar subqueries are executed on initiator and replaced to literals in UPDATE and DELETE queries", 0) \ M(UInt64, mutations_max_literal_size_to_replace, 16384, "The maximum size of serialized literal in bytes to replace in UPDATE and DELETE queries", 0) \ + M(Bool, allow_insert_threads_reduction_optimizaion, false, "If true it allows to apply additional single-insert-transformer for insertion of data", 0) \ \ M(Float, create_replicated_merge_tree_fault_injection_probability, 0.0f, "The probability of a fault injection during table creation after creating metadata in ZooKeeper", 0) \ \ diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 55f84080b13..f0340bd1f48 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -620,19 +620,32 @@ BlockIO InterpreterInsertQuery::execute() { bool table_prefers_large_blocks = table->prefersLargeBlocks(); - pipeline.addTransform(std::make_shared( - header, - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, - presink_chains.size())); - - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + if (settings.allow_insert_threads_reduction_optimizaion) { - return std::make_shared( - in_header, - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - }); + pipeline.addTransform(std::make_shared( + header, + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, + presink_chains.size())); + + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared( + in_header, + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + }); + } + else + { + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr + { + return std::make_shared( + in_header, + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + }); + } } size_t num_select_threads = pipeline.getNumThreads(); @@ -684,20 +697,33 @@ BlockIO InterpreterInsertQuery::execute() { bool table_prefers_large_blocks = table->prefersLargeBlocks(); - auto squashing = std::make_shared( - chain.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - - chain.addSource(std::move(squashing)); - - auto balancing = std::make_shared( + if (settings.allow_insert_threads_reduction_optimizaion) + { + auto squashing = std::make_shared( chain.getInputHeader(), table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, - presink_chains.size()); + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - chain.addSource(std::move(balancing)); + chain.addSource(std::move(squashing)); + + auto balancing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, + presink_chains.size()); + + chain.addSource(std::move(balancing)); + } + else + { + auto squashing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + + chain.addSource(std::move(squashing)); + } + } auto context_ptr = getContext(); diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index e7c831c3e0e..1d0098f0cd9 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -372,16 +372,26 @@ std::optional generateViewChain( bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); const auto & settings = insert_context->getSettingsRef(); - out.addSource(std::make_shared( - out.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); + if (settings.allow_insert_threads_reduction_optimizaion) + { + out.addSource(std::make_shared( + out.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); - out.addSource(std::make_shared( - out.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, - 1)); // Chain requires a single input + out.addSource(std::make_shared( + out.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, + 1)); // Chain requires a single input + } + else + { + out.addSource(std::make_shared( + out.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); + } } auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index be3c1384501..56c97d0305e 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -42,6 +43,7 @@ #include #include #include +#include #include #include @@ -181,6 +183,7 @@ void validateClientInfo(const ClientInfo & session_client_info, const ClientInfo namespace DB { +using Which = Field::Types::Which; TCPHandler::TCPHandler( IServer & server_, @@ -1602,6 +1605,70 @@ void TCPHandler::sendHello() nonce.emplace(thread_local_rng()); writeIntBinary(nonce.value(), *out); } + + /// If client is Clickhouse-client we will send server profile settings of this user + if (client_name == (std::string(VERSION_NAME) + " client")) + { + const auto & user = session->sessionContext()->getUser(); + String query = fmt::format( + R"(SELECT setting_name, value FROM system.settings_profile_elements WHERE user_name = '{0}')", + escapeString(user->getName())); + const auto & res_const = executeQuery(query,server.context(), QueryFlags{ .internal = true }).second; + auto & res = const_cast(res_const); + PullingPipelineExecutor pulling_executor(res.pipeline); + Block block; + pulling_executor.pull(block); + /// filter data + std::map server_settings; + for (size_t row = 0; row < block.rows(); ++row) + { + size_t col_index = 0; + String name; + Field value_field; + for (const auto & name_value: block) + { + Field field; + name_value.column->get(row, field); + if (!field.isNull()) + { + if (col_index == 0) + name = field.safeGet(); + else + value_field = field; + } + else + continue; + + ++col_index; + } + if (!name.empty()) + server_settings[name] = value_field; + + } + + writeVarUInt(server_settings.size(), *out); + if (!server_settings.empty()) + { + for (const auto & setting : server_settings) + { + writeStringBinary(setting.first, *out); + writeVarUInt(setting.second.getType(), *out); + switch (setting.second.getType()) + { + case Which::UInt64: + writeVarUInt(setting.second.safeGet(), *out);break; + case Which::String: + writeStringBinary(setting.second.safeGet(), *out);break; + case Which::Bool: + writeVarUInt(setting.second.get(), *out);break; + default: + break; + } + + } + } + } + out->next(); } diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index b31cc1e94f1..8d28d68dc39 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -311,8 +311,15 @@ Block ProjectionDescription::calculate(const Block & block, ContextPtr context) builder.resize(1); // Generate aggregated blocks with rows less or equal than the original block. // There should be only one output block after this transformation. - builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0, 1)); - builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); + if (mut_context->getSettings().allow_insert_threads_reduction_optimizaion) + { + builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0, 1)); + builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); + } + else + { + builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); + } auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); PullingPipelineExecutor executor(pipeline); From 1f7198b3d3576de29485cd7b96bbc9bf97d181bb Mon Sep 17 00:00:00 2001 From: yariks5s Date: Fri, 17 May 2024 12:15:58 +0000 Subject: [PATCH 035/139] style fix + resize optimization for merging columns --- src/Interpreters/InterpreterInsertQuery.cpp | 4 +- src/Interpreters/Squashing.cpp | 63 ++++++++++++--------- src/Interpreters/Squashing.h | 7 +-- 3 files changed, 41 insertions(+), 33 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index f0340bd1f48..3a6329997bd 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -711,7 +711,7 @@ BlockIO InterpreterInsertQuery::execute() table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, presink_chains.size()); - + chain.addSource(std::move(balancing)); } else @@ -723,7 +723,7 @@ BlockIO InterpreterInsertQuery::execute() chain.addSource(std::move(squashing)); } - + } auto context_ptr = getContext(); diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 2d87b47798c..526af3db2e4 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -1,5 +1,8 @@ +#include #include #include +#include "Core/Block.h" +#include "Core/ColumnsWithTypeAndName.h" namespace DB @@ -128,10 +131,8 @@ bool Squashing::isEnoughSize(size_t rows, size_t bytes) const || (min_block_size_bytes && bytes >= min_block_size_bytes); } -ApplySquashing::ApplySquashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) - : min_block_size_rows(min_block_size_rows_) - , min_block_size_bytes(min_block_size_bytes_) - , header(std::move(header_)) +ApplySquashing::ApplySquashing(Block header_) + : header(std::move(header_)) { } @@ -146,37 +147,47 @@ Block ApplySquashing::addImpl(Chunk && input_chunk) return Block(); const auto *info = getInfoFromChunk(input_chunk); - for (auto & chunk : info->chunks) - append(chunk.clone()); + append(info->chunks); - { - Block to_return; - std::swap(to_return, accumulated_block); - return to_return; - } + Block to_return; + std::swap(to_return, accumulated_block); + return to_return; } -void ApplySquashing::append(Chunk && input_chunk) +void ApplySquashing::append(const std::vector & input_chunks) { - if (input_chunk.getNumColumns() == 0) - return; - if (!accumulated_block) + std::vector mutable_columns; + size_t rows = 0; + for (const Chunk & chunk : input_chunks) + rows += chunk.getNumRows(); + + // add here resize of mutable_column + for (const auto & input_chunk : input_chunks) { - for (size_t i = 0; i < input_chunk.getNumColumns(); ++ i) + if (!accumulated_block) { - ColumnWithTypeAndName col = ColumnWithTypeAndName(input_chunk.getColumns()[i], header.getDataTypes()[i], header.getNames()[i]); - accumulated_block.insert(accumulated_block.columns(), col); + for (size_t i = 0; i < input_chunks[0].getNumColumns(); ++ i) + { + ColumnWithTypeAndName col = ColumnWithTypeAndName(input_chunks[0].getColumns()[i], header.getDataTypes()[i], header.getNames()[i]); + mutable_columns.push_back(IColumn::mutate(col.column)); + accumulated_block.insert(col); + } } - return; - } - for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i) - { - const auto source_column = input_chunk.getColumns()[i]; + if (input_chunk.getNumColumns() == 0) + continue; - auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column)); - mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); - accumulated_block.getByPosition(i).column = std::move(mutable_column); + for (auto & column : mutable_columns) + column->reserve(rows); + + for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i) + { + const auto source_column = input_chunk.getColumns()[i]; + + mutable_columns[i] = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column)); + mutable_columns[i]->insertRangeFrom(*source_column, 0, source_column->size()); + accumulated_block.getByPosition(i).column = mutable_columns[i]->cloneFinalized(); + } } } diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index 0bb6acf9372..a68b581d40a 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -58,14 +58,11 @@ private: class ApplySquashing { public: - ApplySquashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); + explicit ApplySquashing(Block header_); Block add(Chunk && input_chunk); private: - size_t min_block_size_rows; - size_t min_block_size_bytes; - Block accumulated_block; const Block header; @@ -73,7 +70,7 @@ private: const ChunksToSquash * getInfoFromChunk(const Chunk & chunk); - void append(Chunk && input_chunk); + void append(const std::vector & input_chunk); bool isEnoughSize(const Block & block); bool isEnoughSize(size_t rows, size_t bytes) const; From 8d235a4a399b3489ff3a8672134c8905511562a3 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Fri, 17 May 2024 15:11:21 +0000 Subject: [PATCH 036/139] remove trash from the code --- src/Server/TCPHandler.cpp | 65 --------------------------------------- 1 file changed, 65 deletions(-) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 56c97d0305e..3660b4c1187 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -43,7 +43,6 @@ #include #include #include -#include #include #include @@ -183,7 +182,6 @@ void validateClientInfo(const ClientInfo & session_client_info, const ClientInfo namespace DB { -using Which = Field::Types::Which; TCPHandler::TCPHandler( IServer & server_, @@ -1606,69 +1604,6 @@ void TCPHandler::sendHello() writeIntBinary(nonce.value(), *out); } - /// If client is Clickhouse-client we will send server profile settings of this user - if (client_name == (std::string(VERSION_NAME) + " client")) - { - const auto & user = session->sessionContext()->getUser(); - String query = fmt::format( - R"(SELECT setting_name, value FROM system.settings_profile_elements WHERE user_name = '{0}')", - escapeString(user->getName())); - const auto & res_const = executeQuery(query,server.context(), QueryFlags{ .internal = true }).second; - auto & res = const_cast(res_const); - PullingPipelineExecutor pulling_executor(res.pipeline); - Block block; - pulling_executor.pull(block); - /// filter data - std::map server_settings; - for (size_t row = 0; row < block.rows(); ++row) - { - size_t col_index = 0; - String name; - Field value_field; - for (const auto & name_value: block) - { - Field field; - name_value.column->get(row, field); - if (!field.isNull()) - { - if (col_index == 0) - name = field.safeGet(); - else - value_field = field; - } - else - continue; - - ++col_index; - } - if (!name.empty()) - server_settings[name] = value_field; - - } - - writeVarUInt(server_settings.size(), *out); - if (!server_settings.empty()) - { - for (const auto & setting : server_settings) - { - writeStringBinary(setting.first, *out); - writeVarUInt(setting.second.getType(), *out); - switch (setting.second.getType()) - { - case Which::UInt64: - writeVarUInt(setting.second.safeGet(), *out);break; - case Which::String: - writeStringBinary(setting.second.safeGet(), *out);break; - case Which::Bool: - writeVarUInt(setting.second.get(), *out);break; - default: - break; - } - - } - } - } - out->next(); } From 6b835522b3ab12ffc6b210b61c11bb49f4fab918 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Fri, 17 May 2024 15:22:32 +0000 Subject: [PATCH 037/139] fix build --- src/Processors/Transforms/ApplySquashingTransform.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Processors/Transforms/ApplySquashingTransform.h b/src/Processors/Transforms/ApplySquashingTransform.h index 584fb72cccb..abb3a0aad41 100644 --- a/src/Processors/Transforms/ApplySquashingTransform.h +++ b/src/Processors/Transforms/ApplySquashingTransform.h @@ -9,10 +9,9 @@ namespace DB class ApplySquashingTransform : public ExceptionKeepingTransform { public: - ApplySquashingTransform( - const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) + explicit ApplySquashingTransform(const Block & header) : ExceptionKeepingTransform(header, header, false) - , squashing(header, min_block_size_rows, min_block_size_bytes) + , squashing(header) { } From ef9bfbd85a0fa1dbd387f5fa3869be8d2614bb70 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Fri, 17 May 2024 15:58:40 +0000 Subject: [PATCH 038/139] fix build --- src/Interpreters/InterpreterInsertQuery.cpp | 10 ++-------- src/Processors/Transforms/buildPushingToViewsChain.cpp | 10 ++-------- src/Storages/ProjectionsDescription.cpp | 2 +- 3 files changed, 5 insertions(+), 17 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 3a6329997bd..47a0567dfec 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -630,10 +630,7 @@ BlockIO InterpreterInsertQuery::execute() pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { - return std::make_shared( - in_header, - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + return std::make_shared(in_header); }); } else @@ -699,10 +696,7 @@ BlockIO InterpreterInsertQuery::execute() if (settings.allow_insert_threads_reduction_optimizaion) { - auto squashing = std::make_shared( - chain.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); + auto squashing = std::make_shared(chain.getInputHeader()); chain.addSource(std::move(squashing)); diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 1d0098f0cd9..4e703828554 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -374,10 +374,7 @@ std::optional generateViewChain( if (settings.allow_insert_threads_reduction_optimizaion) { - out.addSource(std::make_shared( - out.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); + out.addSource(std::make_shared(out.getInputHeader())); out.addSource(std::make_shared( out.getInputHeader(), @@ -643,10 +640,7 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat context->getSettingsRef().min_insert_block_size_rows, context->getSettingsRef().min_insert_block_size_bytes, pipeline.getNumStreams())); - pipeline.addTransform(std::make_shared( - pipeline.getHeader(), - context->getSettingsRef().min_insert_block_size_rows, - context->getSettingsRef().min_insert_block_size_bytes)); + pipeline.addTransform(std::make_shared(pipeline.getHeader())); auto converting = ActionsDAG::makeConvertingActions( pipeline.getHeader().getColumnsWithTypeAndName(), diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 8d28d68dc39..45add4332ff 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -314,7 +314,7 @@ Block ProjectionDescription::calculate(const Block & block, ContextPtr context) if (mut_context->getSettings().allow_insert_threads_reduction_optimizaion) { builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0, 1)); - builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); + builder.addTransform(std::make_shared(builder.getHeader())); } else { From 40a78df96a44552c2548e67aef93601a14afec57 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Fri, 17 May 2024 17:10:48 +0000 Subject: [PATCH 039/139] small improvements --- src/Interpreters/Squashing.cpp | 46 +++++++------------ src/Interpreters/Squashing.h | 2 +- .../Transforms/PlanSquashingTransform.cpp | 1 - src/Storages/ProjectionsDescription.cpp | 2 +- 4 files changed, 19 insertions(+), 32 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 526af3db2e4..8f0964403b1 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -147,47 +147,35 @@ Block ApplySquashing::addImpl(Chunk && input_chunk) return Block(); const auto *info = getInfoFromChunk(input_chunk); - append(info->chunks); + for (auto & chunk : info->chunks) + append(chunk); Block to_return; std::swap(to_return, accumulated_block); return to_return; } -void ApplySquashing::append(const std::vector & input_chunks) +void ApplySquashing::append(Chunk & input_chunk) { - std::vector mutable_columns; - size_t rows = 0; - for (const Chunk & chunk : input_chunks) - rows += chunk.getNumRows(); - - // add here resize of mutable_column - for (const auto & input_chunk : input_chunks) + if (input_chunk.getNumColumns() == 0) + return; + if (!accumulated_block) { - if (!accumulated_block) + for (size_t i = 0; i < input_chunk.getNumColumns(); ++ i) { - for (size_t i = 0; i < input_chunks[0].getNumColumns(); ++ i) - { - ColumnWithTypeAndName col = ColumnWithTypeAndName(input_chunks[0].getColumns()[i], header.getDataTypes()[i], header.getNames()[i]); - mutable_columns.push_back(IColumn::mutate(col.column)); - accumulated_block.insert(col); - } + ColumnWithTypeAndName col = ColumnWithTypeAndName(input_chunk.getColumns()[i], header.getDataTypes()[i], header.getNames()[i]); + accumulated_block.insert(accumulated_block.columns(), col); } + return; + } - if (input_chunk.getNumColumns() == 0) - continue; + for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i) + { + const auto source_column = input_chunk.getColumns()[i]; - for (auto & column : mutable_columns) - column->reserve(rows); - - for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i) - { - const auto source_column = input_chunk.getColumns()[i]; - - mutable_columns[i] = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column)); - mutable_columns[i]->insertRangeFrom(*source_column, 0, source_column->size()); - accumulated_block.getByPosition(i).column = mutable_columns[i]->cloneFinalized(); - } + auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column)); + mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); + accumulated_block.getByPosition(i).column = std::move(mutable_column); } } diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index a68b581d40a..8273ae8cc8e 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -70,7 +70,7 @@ private: const ChunksToSquash * getInfoFromChunk(const Chunk & chunk); - void append(const std::vector & input_chunk); + void append(Chunk & input_chunk); bool isEnoughSize(const Block & block); bool isEnoughSize(size_t rows, size_t bytes) const; diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index f4106204462..51781b03853 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -1,6 +1,5 @@ #include #include -#include "Common/logger_useful.h" #include namespace DB diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 45add4332ff..070245ba1b4 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -318,7 +318,7 @@ Block ProjectionDescription::calculate(const Block & block, ContextPtr context) } else { - builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); + builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); } auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); From 330bac788e64fd0a50f7f55d8a52231d640abb49 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 21 May 2024 12:05:22 +0000 Subject: [PATCH 040/139] remove setting --- src/Core/Settings.h | 1 - src/Interpreters/InterpreterInsertQuery.cpp | 54 +++++-------------- src/Interpreters/Squashing.cpp | 2 - .../Transforms/buildPushingToViewsChain.cpp | 29 +++------- src/Storages/ProjectionsDescription.cpp | 12 ++--- 5 files changed, 25 insertions(+), 73 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 393d8202d05..4a0de354a03 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -668,7 +668,6 @@ class IColumn; M(Bool, mutations_execute_nondeterministic_on_initiator, false, "If true nondeterministic function are executed on initiator and replaced to literals in UPDATE and DELETE queries", 0) \ M(Bool, mutations_execute_subqueries_on_initiator, false, "If true scalar subqueries are executed on initiator and replaced to literals in UPDATE and DELETE queries", 0) \ M(UInt64, mutations_max_literal_size_to_replace, 16384, "The maximum size of serialized literal in bytes to replace in UPDATE and DELETE queries", 0) \ - M(Bool, allow_insert_threads_reduction_optimizaion, false, "If true it allows to apply additional single-insert-transformer for insertion of data", 0) \ \ M(Float, create_replicated_merge_tree_fault_injection_probability, 0.0f, "The probability of a fault injection during table creation after creating metadata in ZooKeeper", 0) \ \ diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 47a0567dfec..c7729f3985b 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -620,29 +620,16 @@ BlockIO InterpreterInsertQuery::execute() { bool table_prefers_large_blocks = table->prefersLargeBlocks(); - if (settings.allow_insert_threads_reduction_optimizaion) - { - pipeline.addTransform(std::make_shared( - header, - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, - presink_chains.size())); + pipeline.addTransform(std::make_shared( + header, + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, + presink_chains.size())); - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared(in_header); - }); - } - else + pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { - pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr - { - return std::make_shared( - in_header, - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - }); - } + return std::make_shared(in_header); + }); } size_t num_select_threads = pipeline.getNumThreads(); @@ -694,30 +681,17 @@ BlockIO InterpreterInsertQuery::execute() { bool table_prefers_large_blocks = table->prefersLargeBlocks(); - if (settings.allow_insert_threads_reduction_optimizaion) - { - auto squashing = std::make_shared(chain.getInputHeader()); + auto squashing = std::make_shared(chain.getInputHeader()); - chain.addSource(std::move(squashing)); + chain.addSource(std::move(squashing)); - auto balancing = std::make_shared( - chain.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, - presink_chains.size()); - - chain.addSource(std::move(balancing)); - } - else - { - auto squashing = std::make_shared( + auto balancing = std::make_shared( chain.getInputHeader(), table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); - - chain.addSource(std::move(squashing)); - } + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, + presink_chains.size()); + chain.addSource(std::move(balancing)); } auto context_ptr = getContext(); diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 8f0964403b1..9e398febdca 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -1,8 +1,6 @@ #include #include #include -#include "Core/Block.h" -#include "Core/ColumnsWithTypeAndName.h" namespace DB diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 4e703828554..951f40dadb9 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -367,29 +367,16 @@ std::optional generateViewChain( bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms, check_access); - if (interpreter.shouldAddSquashingFroStorage(inner_table)) - { - bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); - const auto & settings = insert_context->getSettingsRef(); + bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); + const auto & settings = insert_context->getSettingsRef(); - if (settings.allow_insert_threads_reduction_optimizaion) - { - out.addSource(std::make_shared(out.getInputHeader())); + out.addSource(std::make_shared(out.getInputHeader())); - out.addSource(std::make_shared( - out.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, - 1)); // Chain requires a single input - } - else - { - out.addSource(std::make_shared( - out.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); - } - } + out.addSource(std::make_shared( + out.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, + 1)); // Chain requires a single input auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index 070245ba1b4..c88582a8a1a 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -311,15 +311,9 @@ Block ProjectionDescription::calculate(const Block & block, ContextPtr context) builder.resize(1); // Generate aggregated blocks with rows less or equal than the original block. // There should be only one output block after this transformation. - if (mut_context->getSettings().allow_insert_threads_reduction_optimizaion) - { - builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0, 1)); - builder.addTransform(std::make_shared(builder.getHeader())); - } - else - { - builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); - } + + builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0, 1)); + builder.addTransform(std::make_shared(builder.getHeader())); auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); PullingPipelineExecutor executor(pipeline); From 319542f85bc5c36bbc7c810c0883b721956250be Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 22 May 2024 19:07:14 +0000 Subject: [PATCH 041/139] fix planning algorithm + switch matView to old squashing --- src/Interpreters/Squashing.cpp | 42 ++++++++++++++++--- src/Interpreters/Squashing.h | 2 +- .../Transforms/buildPushingToViewsChain.cpp | 13 ++---- 3 files changed, 41 insertions(+), 16 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 9e398febdca..7ebe4a930c9 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -1,6 +1,7 @@ #include #include #include +#include namespace DB @@ -197,7 +198,7 @@ PlanSquashing::PlanSquashing(Block header_, size_t min_block_size_rows_, size_t Chunk PlanSquashing::flush() { - return convertToChunk(chunks_to_merge_vec); + return convertToChunk(std::move(chunks_to_merge_vec)); } Chunk PlanSquashing::add(Chunk && input_chunk) @@ -210,21 +211,49 @@ Chunk PlanSquashing::addImpl(Chunk && input_chunk) if (!input_chunk) return {}; - if (isEnoughSize(chunks_to_merge_vec)) + /// Just read block is already enough. + if (isEnoughSize(input_chunk.getNumRows(), input_chunk.bytes())) + { + /// If no accumulated data, return just read block. + if (chunks_to_merge_vec.empty()) + { + chunks_to_merge_vec.push_back(std::move(input_chunk)); + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + chunks_to_merge_vec.clear(); + return res_chunk; + } + + /// Return accumulated data (maybe it has small size) and place new block to accumulated data. + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); chunks_to_merge_vec.clear(); - - if (input_chunk) chunks_to_merge_vec.push_back(std::move(input_chunk)); + return res_chunk; + } + /// Accumulated block is already enough. if (isEnoughSize(chunks_to_merge_vec)) { - Chunk res_chunk = convertToChunk(chunks_to_merge_vec); + /// Return accumulated data and place new block to accumulated data. + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + chunks_to_merge_vec.clear(); + chunks_to_merge_vec.push_back(std::move(input_chunk)); + return res_chunk; + } + + /// Pushing data into accumulating vector + chunks_to_merge_vec.push_back(std::move(input_chunk)); + + /// If accumulated data is big enough, we send it + if (isEnoughSize(chunks_to_merge_vec)) + { + Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + chunks_to_merge_vec.clear(); return res_chunk; } return {}; } -Chunk PlanSquashing::convertToChunk(std::vector &chunks) +Chunk PlanSquashing::convertToChunk(std::vector && chunks) { if (chunks.empty()) return {}; @@ -254,6 +283,7 @@ bool PlanSquashing::isEnoughSize(const std::vector & chunks) bool PlanSquashing::isEnoughSize(size_t rows, size_t bytes) const { + LOG_TRACE(getLogger("Planning"), "rows: {}, bytes: {}", rows, bytes); return (!min_block_size_rows && !min_block_size_bytes) || (min_block_size_rows && rows >= min_block_size_rows) || (min_block_size_bytes && bytes >= min_block_size_bytes); diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index 8273ae8cc8e..0e9f001762f 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -100,7 +100,7 @@ private: bool isEnoughSize(const std::vector & chunks); bool isEnoughSize(size_t rows, size_t bytes) const; - Chunk convertToChunk(std::vector &chunks); + Chunk convertToChunk(std::vector && chunks); }; } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 951f40dadb9..cf407a75879 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -370,13 +370,10 @@ std::optional generateViewChain( bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); const auto & settings = insert_context->getSettingsRef(); - out.addSource(std::make_shared(out.getInputHeader())); - - out.addSource(std::make_shared( + out.addSource(std::make_shared( out.getInputHeader(), table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL, - 1)); // Chain requires a single input + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); @@ -622,12 +619,10 @@ static QueryPipeline process(Block block, ViewRuntimeData & view, const ViewsDat /// Squashing is needed here because the materialized view query can generate a lot of blocks /// even when only one block is inserted into the parent table (e.g. if the query is a GROUP BY /// and two-level aggregation is triggered). - pipeline.addTransform(std::make_shared( + pipeline.addTransform(std::make_shared( pipeline.getHeader(), context->getSettingsRef().min_insert_block_size_rows, - context->getSettingsRef().min_insert_block_size_bytes, - pipeline.getNumStreams())); - pipeline.addTransform(std::make_shared(pipeline.getHeader())); + context->getSettingsRef().min_insert_block_size_bytes)); auto converting = ActionsDAG::makeConvertingActions( pipeline.getHeader().getColumnsWithTypeAndName(), From 2db07e64e3a9a4897220f453e78c1d82d1a75d42 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 23 May 2024 11:59:31 +0000 Subject: [PATCH 042/139] fix tidy --- src/Processors/Transforms/PlanSquashingTransform.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 51781b03853..51c637f745b 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -35,7 +35,10 @@ IProcessor::Status PlanSquashingTransform::prepare() break; /// never reached } } - status = finish(); + if (status == Status::Ready) + status = finish(); + else + throw Exception(ErrorCodes::LOGICAL_ERROR, "There should be a Ready status to finish the PlanSquashing"); return status; } From 58000be1a7b0e6fd659073f383017cf8b913baaa Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 23 May 2024 13:55:06 +0000 Subject: [PATCH 043/139] fix style --- src/Processors/Transforms/PlanSquashingTransform.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 51c637f745b..2cb0a19ecdb 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -5,6 +5,11 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + PlanSquashingTransform::PlanSquashingTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t num_ports) : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), balance(header, min_block_size_rows, min_block_size_bytes) { From f632636f210c34841a7634790e32ba2153633ebf Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 23 May 2024 19:12:02 +0000 Subject: [PATCH 044/139] apply double-phased squashing in all transformers, resize optimization --- src/Interpreters/Squashing.cpp | 50 ++++++++++--------- src/Interpreters/Squashing.h | 8 ++- .../Transforms/ApplySquashingTransform.h | 8 +-- .../Transforms/PlanSquashingTransform.h | 1 - .../Transforms/SquashingTransform.cpp | 30 +++++++---- .../Transforms/SquashingTransform.h | 6 ++- 6 files changed, 57 insertions(+), 46 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 7ebe4a930c9..46e21635a30 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -1,7 +1,6 @@ #include #include #include -#include namespace DB @@ -135,46 +134,52 @@ ApplySquashing::ApplySquashing(Block header_) { } -Block ApplySquashing::add(Chunk && input_chunk) +Chunk ApplySquashing::add(Chunk && input_chunk) { return addImpl(std::move(input_chunk)); } -Block ApplySquashing::addImpl(Chunk && input_chunk) +Chunk ApplySquashing::addImpl(Chunk && input_chunk) { if (!input_chunk.hasChunkInfo()) - return Block(); + return Chunk(); const auto *info = getInfoFromChunk(input_chunk); - for (auto & chunk : info->chunks) - append(chunk); + append(info->chunks); Block to_return; std::swap(to_return, accumulated_block); - return to_return; + return Chunk(to_return.getColumns(), to_return.rows()); } -void ApplySquashing::append(Chunk & input_chunk) +void ApplySquashing::append(const std::vector & input_chunks) { - if (input_chunk.getNumColumns() == 0) - return; - if (!accumulated_block) + std::vector mutable_columns; + size_t rows = 0; + for (const Chunk & chunk : input_chunks) + rows += chunk.getNumRows(); + + for (const auto & input_chunk : input_chunks) { - for (size_t i = 0; i < input_chunk.getNumColumns(); ++ i) + if (!accumulated_block) { - ColumnWithTypeAndName col = ColumnWithTypeAndName(input_chunk.getColumns()[i], header.getDataTypes()[i], header.getNames()[i]); - accumulated_block.insert(accumulated_block.columns(), col); + for (size_t i = 0; i < input_chunks[0].getNumColumns(); ++i) + { // We can put this part of code out of the cycle, but it will consume more memory + ColumnWithTypeAndName col = ColumnWithTypeAndName(input_chunks[0].getColumns()[i],header.getDataTypes()[i], header.getNames()[i]); + mutable_columns.push_back(IColumn::mutate(col.column)); + mutable_columns[i]->reserve(rows); + accumulated_block.insert(col); + } + continue; } - return; - } - for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i) - { - const auto source_column = input_chunk.getColumns()[i]; + for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i) + { + const auto source_column = input_chunk.getColumns()[i]; - auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column)); - mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); - accumulated_block.getByPosition(i).column = std::move(mutable_column); + mutable_columns[i]->insertRangeFrom(*source_column, 0, source_column->size()); + accumulated_block.getByPosition(i).column = mutable_columns[i]->cloneFinalized(); + } } } @@ -283,7 +288,6 @@ bool PlanSquashing::isEnoughSize(const std::vector & chunks) bool PlanSquashing::isEnoughSize(size_t rows, size_t bytes) const { - LOG_TRACE(getLogger("Planning"), "rows: {}, bytes: {}", rows, bytes); return (!min_block_size_rows && !min_block_size_bytes) || (min_block_size_rows && rows >= min_block_size_rows) || (min_block_size_bytes && bytes >= min_block_size_bytes); diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index 0e9f001762f..d116ff1eddd 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -1,7 +1,5 @@ #pragma once -#include -#include #include #include #include @@ -60,17 +58,17 @@ class ApplySquashing public: explicit ApplySquashing(Block header_); - Block add(Chunk && input_chunk); + Chunk add(Chunk && input_chunk); private: Block accumulated_block; const Block header; - Block addImpl(Chunk && chunk); + Chunk addImpl(Chunk && chunk); const ChunksToSquash * getInfoFromChunk(const Chunk & chunk); - void append(Chunk & input_chunk); + void append(const std::vector & input_chunks); bool isEnoughSize(const Block & block); bool isEnoughSize(size_t rows, size_t bytes) const; diff --git a/src/Processors/Transforms/ApplySquashingTransform.h b/src/Processors/Transforms/ApplySquashingTransform.h index abb3a0aad41..e63691fcc6a 100644 --- a/src/Processors/Transforms/ApplySquashingTransform.h +++ b/src/Processors/Transforms/ApplySquashingTransform.h @@ -37,8 +37,8 @@ public: protected: void onConsume(Chunk chunk) override { - if (auto block = squashing.add(std::move(chunk))) - cur_chunk.setColumns(block.getColumns(), block.rows()); + if (auto res_chunk = squashing.add(std::move(chunk))) + cur_chunk.setColumns(res_chunk.getColumns(), res_chunk.getNumRows()); } GenerateResult onGenerate() override @@ -50,8 +50,8 @@ protected: } void onFinish() override { - auto block = squashing.add({}); - finish_chunk.setColumns(block.getColumns(), block.rows()); + auto chunk = squashing.add({}); + finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); } private: diff --git a/src/Processors/Transforms/PlanSquashingTransform.h b/src/Processors/Transforms/PlanSquashingTransform.h index dc5b6d669b1..7afc942a7f2 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.h +++ b/src/Processors/Transforms/PlanSquashingTransform.h @@ -3,7 +3,6 @@ #include #include #include -#include "Processors/Port.h" enum PlanningStatus { diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index 8f7f6488d3e..a516811bf45 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -1,5 +1,5 @@ #include -#include +#include namespace DB { @@ -12,14 +12,16 @@ extern const int LOGICAL_ERROR; SquashingTransform::SquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) - , squashing(min_block_size_rows, min_block_size_bytes) + , planSquashing(header, min_block_size_rows, min_block_size_bytes) + , applySquashing(header) { } void SquashingTransform::onConsume(Chunk chunk) { - if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) - cur_chunk.setColumns(block.getColumns(), block.rows()); + Chunk planned_chunk = planSquashing.add(std::move(chunk)); + if (planned_chunk.hasChunkInfo()) + cur_chunk = applySquashing.add(std::move(planned_chunk)); } SquashingTransform::GenerateResult SquashingTransform::onGenerate() @@ -32,8 +34,10 @@ SquashingTransform::GenerateResult SquashingTransform::onGenerate() void SquashingTransform::onFinish() { - auto block = squashing.add({}); - finish_chunk.setColumns(block.getColumns(), block.rows()); + Chunk chunk = planSquashing.flush(); + if (chunk.hasChunkInfo()) + chunk = applySquashing.add(std::move(chunk)); + finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); } void SquashingTransform::work() @@ -55,7 +59,9 @@ void SquashingTransform::work() SimpleSquashingTransform::SimpleSquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) - : ISimpleTransform(header, header, false), squashing(min_block_size_rows, min_block_size_bytes) + : ISimpleTransform(header, header, false) + , planSquashing(header, min_block_size_rows, min_block_size_bytes) + , applySquashing(header) { } @@ -63,16 +69,18 @@ void SimpleSquashingTransform::transform(Chunk & chunk) { if (!finished) { - if (auto block = squashing.add(getInputPort().getHeader().cloneWithColumns(chunk.detachColumns()))) - chunk.setColumns(block.getColumns(), block.rows()); + Chunk planned_chunk = planSquashing.add(std::move(chunk)); + if (planned_chunk.hasChunkInfo()) + chunk = applySquashing.add(std::move(planned_chunk)); } else { if (chunk.hasRows()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); - auto block = squashing.add({}); - chunk.setColumns(block.getColumns(), block.rows()); + chunk = planSquashing.flush(); + if (chunk.hasChunkInfo()) + chunk = applySquashing.add(std::move(chunk)); } } diff --git a/src/Processors/Transforms/SquashingTransform.h b/src/Processors/Transforms/SquashingTransform.h index c5b727ac6ec..b5b3c6616d2 100644 --- a/src/Processors/Transforms/SquashingTransform.h +++ b/src/Processors/Transforms/SquashingTransform.h @@ -24,7 +24,8 @@ protected: void onFinish() override; private: - Squashing squashing; + PlanSquashing planSquashing; + ApplySquashing applySquashing; Chunk cur_chunk; Chunk finish_chunk; }; @@ -43,7 +44,8 @@ protected: IProcessor::Status prepare() override; private: - Squashing squashing; + PlanSquashing planSquashing; + ApplySquashing applySquashing; bool finished = false; }; From e6f83386665d1c129e5b94c82cdd643215f198c6 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Fri, 24 May 2024 20:38:45 +0000 Subject: [PATCH 045/139] memory optimizations and removed unused parts --- src/Interpreters/Squashing.cpp | 69 ++++++++----------- src/Interpreters/Squashing.h | 20 +++--- .../Transforms/PlanSquashingTransform.cpp | 5 +- .../Transforms/SquashingTransform.cpp | 4 +- src/Server/TCPHandler.cpp | 2 - src/Storages/buildQueryTreeForShard.cpp | 1 - 6 files changed, 47 insertions(+), 54 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 46e21635a30..12dcac7eb96 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -135,11 +135,6 @@ ApplySquashing::ApplySquashing(Block header_) } Chunk ApplySquashing::add(Chunk && input_chunk) -{ - return addImpl(std::move(input_chunk)); -} - -Chunk ApplySquashing::addImpl(Chunk && input_chunk) { if (!input_chunk.hasChunkInfo()) return Chunk(); @@ -147,40 +142,38 @@ Chunk ApplySquashing::addImpl(Chunk && input_chunk) const auto *info = getInfoFromChunk(input_chunk); append(info->chunks); - Block to_return; - std::swap(to_return, accumulated_block); - return Chunk(to_return.getColumns(), to_return.rows()); + return std::move(accumulated_chunk); } -void ApplySquashing::append(const std::vector & input_chunks) +void ApplySquashing::append(std::vector & input_chunks) { - std::vector mutable_columns; + accumulated_chunk = {}; + std::vector mutable_columns = {}; size_t rows = 0; for (const Chunk & chunk : input_chunks) rows += chunk.getNumRows(); - for (const auto & input_chunk : input_chunks) + for (auto & input_chunk : input_chunks) { - if (!accumulated_block) + Columns columns = input_chunk.detachColumns(); + if (mutable_columns.empty()) { - for (size_t i = 0; i < input_chunks[0].getNumColumns(); ++i) - { // We can put this part of code out of the cycle, but it will consume more memory - ColumnWithTypeAndName col = ColumnWithTypeAndName(input_chunks[0].getColumns()[i],header.getDataTypes()[i], header.getNames()[i]); - mutable_columns.push_back(IColumn::mutate(col.column)); + for (size_t i = 0; i < columns.size(); ++i) + { + mutable_columns.push_back(IColumn::mutate(columns[i])); mutable_columns[i]->reserve(rows); - accumulated_block.insert(col); } continue; } - for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i) + for (size_t i = 0, size = mutable_columns.size(); i < size; ++i) { - const auto source_column = input_chunk.getColumns()[i]; + const auto source_column = columns[i]; mutable_columns[i]->insertRangeFrom(*source_column, 0, source_column->size()); - accumulated_block.getByPosition(i).column = mutable_columns[i]->cloneFinalized(); } } + accumulated_chunk.setColumns(std::move(mutable_columns), rows); } const ChunksToSquash* ApplySquashing::getInfoFromChunk(const Chunk & chunk) @@ -206,12 +199,7 @@ Chunk PlanSquashing::flush() return convertToChunk(std::move(chunks_to_merge_vec)); } -Chunk PlanSquashing::add(Chunk && input_chunk) -{ - return addImpl(std::move(input_chunk)); -} - -Chunk PlanSquashing::addImpl(Chunk && input_chunk) +Chunk PlanSquashing::add(Chunk & input_chunk) { if (!input_chunk) return {}; @@ -231,27 +219,31 @@ Chunk PlanSquashing::addImpl(Chunk && input_chunk) /// Return accumulated data (maybe it has small size) and place new block to accumulated data. Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); chunks_to_merge_vec.clear(); + changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); chunks_to_merge_vec.push_back(std::move(input_chunk)); return res_chunk; } /// Accumulated block is already enough. - if (isEnoughSize(chunks_to_merge_vec)) + if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) { /// Return accumulated data and place new block to accumulated data. Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); chunks_to_merge_vec.clear(); + changeCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); chunks_to_merge_vec.push_back(std::move(input_chunk)); return res_chunk; } /// Pushing data into accumulating vector + expandCurrentSize(input_chunk.getNumRows(), input_chunk.bytes()); chunks_to_merge_vec.push_back(std::move(input_chunk)); /// If accumulated data is big enough, we send it - if (isEnoughSize(chunks_to_merge_vec)) + if (isEnoughSize(accumulated_size.rows, accumulated_size.bytes)) { Chunk res_chunk = convertToChunk(std::move(chunks_to_merge_vec)); + changeCurrentSize(0, 0); chunks_to_merge_vec.clear(); return res_chunk; } @@ -264,26 +256,23 @@ Chunk PlanSquashing::convertToChunk(std::vector && chunks) return {}; auto info = std::make_shared(); - for (auto &chunk : chunks) - info->chunks.push_back(std::move(chunk)); + info->chunks = std::move(chunks); chunks.clear(); return Chunk(header.cloneEmptyColumns(), 0, info); } -bool PlanSquashing::isEnoughSize(const std::vector & chunks) +void PlanSquashing::expandCurrentSize(size_t rows, size_t bytes) { - size_t rows = 0; - size_t bytes = 0; + accumulated_size.rows += rows; + accumulated_size.bytes += bytes; +} - for (const Chunk & chunk : chunks) - { - rows += chunk.getNumRows(); - bytes += chunk.bytes(); - } - - return isEnoughSize(rows, bytes); +void PlanSquashing::changeCurrentSize(size_t rows, size_t bytes) +{ + accumulated_size.rows = rows; + accumulated_size.bytes = bytes; } bool PlanSquashing::isEnoughSize(size_t rows, size_t bytes) const diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index d116ff1eddd..0e844c4912b 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -61,14 +61,12 @@ public: Chunk add(Chunk && input_chunk); private: - Block accumulated_block; + Chunk accumulated_chunk; const Block header; - Chunk addImpl(Chunk && chunk); - const ChunksToSquash * getInfoFromChunk(const Chunk & chunk); - void append(const std::vector & input_chunks); + void append(std::vector & input_chunks); bool isEnoughSize(const Block & block); bool isEnoughSize(size_t rows, size_t bytes) const; @@ -79,7 +77,7 @@ class PlanSquashing public: PlanSquashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); - Chunk add(Chunk && input_chunk); + Chunk add(Chunk & input_chunk); Chunk flush(); bool isDataLeft() { @@ -87,15 +85,21 @@ public: } private: + struct CurrentSize + { + size_t rows = 0; + size_t bytes = 0; + }; + std::vector chunks_to_merge_vec = {}; size_t min_block_size_rows; size_t min_block_size_bytes; const Block header; + CurrentSize accumulated_size; - Chunk addImpl(Chunk && input_chunk); - - bool isEnoughSize(const std::vector & chunks); + void expandCurrentSize(size_t rows, size_t bytes); + void changeCurrentSize(size_t rows, size_t bytes); bool isEnoughSize(size_t rows, size_t bytes) const; Chunk convertToChunk(std::vector && chunks); diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 2cb0a19ecdb..1384f760d48 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -90,6 +90,7 @@ IProcessor::Status PlanSquashingTransform::prepareConsume() { planning_status = PlanningStatus::FLUSH; flushChunk(); + return Status::Ready; } planning_status = PlanningStatus::PUSH; return Status::Ready; @@ -133,7 +134,7 @@ IProcessor::Status PlanSquashingTransform::waitForDataIn() void PlanSquashingTransform::transform(Chunk & chunk_) { - Chunk res_chunk = balance.add(std::move(chunk_)); + Chunk res_chunk = balance.add(chunk_); std::swap(res_chunk, chunk_); } @@ -157,6 +158,8 @@ IProcessor::Status PlanSquashingTransform::sendOrFlush() { if (planning_status == PlanningStatus::PUSH) planning_status = PlanningStatus::READ_IF_CAN; + else + planning_status = PlanningStatus::FINISH; output.push(std::move(chunk)); return Status::Ready; diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index a516811bf45..67358316d48 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -19,7 +19,7 @@ SquashingTransform::SquashingTransform( void SquashingTransform::onConsume(Chunk chunk) { - Chunk planned_chunk = planSquashing.add(std::move(chunk)); + Chunk planned_chunk = planSquashing.add(chunk); if (planned_chunk.hasChunkInfo()) cur_chunk = applySquashing.add(std::move(planned_chunk)); } @@ -69,7 +69,7 @@ void SimpleSquashingTransform::transform(Chunk & chunk) { if (!finished) { - Chunk planned_chunk = planSquashing.add(std::move(chunk)); + Chunk planned_chunk = planSquashing.add(chunk); if (planned_chunk.hasChunkInfo()) chunk = applySquashing.add(std::move(planned_chunk)); } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 3f8e43d81dd..9f14facdf8f 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -1603,7 +1602,6 @@ void TCPHandler::sendHello() nonce.emplace(thread_local_rng()); writeIntBinary(nonce.value(), *out); } - out->next(); } diff --git a/src/Storages/buildQueryTreeForShard.cpp b/src/Storages/buildQueryTreeForShard.cpp index 8ebb2173f6c..0f7d65fc7c9 100644 --- a/src/Storages/buildQueryTreeForShard.cpp +++ b/src/Storages/buildQueryTreeForShard.cpp @@ -23,7 +23,6 @@ #include #include - namespace DB { From b08ecfe6c0ee5b842f30999257daa3ae89ba0916 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Sat, 25 May 2024 10:40:51 +0000 Subject: [PATCH 046/139] fix for nullable columns --- src/Interpreters/Squashing.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 12dcac7eb96..e2abcd00be3 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -160,7 +160,10 @@ void ApplySquashing::append(std::vector & input_chunks) { for (size_t i = 0; i < columns.size(); ++i) { - mutable_columns.push_back(IColumn::mutate(columns[i])); + if (columns[i]->isNullable()) + mutable_columns.push_back(IColumn::mutate(columns[i])); + else + mutable_columns.push_back(columns[i]->assumeMutable()); mutable_columns[i]->reserve(rows); } continue; From 01a16fd8e2d7ba65ee9feddcfb1cf7609be02947 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Mon, 27 May 2024 11:33:01 +0000 Subject: [PATCH 047/139] squashing refactoring --- src/Interpreters/Squashing.cpp | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index e2abcd00be3..855bf32abe9 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -153,27 +153,24 @@ void ApplySquashing::append(std::vector & input_chunks) for (const Chunk & chunk : input_chunks) rows += chunk.getNumRows(); - for (auto & input_chunk : input_chunks) { - Columns columns = input_chunk.detachColumns(); - if (mutable_columns.empty()) + auto & first_chunk = input_chunks[0]; + Columns columns = first_chunk.detachColumns(); + for (size_t i = 0; i < columns.size(); ++i) { - for (size_t i = 0; i < columns.size(); ++i) - { - if (columns[i]->isNullable()) - mutable_columns.push_back(IColumn::mutate(columns[i])); - else - mutable_columns.push_back(columns[i]->assumeMutable()); - mutable_columns[i]->reserve(rows); - } - continue; + mutable_columns.push_back(IColumn::mutate(std::move(columns[i]))); + mutable_columns[i]->reserve(rows); } + } - for (size_t i = 0, size = mutable_columns.size(); i < size; ++i) + for (size_t i = 1; i < input_chunks.size(); ++i) // We've already processed the first chunk above + { + Columns columns = input_chunks[i].detachColumns(); + for (size_t j = 0, size = mutable_columns.size(); j < size; ++j) { - const auto source_column = columns[i]; + const auto source_column = columns[j]; - mutable_columns[i]->insertRangeFrom(*source_column, 0, source_column->size()); + mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size()); } } accumulated_chunk.setColumns(std::move(mutable_columns), rows); From 00b07bba146848cfecf8a4f59c78161d24aa3566 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Mon, 27 May 2024 16:23:01 +0000 Subject: [PATCH 048/139] remove squashing --- src/Interpreters/Squashing.cpp | 127 +----------------- src/Interpreters/Squashing.h | 33 +---- .../Transforms/PlanSquashingTransform.cpp | 4 +- .../Transforms/SquashingTransform.cpp | 8 +- src/Server/TCPHandler.cpp | 23 +++- src/Storages/MergeTree/MutateTask.cpp | 28 +++- 6 files changed, 56 insertions(+), 167 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 855bf32abe9..3872c2ba6b9 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -1,6 +1,7 @@ #include #include #include +#include "Columns/IColumn.h" namespace DB @@ -11,124 +12,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -Squashing::Squashing(size_t min_block_size_rows_, size_t min_block_size_bytes_) - : min_block_size_rows(min_block_size_rows_) - , min_block_size_bytes(min_block_size_bytes_) -{ -} - -Block Squashing::add(Block && input_block) -{ - return addImpl(std::move(input_block)); -} - -Block Squashing::add(const Block & input_block) -{ - return addImpl(input_block); -} - -/* - * To minimize copying, accept two types of argument: const reference for output - * stream, and rvalue reference for input stream, and decide whether to copy - * inside this function. This allows us not to copy Block unless we absolutely - * have to. - */ -template -Block Squashing::addImpl(ReferenceType input_block) -{ - /// End of input stream. - if (!input_block) - { - Block to_return; - std::swap(to_return, accumulated_block); - return to_return; - } - - /// Just read block is already enough. - if (isEnoughSize(input_block)) - { - /// If no accumulated data, return just read block. - if (!accumulated_block) - { - return std::move(input_block); - } - - /// Return accumulated data (maybe it has small size) and place new block to accumulated data. - Block to_return = std::move(input_block); - std::swap(to_return, accumulated_block); - return to_return; - } - - /// Accumulated block is already enough. - if (isEnoughSize(accumulated_block)) - { - /// Return accumulated data and place new block to accumulated data. - Block to_return = std::move(input_block); - std::swap(to_return, accumulated_block); - return to_return; - } - - append(std::move(input_block)); - if (isEnoughSize(accumulated_block)) - { - Block to_return; - std::swap(to_return, accumulated_block); - return to_return; - } - - /// Squashed block is not ready. - return {}; -} - - -template -void Squashing::append(ReferenceType input_block) -{ - if (!accumulated_block) - { - accumulated_block = std::move(input_block); - return; - } - - assert(blocksHaveEqualStructure(input_block, accumulated_block)); - - for (size_t i = 0, size = accumulated_block.columns(); i < size; ++i) - { - const auto source_column = input_block.getByPosition(i).column; - - auto mutable_column = IColumn::mutate(std::move(accumulated_block.getByPosition(i).column)); - mutable_column->insertRangeFrom(*source_column, 0, source_column->size()); - accumulated_block.getByPosition(i).column = std::move(mutable_column); - } -} - - -bool Squashing::isEnoughSize(const Block & block) -{ - size_t rows = 0; - size_t bytes = 0; - - for (const auto & [column, type, name] : block) - { - if (!rows) - rows = column->size(); - else if (rows != column->size()) - throw Exception(ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH, "Sizes of columns doesn't match"); - - bytes += column->byteSize(); - } - - return isEnoughSize(rows, bytes); -} - - -bool Squashing::isEnoughSize(size_t rows, size_t bytes) const -{ - return (!min_block_size_rows && !min_block_size_bytes) - || (min_block_size_rows && rows >= min_block_size_rows) - || (min_block_size_bytes && bytes >= min_block_size_bytes); -} - ApplySquashing::ApplySquashing(Block header_) : header(std::move(header_)) { @@ -187,10 +70,9 @@ const ChunksToSquash* ApplySquashing::getInfoFromChunk(const Chunk & chunk) return agg_info; } -PlanSquashing::PlanSquashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) +PlanSquashing::PlanSquashing(size_t min_block_size_rows_, size_t min_block_size_bytes_) : min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) - , header(std::move(header_)) { } @@ -199,7 +81,7 @@ Chunk PlanSquashing::flush() return convertToChunk(std::move(chunks_to_merge_vec)); } -Chunk PlanSquashing::add(Chunk & input_chunk) +Chunk PlanSquashing::add(Chunk && input_chunk) { if (!input_chunk) return {}; @@ -260,7 +142,8 @@ Chunk PlanSquashing::convertToChunk(std::vector && chunks) chunks.clear(); - return Chunk(header.cloneEmptyColumns(), 0, info); + Columns cols = {}; + return Chunk(cols, 0, info); } void PlanSquashing::expandCurrentSize(size_t rows, size_t bytes) diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index 0e844c4912b..d9d430c1835 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -25,33 +25,6 @@ struct ChunksToSquash : public ChunkInfo * * Order of data is kept. */ -class Squashing -{ -public: - /// Conditions on rows and bytes are OR-ed. If one of them is zero, then corresponding condition is ignored. - Squashing(size_t min_block_size_rows_, size_t min_block_size_bytes_); - - /** Add next block and possibly returns squashed block. - * At end, you need to pass empty block. As the result for last (empty) block, you will get last Result with ready = true. - */ - Block add(Block && block); - Block add(const Block & block); - -private: - size_t min_block_size_rows; - size_t min_block_size_bytes; - - Block accumulated_block; - - template - Block addImpl(ReferenceType block); - - template - void append(ReferenceType block); - - bool isEnoughSize(const Block & block); - bool isEnoughSize(size_t rows, size_t bytes) const; -}; class ApplySquashing { @@ -75,9 +48,9 @@ private: class PlanSquashing { public: - PlanSquashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); + PlanSquashing(size_t min_block_size_rows_, size_t min_block_size_bytes_); - Chunk add(Chunk & input_chunk); + Chunk add(Chunk && input_chunk); Chunk flush(); bool isDataLeft() { @@ -95,7 +68,7 @@ private: size_t min_block_size_rows; size_t min_block_size_bytes; - const Block header; + // const Block header; CurrentSize accumulated_size; void expandCurrentSize(size_t rows, size_t bytes); diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 1384f760d48..96f41e37d2f 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -11,7 +11,7 @@ namespace ErrorCodes } PlanSquashingTransform::PlanSquashingTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t num_ports) - : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), balance(header, min_block_size_rows, min_block_size_bytes) + : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), balance(min_block_size_rows, min_block_size_bytes) { } @@ -134,7 +134,7 @@ IProcessor::Status PlanSquashingTransform::waitForDataIn() void PlanSquashingTransform::transform(Chunk & chunk_) { - Chunk res_chunk = balance.add(chunk_); + Chunk res_chunk = balance.add(std::move(chunk_)); std::swap(res_chunk, chunk_); } diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index 67358316d48..6f7c877b2f3 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -12,14 +12,14 @@ extern const int LOGICAL_ERROR; SquashingTransform::SquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) - , planSquashing(header, min_block_size_rows, min_block_size_bytes) + , planSquashing(min_block_size_rows, min_block_size_bytes) , applySquashing(header) { } void SquashingTransform::onConsume(Chunk chunk) { - Chunk planned_chunk = planSquashing.add(chunk); + Chunk planned_chunk = planSquashing.add(std::move(chunk)); if (planned_chunk.hasChunkInfo()) cur_chunk = applySquashing.add(std::move(planned_chunk)); } @@ -60,7 +60,7 @@ void SquashingTransform::work() SimpleSquashingTransform::SimpleSquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ISimpleTransform(header, header, false) - , planSquashing(header, min_block_size_rows, min_block_size_bytes) + , planSquashing(min_block_size_rows, min_block_size_bytes) , applySquashing(header) { } @@ -69,7 +69,7 @@ void SimpleSquashingTransform::transform(Chunk & chunk) { if (!finished) { - Chunk planned_chunk = planSquashing.add(chunk); + Chunk planned_chunk = planSquashing.add(std::move(chunk)); if (planned_chunk.hasChunkInfo()) chunk = applySquashing.add(std::move(planned_chunk)); } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 9f14facdf8f..476c4dd372b 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -885,13 +885,21 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro using PushResult = AsynchronousInsertQueue::PushResult; startInsertQuery(); - Squashing squashing(0, query_context->getSettingsRef().async_insert_max_data_size); + PlanSquashing plan_squashing(0, query_context->getSettingsRef().async_insert_max_data_size); + ApplySquashing apply_squashing(state.input_header); while (readDataNext()) { - auto result = squashing.add(std::move(state.block_for_insert)); - if (result) + auto planned_chunk = plan_squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); + Chunk result_chunk; + if (planned_chunk.hasChunkInfo()) + result_chunk = apply_squashing.add(std::move(planned_chunk)); + if (result_chunk) { + ColumnsWithTypeAndName cols; + for (size_t j = 0; j < result_chunk.getNumColumns(); ++ j) + cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], state.input_header.getDataTypes()[j], state.input_header.getNames()[j])); + auto result = Block(cols); return PushResult { .status = PushResult::TOO_MUCH_DATA, @@ -900,7 +908,14 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro } } - auto result = squashing.add({}); + auto planned_chunk = plan_squashing.flush(); + Chunk result_chunk; + if (planned_chunk.hasChunkInfo()) + result_chunk = apply_squashing.add(std::move(planned_chunk)); + ColumnsWithTypeAndName cols; + for (size_t j = 0; j < result_chunk.getNumColumns(); ++ j) + cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], state.input_header.getDataTypes()[j], state.input_header.getNames()[j])); + auto result = Block(cols); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 8052ee8f630..f7a4651f6fd 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -28,6 +28,7 @@ #include #include #include +#include namespace ProfileEvents @@ -1266,7 +1267,8 @@ private: ProjectionNameToItsBlocks projection_parts; std::move_iterator projection_parts_iterator; - std::vector projection_squashes; + std::vector projection_squash_plannings; + std::vector projection_squashes; const ProjectionsDescription & projections; ExecutableTaskPtr merge_projection_parts_task_ptr; @@ -1285,7 +1287,8 @@ void PartMergerWriter::prepare() for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { // We split the materialization into multiple stages similar to the process of INSERT SELECT query. - projection_squashes.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); + projection_squash_plannings.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); + projection_squashes.emplace_back(ctx->updated_header); } existing_rows_count = 0; @@ -1313,7 +1316,15 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() Block projection_block; { ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); - projection_block = projection_squashes[i].add(projection.calculate(cur_block, ctx->context)); + Block to_plan = projection.calculate(cur_block, ctx->context); + Chunk planned_chunk = projection_squash_plannings[i].add({to_plan.getColumns(), to_plan.rows()}); + Chunk projection_chunk; + if (planned_chunk.hasChunkInfo()) + projection_chunk = projection_squashes[i].add(std::move(planned_chunk)); + ColumnsWithTypeAndName cols; + for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) + cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], ctx->updated_header.getDataTypes()[j], ctx->updated_header.getNames()[j])); + projection_block = Block(cols); } if (projection_block) @@ -1337,8 +1348,15 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { const auto & projection = *ctx->projections_to_build[i]; - auto & projection_squash = projection_squashes[i]; - auto projection_block = projection_squash.add({}); + auto & projection_squash_plan = projection_squash_plannings[i]; + auto planned_chunk = projection_squash_plan.flush(); + Chunk projection_chunk; + if (planned_chunk.hasChunkInfo()) + projection_chunk = projection_squashes[i].add(std::move(planned_chunk)); + ColumnsWithTypeAndName cols; + for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) + cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], ctx->updated_header.getDataTypes()[j], ctx->updated_header.getNames()[j])); + auto projection_block = Block(cols); if (projection_block) { auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( From d0d5b6d0cbbc9841f6bce59ef2feee4cc00b1b1f Mon Sep 17 00:00:00 2001 From: yariks5s Date: Mon, 27 May 2024 16:30:46 +0000 Subject: [PATCH 049/139] fix style --- src/Interpreters/Squashing.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 3872c2ba6b9..82d80114a85 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -8,7 +8,6 @@ namespace DB { namespace ErrorCodes { - extern const int SIZES_OF_COLUMNS_DOESNT_MATCH; extern const int LOGICAL_ERROR; } From 78e161ff15b5399aa18141b5cf896353a2fc9e00 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Mon, 27 May 2024 19:02:17 +0000 Subject: [PATCH 050/139] fixes (added header to planner) --- src/Interpreters/Squashing.cpp | 7 ++- src/Interpreters/Squashing.h | 4 +- .../Transforms/PlanSquashingTransform.cpp | 2 +- .../Transforms/SquashingTransform.cpp | 4 +- src/Server/TCPHandler.cpp | 6 +-- src/Storages/MergeTree/MutateTask.cpp | 44 +++++++++---------- 6 files changed, 31 insertions(+), 36 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 82d80114a85..9ecd92f732c 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -1,7 +1,6 @@ #include #include #include -#include "Columns/IColumn.h" namespace DB @@ -69,9 +68,10 @@ const ChunksToSquash* ApplySquashing::getInfoFromChunk(const Chunk & chunk) return agg_info; } -PlanSquashing::PlanSquashing(size_t min_block_size_rows_, size_t min_block_size_bytes_) +PlanSquashing::PlanSquashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) : min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) + , header(std::move(header_)) { } @@ -141,8 +141,7 @@ Chunk PlanSquashing::convertToChunk(std::vector && chunks) chunks.clear(); - Columns cols = {}; - return Chunk(cols, 0, info); + return Chunk(header.cloneEmptyColumns(), 0, info); } void PlanSquashing::expandCurrentSize(size_t rows, size_t bytes) diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index d9d430c1835..a2928e0eeb6 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -48,7 +48,7 @@ private: class PlanSquashing { public: - PlanSquashing(size_t min_block_size_rows_, size_t min_block_size_bytes_); + explicit PlanSquashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); Chunk add(Chunk && input_chunk); Chunk flush(); @@ -68,7 +68,7 @@ private: size_t min_block_size_rows; size_t min_block_size_bytes; - // const Block header; + const Block header; CurrentSize accumulated_size; void expandCurrentSize(size_t rows, size_t bytes); diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 96f41e37d2f..7945bd97e04 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -11,7 +11,7 @@ namespace ErrorCodes } PlanSquashingTransform::PlanSquashingTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t num_ports) - : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), balance(min_block_size_rows, min_block_size_bytes) + : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), balance(header, min_block_size_rows, min_block_size_bytes) { } diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index 6f7c877b2f3..a516811bf45 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -12,7 +12,7 @@ extern const int LOGICAL_ERROR; SquashingTransform::SquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) - , planSquashing(min_block_size_rows, min_block_size_bytes) + , planSquashing(header, min_block_size_rows, min_block_size_bytes) , applySquashing(header) { } @@ -60,7 +60,7 @@ void SquashingTransform::work() SimpleSquashingTransform::SimpleSquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ISimpleTransform(header, header, false) - , planSquashing(min_block_size_rows, min_block_size_bytes) + , planSquashing(header, min_block_size_rows, min_block_size_bytes) , applySquashing(header) { } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 476c4dd372b..dfe2d909b43 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -885,17 +885,15 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro using PushResult = AsynchronousInsertQueue::PushResult; startInsertQuery(); - PlanSquashing plan_squashing(0, query_context->getSettingsRef().async_insert_max_data_size); + PlanSquashing plan_squashing(state.input_header, 0, query_context->getSettingsRef().async_insert_max_data_size); ApplySquashing apply_squashing(state.input_header); while (readDataNext()) { auto planned_chunk = plan_squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); - Chunk result_chunk; if (planned_chunk.hasChunkInfo()) - result_chunk = apply_squashing.add(std::move(planned_chunk)); - if (result_chunk) { + Chunk result_chunk = apply_squashing.add(std::move(planned_chunk)); ColumnsWithTypeAndName cols; for (size_t j = 0; j < result_chunk.getNumColumns(); ++ j) cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], state.input_header.getDataTypes()[j], state.input_header.getNames()[j])); diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index f7a4651f6fd..5e14d4c5b38 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1267,7 +1267,7 @@ private: ProjectionNameToItsBlocks projection_parts; std::move_iterator projection_parts_iterator; - std::vector projection_squash_plannings; + std::vector projection_squash_plannings; std::vector projection_squashes; const ProjectionsDescription & projections; @@ -1282,12 +1282,15 @@ private: void PartMergerWriter::prepare() { + projection_squash_plannings.reserve(ctx->projections_to_build.size()); + projection_squashes.reserve(ctx->projections_to_build.size()); const auto & settings = ctx->context->getSettingsRef(); for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { + PlanSquashing plan_squashing(ctx->updated_header, settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); // We split the materialization into multiple stages similar to the process of INSERT SELECT query. - projection_squash_plannings.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); + projection_squash_plannings.push_back(&plan_squashing); projection_squashes.emplace_back(ctx->updated_header); } @@ -1313,24 +1316,21 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { const auto & projection = *ctx->projections_to_build[i]; - Block projection_block; + Chunk planned_chunk; { ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); - Block to_plan = projection.calculate(cur_block, ctx->context); - Chunk planned_chunk = projection_squash_plannings[i].add({to_plan.getColumns(), to_plan.rows()}); - Chunk projection_chunk; - if (planned_chunk.hasChunkInfo()) - projection_chunk = projection_squashes[i].add(std::move(planned_chunk)); + Block block_to_squash = projection.calculate(cur_block, ctx->context); + planned_chunk = projection_squash_plannings[i]->add({block_to_squash.getColumns(), block_to_squash.rows()}); + } + + if (planned_chunk.hasChunkInfo()) + { + Chunk projection_chunk = projection_squashes[i].add(std::move(planned_chunk)); ColumnsWithTypeAndName cols; for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], ctx->updated_header.getDataTypes()[j], ctx->updated_header.getNames()[j])); - projection_block = Block(cols); - } - - if (projection_block) - { auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( - *ctx->data, ctx->log, projection_block, projection, ctx->new_data_part.get(), ++block_num); + *ctx->data, ctx->log, Block(cols), projection, ctx->new_data_part.get(), ++block_num); tmp_part.finalize(); tmp_part.part->getDataPartStorage().commitTransaction(); projection_parts[projection.name].emplace_back(std::move(tmp_part.part)); @@ -1349,18 +1349,16 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { const auto & projection = *ctx->projections_to_build[i]; auto & projection_squash_plan = projection_squash_plannings[i]; - auto planned_chunk = projection_squash_plan.flush(); - Chunk projection_chunk; + auto planned_chunk = projection_squash_plan->flush(); if (planned_chunk.hasChunkInfo()) - projection_chunk = projection_squashes[i].add(std::move(planned_chunk)); - ColumnsWithTypeAndName cols; - for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) - cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], ctx->updated_header.getDataTypes()[j], ctx->updated_header.getNames()[j])); - auto projection_block = Block(cols); - if (projection_block) { + Chunk projection_chunk = projection_squashes[i].add(std::move(planned_chunk)); + ColumnsWithTypeAndName cols; + for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) + cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], ctx->updated_header.getDataTypes()[j], ctx->updated_header.getNames()[j])); + auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( - *ctx->data, ctx->log, projection_block, projection, ctx->new_data_part.get(), ++block_num); + *ctx->data, ctx->log, Block(cols), projection, ctx->new_data_part.get(), ++block_num); temp_part.finalize(); temp_part.part->getDataPartStorage().commitTransaction(); projection_parts[projection.name].emplace_back(std::move(temp_part.part)); From 01183902a667d94d71ed9faabeffdc60cdcf95cd Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 28 May 2024 12:07:20 +0000 Subject: [PATCH 051/139] try to fix a segfault --- src/Server/TCPHandler.cpp | 10 ++++++---- src/Storages/MergeTree/MutateTask.cpp | 12 ++++++------ 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index dfe2d909b43..d0e9dc5f3ee 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -895,8 +895,9 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro { Chunk result_chunk = apply_squashing.add(std::move(planned_chunk)); ColumnsWithTypeAndName cols; - for (size_t j = 0; j < result_chunk.getNumColumns(); ++ j) - cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], state.input_header.getDataTypes()[j], state.input_header.getNames()[j])); + if (result_chunk.hasColumns()) + for (size_t j = 0; j < result_chunk.getNumColumns(); ++ j) + cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], state.input_header.getDataTypes()[j], state.input_header.getNames()[j])); auto result = Block(cols); return PushResult { @@ -911,8 +912,9 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro if (planned_chunk.hasChunkInfo()) result_chunk = apply_squashing.add(std::move(planned_chunk)); ColumnsWithTypeAndName cols; - for (size_t j = 0; j < result_chunk.getNumColumns(); ++ j) - cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], state.input_header.getDataTypes()[j], state.input_header.getNames()[j])); + if (result_chunk.hasColumns()) + for (size_t j = 0; j < result_chunk.getNumColumns(); ++ j) + cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], state.input_header.getDataTypes()[j], state.input_header.getNames()[j])); auto result = Block(cols); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 5e14d4c5b38..0e272fc8eb9 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1282,8 +1282,6 @@ private: void PartMergerWriter::prepare() { - projection_squash_plannings.reserve(ctx->projections_to_build.size()); - projection_squashes.reserve(ctx->projections_to_build.size()); const auto & settings = ctx->context->getSettingsRef(); for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) @@ -1327,8 +1325,9 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { Chunk projection_chunk = projection_squashes[i].add(std::move(planned_chunk)); ColumnsWithTypeAndName cols; - for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) - cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], ctx->updated_header.getDataTypes()[j], ctx->updated_header.getNames()[j])); + if (projection_chunk.hasColumns()) + for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) + cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], ctx->updated_header.getDataTypes()[j], ctx->updated_header.getNames()[j])); auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, Block(cols), projection, ctx->new_data_part.get(), ++block_num); tmp_part.finalize(); @@ -1354,8 +1353,9 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { Chunk projection_chunk = projection_squashes[i].add(std::move(planned_chunk)); ColumnsWithTypeAndName cols; - for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) - cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], ctx->updated_header.getDataTypes()[j], ctx->updated_header.getNames()[j])); + if (projection_chunk.hasColumns()) + for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) + cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], ctx->updated_header.getDataTypes()[j], ctx->updated_header.getNames()[j])); auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, Block(cols), projection, ctx->new_data_part.get(), ++block_num); From 45f6c19c9df5c3f62b1ed4933321053ef6f77c91 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 28 May 2024 15:36:19 +0000 Subject: [PATCH 052/139] attempt #2 --- src/Storages/MergeTree/MutateTask.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 0e272fc8eb9..8c4e0c6e654 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1315,11 +1315,9 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() const auto & projection = *ctx->projections_to_build[i]; Chunk planned_chunk; - { - ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); - Block block_to_squash = projection.calculate(cur_block, ctx->context); - planned_chunk = projection_squash_plannings[i]->add({block_to_squash.getColumns(), block_to_squash.rows()}); - } + ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); + Block block_to_squash = projection.calculate(cur_block, ctx->context); + planned_chunk = projection_squash_plannings[i]->add({block_to_squash.getColumns(), block_to_squash.rows()}); if (planned_chunk.hasChunkInfo()) { @@ -1327,7 +1325,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() ColumnsWithTypeAndName cols; if (projection_chunk.hasColumns()) for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) - cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], ctx->updated_header.getDataTypes()[j], ctx->updated_header.getNames()[j])); + cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], block_to_squash.getDataTypes()[j], block_to_squash.getNames()[j])); auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, Block(cols), projection, ctx->new_data_part.get(), ++block_num); tmp_part.finalize(); From f46a7d64a0163e0cf9140eb0e56c88f2cc6471bb Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 28 May 2024 17:00:35 +0000 Subject: [PATCH 053/139] fix segfault in TCPHandler --- src/Server/TCPHandler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index d0e9dc5f3ee..b95face57e1 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -897,7 +897,7 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro ColumnsWithTypeAndName cols; if (result_chunk.hasColumns()) for (size_t j = 0; j < result_chunk.getNumColumns(); ++ j) - cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], state.input_header.getDataTypes()[j], state.input_header.getNames()[j])); + cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], state.block_for_insert.getDataTypes()[j], state.block_for_insert.getNames()[j])); auto result = Block(cols); return PushResult { @@ -914,7 +914,7 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro ColumnsWithTypeAndName cols; if (result_chunk.hasColumns()) for (size_t j = 0; j < result_chunk.getNumColumns(); ++ j) - cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], state.input_header.getDataTypes()[j], state.input_header.getNames()[j])); + cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], state.block_for_insert.getDataTypes()[j], state.block_for_insert.getNames()[j])); auto result = Block(cols); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } From 513de6ce19867dc10fedf5c9820363b84655a9f1 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 28 May 2024 17:59:44 +0000 Subject: [PATCH 054/139] using of header from applySquashing --- src/Interpreters/Squashing.h | 2 +- src/Storages/MergeTree/MutateTask.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index a2928e0eeb6..05259bbc0c3 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -32,10 +32,10 @@ public: explicit ApplySquashing(Block header_); Chunk add(Chunk && input_chunk); + const Block header; private: Chunk accumulated_chunk; - const Block header; const ChunksToSquash * getInfoFromChunk(const Chunk & chunk); diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 8c4e0c6e654..0a3a217d943 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1353,7 +1353,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() ColumnsWithTypeAndName cols; if (projection_chunk.hasColumns()) for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) - cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], ctx->updated_header.getDataTypes()[j], ctx->updated_header.getNames()[j])); + cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], projection_squashes[i].header.getDataTypes()[j], projection_squashes[i].header.getNames()[j])); auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, Block(cols), projection, ctx->new_data_part.get(), ++block_num); From f0e9d6b459cfee4331861d4f0e3c92c1e9d67c72 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 29 May 2024 14:30:48 +0000 Subject: [PATCH 055/139] revert changes in mv --- .../Transforms/buildPushingToViewsChain.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index ff1be9323f5..177d45650dd 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -367,13 +367,16 @@ std::optional generateViewChain( bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms, check_access); - bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); - const auto & settings = insert_context->getSettingsRef(); + if (interpreter.shouldAddSquashingFroStorage(inner_table)) + { + bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); + const auto & settings = insert_context->getSettingsRef(); - out.addSource(std::make_shared( - out.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); + out.addSource(std::make_shared( + out.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); + } auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); From f51a145437df6f173d67e5fc7f1259c1e0154a98 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 29 May 2024 15:00:22 +0000 Subject: [PATCH 056/139] fixes for segfault --- src/Server/TCPHandler.cpp | 8 ++++---- src/Storages/MergeTree/MutateTask.cpp | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index b95face57e1..af184940c7e 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -895,9 +895,9 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro { Chunk result_chunk = apply_squashing.add(std::move(planned_chunk)); ColumnsWithTypeAndName cols; - if (result_chunk.hasColumns()) + if (result_chunk.hasColumns() && apply_squashing.header) for (size_t j = 0; j < result_chunk.getNumColumns(); ++ j) - cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], state.block_for_insert.getDataTypes()[j], state.block_for_insert.getNames()[j])); + cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], apply_squashing.header.getDataTypes()[j], apply_squashing.header.getNames()[j])); auto result = Block(cols); return PushResult { @@ -912,9 +912,9 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro if (planned_chunk.hasChunkInfo()) result_chunk = apply_squashing.add(std::move(planned_chunk)); ColumnsWithTypeAndName cols; - if (result_chunk.hasColumns()) + if (result_chunk.hasColumns() && apply_squashing.header) for (size_t j = 0; j < result_chunk.getNumColumns(); ++ j) - cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], state.block_for_insert.getDataTypes()[j], state.block_for_insert.getNames()[j])); + cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], apply_squashing.header.getDataTypes()[j], apply_squashing.header.getNames()[j])); auto result = Block(cols); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 0a3a217d943..3469b609f6b 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1323,9 +1323,9 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { Chunk projection_chunk = projection_squashes[i].add(std::move(planned_chunk)); ColumnsWithTypeAndName cols; - if (projection_chunk.hasColumns()) + if (projection_chunk.hasColumns() && projection_squashes[i].header) for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) - cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], block_to_squash.getDataTypes()[j], block_to_squash.getNames()[j])); + cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], projection_squashes[i].header.getDataTypes()[j], projection_squashes[i].header.getNames()[j])); auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, Block(cols), projection, ctx->new_data_part.get(), ++block_num); tmp_part.finalize(); @@ -1351,7 +1351,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { Chunk projection_chunk = projection_squashes[i].add(std::move(planned_chunk)); ColumnsWithTypeAndName cols; - if (projection_chunk.hasColumns()) + if (projection_chunk.hasColumns() && projection_squashes[i].header) for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], projection_squashes[i].header.getDataTypes()[j], projection_squashes[i].header.getNames()[j])); From d351c05243cc42dd05b3a4edf90dfe2044786e9a Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 29 May 2024 16:13:21 +0000 Subject: [PATCH 057/139] reset + try to fix mv and mutations --- .../Transforms/buildPushingToViewsChain.cpp | 15 +++++++++------ src/Server/TCPHandler.cpp | 6 +++--- src/Storages/MergeTree/MutateTask.cpp | 4 ++-- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index ff1be9323f5..177d45650dd 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -367,13 +367,16 @@ std::optional generateViewChain( bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms, check_access); - bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); - const auto & settings = insert_context->getSettingsRef(); + if (interpreter.shouldAddSquashingFroStorage(inner_table)) + { + bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); + const auto & settings = insert_context->getSettingsRef(); - out.addSource(std::make_shared( - out.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); + out.addSource(std::make_shared( + out.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); + } auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index b95face57e1..3cbaffe857a 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -895,7 +895,7 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro { Chunk result_chunk = apply_squashing.add(std::move(planned_chunk)); ColumnsWithTypeAndName cols; - if (result_chunk.hasColumns()) + if (result_chunk.hasColumns() && state.block_for_insert) for (size_t j = 0; j < result_chunk.getNumColumns(); ++ j) cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], state.block_for_insert.getDataTypes()[j], state.block_for_insert.getNames()[j])); auto result = Block(cols); @@ -912,9 +912,9 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro if (planned_chunk.hasChunkInfo()) result_chunk = apply_squashing.add(std::move(planned_chunk)); ColumnsWithTypeAndName cols; - if (result_chunk.hasColumns()) + if (result_chunk.hasColumns() && apply_squashing.header) for (size_t j = 0; j < result_chunk.getNumColumns(); ++ j) - cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], state.block_for_insert.getDataTypes()[j], state.block_for_insert.getNames()[j])); + cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], apply_squashing.header.getDataTypes()[j], apply_squashing.header.getNames()[j])); auto result = Block(cols); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 0a3a217d943..af36b7bb3e8 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1323,7 +1323,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { Chunk projection_chunk = projection_squashes[i].add(std::move(planned_chunk)); ColumnsWithTypeAndName cols; - if (projection_chunk.hasColumns()) + if (projection_chunk.hasColumns() && block_to_squash) for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], block_to_squash.getDataTypes()[j], block_to_squash.getNames()[j])); auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( @@ -1351,7 +1351,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { Chunk projection_chunk = projection_squashes[i].add(std::move(planned_chunk)); ColumnsWithTypeAndName cols; - if (projection_chunk.hasColumns()) + if (projection_chunk.hasColumns() && projection_squashes[i].header) for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], projection_squashes[i].header.getDataTypes()[j], projection_squashes[i].header.getNames()[j])); From d86580ef049fc402d48808e3c125a61f824ed40f Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 29 May 2024 20:37:49 +0000 Subject: [PATCH 058/139] try to fix segfaults --- src/Interpreters/Squashing.h | 11 ++++++++++- src/Server/TCPHandler.cpp | 4 ++-- src/Storages/MergeTree/MutateTask.cpp | 5 +++-- 3 files changed, 15 insertions(+), 5 deletions(-) mode change 100644 => 100755 src/Interpreters/Squashing.h mode change 100644 => 100755 src/Server/TCPHandler.cpp mode change 100644 => 100755 src/Storages/MergeTree/MutateTask.cpp diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h old mode 100644 new mode 100755 index 05259bbc0c3..84e67e5d4c1 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -32,10 +32,19 @@ public: explicit ApplySquashing(Block header_); Chunk add(Chunk && input_chunk); - const Block header; + + void setHeader(Block header_) + { + header = header_; + } + Block getHeader() + { + return header; + } private: Chunk accumulated_chunk; + Block header; const ChunksToSquash * getInfoFromChunk(const Chunk & chunk); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp old mode 100644 new mode 100755 index 3cbaffe857a..77f84dba6e4 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -912,9 +912,9 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro if (planned_chunk.hasChunkInfo()) result_chunk = apply_squashing.add(std::move(planned_chunk)); ColumnsWithTypeAndName cols; - if (result_chunk.hasColumns() && apply_squashing.header) + if (result_chunk.hasColumns() && apply_squashing.getHeader()) for (size_t j = 0; j < result_chunk.getNumColumns(); ++ j) - cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], apply_squashing.header.getDataTypes()[j], apply_squashing.header.getNames()[j])); + cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], apply_squashing.getHeader().getDataTypes()[j], apply_squashing.getHeader().getNames()[j])); auto result = Block(cols); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp old mode 100644 new mode 100755 index af36b7bb3e8..ff1d7c0b7c2 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1331,6 +1331,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() tmp_part.finalize(); tmp_part.part->getDataPartStorage().commitTransaction(); projection_parts[projection.name].emplace_back(std::move(tmp_part.part)); + projection_squashes[i].setHeader(block_to_squash); } } @@ -1351,9 +1352,9 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { Chunk projection_chunk = projection_squashes[i].add(std::move(planned_chunk)); ColumnsWithTypeAndName cols; - if (projection_chunk.hasColumns() && projection_squashes[i].header) + if (projection_chunk.hasColumns() && projection_squashes[i].getHeader()) for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) - cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], projection_squashes[i].header.getDataTypes()[j], projection_squashes[i].header.getNames()[j])); + cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], projection_squashes[i].getHeader().getDataTypes()[j], projection_squashes[i].getHeader().getNames()[j])); auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, Block(cols), projection, ctx->new_data_part.get(), ++block_num); From aa52e9036ef1aef21c037bb2d8f3722f4cd24de3 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 29 May 2024 23:29:19 +0000 Subject: [PATCH 059/139] reset last commit --- src/Interpreters/Squashing.h | 11 ++++++++++- src/Server/TCPHandler.cpp | 4 ++-- src/Storages/MergeTree/MutateTask.cpp | 5 +++-- 3 files changed, 15 insertions(+), 5 deletions(-) mode change 100644 => 100755 src/Interpreters/Squashing.h mode change 100644 => 100755 src/Server/TCPHandler.cpp mode change 100644 => 100755 src/Storages/MergeTree/MutateTask.cpp diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h old mode 100644 new mode 100755 index 05259bbc0c3..84e67e5d4c1 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -32,10 +32,19 @@ public: explicit ApplySquashing(Block header_); Chunk add(Chunk && input_chunk); - const Block header; + + void setHeader(Block header_) + { + header = header_; + } + Block getHeader() + { + return header; + } private: Chunk accumulated_chunk; + Block header; const ChunksToSquash * getInfoFromChunk(const Chunk & chunk); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp old mode 100644 new mode 100755 index 3cbaffe857a..77f84dba6e4 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -912,9 +912,9 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro if (planned_chunk.hasChunkInfo()) result_chunk = apply_squashing.add(std::move(planned_chunk)); ColumnsWithTypeAndName cols; - if (result_chunk.hasColumns() && apply_squashing.header) + if (result_chunk.hasColumns() && apply_squashing.getHeader()) for (size_t j = 0; j < result_chunk.getNumColumns(); ++ j) - cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], apply_squashing.header.getDataTypes()[j], apply_squashing.header.getNames()[j])); + cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], apply_squashing.getHeader().getDataTypes()[j], apply_squashing.getHeader().getNames()[j])); auto result = Block(cols); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp old mode 100644 new mode 100755 index af36b7bb3e8..ff1d7c0b7c2 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1331,6 +1331,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() tmp_part.finalize(); tmp_part.part->getDataPartStorage().commitTransaction(); projection_parts[projection.name].emplace_back(std::move(tmp_part.part)); + projection_squashes[i].setHeader(block_to_squash); } } @@ -1351,9 +1352,9 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { Chunk projection_chunk = projection_squashes[i].add(std::move(planned_chunk)); ColumnsWithTypeAndName cols; - if (projection_chunk.hasColumns() && projection_squashes[i].header) + if (projection_chunk.hasColumns() && projection_squashes[i].getHeader()) for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) - cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], projection_squashes[i].header.getDataTypes()[j], projection_squashes[i].header.getNames()[j])); + cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], projection_squashes[i].getHeader().getDataTypes()[j], projection_squashes[i].getHeader().getNames()[j])); auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, Block(cols), projection, ctx->new_data_part.get(), ++block_num); From b160548aafc07e0db47ece097943cf3e61422c4c Mon Sep 17 00:00:00 2001 From: yariks5s Date: Wed, 29 May 2024 23:39:16 +0000 Subject: [PATCH 060/139] change the chmod back --- src/Interpreters/Squashing.h | 0 src/Server/TCPHandler.cpp | 0 src/Storages/MergeTree/MutateTask.cpp | 0 3 files changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 src/Interpreters/Squashing.h mode change 100755 => 100644 src/Server/TCPHandler.cpp mode change 100755 => 100644 src/Storages/MergeTree/MutateTask.cpp diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h old mode 100755 new mode 100644 diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp old mode 100755 new mode 100644 diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp old mode 100755 new mode 100644 From 3e0947c759f5b9a70add338681cfcb660388e2a8 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 30 May 2024 00:16:35 +0000 Subject: [PATCH 061/139] try to remove if in mv --- .../Transforms/buildPushingToViewsChain.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 177d45650dd..ff1be9323f5 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -367,16 +367,13 @@ std::optional generateViewChain( bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms, check_access); - if (interpreter.shouldAddSquashingFroStorage(inner_table)) - { - bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); - const auto & settings = insert_context->getSettingsRef(); + bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); + const auto & settings = insert_context->getSettingsRef(); - out.addSource(std::make_shared( - out.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); - } + out.addSource(std::make_shared( + out.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); From cadf9d466664ceb693dbebb31a3f3df57af84c8b Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 30 May 2024 10:41:36 +0000 Subject: [PATCH 062/139] Revert "try to remove if in mv" This reverts commit 3e0947c759f5b9a70add338681cfcb660388e2a8. --- .../Transforms/buildPushingToViewsChain.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index ff1be9323f5..177d45650dd 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -367,13 +367,16 @@ std::optional generateViewChain( bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms, check_access); - bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); - const auto & settings = insert_context->getSettingsRef(); + if (interpreter.shouldAddSquashingFroStorage(inner_table)) + { + bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); + const auto & settings = insert_context->getSettingsRef(); - out.addSource(std::make_shared( - out.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); + out.addSource(std::make_shared( + out.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); + } auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); From 6c6bf069e211c17182d6b54d0afdaff48f932bfe Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 30 May 2024 12:07:18 +0000 Subject: [PATCH 063/139] remove moving of header --- src/Interpreters/Squashing.cpp | 4 ++-- src/Storages/MergeTree/MutateTask.cpp | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 9ecd92f732c..47add495421 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -11,7 +11,7 @@ namespace ErrorCodes } ApplySquashing::ApplySquashing(Block header_) - : header(std::move(header_)) + : header(header_) { } @@ -71,7 +71,7 @@ const ChunksToSquash* ApplySquashing::getInfoFromChunk(const Chunk & chunk) PlanSquashing::PlanSquashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) : min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) - , header(std::move(header_)) + , header(header_) { } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index ff1d7c0b7c2..2269b16b443 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1314,10 +1314,9 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { const auto & projection = *ctx->projections_to_build[i]; - Chunk planned_chunk; ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); Block block_to_squash = projection.calculate(cur_block, ctx->context); - planned_chunk = projection_squash_plannings[i]->add({block_to_squash.getColumns(), block_to_squash.rows()}); + Chunk planned_chunk = projection_squash_plannings[i]->add({block_to_squash.getColumns(), block_to_squash.rows()}); if (planned_chunk.hasChunkInfo()) { From 0579fc9436528221d88ffc02f23e42de7ad4dc81 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 30 May 2024 14:48:14 +0000 Subject: [PATCH 064/139] remove moving from planSquashing --- src/Interpreters/Squashing.cpp | 2 +- src/Interpreters/Squashing.h | 2 +- src/Processors/Transforms/PlanSquashingTransform.cpp | 2 +- src/Processors/Transforms/SquashingTransform.cpp | 4 ++-- src/Server/TCPHandler.cpp | 3 ++- src/Storages/MergeTree/MutateTask.cpp | 5 +++-- 6 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 47add495421..6706399a3d2 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -80,7 +80,7 @@ Chunk PlanSquashing::flush() return convertToChunk(std::move(chunks_to_merge_vec)); } -Chunk PlanSquashing::add(Chunk && input_chunk) +Chunk PlanSquashing::add(Chunk & input_chunk) { if (!input_chunk) return {}; diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index 84e67e5d4c1..802e77847e9 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -59,7 +59,7 @@ class PlanSquashing public: explicit PlanSquashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); - Chunk add(Chunk && input_chunk); + Chunk add(Chunk & input_chunk); Chunk flush(); bool isDataLeft() { diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 7945bd97e04..1384f760d48 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -134,7 +134,7 @@ IProcessor::Status PlanSquashingTransform::waitForDataIn() void PlanSquashingTransform::transform(Chunk & chunk_) { - Chunk res_chunk = balance.add(std::move(chunk_)); + Chunk res_chunk = balance.add(chunk_); std::swap(res_chunk, chunk_); } diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index a516811bf45..67358316d48 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -19,7 +19,7 @@ SquashingTransform::SquashingTransform( void SquashingTransform::onConsume(Chunk chunk) { - Chunk planned_chunk = planSquashing.add(std::move(chunk)); + Chunk planned_chunk = planSquashing.add(chunk); if (planned_chunk.hasChunkInfo()) cur_chunk = applySquashing.add(std::move(planned_chunk)); } @@ -69,7 +69,7 @@ void SimpleSquashingTransform::transform(Chunk & chunk) { if (!finished) { - Chunk planned_chunk = planSquashing.add(std::move(chunk)); + Chunk planned_chunk = planSquashing.add(chunk); if (planned_chunk.hasChunkInfo()) chunk = applySquashing.add(std::move(planned_chunk)); } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 77f84dba6e4..6973808078c 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -890,7 +890,8 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro while (readDataNext()) { - auto planned_chunk = plan_squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); + Chunk input_chunk = {state.block_for_insert.getColumns(), state.block_for_insert.rows()}; + auto planned_chunk = plan_squashing.add(input_chunk); if (planned_chunk.hasChunkInfo()) { Chunk result_chunk = apply_squashing.add(std::move(planned_chunk)); diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 2269b16b443..5267143bf65 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1314,9 +1314,10 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { const auto & projection = *ctx->projections_to_build[i]; - ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); + ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); // Not clear why is it needed heee Block block_to_squash = projection.calculate(cur_block, ctx->context); - Chunk planned_chunk = projection_squash_plannings[i]->add({block_to_squash.getColumns(), block_to_squash.rows()}); + Chunk input_chunk = {block_to_squash.getColumns(), block_to_squash.rows()}; + Chunk planned_chunk = projection_squash_plannings[i]->add(input_chunk); if (planned_chunk.hasChunkInfo()) { From 826bec2575b8ccd3800b0e04be5422c000367dc2 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 30 May 2024 16:33:48 +0000 Subject: [PATCH 065/139] added move constructor and removed unused parts --- src/Interpreters/Squashing.cpp | 7 ++- src/Interpreters/Squashing.h | 8 ++-- .../Transforms/PlanSquashingTransform.cpp | 2 +- .../Transforms/SquashingTransform.cpp | 4 +- .../Transforms/buildPushingToViewsChain.cpp | 15 ++++--- src/Server/TCPHandler.cpp | 16 +++---- src/Storages/MergeTree/MutateTask.cpp | 43 ++++++++----------- 7 files changed, 47 insertions(+), 48 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 82d80114a85..a15abf968d2 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -1,7 +1,6 @@ #include #include #include -#include "Columns/IColumn.h" namespace DB @@ -69,9 +68,10 @@ const ChunksToSquash* ApplySquashing::getInfoFromChunk(const Chunk & chunk) return agg_info; } -PlanSquashing::PlanSquashing(size_t min_block_size_rows_, size_t min_block_size_bytes_) +PlanSquashing::PlanSquashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) : min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) + , header(header_) { } @@ -141,8 +141,7 @@ Chunk PlanSquashing::convertToChunk(std::vector && chunks) chunks.clear(); - Columns cols = {}; - return Chunk(cols, 0, info); + return Chunk(header.cloneEmptyColumns(), 0, info); } void PlanSquashing::expandCurrentSize(size_t rows, size_t bytes) diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index d9d430c1835..77191e63050 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -33,9 +33,10 @@ public: Chunk add(Chunk && input_chunk); + Block header; + private: Chunk accumulated_chunk; - const Block header; const ChunksToSquash * getInfoFromChunk(const Chunk & chunk); @@ -48,7 +49,8 @@ private: class PlanSquashing { public: - PlanSquashing(size_t min_block_size_rows_, size_t min_block_size_bytes_); + explicit PlanSquashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); + PlanSquashing(PlanSquashing && other) = default; Chunk add(Chunk && input_chunk); Chunk flush(); @@ -68,7 +70,7 @@ private: size_t min_block_size_rows; size_t min_block_size_bytes; - // const Block header; + const Block header; CurrentSize accumulated_size; void expandCurrentSize(size_t rows, size_t bytes); diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 96f41e37d2f..7945bd97e04 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -11,7 +11,7 @@ namespace ErrorCodes } PlanSquashingTransform::PlanSquashingTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t num_ports) - : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), balance(min_block_size_rows, min_block_size_bytes) + : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), balance(header, min_block_size_rows, min_block_size_bytes) { } diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index 6f7c877b2f3..a516811bf45 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -12,7 +12,7 @@ extern const int LOGICAL_ERROR; SquashingTransform::SquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) - , planSquashing(min_block_size_rows, min_block_size_bytes) + , planSquashing(header, min_block_size_rows, min_block_size_bytes) , applySquashing(header) { } @@ -60,7 +60,7 @@ void SquashingTransform::work() SimpleSquashingTransform::SimpleSquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ISimpleTransform(header, header, false) - , planSquashing(min_block_size_rows, min_block_size_bytes) + , planSquashing(header, min_block_size_rows, min_block_size_bytes) , applySquashing(header) { } diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index ff1be9323f5..177d45650dd 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -367,13 +367,16 @@ std::optional generateViewChain( bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms, check_access); - bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); - const auto & settings = insert_context->getSettingsRef(); + if (interpreter.shouldAddSquashingFroStorage(inner_table)) + { + bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); + const auto & settings = insert_context->getSettingsRef(); - out.addSource(std::make_shared( - out.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); + out.addSource(std::make_shared( + out.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); + } auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 476c4dd372b..06f20fef613 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -885,20 +885,19 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro using PushResult = AsynchronousInsertQueue::PushResult; startInsertQuery(); - PlanSquashing plan_squashing(0, query_context->getSettingsRef().async_insert_max_data_size); + PlanSquashing plan_squashing(state.input_header, 0, query_context->getSettingsRef().async_insert_max_data_size); ApplySquashing apply_squashing(state.input_header); while (readDataNext()) { auto planned_chunk = plan_squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); - Chunk result_chunk; if (planned_chunk.hasChunkInfo()) - result_chunk = apply_squashing.add(std::move(planned_chunk)); - if (result_chunk) { + Chunk result_chunk = apply_squashing.add(std::move(planned_chunk)); ColumnsWithTypeAndName cols; - for (size_t j = 0; j < result_chunk.getNumColumns(); ++ j) - cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], state.input_header.getDataTypes()[j], state.input_header.getNames()[j])); + if (result_chunk.hasColumns() && state.block_for_insert) + for (size_t j = 0; j < result_chunk.getNumColumns(); ++j) + cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], state.block_for_insert.getDataTypes()[j], state.block_for_insert.getNames()[j])); auto result = Block(cols); return PushResult { @@ -913,8 +912,9 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro if (planned_chunk.hasChunkInfo()) result_chunk = apply_squashing.add(std::move(planned_chunk)); ColumnsWithTypeAndName cols; - for (size_t j = 0; j < result_chunk.getNumColumns(); ++ j) - cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], state.input_header.getDataTypes()[j], state.input_header.getNames()[j])); + if (result_chunk.hasColumns()) + for (size_t j = 0; j < result_chunk.getNumColumns(); ++ j) + cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], state.input_header.getDataTypes()[j], state.input_header.getNames()[j])); auto result = Block(cols); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index f7a4651f6fd..c24a643c374 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1287,7 +1287,7 @@ void PartMergerWriter::prepare() for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { // We split the materialization into multiple stages similar to the process of INSERT SELECT query. - projection_squash_plannings.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); + projection_squash_plannings.emplace_back(ctx->updated_header, settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); projection_squashes.emplace_back(ctx->updated_header); } @@ -1313,24 +1313,20 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { const auto & projection = *ctx->projections_to_build[i]; - Block projection_block; - { - ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); - Block to_plan = projection.calculate(cur_block, ctx->context); - Chunk planned_chunk = projection_squash_plannings[i].add({to_plan.getColumns(), to_plan.rows()}); - Chunk projection_chunk; - if (planned_chunk.hasChunkInfo()) - projection_chunk = projection_squashes[i].add(std::move(planned_chunk)); - ColumnsWithTypeAndName cols; - for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) - cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], ctx->updated_header.getDataTypes()[j], ctx->updated_header.getNames()[j])); - projection_block = Block(cols); - } + ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); + Block block_to_squash = projection.calculate(cur_block, ctx->context); + projection_squashes[i].header = block_to_squash; + Chunk planned_chunk = projection_squash_plannings[i].add({block_to_squash.getColumns(), block_to_squash.rows()}); - if (projection_block) + if (planned_chunk.hasChunkInfo()) { + Chunk projection_chunk = projection_squashes[i].add(std::move(planned_chunk)); + ColumnsWithTypeAndName cols; + if (projection_chunk.hasColumns()) + for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) + cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], block_to_squash.getDataTypes()[j], block_to_squash.getNames()[j])); auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( - *ctx->data, ctx->log, projection_block, projection, ctx->new_data_part.get(), ++block_num); + *ctx->data, ctx->log, Block(cols), projection, ctx->new_data_part.get(), ++block_num); tmp_part.finalize(); tmp_part.part->getDataPartStorage().commitTransaction(); projection_parts[projection.name].emplace_back(std::move(tmp_part.part)); @@ -1350,17 +1346,16 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() const auto & projection = *ctx->projections_to_build[i]; auto & projection_squash_plan = projection_squash_plannings[i]; auto planned_chunk = projection_squash_plan.flush(); - Chunk projection_chunk; if (planned_chunk.hasChunkInfo()) - projection_chunk = projection_squashes[i].add(std::move(planned_chunk)); - ColumnsWithTypeAndName cols; - for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) - cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], ctx->updated_header.getDataTypes()[j], ctx->updated_header.getNames()[j])); - auto projection_block = Block(cols); - if (projection_block) { + Chunk projection_chunk = projection_squashes[i].add(std::move(planned_chunk)); + ColumnsWithTypeAndName cols; + if (projection_chunk.hasColumns()) + for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) + cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], projection_squashes[i].header.getDataTypes()[j], projection_squashes[i].header.getNames()[j])); + auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( - *ctx->data, ctx->log, projection_block, projection, ctx->new_data_part.get(), ++block_num); + *ctx->data, ctx->log, Block(cols), projection, ctx->new_data_part.get(), ++block_num); temp_part.finalize(); temp_part.part->getDataPartStorage().commitTransaction(); projection_parts[projection.name].emplace_back(std::move(temp_part.part)); From 5a09dce95d0c1be55e9283dcf49b8fb5592f923d Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 30 May 2024 18:06:11 +0000 Subject: [PATCH 066/139] adding header in TCPHandler --- src/Server/TCPHandler.cpp | 6 +++++- src/Storages/MergeTree/MutateTask.cpp | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 06f20fef613..070073f414d 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -890,6 +890,8 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro while (readDataNext()) { + if (!apply_squashing.header) + apply_squashing.header = state.block_for_insert; auto planned_chunk = plan_squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); if (planned_chunk.hasChunkInfo()) { @@ -899,6 +901,7 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro for (size_t j = 0; j < result_chunk.getNumColumns(); ++j) cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], state.block_for_insert.getDataTypes()[j], state.block_for_insert.getNames()[j])); auto result = Block(cols); + apply_squashing.header = Block(state.block_for_insert); return PushResult { .status = PushResult::TOO_MUCH_DATA, @@ -914,7 +917,8 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro ColumnsWithTypeAndName cols; if (result_chunk.hasColumns()) for (size_t j = 0; j < result_chunk.getNumColumns(); ++ j) - cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], state.input_header.getDataTypes()[j], state.input_header.getNames()[j])); + cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], apply_squashing.header.getDataTypes()[j], apply_squashing.header.getNames()[j])); + auto result = Block(cols); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 0d1fc46ec76..e5285a970dd 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1316,7 +1316,8 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); Block block_to_squash = projection.calculate(cur_block, ctx->context); - projection_squashes[i].header = block_to_squash; + if (!projection_squashes[i].header) + projection_squashes[i].header = block_to_squash; Chunk planned_chunk = projection_squash_plannings[i].add({block_to_squash.getColumns(), block_to_squash.rows()}); if (planned_chunk.hasChunkInfo()) From 84c8c4ca482805a762ba03152cd8f8e412cec1ca Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 30 May 2024 18:28:24 +0000 Subject: [PATCH 067/139] try to assign header unconditionally --- .../Transforms/buildPushingToViewsChain.cpp | 15 ++++++--------- src/Server/TCPHandler.cpp | 3 +-- src/Storages/MergeTree/MutateTask.cpp | 3 +-- 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index 177d45650dd..ff1be9323f5 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -367,16 +367,13 @@ std::optional generateViewChain( bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms, check_access); - if (interpreter.shouldAddSquashingFroStorage(inner_table)) - { - bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); - const auto & settings = insert_context->getSettingsRef(); + bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); + const auto & settings = insert_context->getSettingsRef(); - out.addSource(std::make_shared( - out.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); - } + out.addSource(std::make_shared( + out.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 070073f414d..1dd99796754 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -890,8 +890,7 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro while (readDataNext()) { - if (!apply_squashing.header) - apply_squashing.header = state.block_for_insert; + apply_squashing.header = state.block_for_insert; auto planned_chunk = plan_squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); if (planned_chunk.hasChunkInfo()) { diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index e5285a970dd..0d1fc46ec76 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1316,8 +1316,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); Block block_to_squash = projection.calculate(cur_block, ctx->context); - if (!projection_squashes[i].header) - projection_squashes[i].header = block_to_squash; + projection_squashes[i].header = block_to_squash; Chunk planned_chunk = projection_squash_plannings[i].add({block_to_squash.getColumns(), block_to_squash.rows()}); if (planned_chunk.hasChunkInfo()) From 43cb255394a4d93a1b4703f01d56ac2144c3881c Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 30 May 2024 18:46:56 +0000 Subject: [PATCH 068/139] [TEMP] Change refence for 2290_async_queries_in_query_log --- .../0_stateless/02790_async_queries_in_query_log.reference | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/queries/0_stateless/02790_async_queries_in_query_log.reference b/tests/queries/0_stateless/02790_async_queries_in_query_log.reference index aa18817f4e6..af8244c06b8 100644 --- a/tests/queries/0_stateless/02790_async_queries_in_query_log.reference +++ b/tests/queries/0_stateless/02790_async_queries_in_query_log.reference @@ -101,6 +101,13 @@ table: async_insert_landing partition_id: all rows: 3 +Row 2: +────── +database: default +table: async_insert_target +partition_id: all +rows: 3 + system.query_log Row 1: ────── From c857099c3d19cce02e3ff80f3bf3faa7656eac28 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Fri, 31 May 2024 13:42:16 +0000 Subject: [PATCH 069/139] un-flaky test --- tests/queries/0_stateless/02790_async_queries_in_query_log.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/02790_async_queries_in_query_log.sh b/tests/queries/0_stateless/02790_async_queries_in_query_log.sh index 1ff97031acb..a74519a7608 100755 --- a/tests/queries/0_stateless/02790_async_queries_in_query_log.sh +++ b/tests/queries/0_stateless/02790_async_queries_in_query_log.sh @@ -65,6 +65,7 @@ function print_flush_query_logs() WHERE event_date >= yesterday() AND query_id = (SELECT flush_query_id FROM system.asynchronous_insert_log WHERE event_date >= yesterday() AND query_id = '$1') + ORDER BY table FORMAT Vertical" } From ac480084a5faf6fa029bff492c09989ac9573388 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Mon, 3 Jun 2024 19:40:41 +0000 Subject: [PATCH 070/139] try to add check in mv --- .../Transforms/buildPushingToViewsChain.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/Processors/Transforms/buildPushingToViewsChain.cpp b/src/Processors/Transforms/buildPushingToViewsChain.cpp index ff1be9323f5..177d45650dd 100644 --- a/src/Processors/Transforms/buildPushingToViewsChain.cpp +++ b/src/Processors/Transforms/buildPushingToViewsChain.cpp @@ -367,13 +367,16 @@ std::optional generateViewChain( bool check_access = !materialized_view->hasInnerTable() && materialized_view->getInMemoryMetadataPtr()->sql_security_type; out = interpreter.buildChain(inner_table, inner_metadata_snapshot, insert_columns, thread_status_holder, view_counter_ms, check_access); - bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); - const auto & settings = insert_context->getSettingsRef(); + if (interpreter.shouldAddSquashingFroStorage(inner_table)) + { + bool table_prefers_large_blocks = inner_table->prefersLargeBlocks(); + const auto & settings = insert_context->getSettingsRef(); - out.addSource(std::make_shared( - out.getInputHeader(), - table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, - table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); + out.addSource(std::make_shared( + out.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL)); + } auto counting = std::make_shared(out.getInputHeader(), current_thread, insert_context->getQuota()); counting->setProcessListElement(insert_context->getProcessListElement()); From e6f99266f60a46ec5ae56e55ecff2d753ac49919 Mon Sep 17 00:00:00 2001 From: Lee sungju Date: Tue, 4 Jun 2024 14:59:22 +0900 Subject: [PATCH 071/139] fix typo Fix typos in named collection samples --- docs/en/engines/table-engines/integrations/iceberg.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/integrations/iceberg.md b/docs/en/engines/table-engines/integrations/iceberg.md index 9d6395f73ac..21fdbc0b1a5 100644 --- a/docs/en/engines/table-engines/integrations/iceberg.md +++ b/docs/en/engines/table-engines/integrations/iceberg.md @@ -37,7 +37,7 @@ Using named collections: http://test.s3.amazonaws.com/clickhouse-bucket/ - test + test test From 9920c3d17f9f64e24b46addd746c4bdcc49e6972 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 6 Jun 2024 21:19:28 +0000 Subject: [PATCH 072/139] Add uniform snowflakeID conversion functions --- docs/en/operations/settings/settings.md | 6 + .../sql-reference/functions/uuid-functions.md | 169 +++++++++++++- src/Core/Settings.h | 1 + src/Core/SettingsChangesHistory.h | 1 + src/Functions/dateTimeToSnowflakeID.cpp | 181 +++++++++++++++ src/Functions/generateSnowflakeID.cpp | 2 +- src/Functions/snowflake.cpp | 99 +++++--- src/Functions/snowflakeIDToDateTime.cpp | 217 ++++++++++++++++++ .../0_stateless/00515_enhanced_time_zones.sql | 2 + .../0_stateless/01942_dateTimeToSnowflake.sql | 4 + .../01942_dateTimeToSnowflakeID.reference | 29 +++ .../01942_dateTimeToSnowflakeID.sql | 71 ++++++ .../01942_snowflakeIDToDateTime.reference | 27 +++ .../01942_snowflakeIDToDateTime.sql | 86 +++++++ .../0_stateless/01942_snowflakeToDateTime.sql | 5 + .../aspell-ignore/en/aspell-dict.txt | 3 + 16 files changed, 869 insertions(+), 34 deletions(-) create mode 100644 src/Functions/dateTimeToSnowflakeID.cpp create mode 100644 src/Functions/snowflakeIDToDateTime.cpp create mode 100644 tests/queries/0_stateless/01942_dateTimeToSnowflakeID.reference create mode 100644 tests/queries/0_stateless/01942_dateTimeToSnowflakeID.sql create mode 100644 tests/queries/0_stateless/01942_snowflakeIDToDateTime.reference create mode 100644 tests/queries/0_stateless/01942_snowflakeIDToDateTime.sql diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index ffaf53085c4..a0ee2ef0399 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -5398,6 +5398,12 @@ When set to `false` than all attempts are made with identical timeouts. Default value: `true`. +## uniform_snowflake_conversion_functions {#uniform_snowflake_conversion_functions} + +Controls if functions `snowflakeIDToDateTime`, `snowflakeIDToDateTime64`, `dateTimeToSnowflakeID`, and `dateTime64ToSnowflakeID` are enabled (if `true`), or functions `snowflakeToDateTime`, `snowflakeToDateTime64`, `dateTimeToSnowflake`, and `dateTime64ToSnowflake` (if `false`). + +Default value: `true` + ## allow_experimental_variant_type {#allow_experimental_variant_type} Allows creation of experimental [Variant](../../sql-reference/data-types/variant.md). diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md index 0323ae728a9..24557db4ee9 100644 --- a/docs/en/sql-reference/functions/uuid-functions.md +++ b/docs/en/sql-reference/functions/uuid-functions.md @@ -543,12 +543,17 @@ serverUUID() Generates a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID). -The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. +The generated Snowflake ID contains the current Unix timestamp in milliseconds (41 + 1 top zero bits), followed by a machine id (10 bits), and a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function `generateSnowflakeID` guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries. +:::note +The generated Snowflake IDs are based on the UNIX epoch 1970-01-01. +While no standard or recommendation exists for the epoch of Snowflake IDs, implementations in other systems may use a different epoch, e.g. Twitter/X (2010-11-04) or Mastodon (2015-01-01). +::: + ``` 0 1 2 3 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 @@ -605,6 +610,11 @@ SELECT generateSnowflakeID(1), generateSnowflakeID(2); ## snowflakeToDateTime +:::warning +This function is deprecated and can only be used if setting [uniform_snowflake_conversion_functions](../../operations/settings/settings.md#uniform_snowflake_conversion_functions) is disabled. +The function will be removed at some point in future. +::: + Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime](../data-types/datetime.md) format. **Syntax** @@ -641,6 +651,11 @@ Result: ## snowflakeToDateTime64 +:::warning +This function is deprecated and can only be used if setting [uniform_snowflake_conversion_functions](../../operations/settings/settings.md#uniform_snowflake_conversion_functions) is disabled. +The function will be removed at some point in future. +::: + Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime64](../data-types/datetime64.md) format. **Syntax** @@ -677,6 +692,11 @@ Result: ## dateTimeToSnowflake +:::warning +This function is deprecated and can only be used if setting [uniform_snowflake_conversion_functions](../../operations/settings/settings.md#uniform_snowflake_conversion_functions) is disabled. +The function will be removed at some point in future. +::: + Converts a [DateTime](../data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. **Syntax** @@ -711,6 +731,11 @@ Result: ## dateTime64ToSnowflake +:::warning +This function is deprecated and can only be used if setting [uniform_snowflake_conversion_functions](../../operations/settings/settings.md#uniform_snowflake_conversion_functions) is disabled. +The function will be removed at some point in future. +::: + Convert a [DateTime64](../data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. **Syntax** @@ -743,6 +768,148 @@ Result: └─────────────────────────────┘ ``` +## snowflakeIDToDateTime + +Returns the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as a value of type [DateTime](../data-types/datetime.md). + +**Syntax** + +``` sql +snowflakeIDToDateTime(value[, epoch[, time_zone]]) +``` + +**Arguments** + +- `value` — Snowflake ID. [UInt64](../data-types/int-uint.md). +- `epoch` - Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md). + +**Returned value** + +- The timestamp component of `value` as a [DateTime](../data-types/datetime.md) value. + +**Example** + +Query: + +```sql +SELECT snowflakeIDToDateTime(7204436857747984384) AS res +``` + +Result: + +``` +┌─────────────────res─┐ +│ 2024-06-06 10:59:58 │ +└─────────────────────┘ +``` + +## snowflakeIDToDateTime64 + +Returns the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as a value of type [DateTime64](../data-types/datetime64.md). + +**Syntax** + +``` sql +snowflakeIDToDateTime64(value[, epoch[, time_zone]]) +``` + +**Arguments** + +- `value` — Snowflake ID. [UInt64](../data-types/int-uint.md). +- `epoch` - Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md). +- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md). + +**Returned value** + +- The timestamp component of `value` as a [DateTime64](../data-types/datetime64.md) with scale = 3, i.e. millisecond precision. + +**Example** + +Query: + +```sql +SELECT snowflakeIDToDateTime64(7204436857747984384) AS res +``` + +Result: + +``` +┌─────────────────res─┐ +│ 2024-06-06 10:59:58 │ +└─────────────────────┘ +``` + +## dateTimeToSnowflakeID + +Converts a [DateTime](../data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. + +**Syntax** + +``` sql +dateTimeToSnowflakeID(value[, epoch]) +``` + +**Arguments** + +- `value` — Date with time. [DateTime](../data-types/datetime.md). +- `epoch` - Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md). + +**Returned value** + +- Input value converted to [UInt64](../data-types/int-uint.md) as the first Snowflake ID at that time. + +**Example** + +Query: + +```sql +SELECT toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt, dateTimeToSnowflakeID(dt) AS res; +``` + +Result: + +``` +┌──────────────────dt─┬─────────────────res─┐ +│ 2021-08-15 18:57:56 │ 6832626392367104000 │ +└─────────────────────┴─────────────────────┘ +``` + +## dateTime64ToSnowflakeID + +Convert a [DateTime64](../data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time. + +**Syntax** + +``` sql +dateTime64ToSnowflakeID(value[, epoch]) +``` + +**Arguments** + +- `value` — Date with time. [DateTime64](../data-types/datetime64.md). +- `epoch` - Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md). + +**Returned value** + +- Input value converted to [UInt64](../data-types/int-uint.md) as the first Snowflake ID at that time. + +**Example** + +Query: + +```sql +SELECT toDateTime('2021-08-15 18:57:56.493', 3, 'Asia/Shanghai') AS dt, dateTime64ToSnowflakeID(dt) AS res; +``` + +Result: + +``` +┌──────────────────────dt─┬─────────────────res─┐ +│ 2021-08-15 18:57:56.493 │ 6832626394434895872 │ +└─────────────────────────┴─────────────────────┘ +``` + ## See also - [dictGetUUID](../functions/ext-dict-functions.md#ext_dict_functions-other) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 27ce54c03a7..2f85fb71e6f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -928,6 +928,7 @@ class IColumn; M(Int64, prefer_warmed_unmerged_parts_seconds, 0, "Only available in ClickHouse Cloud. If a merged part is less than this many seconds old and is not pre-warmed (see cache_populated_by_fetch), but all its source parts are available and pre-warmed, SELECT queries will read from those parts instead. Only for ReplicatedMergeTree. Note that this only checks whether CacheWarmer processed the part; if the part was fetched into cache by something else, it'll still be considered cold until CacheWarmer gets to it; if it was warmed, then evicted from cache, it'll still be considered warm.", 0) \ M(Bool, iceberg_engine_ignore_schema_evolution, false, "Ignore schema evolution in Iceberg table engine and read all data using latest schema saved on table creation. Note that it can lead to incorrect result", 0) \ M(Bool, allow_deprecated_error_prone_window_functions, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)", 0) \ + M(Bool, uniform_snowflake_conversion_functions, true, "Enable functions snowflakeIDToDateTime[64] and dateTime[64]ToSnowflakeID.", 0) \ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS. diff --git a/src/Core/SettingsChangesHistory.h b/src/Core/SettingsChangesHistory.h index 814c08c5705..cda036c22ea 100644 --- a/src/Core/SettingsChangesHistory.h +++ b/src/Core/SettingsChangesHistory.h @@ -97,6 +97,7 @@ static const std::map +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int UNKNOWN_FUNCTION; +} + +namespace +{ + +/// See generateSnowflakeID.cpp +constexpr int time_shift = 22; + +} + +class FunctionDateTimeToSnowflakeID : public IFunction +{ +private: + const bool uniform_snowflake_conversion_functions; + +public: + static constexpr auto name = "dateTimeToSnowflakeID"; + + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionDateTimeToSnowflakeID(ContextPtr context) + : uniform_snowflake_conversion_functions(context->getSettingsRef().uniform_snowflake_conversion_functions) + {} + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 0; } + bool isVariadic() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors args{ + {"value", static_cast(&isDateTime), nullptr, "DateTime"} + }; + FunctionArgumentDescriptors optional_args{ + {"epoch", static_cast(&isNativeUInt), isColumnConst, "UInt*"} + }; + validateFunctionArgumentTypes(*this, arguments, args, optional_args); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + if (!uniform_snowflake_conversion_functions) + throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "To use function {}, setting 'uniform_snowflake_conversion_functions' must be enabled", getName()); + + const auto & col_src = *arguments[0].column; + + size_t epoch = 0; + if (arguments.size() == 2 && input_rows_count != 0) + { + const auto & col_epoch = *arguments[1].column; + epoch = col_epoch.getUInt(0); + } + + auto col_res = ColumnUInt64::create(input_rows_count); + auto & res_data = col_res->getData(); + + const auto & src_data = typeid_cast(col_src).getData(); + for (size_t i = 0; i < input_rows_count; ++i) + res_data[i] = (static_cast(src_data[i]) * 1000 - epoch) << time_shift; + return col_res; + } +}; + + +class FunctionDateTime64ToSnowflakeID : public IFunction +{ +private: + const bool uniform_snowflake_conversion_functions; + +public: + static constexpr auto name = "dateTime64ToSnowflakeID"; + + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionDateTime64ToSnowflakeID(ContextPtr context) + : uniform_snowflake_conversion_functions(context->getSettingsRef().uniform_snowflake_conversion_functions) + {} + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 0; } + bool isVariadic() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors args{ + {"value", static_cast(&isDateTime64), nullptr, "DateTime64"} + }; + FunctionArgumentDescriptors optional_args{ + {"epoch", static_cast(&isNativeUInt), isColumnConst, "UInt*"} + }; + validateFunctionArgumentTypes(*this, arguments, args, optional_args); + + return std::make_shared(); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + if (!uniform_snowflake_conversion_functions) + throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "To use function {}, setting 'uniform_snowflake_conversion_functions' must be enabled", getName()); + + const auto & col_src = *arguments[0].column; + const auto & src_data = typeid_cast(col_src).getData(); + + size_t epoch = 0; + if (arguments.size() == 2 && input_rows_count != 0) + { + const auto & col_epoch = *arguments[1].column; + epoch = col_epoch.getUInt(0); + } + + auto col_res = ColumnUInt64::create(input_rows_count); + auto & res_data = col_res->getData(); + + /// timestamps in snowflake-ids are millisecond-based, convert input to milliseconds + UInt32 src_scale = getDecimalScale(*arguments[0].type); + Int64 multiplier_msec = DecimalUtils::scaleMultiplier(3); + Int64 multiplier_src = DecimalUtils::scaleMultiplier(src_scale); + auto factor = multiplier_msec / static_cast(multiplier_src); + + for (size_t i = 0; i < input_rows_count; ++i) + res_data[i] = static_cast(src_data[i] * factor - epoch) << time_shift; + + return col_res; + } +}; + +REGISTER_FUNCTION(DateTimeToSnowflakeID) +{ + { + FunctionDocumentation::Description description = R"(Converts a [DateTime](../data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.)"; + FunctionDocumentation::Syntax syntax = "dateTimeToSnowflakeID(value[, epoch])"; + FunctionDocumentation::Arguments arguments = { + {"value", "Date with time. [DateTime](../data-types/datetime.md)."}, + {"epoch", "Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md)"} + }; + FunctionDocumentation::ReturnedValue returned_value = "Input value converted to [UInt64](../data-types/int-uint.md) as the first Snowflake ID at that time."; + FunctionDocumentation::Examples examples = {{"simple", "SELECT dateTimeToSnowflakeID(toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai'))", "6832626392367104000"}}; + FunctionDocumentation::Categories categories = {"Snowflake ID"}; + + factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); + } + + { + FunctionDocumentation::Description description = R"(Converts a [DateTime64](../data-types/datetime64.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.)"; + FunctionDocumentation::Syntax syntax = "dateTime64ToSnowflakeID(value[, epoch])"; + FunctionDocumentation::Arguments arguments = { + {"value", "Date with time. [DateTime64](../data-types/datetime.md)."}, + {"epoch", "Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md)"} + }; + FunctionDocumentation::ReturnedValue returned_value = "Input value converted to [UInt64](../data-types/int-uint.md) as the first Snowflake ID at that time."; + FunctionDocumentation::Examples examples = {{"simple", "SELECT dateTime64ToSnowflakeID(toDateTime64('2021-08-15 18:57:56', 3, 'Asia/Shanghai'))", "6832626394434895872"}}; + FunctionDocumentation::Categories categories = {"Snowflake ID"}; + + factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); + } +} + +} diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp index f1e47ea1158..8ac010deafc 100644 --- a/src/Functions/generateSnowflakeID.cpp +++ b/src/Functions/generateSnowflakeID.cpp @@ -207,7 +207,7 @@ public: REGISTER_FUNCTION(GenerateSnowflakeID) { - FunctionDocumentation::Description description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds 41 (+ 1 top zero bit) bits, followed by machine id (10 bits), a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function generateSnowflakeID guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)"; + FunctionDocumentation::Description description = R"(Generates a Snowflake ID. The generated Snowflake ID contains the current Unix timestamp in milliseconds (41 + 1 top zero bits), followed by a machine id (10 bits), and a counter (12 bits) to distinguish IDs within a millisecond. For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes. In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0. Function generateSnowflakeID guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.)"; FunctionDocumentation::Syntax syntax = "generateSnowflakeID([expression])"; FunctionDocumentation::Arguments arguments = {{"expression", "The expression is used to bypass common subexpression elimination if the function is called multiple times in a query but otherwise ignored. Optional."}}; FunctionDocumentation::ReturnedValue returned_value = "A value of type UInt64"; diff --git a/src/Functions/snowflake.cpp b/src/Functions/snowflake.cpp index 4a2d502a31a..801727e9eb9 100644 --- a/src/Functions/snowflake.cpp +++ b/src/Functions/snowflake.cpp @@ -11,11 +11,17 @@ #include +/// ------------------------------------------------------------------------------------------------------------------------------ +/// The functions in this file are deprecated and should be removed in favor of functions 'snowflakeIDToDateTime[64]' and +/// 'dateTime[64]ToSnowflakeID' by summer 2025. Please also mark setting `uniform_snowflake_conversion_functions` as obsolete then. +/// ------------------------------------------------------------------------------------------------------------------------------ + namespace DB { namespace ErrorCodes { + extern const int DEPRECATED_FUNCTION; extern const int ILLEGAL_TYPE_OF_ARGUMENT; } @@ -34,10 +40,19 @@ constexpr int time_shift = 22; class FunctionDateTimeToSnowflake : public IFunction { private: - const char * name; + const bool uniform_snowflake_conversion_functions; public: - explicit FunctionDateTimeToSnowflake(const char * name_) : name(name_) { } + static constexpr auto name = "dateTimeToSnowflake"; + + static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context); + } + + explicit FunctionDateTimeToSnowflake(ContextPtr context) + : uniform_snowflake_conversion_functions(context->getSettingsRef().uniform_snowflake_conversion_functions) + {} String getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } @@ -56,6 +71,9 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + if (uniform_snowflake_conversion_functions) + throw Exception(ErrorCodes::DEPRECATED_FUNCTION, "Function {} is deprecated, to enable it disable setting 'uniform_snowflake_conversion_functions'", getName()); + const auto & src = arguments[0]; const auto & src_column = *src.column; @@ -73,13 +91,20 @@ public: class FunctionSnowflakeToDateTime : public IFunction { private: - const char * name; const bool allow_nonconst_timezone_arguments; + const bool uniform_snowflake_conversion_functions; public: - explicit FunctionSnowflakeToDateTime(const char * name_, ContextPtr context) - : name(name_) - , allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + static constexpr auto name = "snowflakeToDateTime"; + + static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context); + } + + explicit FunctionSnowflakeToDateTime(ContextPtr context) + : allow_nonconst_timezone_arguments(context->getSettingsRef().allow_nonconst_timezone_arguments) + , uniform_snowflake_conversion_functions(context->getSettingsRef().uniform_snowflake_conversion_functions) {} String getName() const override { return name; } @@ -107,6 +132,9 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + if (uniform_snowflake_conversion_functions) + throw Exception(ErrorCodes::DEPRECATED_FUNCTION, "Function {} is deprecated, to enable it disable setting 'uniform_snowflake_conversion_functions'", getName()); + const auto & src = arguments[0]; const auto & src_column = *src.column; @@ -138,10 +166,19 @@ public: class FunctionDateTime64ToSnowflake : public IFunction { private: - const char * name; + const bool uniform_snowflake_conversion_functions; public: - explicit FunctionDateTime64ToSnowflake(const char * name_) : name(name_) { } + static constexpr auto name = "dateTime64ToSnowflake"; + + static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context); + } + + explicit FunctionDateTime64ToSnowflake(ContextPtr context) + : uniform_snowflake_conversion_functions(context->getSettingsRef().uniform_snowflake_conversion_functions) + {} String getName() const override { return name; } size_t getNumberOfArguments() const override { return 1; } @@ -160,6 +197,9 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + if (uniform_snowflake_conversion_functions) + throw Exception(ErrorCodes::DEPRECATED_FUNCTION, "Function {} is deprecated, to enable it disable setting 'uniform_snowflake_conversion_functions'", getName()); + const auto & src = arguments[0]; const auto & src_column = *src.column; @@ -185,13 +225,20 @@ public: class FunctionSnowflakeToDateTime64 : public IFunction { private: - const char * name; const bool allow_nonconst_timezone_arguments; + const bool uniform_snowflake_conversion_functions; public: - explicit FunctionSnowflakeToDateTime64(const char * name_, ContextPtr context) - : name(name_) - , allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + static constexpr auto name = "snowflakeToDateTime64"; + + static FunctionPtr create(ContextPtr context) + { + return std::make_shared(context); + } + + explicit FunctionSnowflakeToDateTime64(ContextPtr context) + : allow_nonconst_timezone_arguments(context->getSettingsRef().allow_nonconst_timezone_arguments) + , uniform_snowflake_conversion_functions(context->getSettingsRef().uniform_snowflake_conversion_functions) {} String getName() const override { return name; } @@ -219,6 +266,9 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { + if (uniform_snowflake_conversion_functions) + throw Exception(ErrorCodes::DEPRECATED_FUNCTION, "Function {} is deprecated, to enable it disable setting 'uniform_snowflake_conversion_functions'", getName()); + const auto & src = arguments[0]; const auto & src_column = *src.column; @@ -246,27 +296,12 @@ public: } -REGISTER_FUNCTION(DateTimeToSnowflake) +REGISTER_FUNCTION(LegacySnowflakeConversion) { - factory.registerFunction("dateTimeToSnowflake", - [](ContextPtr){ return std::make_shared("dateTimeToSnowflake"); }); -} - -REGISTER_FUNCTION(DateTime64ToSnowflake) -{ - factory.registerFunction("dateTime64ToSnowflake", - [](ContextPtr){ return std::make_shared("dateTime64ToSnowflake"); }); -} - -REGISTER_FUNCTION(SnowflakeToDateTime) -{ - factory.registerFunction("snowflakeToDateTime", - [](ContextPtr context){ return std::make_shared("snowflakeToDateTime", context); }); -} -REGISTER_FUNCTION(SnowflakeToDateTime64) -{ - factory.registerFunction("snowflakeToDateTime64", - [](ContextPtr context){ return std::make_shared("snowflakeToDateTime64", context); }); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); } } diff --git a/src/Functions/snowflakeIDToDateTime.cpp b/src/Functions/snowflakeIDToDateTime.cpp new file mode 100644 index 00000000000..abaf09b165b --- /dev/null +++ b/src/Functions/snowflakeIDToDateTime.cpp @@ -0,0 +1,217 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + extern const int UNKNOWN_FUNCTION; +} + +namespace +{ + +/// See generateSnowflakeID.cpp +constexpr int time_shift = 22; + +} + +class FunctionSnowflakeIDToDateTime : public IFunction +{ +private: + const bool uniform_snowflake_conversion_functions; + const bool allow_nonconst_timezone_arguments; + +public: + static constexpr auto name = "snowflakeIDToDateTime"; + + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionSnowflakeIDToDateTime(ContextPtr context) + : uniform_snowflake_conversion_functions(context->getSettingsRef().uniform_snowflake_conversion_functions) + , allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + {} + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 0; } + bool isVariadic() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors args{ + {"value", static_cast(&isUInt64), nullptr, "UInt64"} + }; + FunctionArgumentDescriptors optional_args{ + {"epoch", static_cast(&isNativeUInt), isColumnConst, "UInt*"}, + {"time_zone", static_cast(&isString), nullptr, "String"} + }; + validateFunctionArgumentTypes(*this, arguments, args, optional_args); + + String timezone; + if (arguments.size() == 3) + timezone = extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, allow_nonconst_timezone_arguments); + + return std::make_shared(timezone); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + if (!uniform_snowflake_conversion_functions) + throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "To use function {}, setting 'uniform_snowflake_conversion_functions' must be enabled", getName()); + + const auto & col_src = *arguments[0].column; + + size_t epoch = 0; + if (arguments.size() >= 2 && input_rows_count != 0) + { + const auto & col_epoch = *arguments[1].column; + epoch = col_epoch.getUInt(0); + } + + auto col_res = ColumnDateTime::create(input_rows_count); + auto & res_data = col_res->getData(); + + if (const auto * col_src_non_const = typeid_cast(&col_src)) + { + const auto & src_data = col_src_non_const->getData(); + for (size_t i = 0; i < input_rows_count; ++i) + res_data[i] = static_cast(((src_data[i] >> time_shift) + epoch) / 1000); + } + else if (const auto * col_src_const = typeid_cast(&col_src)) + { + UInt64 src_val = col_src_const->getValue(); + for (size_t i = 0; i < input_rows_count; ++i) + res_data[i] = static_cast(((src_val >> time_shift) + epoch) / 1000); + } + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal argument for function {}", name); + + return col_res; + } +}; + + +class FunctionSnowflakeIDToDateTime64 : public IFunction +{ +private: + const bool uniform_snowflake_conversion_functions; + const bool allow_nonconst_timezone_arguments; + +public: + static constexpr auto name = "snowflakeIDToDateTime64"; + + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionSnowflakeIDToDateTime64(ContextPtr context) + : uniform_snowflake_conversion_functions(context->getSettingsRef().uniform_snowflake_conversion_functions) + , allow_nonconst_timezone_arguments(context->getSettings().allow_nonconst_timezone_arguments) + {} + + String getName() const override { return name; } + size_t getNumberOfArguments() const override { return 0; } + bool isVariadic() const override { return true; } + bool useDefaultImplementationForConstants() const override { return true; } + bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; } + + DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override + { + FunctionArgumentDescriptors args{ + {"value", static_cast(&isUInt64), nullptr, "UInt64"} + }; + FunctionArgumentDescriptors optional_args{ + {"epoch", static_cast(&isNativeUInt), isColumnConst, "UInt*"}, + {"time_zone", static_cast(&isString), nullptr, "String"} + }; + validateFunctionArgumentTypes(*this, arguments, args, optional_args); + + String timezone; + if (arguments.size() == 3) + timezone = extractTimeZoneNameFromFunctionArguments(arguments, 2, 0, allow_nonconst_timezone_arguments); + + return std::make_shared(3, timezone); + } + + ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override + { + if (!uniform_snowflake_conversion_functions) + throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "To use function {}, setting 'uniform_snowflake_conversion_functions' must be enabled", getName()); + + const auto & col_src = *arguments[0].column; + + size_t epoch = 0; + if (arguments.size() >= 2 && input_rows_count != 0) + { + const auto & col_epoch = *arguments[1].column; + epoch = col_epoch.getUInt(0); + } + + auto col_res = ColumnDateTime64::create(input_rows_count, 3); + auto & res_data = col_res->getData(); + + if (const auto * col_src_non_const = typeid_cast(&col_src)) + { + const auto & src_data = col_src_non_const->getData(); + for (size_t i = 0; i < input_rows_count; ++i) + res_data[i] = (src_data[i] >> time_shift) + epoch; + } + else if (const auto * col_src_const = typeid_cast(&col_src)) + { + UInt64 src_val = col_src_const->getValue(); + for (size_t i = 0; i < input_rows_count; ++i) + res_data[i] = (src_val >> time_shift) + epoch; + } + else + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal argument for function {}", name); + + return col_res; + + } +}; + +REGISTER_FUNCTION(SnowflakeIDToDateTime) +{ + { + FunctionDocumentation::Description description = R"(Returns the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as a value of type [DateTime](../data-types/datetime.md).)"; + FunctionDocumentation::Syntax syntax = "snowflakeIDToDateTime(value[, epoch[, time_zone]])"; + FunctionDocumentation::Arguments arguments = { + {"value", "Snowflake ID. [UInt64](../data-types/int-uint.md)"}, + {"epoch", "Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md)"}, + {"time_zone", "[Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md)"} + }; + FunctionDocumentation::ReturnedValue returned_value = "The timestamp component of `value` as a [DateTime](../data-types/datetime.md) value."; + FunctionDocumentation::Examples examples = {{"simple", "SELECT snowflakeIDToDateTime(7204436857747984384)", "2024-06-06 10:59:58"}}; + FunctionDocumentation::Categories categories = {"Snowflake ID"}; + + factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); + } + + { + FunctionDocumentation::Description description = R"(Returns the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as a value of type [DateTime64](../data-types/datetime64.md).)"; + FunctionDocumentation::Syntax syntax = "snowflakeIDToDateTime64(value[, epoch[, time_zone]])"; + FunctionDocumentation::Arguments arguments = { + {"value", "Snowflake ID. [UInt64](../data-types/int-uint.md)"}, + {"epoch", "Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md)"}, + {"time_zone", "[Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md)"} + }; + FunctionDocumentation::ReturnedValue returned_value = "The timestamp component of `value` as a [DateTime64](../data-types/datetime64.md) with scale = 3, i.e. millisecond precision."; + FunctionDocumentation::Examples examples = {{"simple", "SELECT snowflakeIDToDateTime64(7204436857747984384)", "2024-06-06 10:59:58"}}; + FunctionDocumentation::Categories categories = {"Snowflake ID"}; + + factory.registerFunction({description, syntax, arguments, returned_value, examples, categories}); + } +} + +} diff --git a/tests/queries/0_stateless/00515_enhanced_time_zones.sql b/tests/queries/0_stateless/00515_enhanced_time_zones.sql index 837b0b4be20..e39b618b670 100644 --- a/tests/queries/0_stateless/00515_enhanced_time_zones.sql +++ b/tests/queries/0_stateless/00515_enhanced_time_zones.sql @@ -1,3 +1,5 @@ +SET uniform_snowflake_conversion_functions = 0; + SELECT addMonths(toDateTime('2017-11-05 08:07:47', 'Asia/Istanbul'), 1, 'Asia/Kolkata'); SELECT addMonths(toDateTime('2017-11-05 10:37:47', 'Asia/Kolkata'), 1); SELECT addMonths(toTimeZone(toDateTime('2017-11-05 08:07:47', 'Asia/Istanbul'), 'Asia/Kolkata'), 1); diff --git a/tests/queries/0_stateless/01942_dateTimeToSnowflake.sql b/tests/queries/0_stateless/01942_dateTimeToSnowflake.sql index 1090179bb67..0386717c933 100644 --- a/tests/queries/0_stateless/01942_dateTimeToSnowflake.sql +++ b/tests/queries/0_stateless/01942_dateTimeToSnowflake.sql @@ -1,3 +1,4 @@ +SET uniform_snowflake_conversion_functions = 0; -- Force-disable uniform snowflake conversion functions (in case this is randomized in CI) SET session_timezone = 'Africa/Juba'; -- Error cases @@ -10,6 +11,9 @@ SELECT dateTime64ToSnowflake('abc'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} SELECT dateTimeToSnowflake('abc', 123); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} SELECT dateTime64ToSnowflake('abc', 123); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT dateTimeToSnowflake(now()) SETTINGS uniform_snowflake_conversion_functions = 1; -- { serverError DEPRECATED_FUNCTION } +SELECT dateTime64ToSnowflake(now64()) SETTINGS uniform_snowflake_conversion_functions = 1; -- { serverError DEPRECATED_FUNCTION } + SELECT '-- const / non-const inputs'; WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt diff --git a/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.reference b/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.reference new file mode 100644 index 00000000000..ab4e6770123 --- /dev/null +++ b/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.reference @@ -0,0 +1,29 @@ +-- Negative tests +-- Return type +UInt64 +UInt64 +-- Standard and twitter epoch +Row 1: +────── +dt: 2021-08-15 18:57:56 +dt64: 2021-08-15 18:57:56.492 +dateTimeToSnowflakeID(dt): 6832747188322304000 +dateTime64ToSnowflakeID(dt64): 6832747190385901568 +dateTimeToSnowflakeID(dt, twitter_epoch): 1426981498778550272 +dateTime64ToSnowflakeID(dt64, twitter_epoch): 1426981500842147840 +-- Different DateTime64 scales +Row 1: +────── +dateTime64ToSnowflakeID(dt64_0): 6832747188322304000 +dateTime64ToSnowflakeID(dt64_1): 6832747190000025600 +dateTime64ToSnowflakeID(dt64_2): 6832747190377512960 +dateTime64ToSnowflakeID(dt64_3): 6832747190385901568 +dateTime64ToSnowflakeID(dt64_4): 6832747190385901568 +-- Idempotency +Row 1: +────── +equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_0)), dt64_0): 1 +equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_1)), dt64_1): 1 +equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_2)), dt64_2): 1 +equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_3)), dt64_3): 1 +equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_4)), dt64_4): 0 diff --git a/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.sql b/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.sql new file mode 100644 index 00000000000..d4ea1d7efd0 --- /dev/null +++ b/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.sql @@ -0,0 +1,71 @@ +SET session_timezone = 'UTC'; -- disable timezone randomization +SET allow_experimental_analyzer = 1; -- The old path formats the result with different whitespaces +SET uniform_snowflake_conversion_functions = 1; -- Force-enable uniform snowflake conversion functions (in case this is randomized in CI) + +SELECT '-- Negative tests'; +SELECT dateTimeToSnowflakeID(); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT dateTime64ToSnowflakeID(); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT dateTimeToSnowflakeID('invalid_dt'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT dateTime64ToSnowflakeID('invalid_dt'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT dateTimeToSnowflakeID(now(), 'invalid_epoch'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT dateTime64ToSnowflakeID(now64(), 'invalid_epoch'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT dateTimeToSnowflakeID(now(), 42, 'too_many_args'); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT dateTime64ToSnowflakeID(now64(), 42, 'too_many_args'); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} + +SELECT dateTimeToSnowflakeID(now()) SETTINGS uniform_snowflake_conversion_functions = 0; -- { serverError UNKNOWN_FUNCTION } +SELECT dateTime64ToSnowflakeID(now64()) SETTINGS uniform_snowflake_conversion_functions = 0; -- { serverError UNKNOWN_FUNCTION } + +SELECT '-- Return type'; +SELECT toTypeName(dateTimeToSnowflakeID(now())); +SELECT toTypeName(dateTime64ToSnowflakeID(now64())); + +SELECT '-- Standard and twitter epoch'; + +WITH + toDateTime('2021-08-15 18:57:56') AS dt, + toDateTime64('2021-08-15 18:57:56.492', 3) AS dt64, + 1288834974657 AS twitter_epoch +SELECT + dt, + dt64, + dateTimeToSnowflakeID(dt), + dateTime64ToSnowflakeID(dt64), + dateTimeToSnowflakeID(dt, twitter_epoch), + dateTime64ToSnowflakeID(dt64, twitter_epoch) +FORMAT + Vertical; + +SELECT '-- Different DateTime64 scales'; + +WITH + toDateTime64('2021-08-15 18:57:56.492', 0, 'UTC') AS dt64_0, + toDateTime64('2021-08-15 18:57:56.492', 1, 'UTC') AS dt64_1, + toDateTime64('2021-08-15 18:57:56.492', 2, 'UTC') AS dt64_2, + toDateTime64('2021-08-15 18:57:56.492', 3, 'UTC') AS dt64_3, + toDateTime64('2021-08-15 18:57:56.492', 4, 'UTC') AS dt64_4 +SELECT + dateTime64ToSnowflakeID(dt64_0), + dateTime64ToSnowflakeID(dt64_1), + dateTime64ToSnowflakeID(dt64_2), + dateTime64ToSnowflakeID(dt64_3), + dateTime64ToSnowflakeID(dt64_4) +Format + Vertical; + +SELECT '-- Idempotency'; + + -- DateTime64-to-SnowflakeID-to-DateTime64 is idempotent if the scale is <=3 (millisecond precision) +WITH + now64(0) AS dt64_0, + now64(1) AS dt64_1, + now64(2) AS dt64_2, + now64(3) AS dt64_3, + now64(4) AS dt64_4 +SELECT + snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_0)) == dt64_0, + snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_1)) == dt64_1, + snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_2)) == dt64_2, + snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_3)) == dt64_3, + snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_4)) == dt64_4 +FORMAT + Vertical; diff --git a/tests/queries/0_stateless/01942_snowflakeIDToDateTime.reference b/tests/queries/0_stateless/01942_snowflakeIDToDateTime.reference new file mode 100644 index 00000000000..9ed8c1dd3e5 --- /dev/null +++ b/tests/queries/0_stateless/01942_snowflakeIDToDateTime.reference @@ -0,0 +1,27 @@ +-- Negative tests +-- Return type +DateTime +DateTime64(3) +-- Non-const path +Row 1: +────── +sf: 7204436857747984384 +dt: 2024-06-06 10:59:58 +dt64: 2024-06-06 10:59:58.851 +Row 1: +────── +sf: 1426981498778550272 +dt: 2021-08-15 18:57:56 +dt64: 2021-08-15 18:57:56.000 +Row 1: +────── +sf: 7204436857747984384 +dt: 2024-06-06 18:59:58 +dt64: 2024-06-06 18:59:58.851 +-- Const path +Row 1: +────── +sf: 7204436857747984384 +dt: 2024-06-06 10:59:58 +dt64: 2024-06-06 10:59:58.851 +-- Can be combined with generateSnowflakeID diff --git a/tests/queries/0_stateless/01942_snowflakeIDToDateTime.sql b/tests/queries/0_stateless/01942_snowflakeIDToDateTime.sql new file mode 100644 index 00000000000..b0e244ef814 --- /dev/null +++ b/tests/queries/0_stateless/01942_snowflakeIDToDateTime.sql @@ -0,0 +1,86 @@ +SET session_timezone = 'UTC'; -- disable timezone randomization +SET allow_experimental_analyzer = 1; -- The old path formats the result with different whitespaces +SET uniform_snowflake_conversion_functions = 1; -- Force-enable uniform snowflake conversion functions (in case this is randomized in CI) + +SELECT '-- Negative tests'; +SELECT snowflakeIDToDateTime(); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT snowflakeIDToDateTime64(); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT snowflakeIDToDateTime('invalid_snowflake'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT snowflakeIDToDateTime64('invalid_snowflake'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT snowflakeIDToDateTime(123::UInt64, 'invalid_epoch'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT snowflakeIDToDateTime64(123::UInt64, 'invalid_epoch'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT snowflakeIDToDateTime(123::UInt64, materialize(42)); -- {serverError ILLEGAL_COLUMN} +SELECT snowflakeIDToDateTime64(123::UInt64, materialize(42)); -- {serverError ILLEGAL_COLUMN} +SELECT snowflakeIDToDateTime(123::UInt64, 42, 42); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT snowflakeIDToDateTime64(123::UInt64, 42, 42); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT snowflakeIDToDateTime(123::UInt64, 42, 'UTC', 'too_many_args'); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} +SELECT snowflakeIDToDateTime64(123::UInt64, 42, 'UTC', 'too_many_args'); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} + +SELECT snowflakeIDToDateTime(123::UInt64) SETTINGS uniform_snowflake_conversion_functions = 0; -- { serverError UNKNOWN_FUNCTION } +SELECT snowflakeIDToDateTime64(123::UInt64) SETTINGS uniform_snowflake_conversion_functions = 0; -- { serverError UNKNOWN_FUNCTION } + +SELECT '-- Return type'; +SELECT toTypeName(snowflakeIDToDateTime(123::UInt64)); +SELECT toTypeName(snowflakeIDToDateTime64(123::UInt64)); + +SELECT '-- Non-const path'; +-- Two const arguments are mapped to two non-const arguments ('getDefaultImplementationForConstants'), the non-const path is taken + +WITH + 7204436857747984384 AS sf +SELECT + sf, + snowflakeIDToDateTime(sf) as dt, + snowflakeIDToDateTime64(sf) as dt64 +FORMAT + Vertical; + +-- With Twitter Snowflake ID and Twitter epoch +WITH + 1426981498778550272 AS sf, + 1288834974657 AS epoch +SELECT + sf, + snowflakeIDToDateTime(sf, epoch) as dt, + snowflakeIDToDateTime64(sf, epoch) as dt64 +FORMAT + Vertical; + +-- non-default timezone +WITH + 7204436857747984384 AS sf, + 0 AS epoch, -- default epoch + 'Asia/Shanghai' AS tz +SELECT + sf, + snowflakeIDToDateTime(sf, epoch, tz) as dt, + snowflakeIDToDateTime64(sf, epoch, tz) as dt64 +FORMAT + Vertical; + +SELECT '-- Const path'; + +-- The const path can only be tested by const snowflake + const epoch + non-const time-zone. The latter requires a special setting. +WITH + 7204436857747984384 AS sf, + 0 AS epoch, -- default epoch + materialize('Asia/Shanghai') AS tz +SELECT + sf, + snowflakeIDToDateTime(sf, epoch, tz) as dt, + snowflakeIDToDateTime64(sf, epoch, tz) as dt64 +FORMAT + Vertical +SETTINGS + allow_nonconst_timezone_arguments = 1; + + +SELECT '-- Can be combined with generateSnowflakeID'; + +WITH + generateSnowflakeID() AS snowflake +SELECT + snowflakeIDToDateTime(snowflake), + snowflakeIDToDateTime64(snowflake) +FORMAT + Null; diff --git a/tests/queries/0_stateless/01942_snowflakeToDateTime.sql b/tests/queries/0_stateless/01942_snowflakeToDateTime.sql index f1a50dd370d..1729a50ae44 100644 --- a/tests/queries/0_stateless/01942_snowflakeToDateTime.sql +++ b/tests/queries/0_stateless/01942_snowflakeToDateTime.sql @@ -1,3 +1,5 @@ +SET uniform_snowflake_conversion_functions = 0; -- Force-disable uniform snowflake conversion functions (in case this is randomized in CI) + -- -- Error cases SELECT snowflakeToDateTime(); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} SELECT snowflakeToDateTime64(); -- {serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH} @@ -8,6 +10,9 @@ SELECT snowflakeToDateTime64('abc'); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} SELECT snowflakeToDateTime('abc', 123); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} SELECT snowflakeToDateTime64('abc', 123); -- {serverError ILLEGAL_TYPE_OF_ARGUMENT} +SELECT snowflakeToDateTime(123::Int64) SETTINGS uniform_snowflake_conversion_functions = 1; -- { serverError DEPRECATED_FUNCTION } +SELECT snowflakeToDateTime64(123::Int64) SETTINGS uniform_snowflake_conversion_functions = 1; -- { serverError DEPRECATED_FUNCTION } + SELECT 'const column'; WITH CAST(1426860704886947840 AS Int64) AS i64, diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 49f43615c7e..0025214762e 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -960,6 +960,7 @@ ToGeoBoundary ToIPv ToParent ToSnowflake +ToSnowflakeID ToString ToUnicode Toolset @@ -1453,6 +1454,7 @@ datatypes dateName dateTime dateTimeToSnowflake +dateTimeToSnowflakeID datetime datetimes dayofyear @@ -2468,6 +2470,7 @@ skewpop skewsamp skippingerrors sleepEachRow +snowflakeIDToDateTime snowflakeToDateTime socketcache soundex From a12fec7f41203049ea6a454d6ac8832499ef7958 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 7 Jun 2024 14:22:23 +0000 Subject: [PATCH 073/139] Try to fix flaky test --- .../0_stateless/01942_dateTimeToSnowflakeID.reference | 10 +++++----- .../0_stateless/01942_dateTimeToSnowflakeID.sql | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.reference b/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.reference index ab4e6770123..aaf5a2e3543 100644 --- a/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.reference +++ b/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.reference @@ -22,8 +22,8 @@ dateTime64ToSnowflakeID(dt64_4): 6832747190385901568 -- Idempotency Row 1: ────── -equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_0)), dt64_0): 1 -equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_1)), dt64_1): 1 -equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_2)), dt64_2): 1 -equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_3)), dt64_3): 1 -equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_4)), dt64_4): 0 +equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_0), 0, 'UTC'), dt64_0): 1 +equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_1), 0, 'UTC'), dt64_1): 1 +equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_2), 0, 'UTC'), dt64_2): 1 +equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_3), 0, 'UTC'), dt64_3): 1 +equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_4), 0, 'UTC'), dt64_4): 0 diff --git a/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.sql b/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.sql index d4ea1d7efd0..ae8f7376697 100644 --- a/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.sql +++ b/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.sql @@ -62,10 +62,10 @@ WITH now64(3) AS dt64_3, now64(4) AS dt64_4 SELECT - snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_0)) == dt64_0, - snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_1)) == dt64_1, - snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_2)) == dt64_2, - snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_3)) == dt64_3, - snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_4)) == dt64_4 + snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_0), 0, 'UTC') == dt64_0, + snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_1), 0, 'UTC') == dt64_1, + snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_2), 0, 'UTC') == dt64_2, + snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_3), 0, 'UTC') == dt64_3, + snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_4), 0, 'UTC') == dt64_4 FORMAT Vertical; From 9a7f5d1e2071255451224e16ea80eb69e8ee4658 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Fri, 7 Jun 2024 20:17:48 +0000 Subject: [PATCH 074/139] Try to fix flaky test, pt. II --- .../01942_dateTimeToSnowflakeID.reference | 5 ++++- .../0_stateless/01942_dateTimeToSnowflakeID.sql | 15 +++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.reference b/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.reference index aaf5a2e3543..5dcd0c9dfcd 100644 --- a/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.reference +++ b/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.reference @@ -26,4 +26,7 @@ equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_0), 0, 'UTC'), dt64_ equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_1), 0, 'UTC'), dt64_1): 1 equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_2), 0, 'UTC'), dt64_2): 1 equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_3), 0, 'UTC'), dt64_3): 1 -equals(snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_4), 0, 'UTC'), dt64_4): 0 +Row 1: +────── +dt64_4: 2023-11-11 11:11:11.1231 +snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_4)): 2023-11-11 11:11:11.123 diff --git a/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.sql b/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.sql index ae8f7376697..33bac8aaa35 100644 --- a/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.sql +++ b/tests/queries/0_stateless/01942_dateTimeToSnowflakeID.sql @@ -59,13 +59,20 @@ WITH now64(0) AS dt64_0, now64(1) AS dt64_1, now64(2) AS dt64_2, - now64(3) AS dt64_3, - now64(4) AS dt64_4 + now64(3) AS dt64_3 SELECT snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_0), 0, 'UTC') == dt64_0, snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_1), 0, 'UTC') == dt64_1, snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_2), 0, 'UTC') == dt64_2, - snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_3), 0, 'UTC') == dt64_3, - snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_4), 0, 'UTC') == dt64_4 + snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_3), 0, 'UTC') == dt64_3 +FORMAT + Vertical; + +-- not idempotent +WITH + toDateTime64('2023-11-11 11:11:11.1231', 4, 'UTC') AS dt64_4 +SELECT + dt64_4, + snowflakeIDToDateTime64(dateTime64ToSnowflakeID(dt64_4)) FORMAT Vertical; From ab79addee492172440724df16de1c6a417ccd04f Mon Sep 17 00:00:00 2001 From: serxa Date: Sun, 9 Jun 2024 12:55:31 +0000 Subject: [PATCH 075/139] use 1MB HTTP buffers to avoid frequnet send syscalls --- base/poco/Net/include/Poco/Net/HTTPBasicStreamBuf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/poco/Net/include/Poco/Net/HTTPBasicStreamBuf.h b/base/poco/Net/include/Poco/Net/HTTPBasicStreamBuf.h index c87719b63a4..3178306363c 100644 --- a/base/poco/Net/include/Poco/Net/HTTPBasicStreamBuf.h +++ b/base/poco/Net/include/Poco/Net/HTTPBasicStreamBuf.h @@ -26,7 +26,7 @@ namespace Poco { namespace Net { - constexpr size_t HTTP_DEFAULT_BUFFER_SIZE = 8 * 1024; + constexpr size_t HTTP_DEFAULT_BUFFER_SIZE = 1024 * 1024; typedef Poco::BasicBufferedStreamBuf> HTTPBasicStreamBuf; From 68e1d8701c973def035f0b9ea17f6ca3d224d73c Mon Sep 17 00:00:00 2001 From: yariks5s Date: Mon, 10 Jun 2024 18:09:07 +0000 Subject: [PATCH 076/139] fixes due to review --- src/Interpreters/InterpreterInsertQuery.cpp | 10 +- src/Interpreters/Squashing.cpp | 119 +++++++++--------- src/Interpreters/Squashing.h | 38 ++---- .../Transforms/ApplySquashingTransform.h | 10 +- .../Transforms/PlanSquashingTransform.cpp | 62 +++------ .../Transforms/PlanSquashingTransform.h | 7 +- .../Transforms/SquashingTransform.cpp | 22 ++-- .../Transforms/SquashingTransform.h | 6 +- src/Server/TCPHandler.cpp | 17 ++- src/Storages/MergeTree/MutateTask.cpp | 16 ++- src/Storages/ProjectionsDescription.cpp | 2 +- 11 files changed, 128 insertions(+), 181 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index e632886778f..d735fb8a55c 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -632,7 +632,10 @@ BlockIO InterpreterInsertQuery::execute() pipeline.addSimpleTransform([&](const Block & in_header) -> ProcessorPtr { - return std::make_shared(in_header); + return std::make_shared( + in_header, + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); }); } @@ -685,7 +688,10 @@ BlockIO InterpreterInsertQuery::execute() { bool table_prefers_large_blocks = table->prefersLargeBlocks(); - auto squashing = std::make_shared(chain.getInputHeader()); + auto squashing = std::make_shared( + chain.getInputHeader(), + table_prefers_large_blocks ? settings.min_insert_block_size_rows : settings.max_block_size, + table_prefers_large_blocks ? settings.min_insert_block_size_bytes : 0ULL); chain.addSource(std::move(squashing)); diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 47add495421..a05c5853ce3 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -10,77 +10,30 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -ApplySquashing::ApplySquashing(Block header_) +Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) : header(header_) + , min_block_size_rows(min_block_size_rows_) + , min_block_size_bytes(min_block_size_bytes_) { } -Chunk ApplySquashing::add(Chunk && input_chunk) +Chunk Squashing::flush() +{ + return convertToChunk(std::move(chunks_to_merge_vec)); +} + +Chunk Squashing::squash(Chunk && input_chunk) { if (!input_chunk.hasChunkInfo()) return Chunk(); const auto *info = getInfoFromChunk(input_chunk); - append(info->chunks); + squash(info->chunks); return std::move(accumulated_chunk); } -void ApplySquashing::append(std::vector & input_chunks) -{ - accumulated_chunk = {}; - std::vector mutable_columns = {}; - size_t rows = 0; - for (const Chunk & chunk : input_chunks) - rows += chunk.getNumRows(); - - { - auto & first_chunk = input_chunks[0]; - Columns columns = first_chunk.detachColumns(); - for (size_t i = 0; i < columns.size(); ++i) - { - mutable_columns.push_back(IColumn::mutate(std::move(columns[i]))); - mutable_columns[i]->reserve(rows); - } - } - - for (size_t i = 1; i < input_chunks.size(); ++i) // We've already processed the first chunk above - { - Columns columns = input_chunks[i].detachColumns(); - for (size_t j = 0, size = mutable_columns.size(); j < size; ++j) - { - const auto source_column = columns[j]; - - mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size()); - } - } - accumulated_chunk.setColumns(std::move(mutable_columns), rows); -} - -const ChunksToSquash* ApplySquashing::getInfoFromChunk(const Chunk & chunk) -{ - const auto& info = chunk.getChunkInfo(); - const auto * agg_info = typeid_cast(info.get()); - - if (!agg_info) - throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr"); - - return agg_info; -} - -PlanSquashing::PlanSquashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) - : min_block_size_rows(min_block_size_rows_) - , min_block_size_bytes(min_block_size_bytes_) - , header(header_) -{ -} - -Chunk PlanSquashing::flush() -{ - return convertToChunk(std::move(chunks_to_merge_vec)); -} - -Chunk PlanSquashing::add(Chunk && input_chunk) +Chunk Squashing::add(Chunk && input_chunk) { if (!input_chunk) return {}; @@ -131,7 +84,7 @@ Chunk PlanSquashing::add(Chunk && input_chunk) return {}; } -Chunk PlanSquashing::convertToChunk(std::vector && chunks) +Chunk Squashing::convertToChunk(std::vector && chunks) const { if (chunks.empty()) return {}; @@ -144,19 +97,61 @@ Chunk PlanSquashing::convertToChunk(std::vector && chunks) return Chunk(header.cloneEmptyColumns(), 0, info); } -void PlanSquashing::expandCurrentSize(size_t rows, size_t bytes) +void Squashing::squash(std::vector & input_chunks) +{ + accumulated_chunk = {}; + std::vector mutable_columns = {}; + size_t rows = 0; + for (const Chunk & chunk : input_chunks) + rows += chunk.getNumRows(); + + { + auto & first_chunk = input_chunks[0]; + Columns columns = first_chunk.detachColumns(); + for (size_t i = 0; i < columns.size(); ++i) + { + mutable_columns.push_back(IColumn::mutate(std::move(columns[i]))); + mutable_columns[i]->reserve(rows); + } + } + + for (size_t i = 1; i < input_chunks.size(); ++i) // We've already processed the first chunk above + { + Columns columns = input_chunks[i].detachColumns(); + for (size_t j = 0, size = mutable_columns.size(); j < size; ++j) + { + const auto source_column = columns[j]; + + mutable_columns[j]->insertRangeFrom(*source_column, 0, source_column->size()); + } + } + accumulated_chunk.setColumns(std::move(mutable_columns), rows); +} + +const ChunksToSquash* Squashing::getInfoFromChunk(const Chunk & chunk) +{ + const auto& info = chunk.getChunkInfo(); + const auto * agg_info = typeid_cast(info.get()); + + if (!agg_info) + throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no ChunksToSquash in ChunkInfoPtr"); + + return agg_info; +} + +void Squashing::expandCurrentSize(size_t rows, size_t bytes) { accumulated_size.rows += rows; accumulated_size.bytes += bytes; } -void PlanSquashing::changeCurrentSize(size_t rows, size_t bytes) +void Squashing::changeCurrentSize(size_t rows, size_t bytes) { accumulated_size.rows = rows; accumulated_size.bytes = bytes; } -bool PlanSquashing::isEnoughSize(size_t rows, size_t bytes) const +bool Squashing::isEnoughSize(size_t rows, size_t bytes) const { return (!min_block_size_rows && !min_block_size_bytes) || (min_block_size_rows && rows >= min_block_size_rows) diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index 77191e63050..760b7d7475f 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -26,39 +26,23 @@ struct ChunksToSquash : public ChunkInfo * Order of data is kept. */ -class ApplySquashing +class Squashing { public: - explicit ApplySquashing(Block header_); - - Chunk add(Chunk && input_chunk); - - Block header; - -private: - Chunk accumulated_chunk; - - const ChunksToSquash * getInfoFromChunk(const Chunk & chunk); - - void append(std::vector & input_chunks); - - bool isEnoughSize(const Block & block); - bool isEnoughSize(size_t rows, size_t bytes) const; -}; - -class PlanSquashing -{ -public: - explicit PlanSquashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); - PlanSquashing(PlanSquashing && other) = default; + explicit Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); + Squashing(Squashing && other) = default; Chunk add(Chunk && input_chunk); + Chunk squash(Chunk && input_chunk); Chunk flush(); + bool isDataLeft() { return !chunks_to_merge_vec.empty(); } + Block header; + private: struct CurrentSize { @@ -70,14 +54,18 @@ private: size_t min_block_size_rows; size_t min_block_size_bytes; - const Block header; CurrentSize accumulated_size; + Chunk accumulated_chunk; + + const ChunksToSquash * getInfoFromChunk(const Chunk & chunk); + + void squash(std::vector & input_chunks); void expandCurrentSize(size_t rows, size_t bytes); void changeCurrentSize(size_t rows, size_t bytes); bool isEnoughSize(size_t rows, size_t bytes) const; - Chunk convertToChunk(std::vector && chunks); + Chunk convertToChunk(std::vector && chunks) const; }; } diff --git a/src/Processors/Transforms/ApplySquashingTransform.h b/src/Processors/Transforms/ApplySquashingTransform.h index e63691fcc6a..7bf1f32340b 100644 --- a/src/Processors/Transforms/ApplySquashingTransform.h +++ b/src/Processors/Transforms/ApplySquashingTransform.h @@ -9,9 +9,9 @@ namespace DB class ApplySquashingTransform : public ExceptionKeepingTransform { public: - explicit ApplySquashingTransform(const Block & header) + explicit ApplySquashingTransform(const Block & header, const size_t min_block_size_rows, const size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) - , squashing(header) + , squashing(header, min_block_size_rows, min_block_size_bytes) { } @@ -37,7 +37,7 @@ public: protected: void onConsume(Chunk chunk) override { - if (auto res_chunk = squashing.add(std::move(chunk))) + if (auto res_chunk = squashing.squash(std::move(chunk))) cur_chunk.setColumns(res_chunk.getColumns(), res_chunk.getNumRows()); } @@ -50,12 +50,12 @@ protected: } void onFinish() override { - auto chunk = squashing.add({}); + auto chunk = squashing.squash({}); finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); } private: - ApplySquashing squashing; + Squashing squashing; Chunk cur_chunk; Chunk finish_chunk; }; diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 7945bd97e04..f8d5143493f 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -11,7 +11,7 @@ namespace ErrorCodes } PlanSquashingTransform::PlanSquashingTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t num_ports) - : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), balance(header, min_block_size_rows, min_block_size_bytes) + : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), squashing(header, min_block_size_rows, min_block_size_bytes) { } @@ -29,9 +29,6 @@ IProcessor::Status PlanSquashingTransform::prepare() case READ_IF_CAN: status = prepareConsume(); break; - case WAIT_IN: - planning_status = PlanningStatus::READ_IF_CAN; - return Status::NeedData; case PUSH: return sendOrFlush(); case FLUSH: @@ -64,17 +61,21 @@ void PlanSquashingTransform::init() IProcessor::Status PlanSquashingTransform::prepareConsume() { - bool inputs_have_no_data = true, all_finished = true; + bool all_finished = true; for (auto & input : inputs) { if (!input.isFinished()) all_finished = false; + else + { + input.setNeeded(); + continue; + } if (input.hasData()) { - inputs_have_no_data = false; chunk = input.pull(); - transform(chunk); + chunk = transform(std::move(chunk)); if (chunk.hasChunkInfo()) { @@ -86,62 +87,27 @@ IProcessor::Status PlanSquashingTransform::prepareConsume() if (all_finished) /// If all inputs are closed, we check if we have data in balancing { - if (balance.isDataLeft()) /// If we have data in balancing, we process this data + if (squashing.isDataLeft()) /// If we have data in balancing, we process this data { planning_status = PlanningStatus::FLUSH; flushChunk(); return Status::Ready; } - planning_status = PlanningStatus::PUSH; - return Status::Ready; - } - - if (inputs_have_no_data) - planning_status = PlanningStatus::WAIT_IN; - - return Status::Ready; -} - -IProcessor::Status PlanSquashingTransform::waitForDataIn() -{ - bool all_finished = true; - bool inputs_have_no_data = true; - for (auto & input : inputs) - { - if (input.isFinished()) - continue; - - all_finished = false; - - if (input.hasData()) - inputs_have_no_data = false; - - } - if (all_finished) - { - planning_status = PlanningStatus::READ_IF_CAN; - return Status::Ready; - } - - if (!inputs_have_no_data) - { - planning_status = PlanningStatus::READ_IF_CAN; + planning_status = PlanningStatus::FINISH; return Status::Ready; } return Status::NeedData; } -void PlanSquashingTransform::transform(Chunk & chunk_) +Chunk PlanSquashingTransform::transform(Chunk && chunk_) { - Chunk res_chunk = balance.add(std::move(chunk_)); - std::swap(res_chunk, chunk_); + return squashing.add(std::move(chunk_)); } -void PlanSquashingTransform::flushChunk() +Chunk PlanSquashingTransform::flushChunk() { - Chunk res_chunk = balance.flush(); - std::swap(res_chunk, chunk); + return squashing.flush(); } IProcessor::Status PlanSquashingTransform::sendOrFlush() diff --git a/src/Processors/Transforms/PlanSquashingTransform.h b/src/Processors/Transforms/PlanSquashingTransform.h index 7afc942a7f2..a9152d9dbe9 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.h +++ b/src/Processors/Transforms/PlanSquashingTransform.h @@ -8,7 +8,6 @@ enum PlanningStatus { INIT, READ_IF_CAN, - WAIT_IN, PUSH, FLUSH, FINISH @@ -36,12 +35,12 @@ public: Status waitForDataIn(); Status finish(); - void transform(Chunk & chunk); - void flushChunk(); + Chunk transform(Chunk && chunk); + Chunk flushChunk(); private: Chunk chunk; - PlanSquashing balance; + Squashing squashing; PlanningStatus planning_status = PlanningStatus::INIT; }; } diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index a516811bf45..c1f8a9f2513 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -12,16 +12,15 @@ extern const int LOGICAL_ERROR; SquashingTransform::SquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) - , planSquashing(header, min_block_size_rows, min_block_size_bytes) - , applySquashing(header) + , squashing(header, min_block_size_rows, min_block_size_bytes) { } void SquashingTransform::onConsume(Chunk chunk) { - Chunk planned_chunk = planSquashing.add(std::move(chunk)); + Chunk planned_chunk = squashing.add(std::move(chunk)); if (planned_chunk.hasChunkInfo()) - cur_chunk = applySquashing.add(std::move(planned_chunk)); + cur_chunk = squashing.squash(std::move(planned_chunk)); } SquashingTransform::GenerateResult SquashingTransform::onGenerate() @@ -34,9 +33,9 @@ SquashingTransform::GenerateResult SquashingTransform::onGenerate() void SquashingTransform::onFinish() { - Chunk chunk = planSquashing.flush(); + Chunk chunk = squashing.flush(); if (chunk.hasChunkInfo()) - chunk = applySquashing.add(std::move(chunk)); + chunk = squashing.squash(std::move(chunk)); finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); } @@ -60,8 +59,7 @@ void SquashingTransform::work() SimpleSquashingTransform::SimpleSquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ISimpleTransform(header, header, false) - , planSquashing(header, min_block_size_rows, min_block_size_bytes) - , applySquashing(header) + , squashing(header, min_block_size_rows, min_block_size_bytes) { } @@ -69,18 +67,18 @@ void SimpleSquashingTransform::transform(Chunk & chunk) { if (!finished) { - Chunk planned_chunk = planSquashing.add(std::move(chunk)); + Chunk planned_chunk = squashing.add(std::move(chunk)); if (planned_chunk.hasChunkInfo()) - chunk = applySquashing.add(std::move(planned_chunk)); + chunk = squashing.squash(std::move(planned_chunk)); } else { if (chunk.hasRows()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Chunk expected to be empty, otherwise it will be lost"); - chunk = planSquashing.flush(); + chunk = squashing.flush(); if (chunk.hasChunkInfo()) - chunk = applySquashing.add(std::move(chunk)); + chunk = squashing.squash(std::move(chunk)); } } diff --git a/src/Processors/Transforms/SquashingTransform.h b/src/Processors/Transforms/SquashingTransform.h index b5b3c6616d2..c5b727ac6ec 100644 --- a/src/Processors/Transforms/SquashingTransform.h +++ b/src/Processors/Transforms/SquashingTransform.h @@ -24,8 +24,7 @@ protected: void onFinish() override; private: - PlanSquashing planSquashing; - ApplySquashing applySquashing; + Squashing squashing; Chunk cur_chunk; Chunk finish_chunk; }; @@ -44,8 +43,7 @@ protected: IProcessor::Status prepare() override; private: - PlanSquashing planSquashing; - ApplySquashing applySquashing; + Squashing squashing; bool finished = false; }; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 1dd99796754..2be4e8d5665 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -885,22 +885,21 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro using PushResult = AsynchronousInsertQueue::PushResult; startInsertQuery(); - PlanSquashing plan_squashing(state.input_header, 0, query_context->getSettingsRef().async_insert_max_data_size); - ApplySquashing apply_squashing(state.input_header); + Squashing squashing(state.input_header, 0, query_context->getSettingsRef().async_insert_max_data_size); while (readDataNext()) { - apply_squashing.header = state.block_for_insert; - auto planned_chunk = plan_squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); + squashing.header = state.block_for_insert; + auto planned_chunk = squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); if (planned_chunk.hasChunkInfo()) { - Chunk result_chunk = apply_squashing.add(std::move(planned_chunk)); + Chunk result_chunk = squashing.squash(std::move(planned_chunk)); ColumnsWithTypeAndName cols; if (result_chunk.hasColumns() && state.block_for_insert) for (size_t j = 0; j < result_chunk.getNumColumns(); ++j) cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], state.block_for_insert.getDataTypes()[j], state.block_for_insert.getNames()[j])); auto result = Block(cols); - apply_squashing.header = Block(state.block_for_insert); + squashing.header = Block(state.block_for_insert); return PushResult { .status = PushResult::TOO_MUCH_DATA, @@ -909,14 +908,14 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro } } - auto planned_chunk = plan_squashing.flush(); + auto planned_chunk = squashing.flush(); Chunk result_chunk; if (planned_chunk.hasChunkInfo()) - result_chunk = apply_squashing.add(std::move(planned_chunk)); + result_chunk = squashing.squash(std::move(planned_chunk)); ColumnsWithTypeAndName cols; if (result_chunk.hasColumns()) for (size_t j = 0; j < result_chunk.getNumColumns(); ++ j) - cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], apply_squashing.header.getDataTypes()[j], apply_squashing.header.getNames()[j])); + cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], squashing.header.getDataTypes()[j], squashing.header.getNames()[j])); auto result = Block(cols); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 0d1fc46ec76..fad195d6a36 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1267,8 +1267,7 @@ private: ProjectionNameToItsBlocks projection_parts; std::move_iterator projection_parts_iterator; - std::vector projection_squash_plannings; - std::vector projection_squashes; + std::vector projection_squashes; const ProjectionsDescription & projections; ExecutableTaskPtr merge_projection_parts_task_ptr; @@ -1286,10 +1285,9 @@ void PartMergerWriter::prepare() for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { - PlanSquashing plan_squashing(ctx->updated_header, settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); + Squashing squashing(ctx->updated_header, settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); // We split the materialization into multiple stages similar to the process of INSERT SELECT query. - projection_squash_plannings.emplace_back(ctx->updated_header, settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); - projection_squashes.emplace_back(ctx->updated_header); + projection_squashes.emplace_back(ctx->updated_header, settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); } existing_rows_count = 0; @@ -1317,11 +1315,11 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); Block block_to_squash = projection.calculate(cur_block, ctx->context); projection_squashes[i].header = block_to_squash; - Chunk planned_chunk = projection_squash_plannings[i].add({block_to_squash.getColumns(), block_to_squash.rows()}); + Chunk planned_chunk = projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()}); if (planned_chunk.hasChunkInfo()) { - Chunk projection_chunk = projection_squashes[i].add(std::move(planned_chunk)); + Chunk projection_chunk = projection_squashes[i].squash(std::move(planned_chunk)); ColumnsWithTypeAndName cols; if (projection_chunk.hasColumns()) for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) @@ -1345,11 +1343,11 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { const auto & projection = *ctx->projections_to_build[i]; - auto & projection_squash_plan = projection_squash_plannings[i]; + auto & projection_squash_plan = projection_squashes[i]; auto planned_chunk = projection_squash_plan.flush(); if (planned_chunk.hasChunkInfo()) { - Chunk projection_chunk = projection_squashes[i].add(std::move(planned_chunk)); + Chunk projection_chunk = projection_squashes[i].squash(std::move(planned_chunk)); ColumnsWithTypeAndName cols; if (projection_chunk.hasColumns()) for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) diff --git a/src/Storages/ProjectionsDescription.cpp b/src/Storages/ProjectionsDescription.cpp index c88582a8a1a..37ea3f274b6 100644 --- a/src/Storages/ProjectionsDescription.cpp +++ b/src/Storages/ProjectionsDescription.cpp @@ -313,7 +313,7 @@ Block ProjectionDescription::calculate(const Block & block, ContextPtr context) // There should be only one output block after this transformation. builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0, 1)); - builder.addTransform(std::make_shared(builder.getHeader())); + builder.addTransform(std::make_shared(builder.getHeader(), block.rows(), 0)); auto pipeline = QueryPipelineBuilder::getPipeline(std::move(builder)); PullingPipelineExecutor executor(pipeline); From 8be3957f104d84ab28051c0e4953bc4d9338bf50 Mon Sep 17 00:00:00 2001 From: Yarik Briukhovetskyi <114298166+yariks5s@users.noreply.github.com> Date: Mon, 10 Jun 2024 20:20:46 +0200 Subject: [PATCH 077/139] style check --- src/Interpreters/Squashing.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index a05c5853ce3..05d99d89b15 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -84,7 +84,7 @@ Chunk Squashing::add(Chunk && input_chunk) return {}; } -Chunk Squashing::convertToChunk(std::vector && chunks) const +Chunk Squashing::convertToChunk(std::vector && chunks) const { if (chunks.empty()) return {}; From d2be2ce70544564e33d33b33fa38db5d84d082fd Mon Sep 17 00:00:00 2001 From: yariks5s Date: Mon, 10 Jun 2024 19:01:48 +0000 Subject: [PATCH 078/139] fixes in planner --- src/Interpreters/Squashing.cpp | 9 ++++----- src/Interpreters/Squashing.h | 3 +-- src/Processors/Transforms/PlanSquashingTransform.cpp | 5 ++--- 3 files changed, 7 insertions(+), 10 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index a05c5853ce3..588b75d33de 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -28,9 +28,7 @@ Chunk Squashing::squash(Chunk && input_chunk) return Chunk(); const auto *info = getInfoFromChunk(input_chunk); - squash(info->chunks); - - return std::move(accumulated_chunk); + return squash(info->chunks); } Chunk Squashing::add(Chunk && input_chunk) @@ -97,9 +95,9 @@ Chunk Squashing::convertToChunk(std::vector && chunks) const return Chunk(header.cloneEmptyColumns(), 0, info); } -void Squashing::squash(std::vector & input_chunks) +Chunk Squashing::squash(std::vector & input_chunks) { - accumulated_chunk = {}; + Chunk accumulated_chunk; std::vector mutable_columns = {}; size_t rows = 0; for (const Chunk & chunk : input_chunks) @@ -126,6 +124,7 @@ void Squashing::squash(std::vector & input_chunks) } } accumulated_chunk.setColumns(std::move(mutable_columns), rows); + return accumulated_chunk; } const ChunksToSquash* Squashing::getInfoFromChunk(const Chunk & chunk) diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index 760b7d7475f..15b543be236 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -55,11 +55,10 @@ private: size_t min_block_size_bytes; CurrentSize accumulated_size; - Chunk accumulated_chunk; const ChunksToSquash * getInfoFromChunk(const Chunk & chunk); - void squash(std::vector & input_chunks); + Chunk squash(std::vector & input_chunks); void expandCurrentSize(size_t rows, size_t bytes); void changeCurrentSize(size_t rows, size_t bytes); diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index f8d5143493f..5ae605cc51a 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -27,8 +27,7 @@ IProcessor::Status PlanSquashingTransform::prepare() init(); break; case READ_IF_CAN: - status = prepareConsume(); - break; + return prepareConsume(); case PUSH: return sendOrFlush(); case FLUSH: @@ -90,7 +89,7 @@ IProcessor::Status PlanSquashingTransform::prepareConsume() if (squashing.isDataLeft()) /// If we have data in balancing, we process this data { planning_status = PlanningStatus::FLUSH; - flushChunk(); + chunk = flushChunk(); return Status::Ready; } planning_status = PlanningStatus::FINISH; From 7b239f110245c53dbfc6d296eea4d975d019867e Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 11 Jun 2024 17:36:49 +0000 Subject: [PATCH 079/139] fixes due to review --- src/Interpreters/Squashing.cpp | 16 ++++++------ src/Interpreters/Squashing.h | 10 +++----- src/Processors/Chunk.h | 2 +- .../Transforms/ApplySquashingTransform.h | 6 ++--- .../Transforms/PlanSquashingTransform.cpp | 2 +- .../Transforms/SquashingTransform.cpp | 12 ++++----- src/Server/TCPHandler.cpp | 22 ++++++---------- src/Storages/MergeTree/MutateTask.cpp | 25 ++++++++----------- 8 files changed, 41 insertions(+), 54 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 2c0e7859d44..7964379a35d 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -10,9 +10,8 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) - : header(header_) - , min_block_size_rows(min_block_size_rows_) +Squashing::Squashing(size_t min_block_size_rows_, size_t min_block_size_bytes_) + : min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) { } @@ -87,12 +86,15 @@ Chunk Squashing::convertToChunk(std::vector && chunks) const if (chunks.empty()) return {}; + auto chunk = Chunk(chunks.back().cloneEmptyColumns(), 0); + auto info = std::make_shared(); info->chunks = std::move(chunks); chunks.clear(); - return Chunk(header.cloneEmptyColumns(), 0, info); + chunk.setChunkInfo(info); + return chunk; } Chunk Squashing::squash(std::vector & input_chunks) @@ -106,10 +108,10 @@ Chunk Squashing::squash(std::vector & input_chunks) { auto & first_chunk = input_chunks[0]; Columns columns = first_chunk.detachColumns(); - for (size_t i = 0; i < columns.size(); ++i) + for (auto & column : columns) { - mutable_columns.push_back(IColumn::mutate(std::move(columns[i]))); - mutable_columns[i]->reserve(rows); + mutable_columns.push_back(IColumn::mutate(std::move(column))); + mutable_columns.back()->reserve(rows); } } diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index 15b543be236..ea991d6dc85 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -29,11 +29,11 @@ struct ChunksToSquash : public ChunkInfo class Squashing { public: - explicit Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); + explicit Squashing(size_t min_block_size_rows_, size_t min_block_size_bytes_); Squashing(Squashing && other) = default; Chunk add(Chunk && input_chunk); - Chunk squash(Chunk && input_chunk); + static Chunk squash(Chunk && input_chunk); Chunk flush(); bool isDataLeft() @@ -41,8 +41,6 @@ public: return !chunks_to_merge_vec.empty(); } - Block header; - private: struct CurrentSize { @@ -56,9 +54,9 @@ private: CurrentSize accumulated_size; - const ChunksToSquash * getInfoFromChunk(const Chunk & chunk); + static const ChunksToSquash * getInfoFromChunk(const Chunk & chunk); - Chunk squash(std::vector & input_chunks); + static Chunk squash(std::vector & input_chunks); void expandCurrentSize(size_t rows, size_t bytes); void changeCurrentSize(size_t rows, size_t bytes); diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index 4f753798eaa..3c0952f7aed 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -89,7 +89,7 @@ public: UInt64 getNumColumns() const { return columns.size(); } bool hasRows() const { return num_rows > 0; } bool hasColumns() const { return !columns.empty(); } - bool empty() const { return !hasRows() && !hasColumns(); } + bool empty() const { return !hasRows() && !hasColumns() && !hasChunkInfo(); } explicit operator bool() const { return !empty(); } void addColumn(ColumnPtr column); diff --git a/src/Processors/Transforms/ApplySquashingTransform.h b/src/Processors/Transforms/ApplySquashingTransform.h index 7bf1f32340b..2de614b8dbd 100644 --- a/src/Processors/Transforms/ApplySquashingTransform.h +++ b/src/Processors/Transforms/ApplySquashingTransform.h @@ -11,7 +11,7 @@ class ApplySquashingTransform : public ExceptionKeepingTransform public: explicit ApplySquashingTransform(const Block & header, const size_t min_block_size_rows, const size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) - , squashing(header, min_block_size_rows, min_block_size_bytes) + , squashing(min_block_size_rows, min_block_size_bytes) { } @@ -37,7 +37,7 @@ public: protected: void onConsume(Chunk chunk) override { - if (auto res_chunk = squashing.squash(std::move(chunk))) + if (auto res_chunk = DB::Squashing::squash(std::move(chunk))) cur_chunk.setColumns(res_chunk.getColumns(), res_chunk.getNumRows()); } @@ -50,7 +50,7 @@ protected: } void onFinish() override { - auto chunk = squashing.squash({}); + auto chunk = Chunk(); finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); } diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 5ae605cc51a..71e4ee15ecc 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -11,7 +11,7 @@ namespace ErrorCodes } PlanSquashingTransform::PlanSquashingTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t num_ports) - : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), squashing(header, min_block_size_rows, min_block_size_bytes) + : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), squashing(min_block_size_rows, min_block_size_bytes) { } diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index c1f8a9f2513..34c5b1a7202 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -12,7 +12,7 @@ extern const int LOGICAL_ERROR; SquashingTransform::SquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) - , squashing(header, min_block_size_rows, min_block_size_bytes) + , squashing(min_block_size_rows, min_block_size_bytes) { } @@ -20,7 +20,7 @@ void SquashingTransform::onConsume(Chunk chunk) { Chunk planned_chunk = squashing.add(std::move(chunk)); if (planned_chunk.hasChunkInfo()) - cur_chunk = squashing.squash(std::move(planned_chunk)); + cur_chunk = DB::Squashing::squash(std::move(planned_chunk)); } SquashingTransform::GenerateResult SquashingTransform::onGenerate() @@ -35,7 +35,7 @@ void SquashingTransform::onFinish() { Chunk chunk = squashing.flush(); if (chunk.hasChunkInfo()) - chunk = squashing.squash(std::move(chunk)); + chunk = DB::Squashing::squash(std::move(chunk)); finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); } @@ -59,7 +59,7 @@ void SquashingTransform::work() SimpleSquashingTransform::SimpleSquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ISimpleTransform(header, header, false) - , squashing(header, min_block_size_rows, min_block_size_bytes) + , squashing(min_block_size_rows, min_block_size_bytes) { } @@ -69,7 +69,7 @@ void SimpleSquashingTransform::transform(Chunk & chunk) { Chunk planned_chunk = squashing.add(std::move(chunk)); if (planned_chunk.hasChunkInfo()) - chunk = squashing.squash(std::move(planned_chunk)); + chunk = DB::Squashing::squash(std::move(planned_chunk)); } else { @@ -78,7 +78,7 @@ void SimpleSquashingTransform::transform(Chunk & chunk) chunk = squashing.flush(); if (chunk.hasChunkInfo()) - chunk = squashing.squash(std::move(chunk)); + chunk = DB::Squashing::squash(std::move(chunk)); } } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index cfabc9cef34..940fa94e182 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -886,21 +886,17 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro using PushResult = AsynchronousInsertQueue::PushResult; startInsertQuery(); - Squashing squashing(state.input_header, 0, query_context->getSettingsRef().async_insert_max_data_size); + Squashing squashing(0, query_context->getSettingsRef().async_insert_max_data_size); + Block header; while (readDataNext()) { - squashing.header = state.block_for_insert; + header = state.block_for_insert.cloneWithoutColumns(); auto planned_chunk = squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); if (planned_chunk.hasChunkInfo()) { - Chunk result_chunk = squashing.squash(std::move(planned_chunk)); - ColumnsWithTypeAndName cols; - if (result_chunk.hasColumns() && state.block_for_insert) - for (size_t j = 0; j < result_chunk.getNumColumns(); ++j) - cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], state.block_for_insert.getDataTypes()[j], state.block_for_insert.getNames()[j])); - auto result = Block(cols); - squashing.header = Block(state.block_for_insert); + Chunk result_chunk = DB::Squashing::squash(std::move(planned_chunk)); + auto result = state.block_for_insert.cloneWithColumns(result_chunk.getColumns()); return PushResult { .status = PushResult::TOO_MUCH_DATA, @@ -912,13 +908,9 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro auto planned_chunk = squashing.flush(); Chunk result_chunk; if (planned_chunk.hasChunkInfo()) - result_chunk = squashing.squash(std::move(planned_chunk)); - ColumnsWithTypeAndName cols; - if (result_chunk.hasColumns()) - for (size_t j = 0; j < result_chunk.getNumColumns(); ++ j) - cols.push_back(ColumnWithTypeAndName(result_chunk.getColumns()[j], squashing.header.getDataTypes()[j], squashing.header.getNames()[j])); + result_chunk = DB::Squashing::squash(std::move(planned_chunk)); - auto result = Block(cols); + auto result = header.cloneWithColumns(result_chunk.getColumns()); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 1d0c24c0f08..6fcc8f4ad92 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1286,9 +1286,8 @@ void PartMergerWriter::prepare() for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { - Squashing squashing(ctx->updated_header, settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); // We split the materialization into multiple stages similar to the process of INSERT SELECT query. - projection_squashes.emplace_back(ctx->updated_header, settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); + projection_squashes.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); } existing_rows_count = 0; @@ -1298,6 +1297,7 @@ void PartMergerWriter::prepare() bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { Block cur_block; + Block header; if (MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry) && ctx->mutating_executor->pull(cur_block)) { if (ctx->minmax_idx) @@ -1315,18 +1315,16 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); Block block_to_squash = projection.calculate(cur_block, ctx->context); - projection_squashes[i].header = block_to_squash; + header = block_to_squash.cloneWithoutColumns(); Chunk planned_chunk = projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()}); if (planned_chunk.hasChunkInfo()) { - Chunk projection_chunk = projection_squashes[i].squash(std::move(planned_chunk)); - ColumnsWithTypeAndName cols; - if (projection_chunk.hasColumns()) - for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) - cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], block_to_squash.getDataTypes()[j], block_to_squash.getNames()[j])); + Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); + + auto result = block_to_squash.cloneWithColumns(projection_chunk.getColumns()); auto tmp_part = MergeTreeDataWriter::writeTempProjectionPart( - *ctx->data, ctx->log, Block(cols), projection, ctx->new_data_part.get(), ++block_num); + *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); tmp_part.finalize(); tmp_part.part->getDataPartStorage().commitTransaction(); projection_parts[projection.name].emplace_back(std::move(tmp_part.part)); @@ -1348,14 +1346,11 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() auto planned_chunk = projection_squash_plan.flush(); if (planned_chunk.hasChunkInfo()) { - Chunk projection_chunk = projection_squashes[i].squash(std::move(planned_chunk)); - ColumnsWithTypeAndName cols; - if (projection_chunk.hasColumns()) - for (size_t j = 0; j < projection_chunk.getNumColumns(); ++j) - cols.push_back(ColumnWithTypeAndName(projection_chunk.getColumns()[j], projection_squashes[i].header.getDataTypes()[j], projection_squashes[i].header.getNames()[j])); + Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); + auto result = header.cloneWithColumns(projection_chunk.getColumns()); auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( - *ctx->data, ctx->log, Block(cols), projection, ctx->new_data_part.get(), ++block_num); + *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); temp_part.finalize(); temp_part.part->getDataPartStorage().commitTransaction(); projection_parts[projection.name].emplace_back(std::move(temp_part.part)); From a54d6793578f0dbe14e2d33b5d64fcc931d1c0b1 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 11 Jun 2024 18:46:09 +0000 Subject: [PATCH 080/139] roll back some changes --- src/Interpreters/Squashing.cpp | 10 ++++------ src/Interpreters/Squashing.h | 3 ++- src/Processors/Transforms/ApplySquashingTransform.h | 4 ++-- src/Processors/Transforms/PlanSquashingTransform.cpp | 2 +- src/Processors/Transforms/SquashingTransform.cpp | 4 ++-- src/Server/TCPHandler.cpp | 8 +++----- src/Storages/MergeTree/MutateTask.cpp | 7 +++---- 7 files changed, 17 insertions(+), 21 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 7964379a35d..f8b6a6542cc 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -10,8 +10,9 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -Squashing::Squashing(size_t min_block_size_rows_, size_t min_block_size_bytes_) - : min_block_size_rows(min_block_size_rows_) +Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) + : header(header_) + , min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) { } @@ -86,15 +87,12 @@ Chunk Squashing::convertToChunk(std::vector && chunks) const if (chunks.empty()) return {}; - auto chunk = Chunk(chunks.back().cloneEmptyColumns(), 0); - auto info = std::make_shared(); info->chunks = std::move(chunks); chunks.clear(); - chunk.setChunkInfo(info); - return chunk; + return Chunk(header.cloneEmptyColumns(), 0, info); } Chunk Squashing::squash(std::vector & input_chunks) diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index ea991d6dc85..d76cca60e41 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -29,7 +29,7 @@ struct ChunksToSquash : public ChunkInfo class Squashing { public: - explicit Squashing(size_t min_block_size_rows_, size_t min_block_size_bytes_); + explicit Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); Squashing(Squashing && other) = default; Chunk add(Chunk && input_chunk); @@ -41,6 +41,7 @@ public: return !chunks_to_merge_vec.empty(); } + Block header; private: struct CurrentSize { diff --git a/src/Processors/Transforms/ApplySquashingTransform.h b/src/Processors/Transforms/ApplySquashingTransform.h index 2de614b8dbd..965a084bb13 100644 --- a/src/Processors/Transforms/ApplySquashingTransform.h +++ b/src/Processors/Transforms/ApplySquashingTransform.h @@ -11,7 +11,7 @@ class ApplySquashingTransform : public ExceptionKeepingTransform public: explicit ApplySquashingTransform(const Block & header, const size_t min_block_size_rows, const size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) - , squashing(min_block_size_rows, min_block_size_bytes) + , squashing(header, min_block_size_rows, min_block_size_bytes) { } @@ -50,7 +50,7 @@ protected: } void onFinish() override { - auto chunk = Chunk(); + auto chunk = DB::Squashing::squash({}); finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); } diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 71e4ee15ecc..5ae605cc51a 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -11,7 +11,7 @@ namespace ErrorCodes } PlanSquashingTransform::PlanSquashingTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t num_ports) - : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), squashing(min_block_size_rows, min_block_size_bytes) + : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), squashing(header, min_block_size_rows, min_block_size_bytes) { } diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index 34c5b1a7202..34b733cde5e 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -12,7 +12,7 @@ extern const int LOGICAL_ERROR; SquashingTransform::SquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) - , squashing(min_block_size_rows, min_block_size_bytes) + , squashing(header, min_block_size_rows, min_block_size_bytes) { } @@ -59,7 +59,7 @@ void SquashingTransform::work() SimpleSquashingTransform::SimpleSquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ISimpleTransform(header, header, false) - , squashing(min_block_size_rows, min_block_size_bytes) + , squashing(header, min_block_size_rows, min_block_size_bytes) { } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 940fa94e182..f9b24139d86 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -886,12 +885,11 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro using PushResult = AsynchronousInsertQueue::PushResult; startInsertQuery(); - Squashing squashing(0, query_context->getSettingsRef().async_insert_max_data_size); - Block header; + Squashing squashing(state.input_header, 0, query_context->getSettingsRef().async_insert_max_data_size); while (readDataNext()) { - header = state.block_for_insert.cloneWithoutColumns(); + squashing.header = state.block_for_insert; auto planned_chunk = squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); if (planned_chunk.hasChunkInfo()) { @@ -910,7 +908,7 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro if (planned_chunk.hasChunkInfo()) result_chunk = DB::Squashing::squash(std::move(planned_chunk)); - auto result = header.cloneWithColumns(result_chunk.getColumns()); + auto result = squashing.header.cloneWithColumns(result_chunk.getColumns()); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 6fcc8f4ad92..4ea0e9ece4f 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1287,7 +1287,7 @@ void PartMergerWriter::prepare() for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { // We split the materialization into multiple stages similar to the process of INSERT SELECT query. - projection_squashes.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); + projection_squashes.emplace_back(ctx->updated_header, settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); } existing_rows_count = 0; @@ -1297,7 +1297,6 @@ void PartMergerWriter::prepare() bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { Block cur_block; - Block header; if (MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry) && ctx->mutating_executor->pull(cur_block)) { if (ctx->minmax_idx) @@ -1315,7 +1314,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); Block block_to_squash = projection.calculate(cur_block, ctx->context); - header = block_to_squash.cloneWithoutColumns(); + projection_squashes[i].header = block_to_squash; Chunk planned_chunk = projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()}); if (planned_chunk.hasChunkInfo()) @@ -1348,7 +1347,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); - auto result = header.cloneWithColumns(projection_chunk.getColumns()); + auto result = projection_squash_plan.header.cloneWithColumns(projection_chunk.getColumns()); auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); temp_part.finalize(); From 66a714e3ee2488883a800692b4d02e89b272b668 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 11 Jun 2024 22:45:02 +0000 Subject: [PATCH 081/139] Revert "roll back some changes" This reverts commit a54d6793578f0dbe14e2d33b5d64fcc931d1c0b1. --- src/Interpreters/Squashing.cpp | 10 ++++++---- src/Interpreters/Squashing.h | 3 +-- src/Processors/Transforms/ApplySquashingTransform.h | 4 ++-- src/Processors/Transforms/PlanSquashingTransform.cpp | 2 +- src/Processors/Transforms/SquashingTransform.cpp | 4 ++-- src/Server/TCPHandler.cpp | 8 +++++--- src/Storages/MergeTree/MutateTask.cpp | 7 ++++--- 7 files changed, 21 insertions(+), 17 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index f8b6a6542cc..7964379a35d 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -10,9 +10,8 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) - : header(header_) - , min_block_size_rows(min_block_size_rows_) +Squashing::Squashing(size_t min_block_size_rows_, size_t min_block_size_bytes_) + : min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) { } @@ -87,12 +86,15 @@ Chunk Squashing::convertToChunk(std::vector && chunks) const if (chunks.empty()) return {}; + auto chunk = Chunk(chunks.back().cloneEmptyColumns(), 0); + auto info = std::make_shared(); info->chunks = std::move(chunks); chunks.clear(); - return Chunk(header.cloneEmptyColumns(), 0, info); + chunk.setChunkInfo(info); + return chunk; } Chunk Squashing::squash(std::vector & input_chunks) diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index d76cca60e41..ea991d6dc85 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -29,7 +29,7 @@ struct ChunksToSquash : public ChunkInfo class Squashing { public: - explicit Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); + explicit Squashing(size_t min_block_size_rows_, size_t min_block_size_bytes_); Squashing(Squashing && other) = default; Chunk add(Chunk && input_chunk); @@ -41,7 +41,6 @@ public: return !chunks_to_merge_vec.empty(); } - Block header; private: struct CurrentSize { diff --git a/src/Processors/Transforms/ApplySquashingTransform.h b/src/Processors/Transforms/ApplySquashingTransform.h index 965a084bb13..2de614b8dbd 100644 --- a/src/Processors/Transforms/ApplySquashingTransform.h +++ b/src/Processors/Transforms/ApplySquashingTransform.h @@ -11,7 +11,7 @@ class ApplySquashingTransform : public ExceptionKeepingTransform public: explicit ApplySquashingTransform(const Block & header, const size_t min_block_size_rows, const size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) - , squashing(header, min_block_size_rows, min_block_size_bytes) + , squashing(min_block_size_rows, min_block_size_bytes) { } @@ -50,7 +50,7 @@ protected: } void onFinish() override { - auto chunk = DB::Squashing::squash({}); + auto chunk = Chunk(); finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); } diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 5ae605cc51a..71e4ee15ecc 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -11,7 +11,7 @@ namespace ErrorCodes } PlanSquashingTransform::PlanSquashingTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t num_ports) - : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), squashing(header, min_block_size_rows, min_block_size_bytes) + : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), squashing(min_block_size_rows, min_block_size_bytes) { } diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index 34b733cde5e..34c5b1a7202 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -12,7 +12,7 @@ extern const int LOGICAL_ERROR; SquashingTransform::SquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) - , squashing(header, min_block_size_rows, min_block_size_bytes) + , squashing(min_block_size_rows, min_block_size_bytes) { } @@ -59,7 +59,7 @@ void SquashingTransform::work() SimpleSquashingTransform::SimpleSquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ISimpleTransform(header, header, false) - , squashing(header, min_block_size_rows, min_block_size_bytes) + , squashing(min_block_size_rows, min_block_size_bytes) { } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index f9b24139d86..940fa94e182 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -885,11 +886,12 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro using PushResult = AsynchronousInsertQueue::PushResult; startInsertQuery(); - Squashing squashing(state.input_header, 0, query_context->getSettingsRef().async_insert_max_data_size); + Squashing squashing(0, query_context->getSettingsRef().async_insert_max_data_size); + Block header; while (readDataNext()) { - squashing.header = state.block_for_insert; + header = state.block_for_insert.cloneWithoutColumns(); auto planned_chunk = squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); if (planned_chunk.hasChunkInfo()) { @@ -908,7 +910,7 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro if (planned_chunk.hasChunkInfo()) result_chunk = DB::Squashing::squash(std::move(planned_chunk)); - auto result = squashing.header.cloneWithColumns(result_chunk.getColumns()); + auto result = header.cloneWithColumns(result_chunk.getColumns()); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 4ea0e9ece4f..6fcc8f4ad92 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1287,7 +1287,7 @@ void PartMergerWriter::prepare() for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { // We split the materialization into multiple stages similar to the process of INSERT SELECT query. - projection_squashes.emplace_back(ctx->updated_header, settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); + projection_squashes.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); } existing_rows_count = 0; @@ -1297,6 +1297,7 @@ void PartMergerWriter::prepare() bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { Block cur_block; + Block header; if (MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry) && ctx->mutating_executor->pull(cur_block)) { if (ctx->minmax_idx) @@ -1314,7 +1315,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); Block block_to_squash = projection.calculate(cur_block, ctx->context); - projection_squashes[i].header = block_to_squash; + header = block_to_squash.cloneWithoutColumns(); Chunk planned_chunk = projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()}); if (planned_chunk.hasChunkInfo()) @@ -1347,7 +1348,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); - auto result = projection_squash_plan.header.cloneWithColumns(projection_chunk.getColumns()); + auto result = header.cloneWithColumns(projection_chunk.getColumns()); auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); temp_part.finalize(); From 9923d38227e81216aef3c4323dd68becdf0ff0d3 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 11 Jun 2024 22:45:48 +0000 Subject: [PATCH 082/139] revert changes for empty chunk --- src/Processors/Chunk.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Processors/Chunk.h b/src/Processors/Chunk.h index 3c0952f7aed..4f753798eaa 100644 --- a/src/Processors/Chunk.h +++ b/src/Processors/Chunk.h @@ -89,7 +89,7 @@ public: UInt64 getNumColumns() const { return columns.size(); } bool hasRows() const { return num_rows > 0; } bool hasColumns() const { return !columns.empty(); } - bool empty() const { return !hasRows() && !hasColumns() && !hasChunkInfo(); } + bool empty() const { return !hasRows() && !hasColumns(); } explicit operator bool() const { return !empty(); } void addColumn(ColumnPtr column); From 4a72b36f287a4588b5e608a49b3ae4c824c8e8de Mon Sep 17 00:00:00 2001 From: yariks5s Date: Tue, 11 Jun 2024 23:26:23 +0000 Subject: [PATCH 083/139] Revert "Revert "roll back some changes"" This reverts commit 66a714e3ee2488883a800692b4d02e89b272b668. --- src/Interpreters/Squashing.cpp | 10 ++++------ src/Interpreters/Squashing.h | 3 ++- src/Processors/Transforms/ApplySquashingTransform.h | 4 ++-- src/Processors/Transforms/PlanSquashingTransform.cpp | 2 +- src/Processors/Transforms/SquashingTransform.cpp | 4 ++-- src/Server/TCPHandler.cpp | 8 +++----- src/Storages/MergeTree/MutateTask.cpp | 7 +++---- 7 files changed, 17 insertions(+), 21 deletions(-) diff --git a/src/Interpreters/Squashing.cpp b/src/Interpreters/Squashing.cpp index 7964379a35d..f8b6a6542cc 100644 --- a/src/Interpreters/Squashing.cpp +++ b/src/Interpreters/Squashing.cpp @@ -10,8 +10,9 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } -Squashing::Squashing(size_t min_block_size_rows_, size_t min_block_size_bytes_) - : min_block_size_rows(min_block_size_rows_) +Squashing::Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_) + : header(header_) + , min_block_size_rows(min_block_size_rows_) , min_block_size_bytes(min_block_size_bytes_) { } @@ -86,15 +87,12 @@ Chunk Squashing::convertToChunk(std::vector && chunks) const if (chunks.empty()) return {}; - auto chunk = Chunk(chunks.back().cloneEmptyColumns(), 0); - auto info = std::make_shared(); info->chunks = std::move(chunks); chunks.clear(); - chunk.setChunkInfo(info); - return chunk; + return Chunk(header.cloneEmptyColumns(), 0, info); } Chunk Squashing::squash(std::vector & input_chunks) diff --git a/src/Interpreters/Squashing.h b/src/Interpreters/Squashing.h index ea991d6dc85..d76cca60e41 100644 --- a/src/Interpreters/Squashing.h +++ b/src/Interpreters/Squashing.h @@ -29,7 +29,7 @@ struct ChunksToSquash : public ChunkInfo class Squashing { public: - explicit Squashing(size_t min_block_size_rows_, size_t min_block_size_bytes_); + explicit Squashing(Block header_, size_t min_block_size_rows_, size_t min_block_size_bytes_); Squashing(Squashing && other) = default; Chunk add(Chunk && input_chunk); @@ -41,6 +41,7 @@ public: return !chunks_to_merge_vec.empty(); } + Block header; private: struct CurrentSize { diff --git a/src/Processors/Transforms/ApplySquashingTransform.h b/src/Processors/Transforms/ApplySquashingTransform.h index 2de614b8dbd..965a084bb13 100644 --- a/src/Processors/Transforms/ApplySquashingTransform.h +++ b/src/Processors/Transforms/ApplySquashingTransform.h @@ -11,7 +11,7 @@ class ApplySquashingTransform : public ExceptionKeepingTransform public: explicit ApplySquashingTransform(const Block & header, const size_t min_block_size_rows, const size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) - , squashing(min_block_size_rows, min_block_size_bytes) + , squashing(header, min_block_size_rows, min_block_size_bytes) { } @@ -50,7 +50,7 @@ protected: } void onFinish() override { - auto chunk = Chunk(); + auto chunk = DB::Squashing::squash({}); finish_chunk.setColumns(chunk.getColumns(), chunk.getNumRows()); } diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 71e4ee15ecc..5ae605cc51a 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -11,7 +11,7 @@ namespace ErrorCodes } PlanSquashingTransform::PlanSquashingTransform(const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes, size_t num_ports) - : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), squashing(min_block_size_rows, min_block_size_bytes) + : IProcessor(InputPorts(num_ports, header), OutputPorts(num_ports, header)), squashing(header, min_block_size_rows, min_block_size_bytes) { } diff --git a/src/Processors/Transforms/SquashingTransform.cpp b/src/Processors/Transforms/SquashingTransform.cpp index 34c5b1a7202..34b733cde5e 100644 --- a/src/Processors/Transforms/SquashingTransform.cpp +++ b/src/Processors/Transforms/SquashingTransform.cpp @@ -12,7 +12,7 @@ extern const int LOGICAL_ERROR; SquashingTransform::SquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ExceptionKeepingTransform(header, header, false) - , squashing(min_block_size_rows, min_block_size_bytes) + , squashing(header, min_block_size_rows, min_block_size_bytes) { } @@ -59,7 +59,7 @@ void SquashingTransform::work() SimpleSquashingTransform::SimpleSquashingTransform( const Block & header, size_t min_block_size_rows, size_t min_block_size_bytes) : ISimpleTransform(header, header, false) - , squashing(min_block_size_rows, min_block_size_bytes) + , squashing(header, min_block_size_rows, min_block_size_bytes) { } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 940fa94e182..f9b24139d86 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -886,12 +885,11 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro using PushResult = AsynchronousInsertQueue::PushResult; startInsertQuery(); - Squashing squashing(0, query_context->getSettingsRef().async_insert_max_data_size); - Block header; + Squashing squashing(state.input_header, 0, query_context->getSettingsRef().async_insert_max_data_size); while (readDataNext()) { - header = state.block_for_insert.cloneWithoutColumns(); + squashing.header = state.block_for_insert; auto planned_chunk = squashing.add({state.block_for_insert.getColumns(), state.block_for_insert.rows()}); if (planned_chunk.hasChunkInfo()) { @@ -910,7 +908,7 @@ AsynchronousInsertQueue::PushResult TCPHandler::processAsyncInsertQuery(Asynchro if (planned_chunk.hasChunkInfo()) result_chunk = DB::Squashing::squash(std::move(planned_chunk)); - auto result = header.cloneWithColumns(result_chunk.getColumns()); + auto result = squashing.header.cloneWithColumns(result_chunk.getColumns()); return insert_queue.pushQueryWithBlock(state.parsed_query, std::move(result), query_context); } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index 6fcc8f4ad92..4ea0e9ece4f 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -1287,7 +1287,7 @@ void PartMergerWriter::prepare() for (size_t i = 0, size = ctx->projections_to_build.size(); i < size; ++i) { // We split the materialization into multiple stages similar to the process of INSERT SELECT query. - projection_squashes.emplace_back(settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); + projection_squashes.emplace_back(ctx->updated_header, settings.min_insert_block_size_rows, settings.min_insert_block_size_bytes); } existing_rows_count = 0; @@ -1297,7 +1297,6 @@ void PartMergerWriter::prepare() bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { Block cur_block; - Block header; if (MutationHelpers::checkOperationIsNotCanceled(*ctx->merges_blocker, ctx->mutate_entry) && ctx->mutating_executor->pull(cur_block)) { if (ctx->minmax_idx) @@ -1315,7 +1314,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() ProfileEventTimeIncrement watch(ProfileEvents::MutateTaskProjectionsCalculationMicroseconds); Block block_to_squash = projection.calculate(cur_block, ctx->context); - header = block_to_squash.cloneWithoutColumns(); + projection_squashes[i].header = block_to_squash; Chunk planned_chunk = projection_squashes[i].add({block_to_squash.getColumns(), block_to_squash.rows()}); if (planned_chunk.hasChunkInfo()) @@ -1348,7 +1347,7 @@ bool PartMergerWriter::mutateOriginalPartAndPrepareProjections() { Chunk projection_chunk = DB::Squashing::squash(std::move(planned_chunk)); - auto result = header.cloneWithColumns(projection_chunk.getColumns()); + auto result = projection_squash_plan.header.cloneWithColumns(projection_chunk.getColumns()); auto temp_part = MergeTreeDataWriter::writeTempProjectionPart( *ctx->data, ctx->log, result, projection, ctx->new_data_part.get(), ++block_num); temp_part.finalize(); From c5159bc3aa409448f6663718607909c34b63fb71 Mon Sep 17 00:00:00 2001 From: yariks5s Date: Thu, 13 Jun 2024 14:52:34 +0000 Subject: [PATCH 084/139] fix setNeeded() in PlanSquashing --- src/Processors/Transforms/PlanSquashingTransform.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Processors/Transforms/PlanSquashingTransform.cpp b/src/Processors/Transforms/PlanSquashingTransform.cpp index 5ae605cc51a..8598a29278a 100644 --- a/src/Processors/Transforms/PlanSquashingTransform.cpp +++ b/src/Processors/Transforms/PlanSquashingTransform.cpp @@ -64,12 +64,12 @@ IProcessor::Status PlanSquashingTransform::prepareConsume() for (auto & input : inputs) { if (!input.isFinished()) - all_finished = false; - else { + all_finished = false; input.setNeeded(); - continue; } + else + continue; if (input.hasData()) { From ff6d1d09d60126922a8786f2bab10d28fa923db6 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 13 Jun 2024 14:23:45 +0200 Subject: [PATCH 085/139] Fix possible crash for hedged requests Previously, it was possible for hedged requests to continue choosing replica even after the query had been cancelled (RemoteQueryExecutor::tryCancel()), and not only this does not make sense, but could also lead to a crash, due to use-after-free of current_thread (ThreadStatus), since fiber had been created on a different thread (thread for query pipeline), but will be destroyed from another thread (that calls QueryPipeline dtor), and the query pipeline's thread could be already destroyed by that time (especially under threads pressure). v0: IConnection::cancelAsync() v2: remove it, since the query is sent in a deferred manner for hedged requests, so that said that modifying HedgedConnections::sendCancel() should be enough Signed-off-by: Azat Khuzhin --- src/Client/HedgedConnections.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/Client/HedgedConnections.cpp b/src/Client/HedgedConnections.cpp index fb4d9a6bdcc..8c993f906e0 100644 --- a/src/Client/HedgedConnections.cpp +++ b/src/Client/HedgedConnections.cpp @@ -255,6 +255,17 @@ void HedgedConnections::sendCancel() if (!sent_query || cancelled) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot cancel. Either no query sent or already cancelled."); + /// All hedged connections should be stopped, since otherwise before the + /// HedgedConnectionsFactory will be destroyed (that will happen from + /// QueryPipeline dtor) they could still do some work. + /// And not only this does not make sense, but it also could lead to + /// use-after-free of the current_thread, since the thread from which they + /// had been created differs from the thread where the dtor of + /// QueryPipeline will be called and the initial thread could be already + /// destroyed (especially when the system is under pressure). + if (hedged_connections_factory.hasEventsInProcess()) + hedged_connections_factory.stopChoosingReplicas(); + cancelled = true; for (auto & offset_status : offset_states) From fb32a99578b57cf185f6e868879aaf2ff218419d Mon Sep 17 00:00:00 2001 From: kssenii Date: Thu, 13 Jun 2024 19:13:13 +0200 Subject: [PATCH 086/139] Initialize global trace collector for Poco::ThreadPool --- base/poco/Foundation/CMakeLists.txt | 1 + .../poco/Foundation/include/Poco/ThreadPool.h | 20 ++++- base/poco/Foundation/src/ThreadPool.cpp | 75 ++++++++++++------- programs/server/Server.cpp | 18 +++-- src/Server/HTTPHandler.cpp | 1 - src/Server/InterserverIOHTTPHandler.cpp | 2 - src/Server/KeeperTCPHandler.cpp | 1 - src/Server/MySQLHandler.cpp | 2 - src/Server/PostgreSQLHandler.cpp | 2 - src/Server/TCPHandler.cpp | 1 - src/Server/TCPHandler.h | 1 - 11 files changed, 81 insertions(+), 43 deletions(-) diff --git a/base/poco/Foundation/CMakeLists.txt b/base/poco/Foundation/CMakeLists.txt index dfb41a33fb1..324a0170bdd 100644 --- a/base/poco/Foundation/CMakeLists.txt +++ b/base/poco/Foundation/CMakeLists.txt @@ -213,6 +213,7 @@ target_compile_definitions (_poco_foundation ) target_include_directories (_poco_foundation SYSTEM PUBLIC "include") +target_link_libraries (_poco_foundation PRIVATE clickhouse_common_io) target_link_libraries (_poco_foundation PRIVATE diff --git a/base/poco/Foundation/include/Poco/ThreadPool.h b/base/poco/Foundation/include/Poco/ThreadPool.h index b9506cc5b7f..e2187bfeb66 100644 --- a/base/poco/Foundation/include/Poco/ThreadPool.h +++ b/base/poco/Foundation/include/Poco/ThreadPool.h @@ -48,7 +48,13 @@ class Foundation_API ThreadPool /// from the pool. { public: - ThreadPool(int minCapacity = 2, int maxCapacity = 16, int idleTime = 60, int stackSize = POCO_THREAD_STACK_SIZE); + explicit ThreadPool( + int minCapacity = 2, + int maxCapacity = 16, + int idleTime = 60, + int stackSize = POCO_THREAD_STACK_SIZE, + size_t global_profiler_real_time_period_ns_ = 0, + size_t global_profiler_cpu_time_period_ns_ = 0); /// Creates a thread pool with minCapacity threads. /// If required, up to maxCapacity threads are created /// a NoThreadAvailableException exception is thrown. @@ -56,8 +62,14 @@ public: /// and more than minCapacity threads are running, the thread /// is killed. Threads are created with given stack size. - ThreadPool( - const std::string & name, int minCapacity = 2, int maxCapacity = 16, int idleTime = 60, int stackSize = POCO_THREAD_STACK_SIZE); + explicit ThreadPool( + const std::string & name, + int minCapacity = 2, + int maxCapacity = 16, + int idleTime = 60, + int stackSize = POCO_THREAD_STACK_SIZE, + size_t global_profiler_real_time_period_ns_ = 0, + size_t global_profiler_cpu_time_period_ns_ = 0); /// Creates a thread pool with the given name and minCapacity threads. /// If required, up to maxCapacity threads are created /// a NoThreadAvailableException exception is thrown. @@ -171,6 +183,8 @@ private: int _serial; int _age; int _stackSize; + size_t _globalProfilerRealTimePeriodNs; + size_t _globalProfilerCPUTimePeriodNs; ThreadVec _threads; mutable FastMutex _mutex; }; diff --git a/base/poco/Foundation/src/ThreadPool.cpp b/base/poco/Foundation/src/ThreadPool.cpp index 6335ee82b47..f57c81e4128 100644 --- a/base/poco/Foundation/src/ThreadPool.cpp +++ b/base/poco/Foundation/src/ThreadPool.cpp @@ -20,6 +20,7 @@ #include "Poco/ErrorHandler.h" #include #include +#include namespace Poco { @@ -28,7 +29,11 @@ namespace Poco { class PooledThread: public Runnable { public: - PooledThread(const std::string& name, int stackSize = POCO_THREAD_STACK_SIZE); + explicit PooledThread( + const std::string& name, + int stackSize = POCO_THREAD_STACK_SIZE, + size_t globalProfilerRealTimePeriodNs_ = 0, + size_t globalProfilerCPUTimePeriodNs_ = 0); ~PooledThread(); void start(); @@ -51,16 +56,24 @@ private: Event _targetCompleted; Event _started; FastMutex _mutex; + size_t _globalProfilerRealTimePeriodNs; + size_t _globalProfilerCPUTimePeriodNs; }; -PooledThread::PooledThread(const std::string& name, int stackSize): - _idle(true), - _idleTime(0), - _pTarget(0), - _name(name), +PooledThread::PooledThread( + const std::string& name, + int stackSize, + size_t globalProfilerRealTimePeriodNs_, + size_t globalProfilerCPUTimePeriodNs_) : + _idle(true), + _idleTime(0), + _pTarget(0), + _name(name), _thread(name), - _targetCompleted(false) + _targetCompleted(false), + _globalProfilerRealTimePeriodNs(globalProfilerRealTimePeriodNs_), + _globalProfilerCPUTimePeriodNs(globalProfilerCPUTimePeriodNs_) { poco_assert_dbg (stackSize >= 0); _thread.setStackSize(stackSize); @@ -83,7 +96,7 @@ void PooledThread::start() void PooledThread::start(Thread::Priority priority, Runnable& target) { FastMutex::ScopedLock lock(_mutex); - + poco_assert (_pTarget == 0); _pTarget = ⌖ @@ -109,7 +122,7 @@ void PooledThread::start(Thread::Priority priority, Runnable& target, const std: } _thread.setName(fullName); _thread.setPriority(priority); - + poco_assert (_pTarget == 0); _pTarget = ⌖ @@ -145,7 +158,7 @@ void PooledThread::join() void PooledThread::activate() { FastMutex::ScopedLock lock(_mutex); - + poco_assert (_idle); _idle = false; _targetCompleted.reset(); @@ -155,7 +168,7 @@ void PooledThread::activate() void PooledThread::release() { const long JOIN_TIMEOUT = 10000; - + _mutex.lock(); _pTarget = 0; _mutex.unlock(); @@ -174,6 +187,10 @@ void PooledThread::release() void PooledThread::run() { + DB::ThreadStatus thread_status; + if (unlikely(_globalProfilerRealTimePeriodNs != 0 || _globalProfilerCPUTimePeriodNs != 0)) + thread_status.initGlobalProfiler(_globalProfilerRealTimePeriodNs, _globalProfilerCPUTimePeriodNs); + _started.set(); for (;;) { @@ -220,13 +237,17 @@ void PooledThread::run() ThreadPool::ThreadPool(int minCapacity, int maxCapacity, int idleTime, - int stackSize): - _minCapacity(minCapacity), - _maxCapacity(maxCapacity), + int stackSize, + size_t globalProfilerRealTimePeriodNs_, + size_t globalProfilerCPUTimePeriodNs_) : + _minCapacity(minCapacity), + _maxCapacity(maxCapacity), _idleTime(idleTime), _serial(0), _age(0), - _stackSize(stackSize) + _stackSize(stackSize), + _globalProfilerRealTimePeriodNs(globalProfilerRealTimePeriodNs_), + _globalProfilerCPUTimePeriodNs(globalProfilerCPUTimePeriodNs_) { poco_assert (minCapacity >= 1 && maxCapacity >= minCapacity && idleTime > 0); @@ -243,14 +264,18 @@ ThreadPool::ThreadPool(const std::string& name, int minCapacity, int maxCapacity, int idleTime, - int stackSize): + int stackSize, + size_t globalProfilerRealTimePeriodNs_, + size_t globalProfilerCPUTimePeriodNs_) : _name(name), - _minCapacity(minCapacity), - _maxCapacity(maxCapacity), + _minCapacity(minCapacity), + _maxCapacity(maxCapacity), _idleTime(idleTime), _serial(0), _age(0), - _stackSize(stackSize) + _stackSize(stackSize), + _globalProfilerRealTimePeriodNs(globalProfilerRealTimePeriodNs_), + _globalProfilerCPUTimePeriodNs(globalProfilerCPUTimePeriodNs_) { poco_assert (minCapacity >= 1 && maxCapacity >= minCapacity && idleTime > 0); @@ -393,15 +418,15 @@ void ThreadPool::housekeep() ThreadVec activeThreads; idleThreads.reserve(_threads.size()); activeThreads.reserve(_threads.size()); - + for (ThreadVec::iterator it = _threads.begin(); it != _threads.end(); ++it) { if ((*it)->idle()) { if ((*it)->idleTime() < _idleTime) idleThreads.push_back(*it); - else - expiredThreads.push_back(*it); + else + expiredThreads.push_back(*it); } else activeThreads.push_back(*it); } @@ -463,7 +488,7 @@ PooledThread* ThreadPool::createThread() { std::ostringstream name; name << _name << "[#" << ++_serial << "]"; - return new PooledThread(name.str(), _stackSize); + return new PooledThread(name.str(), _stackSize, _globalProfilerRealTimePeriodNs, _globalProfilerCPUTimePeriodNs); } @@ -481,7 +506,7 @@ public: ThreadPool* pool() { FastMutex::ScopedLock lock(_mutex); - + if (!_pPool) { _pPool = new ThreadPool("default"); @@ -490,7 +515,7 @@ public: } return _pPool; } - + private: ThreadPool* _pPool; FastMutex _mutex; diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index c3a5be706b4..8a5f6173c49 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -721,11 +722,6 @@ try CurrentMetrics::set(CurrentMetrics::Revision, ClickHouseRevision::getVersionRevision()); CurrentMetrics::set(CurrentMetrics::VersionInteger, ClickHouseRevision::getVersionInteger()); - Poco::ThreadPool server_pool(3, server_settings.max_connections); - std::mutex servers_lock; - std::vector servers; - std::vector servers_to_start_before_tables; - /** Context contains all that query execution is dependent: * settings, available functions, data types, aggregate functions, databases, ... */ @@ -823,6 +819,18 @@ try total_memory_tracker.setSampleMaxAllocationSize(server_settings.total_memory_profiler_sample_max_allocation_size); } + Poco::ThreadPool server_pool( + /* minCapacity */3, + /* maxCapacity */server_settings.max_connections, + /* idleTime */60, + /* stackSize */POCO_THREAD_STACK_SIZE, + server_settings.global_profiler_real_time_period_ns, + server_settings.global_profiler_cpu_time_period_ns); + + std::mutex servers_lock; + std::vector servers; + std::vector servers_to_start_before_tables; + /// Wait for all threads to avoid possible use-after-free (for example logging objects can be already destroyed). SCOPE_EXIT({ Stopwatch watch; diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 02d0959ff50..d6afa571e71 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -1060,7 +1060,6 @@ void HTTPHandler::formatExceptionForClient(int exception_code, HTTPServerRequest void HTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) { setThreadName("HTTPHandler"); - ThreadStatus thread_status; session = std::make_unique(server.context(), ClientInfo::Interface::HTTP, request.isSecure()); SCOPE_EXIT({ session.reset(); }); diff --git a/src/Server/InterserverIOHTTPHandler.cpp b/src/Server/InterserverIOHTTPHandler.cpp index 0d79aaa227b..45c28babe3a 100644 --- a/src/Server/InterserverIOHTTPHandler.cpp +++ b/src/Server/InterserverIOHTTPHandler.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include #include @@ -81,7 +80,6 @@ void InterserverIOHTTPHandler::processQuery(HTTPServerRequest & request, HTTPSer void InterserverIOHTTPHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) { setThreadName("IntersrvHandler"); - ThreadStatus thread_status; /// In order to work keep-alive. if (request.getVersion() == HTTPServerRequest::HTTP_1_1) diff --git a/src/Server/KeeperTCPHandler.cpp b/src/Server/KeeperTCPHandler.cpp index 6709cd298e5..4612e2e9fa8 100644 --- a/src/Server/KeeperTCPHandler.cpp +++ b/src/Server/KeeperTCPHandler.cpp @@ -309,7 +309,6 @@ Poco::Timespan KeeperTCPHandler::receiveHandshake(int32_t handshake_length, bool void KeeperTCPHandler::runImpl() { setThreadName("KeeperHandler"); - ThreadStatus thread_status; socket().setReceiveTimeout(receive_timeout); socket().setSendTimeout(send_timeout); diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 6456f6d24ff..9471509ad4b 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -199,7 +198,6 @@ MySQLHandler::~MySQLHandler() = default; void MySQLHandler::run() { setThreadName("MySQLHandler"); - ThreadStatus thread_status; session = std::make_unique(server.context(), ClientInfo::Interface::MYSQL); SCOPE_EXIT({ session.reset(); }); diff --git a/src/Server/PostgreSQLHandler.cpp b/src/Server/PostgreSQLHandler.cpp index 473d681ddb2..8ba8421e6f0 100644 --- a/src/Server/PostgreSQLHandler.cpp +++ b/src/Server/PostgreSQLHandler.cpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -59,7 +58,6 @@ void PostgreSQLHandler::changeIO(Poco::Net::StreamSocket & socket) void PostgreSQLHandler::run() { setThreadName("PostgresHandler"); - ThreadStatus thread_status; session = std::make_unique(server.context(), ClientInfo::Interface::POSTGRESQL); SCOPE_EXIT({ session.reset(); }); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index e3a820340ad..b60339e9fd8 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -246,7 +246,6 @@ TCPHandler::~TCPHandler() void TCPHandler::runImpl() { setThreadName("TCPHandler"); - ThreadStatus thread_status; extractConnectionSettingsFromContext(server.context()); diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 191617f1905..75e36836b63 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include From bd0da139afe4e7b8397e831694f71c5f13b42bd1 Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Thu, 30 May 2024 16:51:36 +0800 Subject: [PATCH 087/139] Add 2 server_setting to limit numebr of table/db max_table_num_to_throw max_database_num_to_throw --- .../settings.md | 16 ++ src/Common/ErrorCodes.cpp | 2 + src/Core/ServerSettings.h | 2 + src/Databases/DatabaseLazy.cpp | 10 +- src/Interpreters/InterpreterCreateQuery.cpp | 29 ++++ ...abase_and_table_count_limitation_reference | 20 +++ ...database_and_table_count_limitation_result | 20 +++ ...56_database_and_table_count_limitation_sql | 21 +++ ..._and_table_count_limitation_test.reference | 1 + ...atabase_and_table_count_limitation_test.sh | 154 ++++++++++++++++++ 10 files changed, 273 insertions(+), 2 deletions(-) create mode 100644 tests/queries/0_stateless/31156_database_and_table_count_limitation_reference create mode 100644 tests/queries/0_stateless/31156_database_and_table_count_limitation_result create mode 100644 tests/queries/0_stateless/31156_database_and_table_count_limitation_sql create mode 100644 tests/queries/0_stateless/31156_database_and_table_count_limitation_test.reference create mode 100755 tests/queries/0_stateless/31156_database_and_table_count_limitation_test.sh diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index f50dae0f1a2..6c4a9fa39d6 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -591,6 +591,22 @@ Default value: 100000 400 ``` +## max\_table\_num\_to\_throw {#max-table-num-to-throw} +If number of tables is greater than this value, server will throw an exception. 0 means no limitation. View, remote tables, dictionary, system tables are not counted. Only count table in Atomic/Ordinary/Replicated/Lazy database engine.Default value: 0 + +**Example** +```xml +400 +``` + +## max\_database\_num\_to\_throw {#max-table-num-to-throw} +If number of _database is greater than this value, server will throw an exception. 0 means no limitation. +Default value: 0 + +**Example** +```xml +400 +``` ## max_temporary_data_on_disk_size diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 29993ed33e4..b1b8e2367a4 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -602,6 +602,8 @@ M(721, DEPRECATED_FUNCTION) \ M(722, ASYNC_LOAD_WAIT_FAILED) \ M(723, PARQUET_EXCEPTION) \ + M(724, TOO_MANY_TABLES) \ + M(725, TOO_MANY_DATABASES) \ \ M(900, DISTRIBUTED_CACHE_ERROR) \ M(901, CANNOT_USE_DISTRIBUTED_CACHE) \ diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h index 309becdd78f..8261db6ce8e 100644 --- a/src/Core/ServerSettings.h +++ b/src/Core/ServerSettings.h @@ -102,6 +102,8 @@ namespace DB M(UInt64, max_dictionary_num_to_warn, 1000lu, "If the number of dictionaries is greater than this value, the server will create a warning that will displayed to user.", 0) \ M(UInt64, max_database_num_to_warn, 1000lu, "If the number of databases is greater than this value, the server will create a warning that will displayed to user.", 0) \ M(UInt64, max_part_num_to_warn, 100000lu, "If the number of parts is greater than this value, the server will create a warning that will displayed to user.", 0) \ + M(UInt64, max_table_num_to_throw, 0lu, "If number of tables is greater than this value, server will throw an exception. 0 means no limitation. View, remote tables, dictionary, system tables are not counted. Only count table in Atomic/Ordinary/Replicated/Lazy database engine.", 0) \ + M(UInt64, max_database_num_to_throw, 0lu, "If number of databases is greater than this value, server will throw an exception. 0 means no limitation.", 0) \ M(UInt64, concurrent_threads_soft_limit_num, 0, "Sets how many concurrent thread can be allocated before applying CPU pressure. Zero means unlimited.", 0) \ M(UInt64, concurrent_threads_soft_limit_ratio_to_cores, 0, "Same as concurrent_threads_soft_limit_num, but with ratio to cores.", 0) \ \ diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index e72834eddbe..feb7113e6f9 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -186,7 +186,10 @@ void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_n throw Exception(ErrorCodes::TABLE_ALREADY_EXISTS, "Table {}.{} already exists.", backQuote(database_name), backQuote(table_name)); it->second.expiration_iterator = cache_expiration_queue.emplace(cache_expiration_queue.end(), current_time, table_name); - CurrentMetrics::add(CurrentMetrics::AttachedTable, 1); + + if (table->isView() == false && table->isRemote() == false + && table->isDictionary() == false && table->isSystemStorage() == false) + CurrentMetrics::add(CurrentMetrics::AttachedTable, 1); } StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & table_name) @@ -202,7 +205,10 @@ StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & ta if (it->second.expiration_iterator != cache_expiration_queue.end()) cache_expiration_queue.erase(it->second.expiration_iterator); tables_cache.erase(it); - CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1); + + if (res->isView() == false && res->isRemote() == false + && res->isDictionary() == false && res->isSystemStorage() == false) + CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1); } return res; } diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 66936dc25d7..b0351a004a4 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -88,6 +88,11 @@ #include #include +namespace CurrentMetrics +{ + extern const Metric AttachedTable; +} + namespace DB { @@ -113,6 +118,8 @@ namespace ErrorCodes extern const int UNKNOWN_STORAGE; extern const int SYNTAX_ERROR; extern const int SUPPORT_IS_DISABLED; + extern const int TOO_MANY_TABLES; + extern const int TOO_MANY_DATABASES; } namespace fs = std::filesystem; @@ -138,6 +145,18 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) throw Exception(ErrorCodes::DATABASE_ALREADY_EXISTS, "Database {} already exists.", database_name); } + if (auto max_db = getContext()->getGlobalContext()->getServerSettings().max_database_num_to_throw; max_db > 0) + { + size_t db_count = DatabaseCatalog::instance().getDatabases().size(); + // there's an invisiable system database _temporary_and_external_tables, so we need to subtract 1 + if (db_count > 0) + db_count--; + if (db_count >= max_db) + throw Exception(ErrorCodes::TOO_MANY_DATABASES, + "Too many databases, max: {}, now: {}. " + "See setting max_database_num_to_throw.", max_db, db_count); + } + /// Will write file with database metadata, if needed. String database_name_escaped = escapeForFileName(database_name); fs::path metadata_path = fs::weakly_canonical(getContext()->getPath()); @@ -1544,6 +1563,16 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, } } + if (UInt64 max_table = getContext()->getGlobalContext()->getServerSettings().max_table_num_to_throw; max_table > 0) + { + UInt64 table_count = CurrentMetrics::get(CurrentMetrics::AttachedTable); + if (table_count >= max_table) + throw Exception(ErrorCodes::TOO_MANY_TABLES, + "Too many tables in the system. Current is {}, limit is {}. " + "See setting 'max_table_num_to_throw'.", + table_count, max_table); + } + database->createTable(getContext(), create.getTable(), res, query_ptr); /// Move table data to the proper place. Wo do not move data earlier to avoid situations diff --git a/tests/queries/0_stateless/31156_database_and_table_count_limitation_reference b/tests/queries/0_stateless/31156_database_and_table_count_limitation_reference new file mode 100644 index 00000000000..e0d61993b11 --- /dev/null +++ b/tests/queries/0_stateless/31156_database_and_table_count_limitation_reference @@ -0,0 +1,20 @@ +INFORMATION_SCHEMA +db1 +db2 +db3 +db4 +db5 +db6 +default +information_schema +system +table1 +table10 +table2 +table3 +table4 +table5 +table6 +table7 +table8 +table9 diff --git a/tests/queries/0_stateless/31156_database_and_table_count_limitation_result b/tests/queries/0_stateless/31156_database_and_table_count_limitation_result new file mode 100644 index 00000000000..e0d61993b11 --- /dev/null +++ b/tests/queries/0_stateless/31156_database_and_table_count_limitation_result @@ -0,0 +1,20 @@ +INFORMATION_SCHEMA +db1 +db2 +db3 +db4 +db5 +db6 +default +information_schema +system +table1 +table10 +table2 +table3 +table4 +table5 +table6 +table7 +table8 +table9 diff --git a/tests/queries/0_stateless/31156_database_and_table_count_limitation_sql b/tests/queries/0_stateless/31156_database_and_table_count_limitation_sql new file mode 100644 index 00000000000..127153956f9 --- /dev/null +++ b/tests/queries/0_stateless/31156_database_and_table_count_limitation_sql @@ -0,0 +1,21 @@ +create database db1; +create database db2; +create database db3; +create database db4; +create database db5; +create database db6; +create database db7; +create database db8; +select name from system.databases; +create table db1.table1 (x UInt64) engine = Memory; +create table db1.table2 (x UInt64) engine = Memory; +create table db1.table3 (x UInt64) engine = Memory; +create table db1.table4 (x UInt64) engine = Memory; +create table db1.table5 (x UInt64) engine = Memory; +create table db1.table6 (x UInt64) engine = Memory; +create table db1.table7 (x UInt64) engine = Memory; +create table db1.table8 (x UInt64) engine = Memory; +create table db1.table9 (x UInt64) engine = Memory; +create table db1.table10 (x UInt64) engine = Memory; +create table db1.table11 (x UInt64) engine = Memory; +select name from system.tables where database != 'system' and database != 'information_schema' and database != 'INFORMATION_SCHEMA'; \ No newline at end of file diff --git a/tests/queries/0_stateless/31156_database_and_table_count_limitation_test.reference b/tests/queries/0_stateless/31156_database_and_table_count_limitation_test.reference new file mode 100644 index 00000000000..386f3d2fd86 --- /dev/null +++ b/tests/queries/0_stateless/31156_database_and_table_count_limitation_test.reference @@ -0,0 +1 @@ +Test passed diff --git a/tests/queries/0_stateless/31156_database_and_table_count_limitation_test.sh b/tests/queries/0_stateless/31156_database_and_table_count_limitation_test.sh new file mode 100755 index 00000000000..365fad5e99d --- /dev/null +++ b/tests/queries/0_stateless/31156_database_and_table_count_limitation_test.sh @@ -0,0 +1,154 @@ +#!/usr/bin/env bash + +#Tags: shared, no-parallel + +# Get the current script directory +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +BASE="$CUR_DIR/$(basename "${BASH_SOURCE[0]}" .sh)" + +# Load shell_config.sh +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# remove --database=$CLICKHOUSE_DATABASE from $CLICKHOUSE_CLIENT +CLICKHOUSE_CLIENT=$(echo $CLICKHOUSE_CLIENT | sed "s/--database=$CLICKHOUSE_DATABASE//") + +# Create a temporary directory +TEMP_DIR=$(mktemp -d "$CUR_DIR/$(basename "$BASE").XXXXXX") + +# Generate config.xml +CONFIG_FILE="$TEMP_DIR/config.xml" +cat > "$CONFIG_FILE" < + + information + $TEMP_DIR/clickhouse-server.log + $TEMP_DIR/clickhouse-server.err.log + + 10 + 10 + + + + + users.xml + + + + +EOL + +echo " + + + + + + + + ::/0 + + default + default + + + + + + " > $TEMP_DIR/users.xml + +# Function to start the server +function start_server() { + local server_opts=( + "--config-file=$CONFIG_FILE" + "--" + "--tcp_port=0" + "--shutdown_wait_unfinished=0" + "--listen_host=127.1" + "--path=$TEMP_DIR" + ) + CLICKHOUSE_WATCHDOG_ENABLE=0 $CLICKHOUSE_SERVER_BINARY "${server_opts[@]}" > /dev/null 2>&1 & + local pid=$! + + echo "$pid" +} + +# Function to get the server port +function get_server_port() { + local pid=$1 && shift + local port='' + while [[ -z $port ]]; do + port=$(lsof -n -a -P -i tcp -s tcp:LISTEN -p "$pid" 2>/dev/null | awk -F'[ :]' '/LISTEN/ { print $(NF-1) }') + sleep 0.5 + done + echo "$port" +} + +# Function to wait for the server port to be available +function wait_server_port() { + local port=$1 && shift + local i=0 retries=30 + while ! $CLICKHOUSE_CLIENT --host 127.1 --port "$port" --format Null -q 'select 1' 2>/dev/null && [[ $i -lt $retries ]]; do + sleep 0.5 + ((i++)) + done + if ! $CLICKHOUSE_CLIENT --host 127.1 --port "$port" --format Null -q 'select 1'; then + echo "Cannot wait until server will start accepting connections on port $port" >&2 + exit 1 + fi +} + +# Function to stop the server +function stop_server() { + if [[ -n "$SERVER_PID" ]]; then + kill -9 "$SERVER_PID" + fi +} + +# Function to clean up +function cleanup() { + stop_server + rm -rf "$TEMP_DIR" +} + +trap cleanup EXIT + +# Start the server and get the port +SERVER_PID=$(start_server) +PORT=$(get_server_port "$SERVER_PID") + +# Wait for the server to start +wait_server_port "$PORT" + +# check result +sql_file="$CUR_DIR/31156_database_and_table_count_limitation_sql" +result_file="$CUR_DIR/31156_database_and_table_count_limitation_result" +reference_file="$CUR_DIR/31156_database_and_table_count_limitation_reference" + +$CLICKHOUSE_CLIENT --host 127.1 --port "$PORT" --multiquery --ignore-error --queries-file=$sql_file 2>/dev/null > "$result_file" + +# Compare the result with the reference, if not same, print the diff +if ! diff -u "$reference_file" "$result_file"; then + echo "Test failed" + exit 1 +fi + +# check errors in error log +log_file="$TEMP_DIR/clickhouse-server.err.log" +database_error=$(grep -c " executeQuery: Code: 725. DB::Exception: Too many databases" $log_file) +table_error=$(grep -c " executeQuery: Code: 724. DB::Exception: Too many tables" $log_file) +#database_error should be 2 +if [ $database_error -ne 2 ]; then + echo "database_error should be 2, but now is $database_error. Tried to create 8 db, 6 should be created and 2 should fail" + echo "Limitation is 10 databases, 4 exist by default: default, system, information_schema, INFORMATION_SCHEMA" + exit 1 +fi + +#table_error should be 1 +if [ $table_error -ne 1 ]; then + echo "table_error should be 1, but now -s $table_error. Tried to create 11 tables, 10 should be created and 1 should fail" + echo "Limitation is 10 tables" + exit 1 +fi + +echo "Test passed" From 25e927ad38c040d00a7d31a00551c4adcc0609e5 Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Tue, 4 Jun 2024 14:32:45 +0800 Subject: [PATCH 088/139] do not count systemStorage in AttachedTable metric --- src/Databases/DatabaseLazy.cpp | 8 ++------ src/Databases/DatabasesCommon.cpp | 8 ++++++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Databases/DatabaseLazy.cpp b/src/Databases/DatabaseLazy.cpp index feb7113e6f9..233db07cd68 100644 --- a/src/Databases/DatabaseLazy.cpp +++ b/src/Databases/DatabaseLazy.cpp @@ -187,9 +187,7 @@ void DatabaseLazy::attachTable(ContextPtr /* context_ */, const String & table_n it->second.expiration_iterator = cache_expiration_queue.emplace(cache_expiration_queue.end(), current_time, table_name); - if (table->isView() == false && table->isRemote() == false - && table->isDictionary() == false && table->isSystemStorage() == false) - CurrentMetrics::add(CurrentMetrics::AttachedTable, 1); + CurrentMetrics::add(CurrentMetrics::AttachedTable, 1); } StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & table_name) @@ -206,9 +204,7 @@ StoragePtr DatabaseLazy::detachTable(ContextPtr /* context */, const String & ta cache_expiration_queue.erase(it->second.expiration_iterator); tables_cache.erase(it); - if (res->isView() == false && res->isRemote() == false - && res->isDictionary() == false && res->isSystemStorage() == false) - CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1); + CurrentMetrics::sub(CurrentMetrics::AttachedTable, 1); } return res; } diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index fd38a31da5c..8c509a1dd53 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -260,7 +260,9 @@ StoragePtr DatabaseWithOwnTablesBase::detachTableUnlocked(const String & table_n res = it->second; tables.erase(it); res->is_detached = true; - CurrentMetrics::sub(getAttachedCounterForStorage(res), 1); + + if (res->isSystemStorage() == false) + CurrentMetrics::sub(getAttachedCounterForStorage(res), 1); auto table_id = res->getStorageID(); if (table_id.hasUUID()) @@ -301,7 +303,9 @@ void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, c /// It is important to reset is_detached here since in case of RENAME in /// non-Atomic database the is_detached is set to true before RENAME. table->is_detached = false; - CurrentMetrics::add(getAttachedCounterForStorage(table), 1); + + if (res->isSystemStorage() == false) + CurrentMetrics::add(getAttachedCounterForStorage(table), 1); } void DatabaseWithOwnTablesBase::shutdown() From 0cdcc0a704a83f65f577cd3fceb9fae0b50ec7b7 Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Tue, 4 Jun 2024 15:11:32 +0800 Subject: [PATCH 089/139] modify DatabaseCommon.cpp compile errors; rename testcases, remove test result file --- src/Databases/DatabasesCommon.cpp | 2 +- ...base_and_table_count_limitation_reference} | 0 ...5_database_and_table_count_limitation_sql} | 0 ...and_table_count_limitation_test.reference} | 0 ...tabase_and_table_count_limitation_test.sh} | 6 +++--- ...database_and_table_count_limitation_result | 20 ------------------- 6 files changed, 4 insertions(+), 24 deletions(-) rename tests/queries/0_stateless/{31156_database_and_table_count_limitation_reference => 03165_database_and_table_count_limitation_reference} (100%) rename tests/queries/0_stateless/{31156_database_and_table_count_limitation_sql => 03165_database_and_table_count_limitation_sql} (100%) rename tests/queries/0_stateless/{31156_database_and_table_count_limitation_test.reference => 03165_database_and_table_count_limitation_test.reference} (100%) rename tests/queries/0_stateless/{31156_database_and_table_count_limitation_test.sh => 03165_database_and_table_count_limitation_test.sh} (95%) delete mode 100644 tests/queries/0_stateless/31156_database_and_table_count_limitation_result diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index 8c509a1dd53..c590486d10e 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -304,7 +304,7 @@ void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, c /// non-Atomic database the is_detached is set to true before RENAME. table->is_detached = false; - if (res->isSystemStorage() == false) + if (table->isSystemStorage() == false) CurrentMetrics::add(getAttachedCounterForStorage(table), 1); } diff --git a/tests/queries/0_stateless/31156_database_and_table_count_limitation_reference b/tests/queries/0_stateless/03165_database_and_table_count_limitation_reference similarity index 100% rename from tests/queries/0_stateless/31156_database_and_table_count_limitation_reference rename to tests/queries/0_stateless/03165_database_and_table_count_limitation_reference diff --git a/tests/queries/0_stateless/31156_database_and_table_count_limitation_sql b/tests/queries/0_stateless/03165_database_and_table_count_limitation_sql similarity index 100% rename from tests/queries/0_stateless/31156_database_and_table_count_limitation_sql rename to tests/queries/0_stateless/03165_database_and_table_count_limitation_sql diff --git a/tests/queries/0_stateless/31156_database_and_table_count_limitation_test.reference b/tests/queries/0_stateless/03165_database_and_table_count_limitation_test.reference similarity index 100% rename from tests/queries/0_stateless/31156_database_and_table_count_limitation_test.reference rename to tests/queries/0_stateless/03165_database_and_table_count_limitation_test.reference diff --git a/tests/queries/0_stateless/31156_database_and_table_count_limitation_test.sh b/tests/queries/0_stateless/03165_database_and_table_count_limitation_test.sh similarity index 95% rename from tests/queries/0_stateless/31156_database_and_table_count_limitation_test.sh rename to tests/queries/0_stateless/03165_database_and_table_count_limitation_test.sh index 365fad5e99d..f665c7ce4bf 100755 --- a/tests/queries/0_stateless/31156_database_and_table_count_limitation_test.sh +++ b/tests/queries/0_stateless/03165_database_and_table_count_limitation_test.sh @@ -121,9 +121,9 @@ PORT=$(get_server_port "$SERVER_PID") wait_server_port "$PORT" # check result -sql_file="$CUR_DIR/31156_database_and_table_count_limitation_sql" -result_file="$CUR_DIR/31156_database_and_table_count_limitation_result" -reference_file="$CUR_DIR/31156_database_and_table_count_limitation_reference" +sql_file="$CUR_DIR/03165_database_and_table_count_limitation_sql" +result_file="$CUR_DIR/03165_database_and_table_count_limitation_result" +reference_file="$CUR_DIR/03165_database_and_table_count_limitation_reference" $CLICKHOUSE_CLIENT --host 127.1 --port "$PORT" --multiquery --ignore-error --queries-file=$sql_file 2>/dev/null > "$result_file" diff --git a/tests/queries/0_stateless/31156_database_and_table_count_limitation_result b/tests/queries/0_stateless/31156_database_and_table_count_limitation_result deleted file mode 100644 index e0d61993b11..00000000000 --- a/tests/queries/0_stateless/31156_database_and_table_count_limitation_result +++ /dev/null @@ -1,20 +0,0 @@ -INFORMATION_SCHEMA -db1 -db2 -db3 -db4 -db5 -db6 -default -information_schema -system -table1 -table10 -table2 -table3 -table4 -table5 -table6 -table7 -table8 -table9 From 06239e115663062508c46cc32590416e5943515d Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Tue, 4 Jun 2024 16:42:14 +0800 Subject: [PATCH 090/139] fix typo --- src/Interpreters/InterpreterCreateQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index b0351a004a4..30d9ddb93f5 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -148,7 +148,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) if (auto max_db = getContext()->getGlobalContext()->getServerSettings().max_database_num_to_throw; max_db > 0) { size_t db_count = DatabaseCatalog::instance().getDatabases().size(); - // there's an invisiable system database _temporary_and_external_tables, so we need to subtract 1 + // there's an invisible system database _temporary_and_external_tables, so we need to subtract 1 if (db_count > 0) db_count--; if (db_count >= max_db) From 446cc0542bdc46af03205740daa004800ee343b9 Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Wed, 5 Jun 2024 17:17:29 +0800 Subject: [PATCH 091/139] add a space to trigger CI after fixed a CI bug --- .../03165_database_and_table_count_limitation_test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/03165_database_and_table_count_limitation_test.sh b/tests/queries/0_stateless/03165_database_and_table_count_limitation_test.sh index f665c7ce4bf..440173393d0 100755 --- a/tests/queries/0_stateless/03165_database_and_table_count_limitation_test.sh +++ b/tests/queries/0_stateless/03165_database_and_table_count_limitation_test.sh @@ -144,7 +144,7 @@ if [ $database_error -ne 2 ]; then exit 1 fi -#table_error should be 1 +# table_error should be 1 if [ $table_error -ne 1 ]; then echo "table_error should be 1, but now -s $table_error. Tried to create 11 tables, 10 should be created and 1 should fail" echo "Limitation is 10 tables" From a2fd206a4dd729321dc318e4edb921bc379dbe13 Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Mon, 10 Jun 2024 16:45:38 +0800 Subject: [PATCH 092/139] adjust database count checking DatabaseCatalog::TEMPORARY_DATABASE explicitly, change 'yandex' into 'clickhouse' in config.xml --- src/Interpreters/InterpreterCreateQuery.cpp | 2 +- .../03165_database_and_table_count_limitation_test.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 30d9ddb93f5..1904dd58d9f 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -149,7 +149,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) { size_t db_count = DatabaseCatalog::instance().getDatabases().size(); // there's an invisible system database _temporary_and_external_tables, so we need to subtract 1 - if (db_count > 0) + if (db_count > 0 && DatabaseCatalog::instance().isDatabaseExist(DatabaseCatalog::TEMPORARY_DATABASE)) db_count--; if (db_count >= max_db) throw Exception(ErrorCodes::TOO_MANY_DATABASES, diff --git a/tests/queries/0_stateless/03165_database_and_table_count_limitation_test.sh b/tests/queries/0_stateless/03165_database_and_table_count_limitation_test.sh index 440173393d0..c0b0639dec4 100755 --- a/tests/queries/0_stateless/03165_database_and_table_count_limitation_test.sh +++ b/tests/queries/0_stateless/03165_database_and_table_count_limitation_test.sh @@ -19,7 +19,7 @@ TEMP_DIR=$(mktemp -d "$CUR_DIR/$(basename "$BASE").XXXXXX") # Generate config.xml CONFIG_FILE="$TEMP_DIR/config.xml" cat > "$CONFIG_FILE" < + information $TEMP_DIR/clickhouse-server.log @@ -35,7 +35,7 @@ cat > "$CONFIG_FILE" < - + EOL echo " From 5ff65a1bf546403883aecd824e9ceb2302eea040 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Mon, 10 Jun 2024 20:46:13 +0000 Subject: [PATCH 093/139] write integration tests --- .../test_table_db_num_limit/config/config.xml | 5 + .../test_table_db_num_limit/test.py | 34 ++++ ...abase_and_table_count_limitation_reference | 20 --- ...65_database_and_table_count_limitation_sql | 21 --- ..._and_table_count_limitation_test.reference | 1 - ...atabase_and_table_count_limitation_test.sh | 154 ------------------ 6 files changed, 39 insertions(+), 196 deletions(-) create mode 100644 tests/integration/test_table_db_num_limit/config/config.xml create mode 100644 tests/integration/test_table_db_num_limit/test.py delete mode 100644 tests/queries/0_stateless/03165_database_and_table_count_limitation_reference delete mode 100644 tests/queries/0_stateless/03165_database_and_table_count_limitation_sql delete mode 100644 tests/queries/0_stateless/03165_database_and_table_count_limitation_test.reference delete mode 100755 tests/queries/0_stateless/03165_database_and_table_count_limitation_test.sh diff --git a/tests/integration/test_table_db_num_limit/config/config.xml b/tests/integration/test_table_db_num_limit/config/config.xml new file mode 100644 index 00000000000..9a573b158fe --- /dev/null +++ b/tests/integration/test_table_db_num_limit/config/config.xml @@ -0,0 +1,5 @@ + + 10 + 10 + + diff --git a/tests/integration/test_table_db_num_limit/test.py b/tests/integration/test_table_db_num_limit/test.py new file mode 100644 index 00000000000..d5bd2228799 --- /dev/null +++ b/tests/integration/test_table_db_num_limit/test.py @@ -0,0 +1,34 @@ +import pytest +from helpers.client import QueryRuntimeException +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) + +node1 = cluster.add_instance( + "node1", main_configs=["config/config.xml"], with_zookeeper=True +) + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + + yield cluster + + finally: + cluster.shutdown() + +def test_table_db_limit(started_cluster): + for i in range(6): + node1.query("create database db{}".format(i)) + + with pytest.raises(QueryRuntimeException) as exp_info: + node1.query("create database db_exp".format(i)) + + assert "TOO_MANY_DATABASES" in str(exp_info) + for i in range(10): + node1.query("create table t{} (a Int32) Engine = Log".format(i)) + + with pytest.raises(QueryRuntimeException) as exp_info: + node1.query("create table default.tx (a Int32) Engine = Log") + assert "TOO_MANY_TABLES" in str(exp_info) diff --git a/tests/queries/0_stateless/03165_database_and_table_count_limitation_reference b/tests/queries/0_stateless/03165_database_and_table_count_limitation_reference deleted file mode 100644 index e0d61993b11..00000000000 --- a/tests/queries/0_stateless/03165_database_and_table_count_limitation_reference +++ /dev/null @@ -1,20 +0,0 @@ -INFORMATION_SCHEMA -db1 -db2 -db3 -db4 -db5 -db6 -default -information_schema -system -table1 -table10 -table2 -table3 -table4 -table5 -table6 -table7 -table8 -table9 diff --git a/tests/queries/0_stateless/03165_database_and_table_count_limitation_sql b/tests/queries/0_stateless/03165_database_and_table_count_limitation_sql deleted file mode 100644 index 127153956f9..00000000000 --- a/tests/queries/0_stateless/03165_database_and_table_count_limitation_sql +++ /dev/null @@ -1,21 +0,0 @@ -create database db1; -create database db2; -create database db3; -create database db4; -create database db5; -create database db6; -create database db7; -create database db8; -select name from system.databases; -create table db1.table1 (x UInt64) engine = Memory; -create table db1.table2 (x UInt64) engine = Memory; -create table db1.table3 (x UInt64) engine = Memory; -create table db1.table4 (x UInt64) engine = Memory; -create table db1.table5 (x UInt64) engine = Memory; -create table db1.table6 (x UInt64) engine = Memory; -create table db1.table7 (x UInt64) engine = Memory; -create table db1.table8 (x UInt64) engine = Memory; -create table db1.table9 (x UInt64) engine = Memory; -create table db1.table10 (x UInt64) engine = Memory; -create table db1.table11 (x UInt64) engine = Memory; -select name from system.tables where database != 'system' and database != 'information_schema' and database != 'INFORMATION_SCHEMA'; \ No newline at end of file diff --git a/tests/queries/0_stateless/03165_database_and_table_count_limitation_test.reference b/tests/queries/0_stateless/03165_database_and_table_count_limitation_test.reference deleted file mode 100644 index 386f3d2fd86..00000000000 --- a/tests/queries/0_stateless/03165_database_and_table_count_limitation_test.reference +++ /dev/null @@ -1 +0,0 @@ -Test passed diff --git a/tests/queries/0_stateless/03165_database_and_table_count_limitation_test.sh b/tests/queries/0_stateless/03165_database_and_table_count_limitation_test.sh deleted file mode 100755 index c0b0639dec4..00000000000 --- a/tests/queries/0_stateless/03165_database_and_table_count_limitation_test.sh +++ /dev/null @@ -1,154 +0,0 @@ -#!/usr/bin/env bash - -#Tags: shared, no-parallel - -# Get the current script directory -CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) -BASE="$CUR_DIR/$(basename "${BASH_SOURCE[0]}" .sh)" - -# Load shell_config.sh -# shellcheck source=../shell_config.sh -. "$CUR_DIR"/../shell_config.sh - -# remove --database=$CLICKHOUSE_DATABASE from $CLICKHOUSE_CLIENT -CLICKHOUSE_CLIENT=$(echo $CLICKHOUSE_CLIENT | sed "s/--database=$CLICKHOUSE_DATABASE//") - -# Create a temporary directory -TEMP_DIR=$(mktemp -d "$CUR_DIR/$(basename "$BASE").XXXXXX") - -# Generate config.xml -CONFIG_FILE="$TEMP_DIR/config.xml" -cat > "$CONFIG_FILE" < - - information - $TEMP_DIR/clickhouse-server.log - $TEMP_DIR/clickhouse-server.err.log - - 10 - 10 - - - - - users.xml - - - - -EOL - -echo " - - - - - - - - ::/0 - - default - default - - - - - - " > $TEMP_DIR/users.xml - -# Function to start the server -function start_server() { - local server_opts=( - "--config-file=$CONFIG_FILE" - "--" - "--tcp_port=0" - "--shutdown_wait_unfinished=0" - "--listen_host=127.1" - "--path=$TEMP_DIR" - ) - CLICKHOUSE_WATCHDOG_ENABLE=0 $CLICKHOUSE_SERVER_BINARY "${server_opts[@]}" > /dev/null 2>&1 & - local pid=$! - - echo "$pid" -} - -# Function to get the server port -function get_server_port() { - local pid=$1 && shift - local port='' - while [[ -z $port ]]; do - port=$(lsof -n -a -P -i tcp -s tcp:LISTEN -p "$pid" 2>/dev/null | awk -F'[ :]' '/LISTEN/ { print $(NF-1) }') - sleep 0.5 - done - echo "$port" -} - -# Function to wait for the server port to be available -function wait_server_port() { - local port=$1 && shift - local i=0 retries=30 - while ! $CLICKHOUSE_CLIENT --host 127.1 --port "$port" --format Null -q 'select 1' 2>/dev/null && [[ $i -lt $retries ]]; do - sleep 0.5 - ((i++)) - done - if ! $CLICKHOUSE_CLIENT --host 127.1 --port "$port" --format Null -q 'select 1'; then - echo "Cannot wait until server will start accepting connections on port $port" >&2 - exit 1 - fi -} - -# Function to stop the server -function stop_server() { - if [[ -n "$SERVER_PID" ]]; then - kill -9 "$SERVER_PID" - fi -} - -# Function to clean up -function cleanup() { - stop_server - rm -rf "$TEMP_DIR" -} - -trap cleanup EXIT - -# Start the server and get the port -SERVER_PID=$(start_server) -PORT=$(get_server_port "$SERVER_PID") - -# Wait for the server to start -wait_server_port "$PORT" - -# check result -sql_file="$CUR_DIR/03165_database_and_table_count_limitation_sql" -result_file="$CUR_DIR/03165_database_and_table_count_limitation_result" -reference_file="$CUR_DIR/03165_database_and_table_count_limitation_reference" - -$CLICKHOUSE_CLIENT --host 127.1 --port "$PORT" --multiquery --ignore-error --queries-file=$sql_file 2>/dev/null > "$result_file" - -# Compare the result with the reference, if not same, print the diff -if ! diff -u "$reference_file" "$result_file"; then - echo "Test failed" - exit 1 -fi - -# check errors in error log -log_file="$TEMP_DIR/clickhouse-server.err.log" -database_error=$(grep -c " executeQuery: Code: 725. DB::Exception: Too many databases" $log_file) -table_error=$(grep -c " executeQuery: Code: 724. DB::Exception: Too many tables" $log_file) -#database_error should be 2 -if [ $database_error -ne 2 ]; then - echo "database_error should be 2, but now is $database_error. Tried to create 8 db, 6 should be created and 2 should fail" - echo "Limitation is 10 databases, 4 exist by default: default, system, information_schema, INFORMATION_SCHEMA" - exit 1 -fi - -# table_error should be 1 -if [ $table_error -ne 1 ]; then - echo "table_error should be 1, but now -s $table_error. Tried to create 11 tables, 10 should be created and 1 should fail" - echo "Limitation is 10 tables" - exit 1 -fi - -echo "Test passed" From f073a456dc02446bc71a2b892ab10eb258418459 Mon Sep 17 00:00:00 2001 From: Han Fei Date: Mon, 10 Jun 2024 21:02:34 +0000 Subject: [PATCH 094/139] fix style --- tests/integration/test_table_db_num_limit/__init__.py | 0 tests/integration/test_table_db_num_limit/test.py | 2 ++ 2 files changed, 2 insertions(+) create mode 100644 tests/integration/test_table_db_num_limit/__init__.py diff --git a/tests/integration/test_table_db_num_limit/__init__.py b/tests/integration/test_table_db_num_limit/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_table_db_num_limit/test.py b/tests/integration/test_table_db_num_limit/test.py index d5bd2228799..71bb494e04e 100644 --- a/tests/integration/test_table_db_num_limit/test.py +++ b/tests/integration/test_table_db_num_limit/test.py @@ -8,6 +8,7 @@ node1 = cluster.add_instance( "node1", main_configs=["config/config.xml"], with_zookeeper=True ) + @pytest.fixture(scope="module") def started_cluster(): try: @@ -18,6 +19,7 @@ def started_cluster(): finally: cluster.shutdown() + def test_table_db_limit(started_cluster): for i in range(6): node1.query("create database db{}".format(i)) From 76d6bc37eb7b3b90be0bb7b6a509f6e3e32655db Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Wed, 12 Jun 2024 21:51:32 +0800 Subject: [PATCH 095/139] adjust code based on review comment --- src/Interpreters/DatabaseCatalog.h | 1 + src/Interpreters/InterpreterCreateQuery.cpp | 38 +++++++++++++------ .../test_table_db_num_limit/test.py | 2 +- 3 files changed, 28 insertions(+), 13 deletions(-) diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 37125d9900c..ac9c01e5dd7 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -129,6 +129,7 @@ public: static constexpr const char * SYSTEM_DATABASE = "system"; static constexpr const char * INFORMATION_SCHEMA = "information_schema"; static constexpr const char * INFORMATION_SCHEMA_UPPERCASE = "INFORMATION_SCHEMA"; + static constexpr const char * DEFAULT_DATABASE = "default"; /// Returns true if a passed name is one of the predefined databases' names. static bool isPredefinedDatabase(std::string_view database_name); diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 1904dd58d9f..697e8136ee1 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -145,16 +145,29 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) throw Exception(ErrorCodes::DATABASE_ALREADY_EXISTS, "Database {} already exists.", database_name); } - if (auto max_db = getContext()->getGlobalContext()->getServerSettings().max_database_num_to_throw; max_db > 0) + auto db_num_limit = getContext()->getGlobalContext()->getServerSettings().max_database_num_to_throw; + if (db_num_limit > 0) { size_t db_count = DatabaseCatalog::instance().getDatabases().size(); - // there's an invisible system database _temporary_and_external_tables, so we need to subtract 1 - if (db_count > 0 && DatabaseCatalog::instance().isDatabaseExist(DatabaseCatalog::TEMPORARY_DATABASE)) - db_count--; - if (db_count >= max_db) + std::vector system_databases = { + DatabaseCatalog::TEMPORARY_DATABASE, + DatabaseCatalog::SYSTEM_DATABASE, + DatabaseCatalog::INFORMATION_SCHEMA, + DatabaseCatalog::INFORMATION_SCHEMA_UPPERCASE, + DatabaseCatalog::DEFAULT_DATABASE + }; + + for (const auto & system_database : system_databases) + { + if (db_count > 0 && DatabaseCatalog::instance().isDatabaseExist(system_database)) + db_count--; + } + + if (db_count >= db_num_limit) throw Exception(ErrorCodes::TOO_MANY_DATABASES, - "Too many databases, max: {}, now: {}. " - "See setting max_database_num_to_throw.", max_db, db_count); + "Too many databases in the Clickhouse. " + "The limit (setting 'max_database_num_to_throw') is set to {}, currnt number of databases is {}", + db_num_limit, db_count); } /// Will write file with database metadata, if needed. @@ -1563,14 +1576,15 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, } } - if (UInt64 max_table = getContext()->getGlobalContext()->getServerSettings().max_table_num_to_throw; max_table > 0) + UInt64 table_num_limit = getContext()->getGlobalContext()->getServerSettings().max_table_num_to_throw; + if (table_num_limit > 0) { UInt64 table_count = CurrentMetrics::get(CurrentMetrics::AttachedTable); - if (table_count >= max_table) + if (table_count >= table_num_limit) throw Exception(ErrorCodes::TOO_MANY_TABLES, - "Too many tables in the system. Current is {}, limit is {}. " - "See setting 'max_table_num_to_throw'.", - table_count, max_table); + "Too many tables in the Clickhouse. " + "The limit (setting 'max_table_num_to_throw') is set to {}, currnt number of tables is {}", + table_num_limit, table_count); } database->createTable(getContext(), create.getTable(), res, query_ptr); diff --git a/tests/integration/test_table_db_num_limit/test.py b/tests/integration/test_table_db_num_limit/test.py index 71bb494e04e..bde9a956d70 100644 --- a/tests/integration/test_table_db_num_limit/test.py +++ b/tests/integration/test_table_db_num_limit/test.py @@ -21,7 +21,7 @@ def started_cluster(): def test_table_db_limit(started_cluster): - for i in range(6): + for i in range(10): node1.query("create database db{}".format(i)) with pytest.raises(QueryRuntimeException) as exp_info: From 555e166ed6d2cb214e42e9502982c07c2baa91b2 Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Thu, 13 Jun 2024 08:38:19 +0800 Subject: [PATCH 096/139] fix type --- src/Interpreters/InterpreterCreateQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 697e8136ee1..c33f97dc80d 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -166,7 +166,7 @@ BlockIO InterpreterCreateQuery::createDatabase(ASTCreateQuery & create) if (db_count >= db_num_limit) throw Exception(ErrorCodes::TOO_MANY_DATABASES, "Too many databases in the Clickhouse. " - "The limit (setting 'max_database_num_to_throw') is set to {}, currnt number of databases is {}", + "The limit (setting 'max_database_num_to_throw') is set to {}, current number of databases is {}", db_num_limit, db_count); } @@ -1583,7 +1583,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, if (table_count >= table_num_limit) throw Exception(ErrorCodes::TOO_MANY_TABLES, "Too many tables in the Clickhouse. " - "The limit (setting 'max_table_num_to_throw') is set to {}, currnt number of tables is {}", + "The limit (setting 'max_table_num_to_throw') is set to {}, current number of tables is {}", table_num_limit, table_count); } From d44d5254056c5266235c0b94b0622d298e57e917 Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Thu, 13 Jun 2024 22:38:47 +0800 Subject: [PATCH 097/139] trigger CI From 08b481af3d8d59088216df5bc182871ef73284ff Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Fri, 14 Jun 2024 09:44:08 +0800 Subject: [PATCH 098/139] trigger CI From 840df04f9064874d207fc73b71ee8db829989425 Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 14 Jun 2024 10:08:50 +0200 Subject: [PATCH 099/139] Restart CI From d0f2014f139de87dfe22e2e660cd3693ca997137 Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Fri, 14 Jun 2024 21:20:25 +0800 Subject: [PATCH 100/139] not count syste tables --- src/Databases/DatabasesCommon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index c590486d10e..6426123bb4f 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -304,7 +304,7 @@ void DatabaseWithOwnTablesBase::attachTableUnlocked(const String & table_name, c /// non-Atomic database the is_detached is set to true before RENAME. table->is_detached = false; - if (table->isSystemStorage() == false) + if (table->isSystemStorage() == false && table_id.database_name != DatabaseCatalog::SYSTEM_DATABASE) CurrentMetrics::add(getAttachedCounterForStorage(table), 1); } From da186811f037dd637ba6850aaac40858c8897fde Mon Sep 17 00:00:00 2001 From: Han Fei Date: Fri, 14 Jun 2024 15:28:21 +0200 Subject: [PATCH 101/139] add more tests --- tests/integration/test_table_db_num_limit/test.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/integration/test_table_db_num_limit/test.py b/tests/integration/test_table_db_num_limit/test.py index bde9a956d70..f2080ec4738 100644 --- a/tests/integration/test_table_db_num_limit/test.py +++ b/tests/integration/test_table_db_num_limit/test.py @@ -28,6 +28,13 @@ def test_table_db_limit(started_cluster): node1.query("create database db_exp".format(i)) assert "TOO_MANY_DATABASES" in str(exp_info) + + for i in range(10): + node1.query("create table t{} (a Int32) Engine = Log".format(i)) + + node1.query("system flush logs"); + for i in range(10): + node1.query("drop table t{}".format(i)) for i in range(10): node1.query("create table t{} (a Int32) Engine = Log".format(i)) From 5f3535309be83cebab9fae967d6b4ab739aca9dc Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 14 Jun 2024 15:38:09 +0200 Subject: [PATCH 102/139] Add a test --- .../test_keeper_profiler/__init__.py | 0 .../configs/keeper_config1.xml | 44 +++++++++++++ .../configs/keeper_config2.xml | 44 +++++++++++++ .../configs/keeper_config3.xml | 44 +++++++++++++ .../integration/test_keeper_profiler/test.py | 65 +++++++++++++++++++ 5 files changed, 197 insertions(+) create mode 100644 tests/integration/test_keeper_profiler/__init__.py create mode 100644 tests/integration/test_keeper_profiler/configs/keeper_config1.xml create mode 100644 tests/integration/test_keeper_profiler/configs/keeper_config2.xml create mode 100644 tests/integration/test_keeper_profiler/configs/keeper_config3.xml create mode 100644 tests/integration/test_keeper_profiler/test.py diff --git a/tests/integration/test_keeper_profiler/__init__.py b/tests/integration/test_keeper_profiler/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_keeper_profiler/configs/keeper_config1.xml b/tests/integration/test_keeper_profiler/configs/keeper_config1.xml new file mode 100644 index 00000000000..b541090bd98 --- /dev/null +++ b/tests/integration/test_keeper_profiler/configs/keeper_config1.xml @@ -0,0 +1,44 @@ + + + + http://minio1:9001/snapshots/ + minio + minio123 + + 9181 + 1 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + * + + + 5000 + 10000 + 5000 + 50 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + 1000000000 + 1000000000 + diff --git a/tests/integration/test_keeper_profiler/configs/keeper_config2.xml b/tests/integration/test_keeper_profiler/configs/keeper_config2.xml new file mode 100644 index 00000000000..9ad0a772860 --- /dev/null +++ b/tests/integration/test_keeper_profiler/configs/keeper_config2.xml @@ -0,0 +1,44 @@ + + + + http://minio1:9001/snapshots/ + minio + minio123 + + 9181 + 2 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + * + + + 5000 + 10000 + 5000 + 75 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + 1000000000 + 1000000000 + diff --git a/tests/integration/test_keeper_profiler/configs/keeper_config3.xml b/tests/integration/test_keeper_profiler/configs/keeper_config3.xml new file mode 100644 index 00000000000..ff31f7cdbf4 --- /dev/null +++ b/tests/integration/test_keeper_profiler/configs/keeper_config3.xml @@ -0,0 +1,44 @@ + + + + http://minio1:9001/snapshots/ + minio + minio123 + + 9181 + 3 + /var/lib/clickhouse/coordination/log + /var/lib/clickhouse/coordination/snapshots + * + + + 5000 + 10000 + 5000 + 75 + trace + + + + + 1 + node1 + 9234 + + + 2 + node2 + 9234 + true + + + 3 + node3 + 9234 + true + + + + 1000000000 + 1000000000 + diff --git a/tests/integration/test_keeper_profiler/test.py b/tests/integration/test_keeper_profiler/test.py new file mode 100644 index 00000000000..bb24f13d9c2 --- /dev/null +++ b/tests/integration/test_keeper_profiler/test.py @@ -0,0 +1,65 @@ +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV +from helpers.keeper_utils import KeeperClient, KeeperException + + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node1", + main_configs=["configs/keeper_config1.xml"], + stay_alive=True, +) +node2 = cluster.add_instance( + "node2", + main_configs=["configs/keeper_config2.xml"], + stay_alive=True, + with_minio=True, +) +node3 = cluster.add_instance( + "node3", + main_configs=["configs/keeper_config3.xml"], + stay_alive=True, + with_minio=True, +) + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def test_profiler(started_cluster): + node = cluster.instances["node1"] + + node.query( + "CREATE TABLE t (key UInt32, value String) Engine = ReplicatedMergeTree('/clickhouse-tables/test1', 'r1') ORDER BY key" + ) + + node.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") + node.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") + node.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") + node.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") + + node.query("system flush logs") + assert int(node.query("exists system.trace_log")) + + assert 1 < int( + node.query( + """ +set allow_introspection_functions=1; +system flush logs; +select cnt from ( + select count() as cnt, formatReadableSize(sum(size)), + arrayStringConcat( + arrayMap(x, y -> concat(x, ': ', y), arrayMap(x -> addressToLine(x), trace), arrayMap(x -> demangle(addressToSymbol(x)), trace)), + '\n') as trace +from system.trace_log where trace_type = ‘Real’ and trace ilike '%KeeperTCPHandler%' group by trace); + """ + ) + ) From c856dc8c66dd95d4557d1e95b07e60d8b4ff5e00 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Fri, 14 Jun 2024 13:45:00 +0000 Subject: [PATCH 103/139] Automatic style fix --- tests/integration/test_keeper_profiler/test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_keeper_profiler/test.py b/tests/integration/test_keeper_profiler/test.py index bb24f13d9c2..796d7798747 100644 --- a/tests/integration/test_keeper_profiler/test.py +++ b/tests/integration/test_keeper_profiler/test.py @@ -24,6 +24,7 @@ node3 = cluster.add_instance( with_minio=True, ) + @pytest.fixture(scope="module", autouse=True) def started_cluster(): try: From ffb9a67a11720a71b833fd2c6f1ef6333d70b018 Mon Sep 17 00:00:00 2001 From: chloro <13125187405@163.com> Date: Sat, 15 Jun 2024 16:28:56 +0800 Subject: [PATCH 104/139] fix the bug that In operator did not perform the correct type conversion --- src/Analyzer/SetUtils.cpp | 44 ++++++++++++++----- src/Interpreters/ActionsVisitor.cpp | 34 ++++++++++---- src/Interpreters/convertFieldToType.cpp | 4 +- src/Interpreters/convertFieldToType.h | 2 +- .../0_stateless/00137_in_constants.reference | 1 + .../0_stateless/00137_in_constants.sql | 1 + 6 files changed, 64 insertions(+), 22 deletions(-) diff --git a/src/Analyzer/SetUtils.cpp b/src/Analyzer/SetUtils.cpp index ceda264b5a6..9a267bfa149 100644 --- a/src/Analyzer/SetUtils.cpp +++ b/src/Analyzer/SetUtils.cpp @@ -54,8 +54,9 @@ size_t getCompoundTypeDepth(const IDataType & type) } template -Block createBlockFromCollection(const Collection & collection, const DataTypes & block_types, bool transform_null_in) +Block createBlockFromCollection(const Collection & collection, const DataTypes& value_types, const DataTypes & block_types, bool transform_null_in) { + assert(collection.size() == value_types.size()); size_t columns_size = block_types.size(); MutableColumns columns(columns_size); for (size_t i = 0; i < columns_size; ++i) @@ -65,19 +66,24 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes & } Row tuple_values; + size_t value_types_index = 0; for (const auto & value : collection) { if (columns_size == 1) { - auto field = convertFieldToTypeStrict(value, *block_types[0]); - if (!field) + DataTypePtr data_type = value_types[value_types_index]; + auto field = convertFieldToTypeStrict(value, *data_type, *block_types[0]); + if (!field) { + value_types_index += 1; continue; + } bool need_insert_null = transform_null_in && block_types[0]->isNullable(); if (!field->isNull() || need_insert_null) columns[0]->insert(*field); - + + value_types_index += 1; continue; } @@ -87,6 +93,9 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes & value.getTypeName()); const auto & tuple = value.template get(); + DataTypePtr value_type = value_types[value_types_index]; + DataTypes tuple_value_type = typeid_cast(value_type.get())->getElements(); + size_t tuple_size = tuple.size(); if (tuple_size != columns_size) @@ -101,7 +110,7 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes & size_t i = 0; for (; i < tuple_size; ++i) { - auto converted_field = convertFieldToTypeStrict(tuple[i], *block_types[i]); + auto converted_field = convertFieldToTypeStrict(tuple[i], *tuple_value_type[i], *block_types[i]); if (!converted_field) break; tuple_values[i] = std::move(*converted_field); @@ -114,6 +123,8 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes & if (i == tuple_size) for (i = 0; i < tuple_size; ++i) columns[i]->insert(tuple_values[i]); + + value_types_index += 1; } Block res; @@ -149,7 +160,8 @@ Block getSetElementsForConstantValue(const DataTypePtr & expression_type, const /// 1 in 1; (1, 2) in (1, 2); identity(tuple(tuple(tuple(1)))) in tuple(tuple(tuple(1))); etc. Array array{value}; - result_block = createBlockFromCollection(array, set_element_types, transform_null_in); + DataTypes value_types{value_type}; + result_block = createBlockFromCollection(array, value_types, set_element_types, transform_null_in); } else if (lhs_type_depth + 1 == rhs_type_depth) { @@ -157,10 +169,22 @@ Block getSetElementsForConstantValue(const DataTypePtr & expression_type, const WhichDataType rhs_which_type(value_type); - if (rhs_which_type.isArray()) - result_block = createBlockFromCollection(value.get(), set_element_types, transform_null_in); - else if (rhs_which_type.isTuple()) - result_block = createBlockFromCollection(value.get(), set_element_types, transform_null_in); + if (rhs_which_type.isArray()) { + const DataTypeArray* value_array_type = typeid_cast(value_type.get()); + size_t value_array_size = value.get().size(); + DataTypes value_types; + value_types.reserve(value_array_size); + + for(size_t i = 0; i < value_array_size; ++i) { + value_types.push_back(value_array_type->getNestedType()); + } + result_block = createBlockFromCollection(value.get(), value_types, set_element_types, transform_null_in); + } + else if (rhs_which_type.isTuple()) { + const DataTypeTuple* value_tuple_type = typeid_cast(value_type.get()); + DataTypes value_types = value_tuple_type->getElements(); + result_block = createBlockFromCollection(value.get(), value_types, set_element_types, transform_null_in); + } else throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Unsupported type at the right-side of IN. Expected Array or Tuple. Actual {}", diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 0bdd4c089f1..081e7e4fc2c 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -102,7 +102,7 @@ static size_t getTypeDepth(const DataTypePtr & type) /// 33.33 in the set is converted to 33.3, but it is not equal to 33.3 in the column, so the result should still be empty. /// We can not include values that don't represent any possible value from the type of filtered column to the set. template -static Block createBlockFromCollection(const Collection & collection, const DataTypes & types, bool transform_null_in) +static Block createBlockFromCollection(const Collection & collection, const DataTypes& value_types, const DataTypes & types, bool transform_null_in) { size_t columns_num = types.size(); MutableColumns columns(columns_num); @@ -113,14 +113,16 @@ static Block createBlockFromCollection(const Collection & collection, const Data } Row tuple_values; + size_t value_type_index = 0; for (const auto & value : collection) { if (columns_num == 1) { - auto field = convertFieldToTypeStrict(value, *types[0]); + auto field = convertFieldToTypeStrict(value, *value_types[value_type_index], *types[0]); bool need_insert_null = transform_null_in && types[0]->isNullable(); if (field && (!field->isNull() || need_insert_null)) columns[0]->insert(*field); + value_type_index += 1; } else { @@ -130,18 +132,20 @@ static Block createBlockFromCollection(const Collection & collection, const Data const auto & tuple = value.template get(); size_t tuple_size = tuple.size(); - if (tuple_size != columns_num) throw Exception(ErrorCodes::INCORRECT_ELEMENT_OF_SET, "Incorrect size of tuple in set: {} instead of {}", tuple_size, columns_num); if (tuple_values.empty()) tuple_values.resize(tuple_size); + + DataTypePtr value_type = value_types[value_type_index]; + DataTypes tuple_value_type = typeid_cast(value_type.get())->getElements(); size_t i = 0; for (; i < tuple_size; ++i) { - auto converted_field = convertFieldToTypeStrict(tuple[i], *types[i]); + auto converted_field = convertFieldToTypeStrict(tuple[i], *tuple_value_type[i], *types[i]); if (!converted_field) break; tuple_values[i] = std::move(*converted_field); @@ -154,6 +158,7 @@ static Block createBlockFromCollection(const Collection & collection, const Data if (i == tuple_size) for (i = 0; i < tuple_size; ++i) columns[i]->insert(tuple_values[i]); + value_type_index += 1; } } @@ -317,16 +322,27 @@ Block createBlockForSet( if (left_type_depth == right_type_depth) { Array array{right_arg_value}; - block = createBlockFromCollection(array, set_element_types, tranform_null_in); + DataTypes data_types{right_arg_type}; + block = createBlockFromCollection(array, data_types, set_element_types, tranform_null_in); } /// 1 in (1, 2); (1, 2) in ((1, 2), (3, 4)); etc. else if (left_type_depth + 1 == right_type_depth) { auto type_index = right_arg_type->getTypeId(); - if (type_index == TypeIndex::Tuple) - block = createBlockFromCollection(right_arg_value.get(), set_element_types, tranform_null_in); - else if (type_index == TypeIndex::Array) - block = createBlockFromCollection(right_arg_value.get(), set_element_types, tranform_null_in); + if (type_index == TypeIndex::Tuple) { + DataTypes data_types = typeid_cast(right_arg_type.get())->getElements(); + block = createBlockFromCollection(right_arg_value.get(), data_types, set_element_types, tranform_null_in); + } + else if (type_index == TypeIndex::Array) { + const auto* right_arg_array_type = typeid_cast(right_arg_type.get()); + size_t right_arg_array_size = right_arg_value.get().size(); + DataTypes data_types; + data_types.reserve(right_arg_array_size); + for(size_t i = 0; i < right_arg_array_size; ++i) { + data_types.push_back(right_arg_array_type->getNestedType()); + } + block = createBlockFromCollection(right_arg_value.get(), data_types, set_element_types, tranform_null_in); + } else throw_unsupported_type(right_arg_type); } diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 9363e3d83eb..57d4e18010b 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -615,9 +615,9 @@ static bool decimalEqualsFloat(Field field, Float64 float_value) return decimal_to_float == float_value; } -std::optional convertFieldToTypeStrict(const Field & from_value, const IDataType & to_type) +std::optional convertFieldToTypeStrict(const Field & from_value, const IDataType& from_type, const IDataType & to_type) { - Field result_value = convertFieldToType(from_value, to_type); + Field result_value = convertFieldToType(from_value, to_type, &from_type); if (Field::isDecimal(from_value.getType()) && Field::isDecimal(result_value.getType())) { diff --git a/src/Interpreters/convertFieldToType.h b/src/Interpreters/convertFieldToType.h index 7f49ea5479d..044b0c9b1ce 100644 --- a/src/Interpreters/convertFieldToType.h +++ b/src/Interpreters/convertFieldToType.h @@ -22,6 +22,6 @@ Field convertFieldToTypeOrThrow(const Field & from_value, const IDataType & to_t /// Applies stricter rules than convertFieldToType, doesn't allow loss of precision converting to Decimal. /// Returns `Field` if the conversion was successful and the result is equal to the original value, otherwise returns nullopt. -std::optional convertFieldToTypeStrict(const Field & from_value, const IDataType & to_type); +std::optional convertFieldToTypeStrict(const Field & from_value, const IDataType& from_type, const IDataType & to_type); } diff --git a/tests/queries/0_stateless/00137_in_constants.reference b/tests/queries/0_stateless/00137_in_constants.reference index 379885fb1ab..94607ffa924 100644 --- a/tests/queries/0_stateless/00137_in_constants.reference +++ b/tests/queries/0_stateless/00137_in_constants.reference @@ -13,6 +13,7 @@ 1 1 1 +1 0 0 0 diff --git a/tests/queries/0_stateless/00137_in_constants.sql b/tests/queries/0_stateless/00137_in_constants.sql index 297acc4ef26..bc365523be1 100644 --- a/tests/queries/0_stateless/00137_in_constants.sql +++ b/tests/queries/0_stateless/00137_in_constants.sql @@ -13,6 +13,7 @@ SELECT 'Hello' IN (SELECT 'Hello'); SELECT materialize('Hello') IN (SELECT 'Hello'); SELECT 'Hello' IN (SELECT materialize('Hello')); SELECT materialize('Hello') IN (SELECT materialize('Hello')); +SELECT toDate('2020-01-01') IN (toDateTime('2020-01-01', 'UTC')); SELECT 2 IN (SELECT 1); SELECT materialize(2) IN (SELECT 1); From 38f01bd831aa36efb76b43bf2f53357cd10ef92b Mon Sep 17 00:00:00 2001 From: chloro <13125187405@163.com> Date: Sun, 16 Jun 2024 14:31:49 +0800 Subject: [PATCH 105/139] update code style --- src/Analyzer/SetUtils.cpp | 18 +++++++++++------- src/Interpreters/ActionsVisitor.cpp | 13 ++++++++----- src/Interpreters/convertFieldToType.cpp | 2 +- src/Interpreters/convertFieldToType.h | 2 +- 4 files changed, 21 insertions(+), 14 deletions(-) diff --git a/src/Analyzer/SetUtils.cpp b/src/Analyzer/SetUtils.cpp index 9a267bfa149..71297169b5c 100644 --- a/src/Analyzer/SetUtils.cpp +++ b/src/Analyzer/SetUtils.cpp @@ -74,7 +74,8 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes& { DataTypePtr data_type = value_types[value_types_index]; auto field = convertFieldToTypeStrict(value, *data_type, *block_types[0]); - if (!field) { + if (!field) + { value_types_index += 1; continue; } @@ -94,7 +95,7 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes& const auto & tuple = value.template get(); DataTypePtr value_type = value_types[value_types_index]; - DataTypes tuple_value_type = typeid_cast(value_type.get())->getElements(); + DataTypes tuple_value_type = typeid_cast(value_type.get())->getElements(); size_t tuple_size = tuple.size(); @@ -169,19 +170,22 @@ Block getSetElementsForConstantValue(const DataTypePtr & expression_type, const WhichDataType rhs_which_type(value_type); - if (rhs_which_type.isArray()) { - const DataTypeArray* value_array_type = typeid_cast(value_type.get()); + if (rhs_which_type.isArray()) + { + const DataTypeArray * value_array_type = typeid_cast(value_type.get()); size_t value_array_size = value.get().size(); DataTypes value_types; value_types.reserve(value_array_size); - for(size_t i = 0; i < value_array_size; ++i) { + for (size_t i = 0; i < value_array_size; ++i) + { value_types.push_back(value_array_type->getNestedType()); } result_block = createBlockFromCollection(value.get(), value_types, set_element_types, transform_null_in); } - else if (rhs_which_type.isTuple()) { - const DataTypeTuple* value_tuple_type = typeid_cast(value_type.get()); + else if (rhs_which_type.isTuple()) + { + const DataTypeTuple * value_tuple_type = typeid_cast(value_type.get()); DataTypes value_types = value_tuple_type->getElements(); result_block = createBlockFromCollection(value.get(), value_types, set_element_types, transform_null_in); } diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 081e7e4fc2c..83142718073 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -102,7 +102,7 @@ static size_t getTypeDepth(const DataTypePtr & type) /// 33.33 in the set is converted to 33.3, but it is not equal to 33.3 in the column, so the result should still be empty. /// We can not include values that don't represent any possible value from the type of filtered column to the set. template -static Block createBlockFromCollection(const Collection & collection, const DataTypes& value_types, const DataTypes & types, bool transform_null_in) +static Block createBlockFromCollection(const Collection & collection, const DataTypes & value_types, const DataTypes & types, bool transform_null_in) { size_t columns_num = types.size(); MutableColumns columns(columns_num); @@ -140,7 +140,7 @@ static Block createBlockFromCollection(const Collection & collection, const Data tuple_values.resize(tuple_size); DataTypePtr value_type = value_types[value_type_index]; - DataTypes tuple_value_type = typeid_cast(value_type.get())->getElements(); + DataTypes tuple_value_type = typeid_cast(value_type.get())->getElements(); size_t i = 0; for (; i < tuple_size; ++i) @@ -329,16 +329,19 @@ Block createBlockForSet( else if (left_type_depth + 1 == right_type_depth) { auto type_index = right_arg_type->getTypeId(); - if (type_index == TypeIndex::Tuple) { + if (type_index == TypeIndex::Tuple) + { DataTypes data_types = typeid_cast(right_arg_type.get())->getElements(); block = createBlockFromCollection(right_arg_value.get(), data_types, set_element_types, tranform_null_in); } - else if (type_index == TypeIndex::Array) { + else if (type_index == TypeIndex::Array) + { const auto* right_arg_array_type = typeid_cast(right_arg_type.get()); size_t right_arg_array_size = right_arg_value.get().size(); DataTypes data_types; data_types.reserve(right_arg_array_size); - for(size_t i = 0; i < right_arg_array_size; ++i) { + for(size_t i = 0; i < right_arg_array_size; ++i) + { data_types.push_back(right_arg_array_type->getNestedType()); } block = createBlockFromCollection(right_arg_value.get(), data_types, set_element_types, tranform_null_in); diff --git a/src/Interpreters/convertFieldToType.cpp b/src/Interpreters/convertFieldToType.cpp index 57d4e18010b..184c263dbdb 100644 --- a/src/Interpreters/convertFieldToType.cpp +++ b/src/Interpreters/convertFieldToType.cpp @@ -615,7 +615,7 @@ static bool decimalEqualsFloat(Field field, Float64 float_value) return decimal_to_float == float_value; } -std::optional convertFieldToTypeStrict(const Field & from_value, const IDataType& from_type, const IDataType & to_type) +std::optional convertFieldToTypeStrict(const Field & from_value, const IDataType & from_type, const IDataType & to_type) { Field result_value = convertFieldToType(from_value, to_type, &from_type); diff --git a/src/Interpreters/convertFieldToType.h b/src/Interpreters/convertFieldToType.h index 044b0c9b1ce..cb7903f587a 100644 --- a/src/Interpreters/convertFieldToType.h +++ b/src/Interpreters/convertFieldToType.h @@ -22,6 +22,6 @@ Field convertFieldToTypeOrThrow(const Field & from_value, const IDataType & to_t /// Applies stricter rules than convertFieldToType, doesn't allow loss of precision converting to Decimal. /// Returns `Field` if the conversion was successful and the result is equal to the original value, otherwise returns nullopt. -std::optional convertFieldToTypeStrict(const Field & from_value, const IDataType& from_type, const IDataType & to_type); +std::optional convertFieldToTypeStrict(const Field & from_value, const IDataType & from_type, const IDataType & to_type); } From 1594f84daf18ed4c810c5b4023968194474ef618 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 16 Jun 2024 11:36:12 +0200 Subject: [PATCH 106/139] Fix test --- tests/integration/test_keeper_profiler/test.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_keeper_profiler/test.py b/tests/integration/test_keeper_profiler/test.py index 796d7798747..c0a70829107 100644 --- a/tests/integration/test_keeper_profiler/test.py +++ b/tests/integration/test_keeper_profiler/test.py @@ -42,10 +42,8 @@ def test_profiler(started_cluster): "CREATE TABLE t (key UInt32, value String) Engine = ReplicatedMergeTree('/clickhouse-tables/test1', 'r1') ORDER BY key" ) - node.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") - node.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") - node.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") - node.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") + for _ in range(50): + node.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") node.query("system flush logs") assert int(node.query("exists system.trace_log")) From 2ecc53787ea2229dcd09c2c80b5054d952669be3 Mon Sep 17 00:00:00 2001 From: kssenii Date: Sun, 16 Jun 2024 14:30:09 +0200 Subject: [PATCH 107/139] Ping CI From dc48eac7c8252fe96aa8c50d82bfe0f2782d3b41 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Sun, 16 Jun 2024 18:13:36 +0200 Subject: [PATCH 108/139] Update test.py --- tests/integration/test_keeper_profiler/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_keeper_profiler/test.py b/tests/integration/test_keeper_profiler/test.py index c0a70829107..01c3680f89d 100644 --- a/tests/integration/test_keeper_profiler/test.py +++ b/tests/integration/test_keeper_profiler/test.py @@ -42,7 +42,7 @@ def test_profiler(started_cluster): "CREATE TABLE t (key UInt32, value String) Engine = ReplicatedMergeTree('/clickhouse-tables/test1', 'r1') ORDER BY key" ) - for _ in range(50): + for _ in range(100): node.query("INSERT INTO t SELECT number, toString(number) from numbers(100)") node.query("system flush logs") From 27a0815bcc696b1aaf2c5756f56a254f2e7d7169 Mon Sep 17 00:00:00 2001 From: chloro <13125187405@163.com> Date: Sun, 16 Jun 2024 15:15:19 +0800 Subject: [PATCH 109/139] fix failed code style check --- src/Analyzer/SetUtils.cpp | 14 ++++++-------- src/Interpreters/ActionsVisitor.cpp | 10 +++++----- src/Interpreters/convertFieldToType.h | 2 +- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/src/Analyzer/SetUtils.cpp b/src/Analyzer/SetUtils.cpp index 71297169b5c..9d898aea6ae 100644 --- a/src/Analyzer/SetUtils.cpp +++ b/src/Analyzer/SetUtils.cpp @@ -72,7 +72,7 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes& { if (columns_size == 1) { - DataTypePtr data_type = value_types[value_types_index]; + const DataTypePtr & data_type = value_types[value_types_index]; auto field = convertFieldToTypeStrict(value, *data_type, *block_types[0]); if (!field) { @@ -83,7 +83,7 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes& bool need_insert_null = transform_null_in && block_types[0]->isNullable(); if (!field->isNull() || need_insert_null) columns[0]->insert(*field); - + value_types_index += 1; continue; } @@ -94,8 +94,8 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes& value.getTypeName()); const auto & tuple = value.template get(); - DataTypePtr value_type = value_types[value_types_index]; - DataTypes tuple_value_type = typeid_cast(value_type.get())->getElements(); + const DataTypePtr & value_type = value_types[value_types_index]; + const DataTypes & tuple_value_type = typeid_cast(value_type.get())->getElements(); size_t tuple_size = tuple.size(); @@ -124,7 +124,7 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes& if (i == tuple_size) for (i = 0; i < tuple_size; ++i) columns[i]->insert(tuple_values[i]); - + value_types_index += 1; } @@ -159,7 +159,6 @@ Block getSetElementsForConstantValue(const DataTypePtr & expression_type, const if (lhs_type_depth == rhs_type_depth) { /// 1 in 1; (1, 2) in (1, 2); identity(tuple(tuple(tuple(1)))) in tuple(tuple(tuple(1))); etc. - Array array{value}; DataTypes value_types{value_type}; result_block = createBlockFromCollection(array, value_types, set_element_types, transform_null_in); @@ -167,7 +166,6 @@ Block getSetElementsForConstantValue(const DataTypePtr & expression_type, const else if (lhs_type_depth + 1 == rhs_type_depth) { /// 1 in (1, 2); (1, 2) in ((1, 2), (3, 4)) - WhichDataType rhs_which_type(value_type); if (rhs_which_type.isArray()) @@ -186,7 +184,7 @@ Block getSetElementsForConstantValue(const DataTypePtr & expression_type, const else if (rhs_which_type.isTuple()) { const DataTypeTuple * value_tuple_type = typeid_cast(value_type.get()); - DataTypes value_types = value_tuple_type->getElements(); + const DataTypes & value_types = value_tuple_type->getElements(); result_block = createBlockFromCollection(value.get(), value_types, set_element_types, transform_null_in); } else diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 83142718073..e5a433b4bcd 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -138,9 +138,9 @@ static Block createBlockFromCollection(const Collection & collection, const Data if (tuple_values.empty()) tuple_values.resize(tuple_size); - - DataTypePtr value_type = value_types[value_type_index]; - DataTypes tuple_value_type = typeid_cast(value_type.get())->getElements(); + + const DataTypePtr & value_type = value_types[value_type_index]; + const DataTypes & tuple_value_type = typeid_cast(value_type.get())->getElements(); size_t i = 0; for (; i < tuple_size; ++i) @@ -331,7 +331,7 @@ Block createBlockForSet( auto type_index = right_arg_type->getTypeId(); if (type_index == TypeIndex::Tuple) { - DataTypes data_types = typeid_cast(right_arg_type.get())->getElements(); + const DataTypes & data_types = typeid_cast(right_arg_type.get())->getElements(); block = createBlockFromCollection(right_arg_value.get(), data_types, set_element_types, tranform_null_in); } else if (type_index == TypeIndex::Array) @@ -340,7 +340,7 @@ Block createBlockForSet( size_t right_arg_array_size = right_arg_value.get().size(); DataTypes data_types; data_types.reserve(right_arg_array_size); - for(size_t i = 0; i < right_arg_array_size; ++i) + for (size_t i = 0; i < right_arg_array_size; ++i) { data_types.push_back(right_arg_array_type->getNestedType()); } diff --git a/src/Interpreters/convertFieldToType.h b/src/Interpreters/convertFieldToType.h index cb7903f587a..4aa09f8619e 100644 --- a/src/Interpreters/convertFieldToType.h +++ b/src/Interpreters/convertFieldToType.h @@ -22,6 +22,6 @@ Field convertFieldToTypeOrThrow(const Field & from_value, const IDataType & to_t /// Applies stricter rules than convertFieldToType, doesn't allow loss of precision converting to Decimal. /// Returns `Field` if the conversion was successful and the result is equal to the original value, otherwise returns nullopt. -std::optional convertFieldToTypeStrict(const Field & from_value, const IDataType & from_type, const IDataType & to_type); +std::optional convertFieldToTypeStrict(const Field & from_value, const IDataType & from_type, const IDataType & to_type); } From 7bed33012db0047db31e302c42193887138281f4 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Mon, 17 Jun 2024 07:51:58 +0200 Subject: [PATCH 110/139] Fix bad code, but not error in system.session_log --- src/Access/SettingsProfilesInfo.cpp | 32 +++++++++------------------- src/Access/SettingsProfilesInfo.h | 6 +++++- src/Interpreters/Session.cpp | 8 +++---- src/Interpreters/Session.h | 3 +-- src/Interpreters/SessionLog.cpp | 2 +- src/Parsers/Kusto/KQL_ReleaseNote.md | 1 - 6 files changed, 21 insertions(+), 31 deletions(-) diff --git a/src/Access/SettingsProfilesInfo.cpp b/src/Access/SettingsProfilesInfo.cpp index d8b52ecf5e4..a5eacbe1b6e 100644 --- a/src/Access/SettingsProfilesInfo.cpp +++ b/src/Access/SettingsProfilesInfo.cpp @@ -15,22 +15,8 @@ namespace ErrorCodes bool operator==(const SettingsProfilesInfo & lhs, const SettingsProfilesInfo & rhs) { - if (lhs.settings != rhs.settings) - return false; - - if (lhs.constraints != rhs.constraints) - return false; - - if (lhs.profiles != rhs.profiles) - return false; - - if (lhs.profiles_with_implicit != rhs.profiles_with_implicit) - return false; - - if (lhs.names_of_profiles != rhs.names_of_profiles) - return false; - - return true; + return std::tie(lhs.settings, lhs.constraints, lhs.profiles, lhs.profiles_with_implicit, lhs.names_of_profiles) + == std::tie(rhs.settings, rhs.constraints, rhs.profiles, rhs.profiles_with_implicit, rhs.names_of_profiles); } std::shared_ptr @@ -66,18 +52,20 @@ Strings SettingsProfilesInfo::getProfileNames() const { Strings result; result.reserve(profiles.size()); - for (const auto & profile_id : profiles) + for (const UUID & profile_uuid : profiles) { - const auto p = names_of_profiles.find(profile_id); - if (p != names_of_profiles.end()) - result.push_back(p->second); + const auto names_it = names_of_profiles.find(profile_uuid); + if (names_it != names_of_profiles.end()) + { + result.push_back(names_it->second); + } else { - if (const auto name = access_control.tryReadName(profile_id)) + if (const auto name = access_control.tryReadName(profile_uuid)) // We could've updated cache here, but it is a very rare case, so don't bother. result.push_back(*name); else - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unable to get profile name for {}", toString(profile_id)); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unable to get profile name for {}", toString(profile_uuid)); } } diff --git a/src/Access/SettingsProfilesInfo.h b/src/Access/SettingsProfilesInfo.h index ec289a5ec0a..bc1b01f47d0 100644 --- a/src/Access/SettingsProfilesInfo.h +++ b/src/Access/SettingsProfilesInfo.h @@ -29,7 +29,11 @@ struct SettingsProfilesInfo /// Names of all the profiles in `profiles`. std::unordered_map names_of_profiles; - explicit SettingsProfilesInfo(const AccessControl & access_control_) : constraints(access_control_), access_control(access_control_) {} + explicit SettingsProfilesInfo(const AccessControl & access_control_) + : constraints(access_control_), access_control(access_control_) + { + } + std::shared_ptr getConstraintsAndProfileIDs( const std::shared_ptr & previous = nullptr) const; diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 396562189e0..9dd686290db 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -532,7 +532,7 @@ ContextMutablePtr Session::makeSessionContext() session_context->checkSettingsConstraints(settings_from_auth_server, SettingSource::QUERY); session_context->applySettingsChanges(settings_from_auth_server); - recordLoginSucess(session_context); + recordLoginSuccess(session_context); return session_context; } @@ -596,7 +596,7 @@ ContextMutablePtr Session::makeSessionContext(const String & session_name_, std: { session_name_ }, max_sessions_for_user); - recordLoginSucess(session_context); + recordLoginSuccess(session_context); return session_context; } @@ -672,13 +672,13 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t user = query_context->getUser(); /// Interserver does not create session context - recordLoginSucess(query_context); + recordLoginSuccess(query_context); return query_context; } -void Session::recordLoginSucess(ContextPtr login_context) const +void Session::recordLoginSuccess(ContextPtr login_context) const { if (notified_session_log_about_login) return; diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index 14f6f806acd..fc41c78e666 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -102,8 +102,7 @@ public: private: std::shared_ptr getSessionLog() const; ContextMutablePtr makeQueryContextImpl(const ClientInfo * client_info_to_copy, ClientInfo * client_info_to_move) const; - void recordLoginSucess(ContextPtr login_context) const; - + void recordLoginSuccess(ContextPtr login_context) const; mutable bool notified_session_log_about_login = false; const UUID auth_id; diff --git a/src/Interpreters/SessionLog.cpp b/src/Interpreters/SessionLog.cpp index adb94cae0c2..dd6af8b2a19 100644 --- a/src/Interpreters/SessionLog.cpp +++ b/src/Interpreters/SessionLog.cpp @@ -214,7 +214,7 @@ void SessionLog::addLoginSuccess(const UUID & auth_id, const ClientInfo & client_info, const UserPtr & login_user) { - DB::SessionLogElement log_entry(auth_id, SESSION_LOGIN_SUCCESS); + SessionLogElement log_entry(auth_id, SESSION_LOGIN_SUCCESS); log_entry.client_info = client_info; if (login_user) diff --git a/src/Parsers/Kusto/KQL_ReleaseNote.md b/src/Parsers/Kusto/KQL_ReleaseNote.md index bea1a627129..fa60ce77835 100644 --- a/src/Parsers/Kusto/KQL_ReleaseNote.md +++ b/src/Parsers/Kusto/KQL_ReleaseNote.md @@ -993,4 +993,3 @@ Please note that the functions listed below only take constant parameters for no - dcount() - dcountif() - bin - \ No newline at end of file From bab5359448c7c4ccf81727304713e8e8fd9cf1a4 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 17 Jun 2024 13:24:07 +0200 Subject: [PATCH 111/139] Update test.py --- tests/integration/test_keeper_profiler/test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integration/test_keeper_profiler/test.py b/tests/integration/test_keeper_profiler/test.py index 01c3680f89d..f9a90b9033e 100644 --- a/tests/integration/test_keeper_profiler/test.py +++ b/tests/integration/test_keeper_profiler/test.py @@ -37,6 +37,8 @@ def started_cluster(): def test_profiler(started_cluster): node = cluster.instances["node1"] + if node1.is_built_with_sanitizer(): + return node.query( "CREATE TABLE t (key UInt32, value String) Engine = ReplicatedMergeTree('/clickhouse-tables/test1', 'r1') ORDER BY key" From 263b22d89cb8d74bf874c235c122dd1f9e91089a Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Mon, 17 Jun 2024 13:24:44 +0200 Subject: [PATCH 112/139] Update test.py --- tests/integration/test_keeper_profiler/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_keeper_profiler/test.py b/tests/integration/test_keeper_profiler/test.py index f9a90b9033e..28dae69bd5f 100644 --- a/tests/integration/test_keeper_profiler/test.py +++ b/tests/integration/test_keeper_profiler/test.py @@ -37,7 +37,7 @@ def started_cluster(): def test_profiler(started_cluster): node = cluster.instances["node1"] - if node1.is_built_with_sanitizer(): + if node.is_built_with_sanitizer(): return node.query( From e3818a97944bb10d56646b4145696a60a7aa0edd Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 11 Jun 2024 15:55:42 +0000 Subject: [PATCH 113/139] Re-enable session caching --- base/poco/NetSSL_OpenSSL/src/SSLManager.cpp | 41 +++++++++---------- .../01393_benchmark_secure_port.sh | 2 +- .../0_stateless/01683_text_log_deadlock.sh | 2 +- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/base/poco/NetSSL_OpenSSL/src/SSLManager.cpp b/base/poco/NetSSL_OpenSSL/src/SSLManager.cpp index 7f6cc9abcb2..d404aed4d13 100644 --- a/base/poco/NetSSL_OpenSSL/src/SSLManager.cpp +++ b/base/poco/NetSSL_OpenSSL/src/SSLManager.cpp @@ -330,27 +330,26 @@ void SSLManager::initDefaultContext(bool server) else _ptrDefaultClientContext->disableProtocols(disabledProtocols); - /// Temporarily disabled during the transition from boringssl to OpenSSL due to tsan issues. - /// bool cacheSessions = config.getBool(prefix + CFG_CACHE_SESSIONS, false); - /// if (server) - /// { - /// std::string sessionIdContext = config.getString(prefix + CFG_SESSION_ID_CONTEXT, config.getString("application.name", "")); - /// _ptrDefaultServerContext->enableSessionCache(cacheSessions, sessionIdContext); - /// if (config.hasProperty(prefix + CFG_SESSION_CACHE_SIZE)) - /// { - /// int cacheSize = config.getInt(prefix + CFG_SESSION_CACHE_SIZE); - /// _ptrDefaultServerContext->setSessionCacheSize(cacheSize); - /// } - /// if (config.hasProperty(prefix + CFG_SESSION_TIMEOUT)) - /// { - /// int timeout = config.getInt(prefix + CFG_SESSION_TIMEOUT); - /// _ptrDefaultServerContext->setSessionTimeout(timeout); - /// } - /// } - /// else - /// { - /// _ptrDefaultClientContext->enableSessionCache(cacheSessions); - /// } + bool cacheSessions = config.getBool(prefix + CFG_CACHE_SESSIONS, false); + if (server) + { + std::string sessionIdContext = config.getString(prefix + CFG_SESSION_ID_CONTEXT, config.getString("application.name", "")); + _ptrDefaultServerContext->enableSessionCache(cacheSessions, sessionIdContext); + if (config.hasProperty(prefix + CFG_SESSION_CACHE_SIZE)) + { + int cacheSize = config.getInt(prefix + CFG_SESSION_CACHE_SIZE); + _ptrDefaultServerContext->setSessionCacheSize(cacheSize); + } + if (config.hasProperty(prefix + CFG_SESSION_TIMEOUT)) + { + int timeout = config.getInt(prefix + CFG_SESSION_TIMEOUT); + _ptrDefaultServerContext->setSessionTimeout(timeout); + } + } + else + { + _ptrDefaultClientContext->enableSessionCache(cacheSessions); + } bool extendedVerification = config.getBool(prefix + CFG_EXTENDED_VERIFICATION, false); if (server) _ptrDefaultServerContext->enableExtendedCertificateVerification(extendedVerification); diff --git a/tests/queries/0_stateless/01393_benchmark_secure_port.sh b/tests/queries/0_stateless/01393_benchmark_secure_port.sh index 7954e439977..c1874a07977 100755 --- a/tests/queries/0_stateless/01393_benchmark_secure_port.sh +++ b/tests/queries/0_stateless/01393_benchmark_secure_port.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-tsan, no-asan +# Tags: no-fasttest, no-asan CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01683_text_log_deadlock.sh b/tests/queries/0_stateless/01683_text_log_deadlock.sh index 1aced61cb42..e838ab87c1d 100755 --- a/tests/queries/0_stateless/01683_text_log_deadlock.sh +++ b/tests/queries/0_stateless/01683_text_log_deadlock.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: deadlock, no-tsan, no-asan +# Tags: deadlock, no-asan CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From e484390a0e27528c0baa2e0f99a3bd5143184117 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Tue, 11 Jun 2024 23:24:21 +0200 Subject: [PATCH 114/139] Remove no-asan tag from tests --- tests/queries/0_stateless/01393_benchmark_secure_port.sh | 2 +- tests/queries/0_stateless/01683_text_log_deadlock.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01393_benchmark_secure_port.sh b/tests/queries/0_stateless/01393_benchmark_secure_port.sh index c1874a07977..f75577e6ddf 100755 --- a/tests/queries/0_stateless/01393_benchmark_secure_port.sh +++ b/tests/queries/0_stateless/01393_benchmark_secure_port.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: no-fasttest, no-asan +# Tags: no-fasttest CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh diff --git a/tests/queries/0_stateless/01683_text_log_deadlock.sh b/tests/queries/0_stateless/01683_text_log_deadlock.sh index e838ab87c1d..af7f348a6a2 100755 --- a/tests/queries/0_stateless/01683_text_log_deadlock.sh +++ b/tests/queries/0_stateless/01683_text_log_deadlock.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: deadlock, no-asan +# Tags: deadlock CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh From fc4249b0e1a365a17c497179c2c412fdf7798733 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 12 Jun 2024 09:54:07 +0000 Subject: [PATCH 115/139] Reduce runtime of 01683_text_log_deadlock - some runs of this test in CI timed out, others succeeded - reducing the test size is not a greate strategy compared to reproducing the slowness locally. I could not make the test run on my system (clickhouse-benchmark did not connect to the server via a secure connecstion, interestingly a secure connection did work for clickhouse-client). - this commit should unblock CI and uncover more interesting bugs caused by re-enabling session caching --- tests/queries/0_stateless/01683_text_log_deadlock.reference | 2 +- tests/queries/0_stateless/01683_text_log_deadlock.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01683_text_log_deadlock.reference b/tests/queries/0_stateless/01683_text_log_deadlock.reference index 4cf61460252..3805f2a95e9 100644 --- a/tests/queries/0_stateless/01683_text_log_deadlock.reference +++ b/tests/queries/0_stateless/01683_text_log_deadlock.reference @@ -1 +1 @@ -queries: 25000 +queries: 5000 diff --git a/tests/queries/0_stateless/01683_text_log_deadlock.sh b/tests/queries/0_stateless/01683_text_log_deadlock.sh index af7f348a6a2..6b3bcc58868 100755 --- a/tests/queries/0_stateless/01683_text_log_deadlock.sh +++ b/tests/queries/0_stateless/01683_text_log_deadlock.sh @@ -5,4 +5,4 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -$CLICKHOUSE_BENCHMARK --secure -i 25000 -c 32 --query 'SELECT 1' 2>&1 | grep -oF 'queries: 25000' +$CLICKHOUSE_BENCHMARK --secure -i 5000 -c 32 --query 'SELECT 1' 2>&1 | grep -oF 'queries: 5000' From 186bd0cc3d21f124e12a19b7d4874111d2597a8e Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 17 Jun 2024 12:20:54 +0000 Subject: [PATCH 116/139] Temporary fix for tsan issue openssl#24629 --- contrib/openssl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/openssl b/contrib/openssl index 67c0b63e578..e0d6ae2bf93 160000 --- a/contrib/openssl +++ b/contrib/openssl @@ -1 +1 @@ -Subproject commit 67c0b63e578e4c751ac9edf490f5a96124fff8dc +Subproject commit e0d6ae2bf93cf6dc26bb86aa39992bc6a410869a From b0b2c3fea99007b9f22ff34c3b1d5b9b51cad4b4 Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Mon, 17 Jun 2024 13:19:59 +0000 Subject: [PATCH 117/139] Minor clarification of setting docs --- docs/en/operations/settings/settings.md | 2 +- src/Core/Settings.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 74d81548e98..59dd92f0fcd 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -5420,7 +5420,7 @@ Default value: `true`. ## uniform_snowflake_conversion_functions {#uniform_snowflake_conversion_functions} -Controls if functions `snowflakeIDToDateTime`, `snowflakeIDToDateTime64`, `dateTimeToSnowflakeID`, and `dateTime64ToSnowflakeID` are enabled (if `true`), or functions `snowflakeToDateTime`, `snowflakeToDateTime64`, `dateTimeToSnowflake`, and `dateTime64ToSnowflake` (if `false`). +If set to `true`, then functions `snowflakeIDToDateTime`, `snowflakeIDToDateTime64`, `dateTimeToSnowflakeID`, and `dateTime64ToSnowflakeID` are enabled, and functions `snowflakeToDateTime`, `snowflakeToDateTime64`, `dateTimeToSnowflake`, and `dateTime64ToSnowflake` are disabled (and vice versa if set to `false`). Default value: `true` diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 064faa228ae..182fe67f135 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -930,7 +930,7 @@ class IColumn; M(Int64, prefer_warmed_unmerged_parts_seconds, 0, "Only available in ClickHouse Cloud. If a merged part is less than this many seconds old and is not pre-warmed (see cache_populated_by_fetch), but all its source parts are available and pre-warmed, SELECT queries will read from those parts instead. Only for ReplicatedMergeTree. Note that this only checks whether CacheWarmer processed the part; if the part was fetched into cache by something else, it'll still be considered cold until CacheWarmer gets to it; if it was warmed, then evicted from cache, it'll still be considered warm.", 0) \ M(Bool, iceberg_engine_ignore_schema_evolution, false, "Ignore schema evolution in Iceberg table engine and read all data using latest schema saved on table creation. Note that it can lead to incorrect result", 0) \ M(Bool, allow_deprecated_error_prone_window_functions, false, "Allow usage of deprecated error prone window functions (neighbor, runningAccumulate, runningDifferenceStartingWithFirstValue, runningDifference)", 0) \ - M(Bool, uniform_snowflake_conversion_functions, true, "Enable functions snowflakeIDToDateTime[64] and dateTime[64]ToSnowflakeID.", 0) \ + M(Bool, uniform_snowflake_conversion_functions, true, "Enables functions snowflakeIDToDateTime[64] and dateTime[64]ToSnowflakeID while disabling functions snowflakeToDateTime[64] and dateTime[64]ToSnowflake.", 0) \ // End of COMMON_SETTINGS // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS, move obsolete settings to OBSOLETE_SETTINGS and obsolete format settings to OBSOLETE_FORMAT_SETTINGS. From cd1475a5dfcf5f45d222b17942c2fe95b80606f0 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 17 Jun 2024 17:59:17 +0200 Subject: [PATCH 118/139] Debug why test failed on aarch64 --- .../integration/test_keeper_profiler/test.py | 38 +++++++++++++++++-- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_keeper_profiler/test.py b/tests/integration/test_keeper_profiler/test.py index 28dae69bd5f..f5095221286 100644 --- a/tests/integration/test_keeper_profiler/test.py +++ b/tests/integration/test_keeper_profiler/test.py @@ -50,9 +50,8 @@ def test_profiler(started_cluster): node.query("system flush logs") assert int(node.query("exists system.trace_log")) - assert 1 < int( - node.query( - """ + result = node.query( + """ set allow_introspection_functions=1; system flush logs; select cnt from ( @@ -62,5 +61,36 @@ select cnt from ( '\n') as trace from system.trace_log where trace_type = ‘Real’ and trace ilike '%KeeperTCPHandler%' group by trace); """ - ) ) + + if len(result) == 0: + assert 0 < int( + node.query( + """ + set allow_introspection_functions=1; + system flush logs; + select cnt from ( + select count() as cnt, formatReadableSize(sum(size)), + arrayStringConcat( + arrayMap(x, y -> concat(x, ': ', y), arrayMap(x -> addressToLine(x), trace), arrayMap(x -> demangle(addressToSymbol(x)), trace)), + '\n') as trace + from system.trace_log where trace_type = ‘Real’ group by trace); + """ + ) + ) + result = node.query( + """ + set allow_introspection_functions=1; + system flush logs; + select * from ( + select count() as cnt, formatReadableSize(sum(size)), + arrayStringConcat( + arrayMap(x, y -> concat(x, ': ', y), arrayMap(x -> addressToLine(x), trace), arrayMap(x -> demangle(addressToSymbol(x)), trace)), + '\n') as trace + from system.trace_log where trace_type = ‘Real’ group by trace); + """ + ) + print(result) + assert False + + assert 1 < int(result) From e310533930c6857e6ed08bca1ba4cc7e56240cd8 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Mon, 17 Jun 2024 18:16:05 +0200 Subject: [PATCH 119/139] Forbid QUALIFY clause in the old analyzer --- src/Interpreters/InterpreterSelectQuery.cpp | 3 +++ .../0_stateless/03173_forbid_qualify.reference | 3 +++ tests/queries/0_stateless/03173_forbid_qualify.sql | 11 +++++++++++ 3 files changed, 17 insertions(+) create mode 100644 tests/queries/0_stateless/03173_forbid_qualify.reference create mode 100644 tests/queries/0_stateless/03173_forbid_qualify.sql diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 8e072779b53..38ffd40b6cd 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1474,6 +1474,9 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional

(source_header); diff --git a/tests/queries/0_stateless/03173_forbid_qualify.reference b/tests/queries/0_stateless/03173_forbid_qualify.reference new file mode 100644 index 00000000000..c2f595d8c4b --- /dev/null +++ b/tests/queries/0_stateless/03173_forbid_qualify.reference @@ -0,0 +1,3 @@ +100 +49 +100 diff --git a/tests/queries/0_stateless/03173_forbid_qualify.sql b/tests/queries/0_stateless/03173_forbid_qualify.sql new file mode 100644 index 00000000000..59f0153cd36 --- /dev/null +++ b/tests/queries/0_stateless/03173_forbid_qualify.sql @@ -0,0 +1,11 @@ +drop table if exists default.test_qualify; +create table default.test_qualify (number Int64) ENGINE = MergeTree ORDER BY (number); + +insert into default.test_qualify SELECT * FROM numbers(100); + +select count() from default.test_qualify; -- 100 +select * from default.test_qualify qualify row_number() over (order by number) = 50 SETTINGS allow_experimental_analyzer = 1; -- 49 +select * from default.test_qualify qualify row_number() over (order by number) = 50 SETTINGS allow_experimental_analyzer = 0; -- { serverError NOT_IMPLEMENTED } + +delete from default.test_qualify where number in (select number from default.test_qualify qualify row_number() over (order by number) = 50); -- { serverError UNFINISHED } +select count() from default.test_qualify; -- 100 From 8d072ade18b61b6eafef0899f5ce551030b6662d Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 17 Jun 2024 22:35:49 +0200 Subject: [PATCH 120/139] Update test --- tests/integration/test_keeper_profiler/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_keeper_profiler/test.py b/tests/integration/test_keeper_profiler/test.py index f5095221286..98738890ad8 100644 --- a/tests/integration/test_keeper_profiler/test.py +++ b/tests/integration/test_keeper_profiler/test.py @@ -69,7 +69,7 @@ from system.trace_log where trace_type = ‘Real’ and trace ilike '%KeeperTCPH """ set allow_introspection_functions=1; system flush logs; - select cnt from ( + select sum(cnt) from ( select count() as cnt, formatReadableSize(sum(size)), arrayStringConcat( arrayMap(x, y -> concat(x, ': ', y), arrayMap(x -> addressToLine(x), trace), arrayMap(x -> demangle(addressToSymbol(x)), trace)), From fb110827f82746964fc91ef73d45719244bbcad8 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 14 Jun 2024 19:16:37 +0200 Subject: [PATCH 121/139] save server data for failed stateless tests --- docker/test/stateless/run.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh index f94621ba092..4434a5338a7 100755 --- a/docker/test/stateless/run.sh +++ b/docker/test/stateless/run.sh @@ -254,7 +254,7 @@ function run_tests() set +e clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \ - --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ + --no-drop-if-fail --test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \ | ts '%Y-%m-%d %H:%M:%S' \ | tee -a test_output/test_result.txt set -e @@ -379,6 +379,10 @@ fi tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||: +rm -rf /var/lib/clickhouse/data/system/*/ +tar -chf /test_output/store.tar /var/lib/clickhouse/store ||: +tar -chf /test_output/metadata.tar /var/lib/clickhouse/metadata/*.sql ||: + if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then rg -Fa "" /var/log/clickhouse-server/clickhouse-server1.log ||: rg -Fa "" /var/log/clickhouse-server/clickhouse-server2.log ||: From 42dd981fe41dc22857f65e2f21b6d03893b04b4a Mon Sep 17 00:00:00 2001 From: chloro <13125187405@163.com> Date: Tue, 18 Jun 2024 08:14:14 +0800 Subject: [PATCH 122/139] fix code review --- src/Analyzer/SetUtils.cpp | 26 +++++++++---------------- src/Interpreters/ActionsVisitor.cpp | 30 ++++++++++++----------------- 2 files changed, 21 insertions(+), 35 deletions(-) diff --git a/src/Analyzer/SetUtils.cpp b/src/Analyzer/SetUtils.cpp index 9d898aea6ae..0ecb3545225 100644 --- a/src/Analyzer/SetUtils.cpp +++ b/src/Analyzer/SetUtils.cpp @@ -9,6 +9,8 @@ #include #include +#include + namespace DB { @@ -66,17 +68,16 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes& } Row tuple_values; - size_t value_types_index = 0; - for (const auto & value : collection) + for (size_t collection_index = 0; collection_index < collection.size(); ++collection_index) { + const auto & value = collection[collection_index]; if (columns_size == 1) { - const DataTypePtr & data_type = value_types[value_types_index]; + const DataTypePtr & data_type = value_types[collection_index]; auto field = convertFieldToTypeStrict(value, *data_type, *block_types[0]); if (!field) { - value_types_index += 1; continue; } @@ -84,7 +85,6 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes& if (!field->isNull() || need_insert_null) columns[0]->insert(*field); - value_types_index += 1; continue; } @@ -94,7 +94,7 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes& value.getTypeName()); const auto & tuple = value.template get(); - const DataTypePtr & value_type = value_types[value_types_index]; + const DataTypePtr & value_type = value_types[collection_index]; const DataTypes & tuple_value_type = typeid_cast(value_type.get())->getElements(); size_t tuple_size = tuple.size(); @@ -124,8 +124,6 @@ Block createBlockFromCollection(const Collection & collection, const DataTypes& if (i == tuple_size) for (i = 0; i < tuple_size; ++i) columns[i]->insert(tuple_values[i]); - - value_types_index += 1; } Block res; @@ -170,20 +168,14 @@ Block getSetElementsForConstantValue(const DataTypePtr & expression_type, const if (rhs_which_type.isArray()) { - const DataTypeArray * value_array_type = typeid_cast(value_type.get()); + const DataTypeArray * value_array_type = assert_cast(value_type.get()); size_t value_array_size = value.get().size(); - DataTypes value_types; - value_types.reserve(value_array_size); - - for (size_t i = 0; i < value_array_size; ++i) - { - value_types.push_back(value_array_type->getNestedType()); - } + DataTypes value_types(value_array_size, value_array_type->getNestedType()); result_block = createBlockFromCollection(value.get(), value_types, set_element_types, transform_null_in); } else if (rhs_which_type.isTuple()) { - const DataTypeTuple * value_tuple_type = typeid_cast(value_type.get()); + const DataTypeTuple * value_tuple_type = assert_cast(value_type.get()); const DataTypes & value_types = value_tuple_type->getElements(); result_block = createBlockFromCollection(value.get(), value_types, set_element_types, transform_null_in); } diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index e5a433b4bcd..3f4afff56e8 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -113,16 +114,15 @@ static Block createBlockFromCollection(const Collection & collection, const Data } Row tuple_values; - size_t value_type_index = 0; - for (const auto & value : collection) + for (size_t collection_index = 0; collection_index < collection.size(); ++collection_index) { + const auto& value = collection[collection_index]; if (columns_num == 1) { - auto field = convertFieldToTypeStrict(value, *value_types[value_type_index], *types[0]); + auto field = convertFieldToTypeStrict(value, *value_types[collection_index], *types[0]); bool need_insert_null = transform_null_in && types[0]->isNullable(); if (field && (!field->isNull() || need_insert_null)) columns[0]->insert(*field); - value_type_index += 1; } else { @@ -139,7 +139,7 @@ static Block createBlockFromCollection(const Collection & collection, const Data if (tuple_values.empty()) tuple_values.resize(tuple_size); - const DataTypePtr & value_type = value_types[value_type_index]; + const DataTypePtr & value_type = value_types[collection_index]; const DataTypes & tuple_value_type = typeid_cast(value_type.get())->getElements(); size_t i = 0; @@ -158,7 +158,6 @@ static Block createBlockFromCollection(const Collection & collection, const Data if (i == tuple_size) for (i = 0; i < tuple_size; ++i) columns[i]->insert(tuple_values[i]); - value_type_index += 1; } } @@ -322,8 +321,8 @@ Block createBlockForSet( if (left_type_depth == right_type_depth) { Array array{right_arg_value}; - DataTypes data_types{right_arg_type}; - block = createBlockFromCollection(array, data_types, set_element_types, tranform_null_in); + DataTypes value_types{right_arg_type}; + block = createBlockFromCollection(array, value_types, set_element_types, tranform_null_in); } /// 1 in (1, 2); (1, 2) in ((1, 2), (3, 4)); etc. else if (left_type_depth + 1 == right_type_depth) @@ -331,20 +330,15 @@ Block createBlockForSet( auto type_index = right_arg_type->getTypeId(); if (type_index == TypeIndex::Tuple) { - const DataTypes & data_types = typeid_cast(right_arg_type.get())->getElements(); - block = createBlockFromCollection(right_arg_value.get(), data_types, set_element_types, tranform_null_in); + const DataTypes & value_types = assert_cast(right_arg_type.get())->getElements(); + block = createBlockFromCollection(right_arg_value.get(), value_types, set_element_types, tranform_null_in); } else if (type_index == TypeIndex::Array) { - const auto* right_arg_array_type = typeid_cast(right_arg_type.get()); + const auto* right_arg_array_type = assert_cast(right_arg_type.get()); size_t right_arg_array_size = right_arg_value.get().size(); - DataTypes data_types; - data_types.reserve(right_arg_array_size); - for (size_t i = 0; i < right_arg_array_size; ++i) - { - data_types.push_back(right_arg_array_type->getNestedType()); - } - block = createBlockFromCollection(right_arg_value.get(), data_types, set_element_types, tranform_null_in); + DataTypes value_types(right_arg_array_size, right_arg_array_type->getNestedType()); + block = createBlockFromCollection(right_arg_value.get(), value_types, set_element_types, tranform_null_in); } else throw_unsupported_type(right_arg_type); From f2a162a4a784af00fc788084eab8bf6763f06f73 Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Tue, 18 Jun 2024 11:09:13 +0800 Subject: [PATCH 123/139] fix style error --- tests/integration/test_table_db_num_limit/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_table_db_num_limit/test.py b/tests/integration/test_table_db_num_limit/test.py index f2080ec4738..aa8030b077c 100644 --- a/tests/integration/test_table_db_num_limit/test.py +++ b/tests/integration/test_table_db_num_limit/test.py @@ -32,7 +32,7 @@ def test_table_db_limit(started_cluster): for i in range(10): node1.query("create table t{} (a Int32) Engine = Log".format(i)) - node1.query("system flush logs"); + node1.query("system flush logs") for i in range(10): node1.query("drop table t{}".format(i)) for i in range(10): From d8379bfba2aeac91f3e578c182daddee9ea70353 Mon Sep 17 00:00:00 2001 From: Xu Jia Date: Tue, 18 Jun 2024 11:48:04 +0800 Subject: [PATCH 124/139] fix the error system flush logs hang --- src/Interpreters/InterpreterCreateQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index c33f97dc80d..9230cba2a29 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -1577,7 +1577,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create, } UInt64 table_num_limit = getContext()->getGlobalContext()->getServerSettings().max_table_num_to_throw; - if (table_num_limit > 0) + if (table_num_limit > 0 && create.getDatabase() != DatabaseCatalog::SYSTEM_DATABASE) { UInt64 table_count = CurrentMetrics::get(CurrentMetrics::AttachedTable); if (table_count >= table_num_limit) From a12ebf05b8d8ce3ff1fa5fe913c4ff26d0ff2bf2 Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Fri, 14 Jun 2024 15:12:31 +0800 Subject: [PATCH 125/139] add function edit distance utf8 --- .../functions/string-functions.md | 26 ++++ src/Functions/FunctionsStringDistance.cpp | 120 +++++++++++------- .../02884_string_distance_function.reference | 31 +++++ .../02884_string_distance_function.sql | 3 +- .../aspell-ignore/en/aspell-dict.txt | 2 + 5 files changed, 135 insertions(+), 47 deletions(-) diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md index c535b82d710..a258456345e 100644 --- a/docs/en/sql-reference/functions/string-functions.md +++ b/docs/en/sql-reference/functions/string-functions.md @@ -2178,6 +2178,32 @@ Result: Alias: levenshteinDistance +## editDistanceUTF8 + +Calculates the [edit distance](https://en.wikipedia.org/wiki/Edit_distance) between two UTF8 strings. + +**Syntax** + +```sql +editDistanceUTF8(string1, string2) +``` + +**Examples** + +``` sql +SELECT editDistanceUTF8('我是谁', '我是我'); +``` + +Result: + +``` text +┌─editDistanceUTF8('我是谁', '我是我')──┐ +│ 1 │ +└─────────────────────────────────────┘ +``` + +Alias: levenshteinDistanceUTF8 + ## damerauLevenshteinDistance Calculates the [Damerau-Levenshtein distance](https://en.wikipedia.org/wiki/Damerau%E2%80%93Levenshtein_distance) between two byte strings. diff --git a/src/Functions/FunctionsStringDistance.cpp b/src/Functions/FunctionsStringDistance.cpp index 6cb23bbea9f..48f4aaf4e09 100644 --- a/src/Functions/FunctionsStringDistance.cpp +++ b/src/Functions/FunctionsStringDistance.cpp @@ -113,6 +113,36 @@ struct ByteHammingDistanceImpl } }; +void parseUTF8String(const char * __restrict data, size_t size, std::function utf8_consumer, std::function ascii_consumer = nullptr) +{ + const char * end = data + size; + while (data < end) + { + size_t len = UTF8::seqLength(*data); + if (len == 1) + { + if (ascii_consumer) + ascii_consumer(static_cast(*data)); + else + utf8_consumer(static_cast(*data)); + ++data; + } + else + { + auto code_point = UTF8::convertUTF8ToCodePoint(data, end - data); + if (code_point.has_value()) + { + utf8_consumer(code_point.value()); + data += len; + } + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal UTF-8 sequence, while processing '{}'", StringRef(data, end - data)); + } + } + } +} + template struct ByteJaccardIndexImpl { @@ -138,57 +168,28 @@ struct ByteJaccardIndexImpl haystack_set.fill(0); needle_set.fill(0); - while (haystack < haystack_end) + if constexpr (is_utf8) { - size_t len = 1; - if constexpr (is_utf8) - len = UTF8::seqLength(*haystack); - - if (len == 1) + parseUTF8String( + haystack, + haystack_size, + [&](UInt32 data) { haystack_utf8_set.insert(data); }, + [&](unsigned char data) { haystack_set[data] = 1; }); + parseUTF8String( + needle, needle_size, [&](UInt32 data) { needle_utf8_set.insert(data); }, [&](unsigned char data) { needle_set[data] = 1; }); + } + else + { + while (haystack < haystack_end) { haystack_set[static_cast(*haystack)] = 1; ++haystack; } - else - { - auto code_point = UTF8::convertUTF8ToCodePoint(haystack, haystack_end - haystack); - if (code_point.has_value()) - { - haystack_utf8_set.insert(code_point.value()); - haystack += len; - } - else - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal UTF-8 sequence, while processing '{}'", StringRef(haystack, haystack_end - haystack)); - } - } - } - - while (needle < needle_end) - { - - size_t len = 1; - if constexpr (is_utf8) - len = UTF8::seqLength(*needle); - - if (len == 1) + while (needle < needle_end) { needle_set[static_cast(*needle)] = 1; ++needle; } - else - { - auto code_point = UTF8::convertUTF8ToCodePoint(needle, needle_end - needle); - if (code_point.has_value()) - { - needle_utf8_set.insert(code_point.value()); - needle += len; - } - else - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Illegal UTF-8 sequence, while processing '{}'", StringRef(needle, needle_end - needle)); - } - } } UInt8 intersection = 0; @@ -226,6 +227,7 @@ struct ByteJaccardIndexImpl static constexpr size_t max_string_size = 1u << 16; +template struct ByteEditDistanceImpl { using ResultType = UInt64; @@ -242,6 +244,16 @@ struct ByteEditDistanceImpl ErrorCodes::TOO_LARGE_STRING_SIZE, "The string size is too big for function editDistance, should be at most {}", max_string_size); + PaddedPODArray haystack_utf8; + PaddedPODArray needle_utf8; + if constexpr (is_utf8) + { + parseUTF8String(haystack, haystack_size, [&](UInt32 data) { haystack_utf8.push_back(data); }); + parseUTF8String(needle, needle_size, [&](UInt32 data) { needle_utf8.push_back(data); }); + haystack_size = haystack_utf8.size(); + needle_size = needle_utf8.size(); + } + PaddedPODArray distances0(haystack_size + 1, 0); PaddedPODArray distances1(haystack_size + 1, 0); @@ -261,9 +273,16 @@ struct ByteEditDistanceImpl insertion = distances1[pos_haystack] + 1; substitution = distances0[pos_haystack]; - if (*(needle + pos_needle) != *(haystack + pos_haystack)) - substitution += 1; - + if constexpr (is_utf8) + { + if (needle_utf8[pos_needle] != haystack_utf8[pos_haystack]) + substitution += 1; + } + else + { + if (*(needle + pos_needle) != *(haystack + pos_haystack)) + substitution += 1; + } distances1[pos_haystack + 1] = std::min(deletion, std::min(substitution, insertion)); } distances0.swap(distances1); @@ -457,7 +476,12 @@ struct NameEditDistance { static constexpr auto name = "editDistance"; }; -using FunctionEditDistance = FunctionsStringSimilarity, NameEditDistance>; +using FunctionEditDistance = FunctionsStringSimilarity>, NameEditDistance>; +struct NameEditDistanceUTF8 +{ + static constexpr auto name = "editDistanceUTF8"; +}; +using FunctionEditDistanceUTF8 = FunctionsStringSimilarity>, NameEditDistanceUTF8>; struct NameDamerauLevenshteinDistance { @@ -499,6 +523,10 @@ REGISTER_FUNCTION(StringDistance) FunctionDocumentation{.description = R"(Calculates the edit distance between two byte-strings.)"}); factory.registerAlias("levenshteinDistance", NameEditDistance::name); + factory.registerFunction( + FunctionDocumentation{.description = R"(Calculates the edit distance between two UTF8 strings.)"}); + factory.registerAlias("levenshteinDistanceUTF8", NameEditDistanceUTF8::name); + factory.registerFunction( FunctionDocumentation{.description = R"(Calculates the Damerau-Levenshtein distance two between two byte-string.)"}); diff --git a/tests/queries/0_stateless/02884_string_distance_function.reference b/tests/queries/0_stateless/02884_string_distance_function.reference index e8cce2017d9..71b15bc8753 100644 --- a/tests/queries/0_stateless/02884_string_distance_function.reference +++ b/tests/queries/0_stateless/02884_string_distance_function.reference @@ -13,53 +13,84 @@ clickhouse mouse 6 -- non-const arguments byteHammingDistance 0 byteHammingDistance abc 3 +byteHammingDistance Jerry 我是谁 9 byteHammingDistance abc 3 byteHammingDistance abc ab 1 byteHammingDistance abc abc 0 byteHammingDistance abc bc 3 byteHammingDistance clickhouse mouse 10 +byteHammingDistance 我是谁 Tom 9 +byteHammingDistance 我是谁 我是我 3 editDistance 0 editDistance abc 3 +editDistance Jerry 我是谁 9 editDistance abc 3 editDistance abc ab 1 editDistance abc abc 0 editDistance abc bc 1 editDistance clickhouse mouse 6 +editDistance 我是谁 Tom 9 +editDistance 我是谁 我是我 3 +editDistanceUTF8 0 +editDistanceUTF8 abc 3 +editDistanceUTF8 Jerry 我是谁 5 +editDistanceUTF8 abc 3 +editDistanceUTF8 abc ab 1 +editDistanceUTF8 abc abc 0 +editDistanceUTF8 abc bc 1 +editDistanceUTF8 clickhouse mouse 6 +editDistanceUTF8 我是谁 Tom 3 +editDistanceUTF8 我是谁 我是我 1 damerauLevenshteinDistance 0 damerauLevenshteinDistance abc 3 +damerauLevenshteinDistance Jerry 我是谁 9 damerauLevenshteinDistance abc 3 damerauLevenshteinDistance abc ab 1 damerauLevenshteinDistance abc abc 0 damerauLevenshteinDistance abc bc 1 damerauLevenshteinDistance clickhouse mouse 6 +damerauLevenshteinDistance 我是谁 Tom 9 +damerauLevenshteinDistance 我是谁 我是我 3 stringJaccardIndex 0 stringJaccardIndex abc 0 +stringJaccardIndex Jerry 我是谁 0 stringJaccardIndex abc 0 stringJaccardIndex abc ab 0.6666666666666666 stringJaccardIndex abc abc 1 stringJaccardIndex abc bc 0.6666666666666666 stringJaccardIndex clickhouse mouse 0.4 +stringJaccardIndex 我是谁 Tom 0 +stringJaccardIndex 我是谁 我是我 0.625 stringJaccardIndexUTF8 0 stringJaccardIndexUTF8 abc 0 +stringJaccardIndexUTF8 Jerry 我是谁 0 stringJaccardIndexUTF8 abc 0 stringJaccardIndexUTF8 abc ab 0.6666666666666666 stringJaccardIndexUTF8 abc abc 1 stringJaccardIndexUTF8 abc bc 0.6666666666666666 stringJaccardIndexUTF8 clickhouse mouse 0.4 +stringJaccardIndexUTF8 我是谁 Tom 0 +stringJaccardIndexUTF8 我是谁 我是我 0.6666666666666666 jaroSimilarity 0 jaroSimilarity abc 3 +jaroSimilarity Jerry 我是谁 0 jaroSimilarity abc 3 jaroSimilarity abc ab 0.8888888888888888 jaroSimilarity abc abc 1 jaroSimilarity abc bc 0 jaroSimilarity clickhouse mouse 0 +jaroSimilarity 我是谁 Tom 0 +jaroSimilarity 我是谁 我是我 0.7777777777777777 jaroWinklerSimilarity 0 jaroWinklerSimilarity abc 3 +jaroWinklerSimilarity Jerry 我是谁 0 jaroWinklerSimilarity abc 3 jaroWinklerSimilarity abc ab 0.9111111111111111 jaroWinklerSimilarity abc abc 1 jaroWinklerSimilarity abc bc 0 jaroWinklerSimilarity clickhouse mouse 0 +jaroWinklerSimilarity 我是谁 Tom 0 +jaroWinklerSimilarity 我是谁 我是我 0.8666666666666666 -- Special UTF-8 tests 0.4 0 diff --git a/tests/queries/0_stateless/02884_string_distance_function.sql b/tests/queries/0_stateless/02884_string_distance_function.sql index fddbf41f0e5..482996e1448 100644 --- a/tests/queries/0_stateless/02884_string_distance_function.sql +++ b/tests/queries/0_stateless/02884_string_distance_function.sql @@ -26,11 +26,12 @@ CREATE TABLE t ) ENGINE = MergeTree ORDER BY s1; -- actual test cases -INSERT INTO t VALUES ('', '') ('abc', '') ('', 'abc') ('abc', 'abc') ('abc', 'ab') ('abc', 'bc') ('clickhouse', 'mouse'); +INSERT INTO t VALUES ('', '') ('abc', '') ('', 'abc') ('abc', 'abc') ('abc', 'ab') ('abc', 'bc') ('clickhouse', 'mouse') ('我是谁', 'Tom') ('Jerry', '我是谁') ('我是谁', '我是我'); SELECT '-- non-const arguments'; SELECT 'byteHammingDistance', s1, s2, byteHammingDistance(s1, s2) FROM t ORDER BY ALL; SELECT 'editDistance', s1, s2, editDistance(s1, s2) FROM t ORDER BY ALL; +SELECT 'editDistanceUTF8', s1, s2, editDistanceUTF8(s1, s2) FROM t ORDER BY ALL; SELECT 'damerauLevenshteinDistance', s1, s2, damerauLevenshteinDistance(s1, s2) FROM t ORDER BY ALL; SELECT 'stringJaccardIndex', s1, s2, stringJaccardIndex(s1, s2) FROM t ORDER BY ALL; SELECT 'stringJaccardIndexUTF8', s1, s2, stringJaccardIndexUTF8(s1, s2) FROM t ORDER BY ALL; diff --git a/utils/check-style/aspell-ignore/en/aspell-dict.txt b/utils/check-style/aspell-ignore/en/aspell-dict.txt index 64ff3e8e2cb..c8fc6754502 100644 --- a/utils/check-style/aspell-ignore/en/aspell-dict.txt +++ b/utils/check-style/aspell-ignore/en/aspell-dict.txt @@ -1541,6 +1541,7 @@ dumpColumnStructure durations ecto editDistance +editDistanceUTF embeddings emptyArray emptyArrayDate @@ -1898,6 +1899,7 @@ lessOrEquals lessorequals levenshtein levenshteinDistance +levenshteinDistanceUTF lexicographically lgamma libFuzzer From 72e3fdc8cae727e925d0628c5eb5e1f25f9bf578 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 18 Jun 2024 09:53:37 +0200 Subject: [PATCH 126/139] Use test database --- .../queries/0_stateless/03173_forbid_qualify.sql | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/03173_forbid_qualify.sql b/tests/queries/0_stateless/03173_forbid_qualify.sql index 59f0153cd36..d8cb2bad2ea 100644 --- a/tests/queries/0_stateless/03173_forbid_qualify.sql +++ b/tests/queries/0_stateless/03173_forbid_qualify.sql @@ -1,11 +1,11 @@ -drop table if exists default.test_qualify; -create table default.test_qualify (number Int64) ENGINE = MergeTree ORDER BY (number); +drop table if exists test_qualify; +create table test_qualify (number Int64) ENGINE = MergeTree ORDER BY (number); -insert into default.test_qualify SELECT * FROM numbers(100); +insert into test_qualify SELECT * FROM numbers(100); -select count() from default.test_qualify; -- 100 -select * from default.test_qualify qualify row_number() over (order by number) = 50 SETTINGS allow_experimental_analyzer = 1; -- 49 -select * from default.test_qualify qualify row_number() over (order by number) = 50 SETTINGS allow_experimental_analyzer = 0; -- { serverError NOT_IMPLEMENTED } +select count() from test_qualify; -- 100 +select * from test_qualify qualify row_number() over (order by number) = 50 SETTINGS allow_experimental_analyzer = 1; -- 49 +select * from test_qualify qualify row_number() over (order by number) = 50 SETTINGS allow_experimental_analyzer = 0; -- { serverError NOT_IMPLEMENTED } -delete from default.test_qualify where number in (select number from default.test_qualify qualify row_number() over (order by number) = 50); -- { serverError UNFINISHED } -select count() from default.test_qualify; -- 100 +delete from test_qualify where number in (select number from test_qualify qualify row_number() over (order by number) = 50); -- { serverError UNFINISHED } +select count() from test_qualify; -- 100 From 5701e3e48bb00d9b3f13ee064fe6401ac1e22833 Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 18 Jun 2024 10:43:41 +0200 Subject: [PATCH 127/139] Fix test --- tests/integration/test_keeper_profiler/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_keeper_profiler/test.py b/tests/integration/test_keeper_profiler/test.py index 98738890ad8..69aaaeb61c8 100644 --- a/tests/integration/test_keeper_profiler/test.py +++ b/tests/integration/test_keeper_profiler/test.py @@ -59,7 +59,7 @@ select cnt from ( arrayStringConcat( arrayMap(x, y -> concat(x, ': ', y), arrayMap(x -> addressToLine(x), trace), arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n') as trace -from system.trace_log where trace_type = ‘Real’ and trace ilike '%KeeperTCPHandler%' group by trace); +from system.trace_log where trace_type = ‘Real’ and (trace ilike '%KeeperTCPHandler%' or trace ilike '%KeeperDispatcher%') group by trace); """ ) From 446e28d51b124d6652b5502ba3728668a2f8dde9 Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 18 Jun 2024 09:29:52 +0000 Subject: [PATCH 128/139] fix test --- tests/integration/test_checking_s3_blobs_paranoid/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_checking_s3_blobs_paranoid/test.py b/tests/integration/test_checking_s3_blobs_paranoid/test.py index 476f7c61b28..2471c93458b 100644 --- a/tests/integration/test_checking_s3_blobs_paranoid/test.py +++ b/tests/integration/test_checking_s3_blobs_paranoid/test.py @@ -300,7 +300,7 @@ def test_when_s3_broken_pipe_at_upload_is_retried(cluster, broken_s3): LIMIT 1000000 SETTINGS s3_max_single_part_upload_size=100, - s3_min_upload_part_size=1000000, + s3_min_upload_part_size=100000, s3_check_objects_after_upload=0 """, query_id=insert_query_id, @@ -311,7 +311,7 @@ def test_when_s3_broken_pipe_at_upload_is_retried(cluster, broken_s3): ) assert create_multipart == 1 - assert upload_parts == 7 + assert upload_parts == 69 assert s3_errors == 3 broken_s3.setup_at_part_upload( From 121f45c8dcc19c1971c47b1f1ad54bd16b119776 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 18 Jun 2024 10:19:52 +0200 Subject: [PATCH 129/139] Fix alignment of Distinct combinator --- .../Combinators/AggregateFunctionDistinct.h | 5 +++++ .../03173_distinct_combinator_alignment.reference | 0 .../0_stateless/03173_distinct_combinator_alignment.sql | 1 + 3 files changed, 6 insertions(+) create mode 100644 tests/queries/0_stateless/03173_distinct_combinator_alignment.reference create mode 100644 tests/queries/0_stateless/03173_distinct_combinator_alignment.sql diff --git a/src/AggregateFunctions/Combinators/AggregateFunctionDistinct.h b/src/AggregateFunctions/Combinators/AggregateFunctionDistinct.h index 4338dcff5c0..f532858b3d8 100644 --- a/src/AggregateFunctions/Combinators/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/Combinators/AggregateFunctionDistinct.h @@ -228,6 +228,11 @@ public: return prefix_size + nested_func->sizeOfData(); } + size_t alignOfData() const override + { + return std::max(alignof(Data), nested_func->alignOfData()); + } + void create(AggregateDataPtr __restrict place) const override { new (place) Data; diff --git a/tests/queries/0_stateless/03173_distinct_combinator_alignment.reference b/tests/queries/0_stateless/03173_distinct_combinator_alignment.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/03173_distinct_combinator_alignment.sql b/tests/queries/0_stateless/03173_distinct_combinator_alignment.sql new file mode 100644 index 00000000000..4a066be5086 --- /dev/null +++ b/tests/queries/0_stateless/03173_distinct_combinator_alignment.sql @@ -0,0 +1 @@ +SELECT toTypeName(topKDistinctState(toNullable(10))(toString(number)) IGNORE NULLS) FROM numbers(100) GROUP BY tuple((map((materialize(toNullable(1)), 2), 4, (3, 4), 5), 3)), map((1, 2), 4, (3, 4), toNullable(5)) WITH CUBE WITH TOTALS FORMAT Null From ed38f370c327698cf660b17d8c09369970b1eee7 Mon Sep 17 00:00:00 2001 From: Max K Date: Tue, 18 Jun 2024 13:17:42 +0200 Subject: [PATCH 130/139] CI: Add Non-blocking (Woolen wolfdog) CI mode --- .github/PULL_REQUEST_TEMPLATE.md | 2 +- .github/workflows/pull_request.yml | 3 ++- tests/ci/ci.py | 10 +++++++--- tests/ci/ci_config.py | 4 +++- tests/ci/ci_definitions.py | 1 + tests/ci/ci_settings.py | 4 ++++ tests/ci/test_ci_config.py | 31 ++++++++++++++++++++++++++++++ tests/ci/test_ci_options.py | 3 +++ 8 files changed, 52 insertions(+), 6 deletions(-) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 74a6f95dbb3..d9f9e9d6c8b 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -49,7 +49,6 @@ At a minimum, the following information should be added (but add more as needed) - [ ] Allow: Integration Tests - [ ] Allow: Performance tests - [ ] Allow: All Builds -- [ ] Allow: All NOT Required Checks - [ ] Allow: batch 1, 2 for multi-batch jobs - [ ] Allow: batch 3, 4, 5, 6 for multi-batch jobs --- @@ -60,6 +59,7 @@ At a minimum, the following information should be added (but add more as needed) - [ ] Exclude: All with aarch64, release, debug --- - [ ] Do not test +- [ ] Woolen Wolfdog - [ ] Upload binaries for special builds - [ ] Disable merge-commit - [ ] Disable CI cache diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 70b71da8fa5..b19d246e1d0 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -126,8 +126,9 @@ jobs: with: stage: Builds_2 data: ${{ needs.RunConfig.outputs.data }} + # stage for running non-required checks without being blocked by required checks (Test_1) if corresponding settings is selected Tests_2: - needs: [RunConfig, Builds_2] + needs: [RunConfig, Builds_1] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2') }} uses: ./.github/workflows/reusable_test_stage.yml with: diff --git a/tests/ci/ci.py b/tests/ci/ci.py index 135a4c91c56..4e34e6b6135 100644 --- a/tests/ci/ci.py +++ b/tests/ci/ci.py @@ -462,7 +462,9 @@ def _configure_jobs( return ci_cache -def _generate_ci_stage_config(jobs_data: Dict[str, Any]) -> Dict[str, Dict[str, Any]]: +def _generate_ci_stage_config( + jobs_data: Dict[str, Any], non_blocking_mode: bool = False +) -> Dict[str, Dict[str, Any]]: """ populates GH Actions' workflow with real jobs "Builds_1": [{"job_name": NAME, "runner_type": RUNNER_TYPE}] @@ -472,7 +474,7 @@ def _generate_ci_stage_config(jobs_data: Dict[str, Any]) -> Dict[str, Dict[str, result = {} # type: Dict[str, Any] stages_to_do = [] for job in jobs_data: - stage_type = CI.get_job_ci_stage(job) + stage_type = CI.get_job_ci_stage(job, non_blocking_ci=non_blocking_mode) if stage_type == CI.WorkflowStages.NA: continue if stage_type not in result: @@ -1007,7 +1009,9 @@ def main() -> int: result["docs"] = ci_cache.job_digests[CI.JobNames.DOCS_CHECK] result["ci_settings"] = ci_settings.as_dict() if not args.skip_jobs: - result["stages_data"] = _generate_ci_stage_config(ci_cache.jobs_to_do) + result["stages_data"] = _generate_ci_stage_config( + ci_cache.jobs_to_do, ci_settings.woolen_wolfdog + ) result["jobs_data"] = { "jobs_to_do": list(ci_cache.jobs_to_do), "jobs_to_skip": ci_cache.jobs_to_skip, diff --git a/tests/ci/ci_config.py b/tests/ci/ci_config.py index 60c6a60af1a..bef43083a35 100644 --- a/tests/ci/ci_config.py +++ b/tests/ci/ci_config.py @@ -545,7 +545,7 @@ class CI: return None @classmethod - def get_job_ci_stage(cls, job_name: str) -> str: + def get_job_ci_stage(cls, job_name: str, non_blocking_ci: bool = False) -> str: if job_name in [ JobNames.STYLE_CHECK, JobNames.FAST_TEST, @@ -572,6 +572,8 @@ class CI: else: stage_type = WorkflowStages.TESTS_3 assert stage_type, f"BUG [{job_name}]" + if non_blocking_ci and stage_type == WorkflowStages.TESTS_3: + stage_type = WorkflowStages.TESTS_2 return stage_type @classmethod diff --git a/tests/ci/ci_definitions.py b/tests/ci/ci_definitions.py index fdd5dc7a671..94555158811 100644 --- a/tests/ci/ci_definitions.py +++ b/tests/ci/ci_definitions.py @@ -46,6 +46,7 @@ class Tags(metaclass=WithIter): """ DO_NOT_TEST_LABEL = "do_not_test" + WOOLEN_WOLFDOG_LABEL = "woolen_wolfdog" NO_MERGE_COMMIT = "no_merge_commit" NO_CI_CACHE = "no_ci_cache" # to upload all binaries from build jobs diff --git a/tests/ci/ci_settings.py b/tests/ci/ci_settings.py index a36fcf953ae..7b2dd12c310 100644 --- a/tests/ci/ci_settings.py +++ b/tests/ci/ci_settings.py @@ -29,6 +29,7 @@ class CiSettings: no_ci_cache: bool = False upload_all: bool = False no_merge_commit: bool = False + woolen_wolfdog: bool = False def as_dict(self) -> Dict[str, Any]: return asdict(self) @@ -108,6 +109,9 @@ class CiSettings: elif match == CI.Tags.NO_MERGE_COMMIT: res.no_merge_commit = True print("NOTE: Merge Commit will be disabled") + elif match == CI.Tags.WOOLEN_WOLFDOG_LABEL: + res.woolen_wolfdog = True + print("NOTE: Woolen Wolfdog mode enabled") elif match.startswith("batch_"): batches = [] try: diff --git a/tests/ci/test_ci_config.py b/tests/ci/test_ci_config.py index 7a51a65b5d5..47247b91858 100644 --- a/tests/ci/test_ci_config.py +++ b/tests/ci/test_ci_config.py @@ -201,6 +201,37 @@ class TestCIConfig(unittest.TestCase): msg=f"Stage for [{job}] is not correct", ) + def test_job_stage_config_non_blocking(self): + """ + check runner is provided w/o exception + """ + # check stages + for job in CI.JobNames: + if job in CI.BuildNames: + self.assertTrue( + CI.get_job_ci_stage(job) + in (CI.WorkflowStages.BUILDS_1, CI.WorkflowStages.BUILDS_2) + ) + else: + if job in ( + CI.JobNames.STYLE_CHECK, + CI.JobNames.FAST_TEST, + CI.JobNames.JEPSEN_SERVER, + CI.JobNames.JEPSEN_KEEPER, + CI.JobNames.BUILD_CHECK, + ): + self.assertEqual( + CI.get_job_ci_stage(job), + CI.WorkflowStages.NA, + msg=f"Stage for [{job}] is not correct", + ) + else: + self.assertTrue( + CI.get_job_ci_stage(job, non_blocking_ci=True) + in (CI.WorkflowStages.TESTS_1, CI.WorkflowStages.TESTS_2), + msg=f"Stage for [{job}] is not correct", + ) + def test_build_jobs_configs(self): """ check build jobs have non-None build_config attribute diff --git a/tests/ci/test_ci_options.py b/tests/ci/test_ci_options.py index fc21c7dda4e..ee256f73abc 100644 --- a/tests/ci/test_ci_options.py +++ b/tests/ci/test_ci_options.py @@ -19,6 +19,7 @@ _TEST_BODY_1 = """ #### CI options: - [ ] do not test (only style check) +- [x] Woolen Wolfdog CI - [x] disable merge-commit (no merge from master before tests) - [ ] disable CI cache (job reuse) @@ -148,6 +149,7 @@ class TestCIOptions(unittest.TestCase): self.assertFalse(ci_options.do_not_test) self.assertFalse(ci_options.no_ci_cache) self.assertTrue(ci_options.no_merge_commit) + self.assertTrue(ci_options.woolen_wolfdog) self.assertEqual(ci_options.ci_sets, ["ci_set_non_required"]) self.assertCountEqual(ci_options.include_keywords, ["foo", "foo_bar"]) self.assertCountEqual(ci_options.exclude_keywords, ["foo", "foo_bar"]) @@ -157,6 +159,7 @@ class TestCIOptions(unittest.TestCase): ci_options = CiSettings.create_from_pr_message( _TEST_BODY_2, update_from_api=False ) + self.assertFalse(ci_options.woolen_wolfdog) self.assertCountEqual( ci_options.include_keywords, ["integration", "foo_bar", "stateless", "azure"], From ec855651f3dff3ac370d4cafcb917b9b4dbc2c84 Mon Sep 17 00:00:00 2001 From: Alex Katsman Date: Tue, 18 Jun 2024 13:54:45 +0000 Subject: [PATCH 131/139] Fix compatibility release check --- tests/ci/compatibility_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/compatibility_check.py b/tests/ci/compatibility_check.py index e7fee827320..bb0c717160e 100644 --- a/tests/ci/compatibility_check.py +++ b/tests/ci/compatibility_check.py @@ -196,7 +196,7 @@ def main(): # See https://sourceware.org/glibc/wiki/Glibc%20Timeline max_glibc_version = "" - if "amd64" in check_name: + if "amd64" in check_name or "release" in check_name: max_glibc_version = "2.4" elif "aarch64" in check_name: max_glibc_version = "2.18" # because of build with newer sysroot? From ece3efe09cc584f3ac0320e83e3829684153c1d9 Mon Sep 17 00:00:00 2001 From: Kseniia Sumarokova <54203879+kssenii@users.noreply.github.com> Date: Tue, 18 Jun 2024 16:33:59 +0200 Subject: [PATCH 132/139] Update test.py --- tests/integration/test_keeper_profiler/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_keeper_profiler/test.py b/tests/integration/test_keeper_profiler/test.py index 69aaaeb61c8..848929df086 100644 --- a/tests/integration/test_keeper_profiler/test.py +++ b/tests/integration/test_keeper_profiler/test.py @@ -59,7 +59,7 @@ select cnt from ( arrayStringConcat( arrayMap(x, y -> concat(x, ': ', y), arrayMap(x -> addressToLine(x), trace), arrayMap(x -> demangle(addressToSymbol(x)), trace)), '\n') as trace -from system.trace_log where trace_type = ‘Real’ and (trace ilike '%KeeperTCPHandler%' or trace ilike '%KeeperDispatcher%') group by trace); +from system.trace_log where trace_type = ‘Real’ and (trace ilike '%KeeperTCPHandler%' or trace ilike '%KeeperDispatcher%') group by trace order by cnt desc) limit 1; """ ) From ca0da7a481325157b1436d1a997078db7ab2fa77 Mon Sep 17 00:00:00 2001 From: Alex Katsman Date: Wed, 29 May 2024 16:09:03 +0000 Subject: [PATCH 133/139] Add query privileges information to the query log. Introduced two additional columns in the system.query_log: used_privileges and missing_privileges. Used_privileges is populated with the privileges that were checked during query execution, and missing_privileges contains required privileges that are missing. --- src/Access/CachedAccessChecking.cpp | 4 +- src/Access/CachedAccessChecking.h | 7 +- src/Access/ContextAccess.cpp | 168 +++++++------ src/Access/ContextAccess.h | 228 +++++++++++++----- .../Access/InterpreterGrantQuery.cpp | 8 +- src/Interpreters/Context.cpp | 19 +- src/Interpreters/Context.h | 29 ++- src/Interpreters/QueryLog.cpp | 7 + src/Interpreters/QueryLog.h | 2 + src/Interpreters/Session.cpp | 2 +- src/Interpreters/executeQuery.cpp | 15 ++ src/Storages/System/StorageSystemColumns.cpp | 2 +- ...8_query_log_privileges_not_empty.reference | 5 + .../03168_query_log_privileges_not_empty.sh | 32 +++ 14 files changed, 377 insertions(+), 151 deletions(-) create mode 100644 tests/queries/0_stateless/03168_query_log_privileges_not_empty.reference create mode 100755 tests/queries/0_stateless/03168_query_log_privileges_not_empty.sh diff --git a/src/Access/CachedAccessChecking.cpp b/src/Access/CachedAccessChecking.cpp index aa8ef6073d3..0d629e7b77a 100644 --- a/src/Access/CachedAccessChecking.cpp +++ b/src/Access/CachedAccessChecking.cpp @@ -4,12 +4,12 @@ namespace DB { -CachedAccessChecking::CachedAccessChecking(const std::shared_ptr & access_, AccessFlags access_flags_) +CachedAccessChecking::CachedAccessChecking(const std::shared_ptr & access_, AccessFlags access_flags_) : CachedAccessChecking(access_, AccessRightsElement{access_flags_}) { } -CachedAccessChecking::CachedAccessChecking(const std::shared_ptr & access_, const AccessRightsElement & element_) +CachedAccessChecking::CachedAccessChecking(const std::shared_ptr & access_, const AccessRightsElement & element_) : access(access_), element(element_) { } diff --git a/src/Access/CachedAccessChecking.h b/src/Access/CachedAccessChecking.h index e87c28dd823..aaeea6ceddc 100644 --- a/src/Access/CachedAccessChecking.h +++ b/src/Access/CachedAccessChecking.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include @@ -13,14 +14,14 @@ class ContextAccess; class CachedAccessChecking { public: - CachedAccessChecking(const std::shared_ptr & access_, AccessFlags access_flags_); - CachedAccessChecking(const std::shared_ptr & access_, const AccessRightsElement & element_); + CachedAccessChecking(const std::shared_ptr & access_, AccessFlags access_flags_); + CachedAccessChecking(const std::shared_ptr & access_, const AccessRightsElement & element_); ~CachedAccessChecking(); bool checkAccess(bool throw_if_denied = true); private: - const std::shared_ptr access; + const std::shared_ptr access; const AccessRightsElement element; bool checked = false; bool result = false; diff --git a/src/Access/ContextAccess.cpp b/src/Access/ContextAccess.cpp index 28a825de6cf..a2807ecc5ea 100644 --- a/src/Access/ContextAccess.cpp +++ b/src/Access/ContextAccess.cpp @@ -20,6 +20,7 @@ #include #include #include +#include namespace DB @@ -271,7 +272,7 @@ namespace std::shared_ptr ContextAccess::fromContext(const ContextPtr & context) { - return context->getAccess(); + return ContextAccessWrapper::fromContext(context)->getAccess(); } @@ -560,7 +561,7 @@ std::shared_ptr ContextAccess::getAccessRightsWithImplicit() template -bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... args) const +bool ContextAccess::checkAccessImplHelper(const ContextPtr & context, AccessFlags flags, const Args &... args) const { if (user_was_dropped) { @@ -573,8 +574,10 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg if (params.full_access) return true; - auto access_granted = [] + auto access_granted = [&] { + if constexpr (throw_if_denied) + context->addQueryPrivilegesInfo(AccessRightsElement{flags, args...}.toStringWithoutOptions(), true); return true; }; @@ -583,7 +586,10 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg FmtArgs && ...fmt_args [[maybe_unused]]) { if constexpr (throw_if_denied) + { + context->addQueryPrivilegesInfo(AccessRightsElement{flags, args...}.toStringWithoutOptions(), false); throw Exception(error_code, std::move(fmt_string), getUserName(), std::forward(fmt_args)...); + } return false; }; @@ -686,102 +692,102 @@ bool ContextAccess::checkAccessImplHelper(AccessFlags flags, const Args &... arg } template -bool ContextAccess::checkAccessImpl(const AccessFlags & flags) const +bool ContextAccess::checkAccessImpl(const ContextPtr & context, const AccessFlags & flags) const { - return checkAccessImplHelper(flags); + return checkAccessImplHelper(context, flags); } template -bool ContextAccess::checkAccessImpl(const AccessFlags & flags, std::string_view database, const Args &... args) const +bool ContextAccess::checkAccessImpl(const ContextPtr & context, const AccessFlags & flags, std::string_view database, const Args &... args) const { - return checkAccessImplHelper(flags, database.empty() ? params.current_database : database, args...); + return checkAccessImplHelper(context, flags, database.empty() ? params.current_database : database, args...); } template -bool ContextAccess::checkAccessImplHelper(const AccessRightsElement & element) const +bool ContextAccess::checkAccessImplHelper(const ContextPtr & context, const AccessRightsElement & element) const { assert(!element.grant_option || grant_option); if (element.isGlobalWithParameter()) { if (element.any_parameter) - return checkAccessImpl(element.access_flags); + return checkAccessImpl(context, element.access_flags); else - return checkAccessImpl(element.access_flags, element.parameter); + return checkAccessImpl(context, element.access_flags, element.parameter); } else if (element.any_database) - return checkAccessImpl(element.access_flags); + return checkAccessImpl(context, element.access_flags); else if (element.any_table) - return checkAccessImpl(element.access_flags, element.database); + return checkAccessImpl(context, element.access_flags, element.database); else if (element.any_column) - return checkAccessImpl(element.access_flags, element.database, element.table); + return checkAccessImpl(context, element.access_flags, element.database, element.table); else - return checkAccessImpl(element.access_flags, element.database, element.table, element.columns); + return checkAccessImpl(context, element.access_flags, element.database, element.table, element.columns); } template -bool ContextAccess::checkAccessImpl(const AccessRightsElement & element) const +bool ContextAccess::checkAccessImpl(const ContextPtr & context, const AccessRightsElement & element) const { if constexpr (grant_option) { - return checkAccessImplHelper(element); + return checkAccessImplHelper(context, element); } else { if (element.grant_option) - return checkAccessImplHelper(element); + return checkAccessImplHelper(context, element); else - return checkAccessImplHelper(element); + return checkAccessImplHelper(context, element); } } template -bool ContextAccess::checkAccessImpl(const AccessRightsElements & elements) const +bool ContextAccess::checkAccessImpl(const ContextPtr & context, const AccessRightsElements & elements) const { for (const auto & element : elements) - if (!checkAccessImpl(element)) + if (!checkAccessImpl(context, element)) return false; return true; } -bool ContextAccess::isGranted(const AccessFlags & flags) const { return checkAccessImpl(flags); } -bool ContextAccess::isGranted(const AccessFlags & flags, std::string_view database) const { return checkAccessImpl(flags, database); } -bool ContextAccess::isGranted(const AccessFlags & flags, std::string_view database, std::string_view table) const { return checkAccessImpl(flags, database, table); } -bool ContextAccess::isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { return checkAccessImpl(flags, database, table, column); } -bool ContextAccess::isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { return checkAccessImpl(flags, database, table, columns); } -bool ContextAccess::isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { return checkAccessImpl(flags, database, table, columns); } -bool ContextAccess::isGranted(const AccessRightsElement & element) const { return checkAccessImpl(element); } -bool ContextAccess::isGranted(const AccessRightsElements & elements) const { return checkAccessImpl(elements); } +bool ContextAccess::isGranted(const ContextPtr & context, const AccessFlags & flags) const { return checkAccessImpl(context, flags); } +bool ContextAccess::isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database) const { return checkAccessImpl(context, flags, database); } +bool ContextAccess::isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table) const { return checkAccessImpl(context, flags, database, table); } +bool ContextAccess::isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { return checkAccessImpl(context, flags, database, table, column); } +bool ContextAccess::isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { return checkAccessImpl(context, flags, database, table, columns); } +bool ContextAccess::isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { return checkAccessImpl(context, flags, database, table, columns); } +bool ContextAccess::isGranted(const ContextPtr & context, const AccessRightsElement & element) const { return checkAccessImpl(context, element); } +bool ContextAccess::isGranted(const ContextPtr & context, const AccessRightsElements & elements) const { return checkAccessImpl(context, elements); } -bool ContextAccess::hasGrantOption(const AccessFlags & flags) const { return checkAccessImpl(flags); } -bool ContextAccess::hasGrantOption(const AccessFlags & flags, std::string_view database) const { return checkAccessImpl(flags, database); } -bool ContextAccess::hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table) const { return checkAccessImpl(flags, database, table); } -bool ContextAccess::hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { return checkAccessImpl(flags, database, table, column); } -bool ContextAccess::hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { return checkAccessImpl(flags, database, table, columns); } -bool ContextAccess::hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { return checkAccessImpl(flags, database, table, columns); } -bool ContextAccess::hasGrantOption(const AccessRightsElement & element) const { return checkAccessImpl(element); } -bool ContextAccess::hasGrantOption(const AccessRightsElements & elements) const { return checkAccessImpl(elements); } +bool ContextAccess::hasGrantOption(const ContextPtr & context, const AccessFlags & flags) const { return checkAccessImpl(context, flags); } +bool ContextAccess::hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database) const { return checkAccessImpl(context, flags, database); } +bool ContextAccess::hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table) const { return checkAccessImpl(context, flags, database, table); } +bool ContextAccess::hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { return checkAccessImpl(context, flags, database, table, column); } +bool ContextAccess::hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { return checkAccessImpl(context, flags, database, table, columns); } +bool ContextAccess::hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { return checkAccessImpl(context, flags, database, table, columns); } +bool ContextAccess::hasGrantOption(const ContextPtr & context, const AccessRightsElement & element) const { return checkAccessImpl(context, element); } +bool ContextAccess::hasGrantOption(const ContextPtr & context, const AccessRightsElements & elements) const { return checkAccessImpl(context, elements); } -void ContextAccess::checkAccess(const AccessFlags & flags) const { checkAccessImpl(flags); } -void ContextAccess::checkAccess(const AccessFlags & flags, std::string_view database) const { checkAccessImpl(flags, database); } -void ContextAccess::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table) const { checkAccessImpl(flags, database, table); } -void ContextAccess::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { checkAccessImpl(flags, database, table, column); } -void ContextAccess::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { checkAccessImpl(flags, database, table, columns); } -void ContextAccess::checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { checkAccessImpl(flags, database, table, columns); } -void ContextAccess::checkAccess(const AccessRightsElement & element) const { checkAccessImpl(element); } -void ContextAccess::checkAccess(const AccessRightsElements & elements) const { checkAccessImpl(elements); } +void ContextAccess::checkAccess(const ContextPtr & context, const AccessFlags & flags) const { checkAccessImpl(context, flags); } +void ContextAccess::checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database) const { checkAccessImpl(context, flags, database); } +void ContextAccess::checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table) const { checkAccessImpl(context, flags, database, table); } +void ContextAccess::checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { checkAccessImpl(context, flags, database, table, column); } +void ContextAccess::checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { checkAccessImpl(context, flags, database, table, columns); } +void ContextAccess::checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { checkAccessImpl(context, flags, database, table, columns); } +void ContextAccess::checkAccess(const ContextPtr & context, const AccessRightsElement & element) const { checkAccessImpl(context, element); } +void ContextAccess::checkAccess(const ContextPtr & context, const AccessRightsElements & elements) const { checkAccessImpl(context, elements); } -void ContextAccess::checkGrantOption(const AccessFlags & flags) const { checkAccessImpl(flags); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, std::string_view database) const { checkAccessImpl(flags, database); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table) const { checkAccessImpl(flags, database, table); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { checkAccessImpl(flags, database, table, column); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { checkAccessImpl(flags, database, table, columns); } -void ContextAccess::checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { checkAccessImpl(flags, database, table, columns); } -void ContextAccess::checkGrantOption(const AccessRightsElement & element) const { checkAccessImpl(element); } -void ContextAccess::checkGrantOption(const AccessRightsElements & elements) const { checkAccessImpl(elements); } +void ContextAccess::checkGrantOption(const ContextPtr & context, const AccessFlags & flags) const { checkAccessImpl(context, flags); } +void ContextAccess::checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database) const { checkAccessImpl(context, flags, database); } +void ContextAccess::checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table) const { checkAccessImpl(context, flags, database, table); } +void ContextAccess::checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { checkAccessImpl(context, flags, database, table, column); } +void ContextAccess::checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { checkAccessImpl(context, flags, database, table, columns); } +void ContextAccess::checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { checkAccessImpl(context, flags, database, table, columns); } +void ContextAccess::checkGrantOption(const ContextPtr & context, const AccessRightsElement & element) const { checkAccessImpl(context, element); } +void ContextAccess::checkGrantOption(const ContextPtr & context, const AccessRightsElements & elements) const { checkAccessImpl(context, elements); } template -bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const GetNameFunction & get_name_function) const +bool ContextAccess::checkAdminOptionImplHelper(const ContextPtr & context, const Container & role_ids, const GetNameFunction & get_name_function) const { auto show_error = [](int error_code [[maybe_unused]], FormatStringHelper fmt_string [[maybe_unused]], @@ -804,7 +810,7 @@ bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const if (!std::size(role_ids)) return true; - if (isGranted(AccessType::ROLE_ADMIN)) + if (isGranted(context, AccessType::ROLE_ADMIN)) return true; auto info = getRolesInfo(); @@ -840,54 +846,54 @@ bool ContextAccess::checkAdminOptionImplHelper(const Container & role_ids, const } template -bool ContextAccess::checkAdminOptionImpl(const UUID & role_id) const +bool ContextAccess::checkAdminOptionImpl(const ContextPtr & context, const UUID & role_id) const { - return checkAdminOptionImplHelper(to_array(role_id), [this](const UUID & id, size_t) { return access_control->tryReadName(id); }); + return checkAdminOptionImplHelper(context, to_array(role_id), [this](const UUID & id, size_t) { return access_control->tryReadName(id); }); } template -bool ContextAccess::checkAdminOptionImpl(const UUID & role_id, const String & role_name) const +bool ContextAccess::checkAdminOptionImpl(const ContextPtr & context, const UUID & role_id, const String & role_name) const { - return checkAdminOptionImplHelper(to_array(role_id), [&role_name](const UUID &, size_t) { return std::optional{role_name}; }); + return checkAdminOptionImplHelper(context, to_array(role_id), [&role_name](const UUID &, size_t) { return std::optional{role_name}; }); } template -bool ContextAccess::checkAdminOptionImpl(const UUID & role_id, const std::unordered_map & names_of_roles) const +bool ContextAccess::checkAdminOptionImpl(const ContextPtr & context, const UUID & role_id, const std::unordered_map & names_of_roles) const { - return checkAdminOptionImplHelper(to_array(role_id), [&names_of_roles](const UUID & id, size_t) { auto it = names_of_roles.find(id); return (it != names_of_roles.end()) ? it->second : std::optional{}; }); + return checkAdminOptionImplHelper(context, to_array(role_id), [&names_of_roles](const UUID & id, size_t) { auto it = names_of_roles.find(id); return (it != names_of_roles.end()) ? it->second : std::optional{}; }); } template -bool ContextAccess::checkAdminOptionImpl(const std::vector & role_ids) const +bool ContextAccess::checkAdminOptionImpl(const ContextPtr & context, const std::vector & role_ids) const { - return checkAdminOptionImplHelper(role_ids, [this](const UUID & id, size_t) { return access_control->tryReadName(id); }); + return checkAdminOptionImplHelper(context, role_ids, [this](const UUID & id, size_t) { return access_control->tryReadName(id); }); } template -bool ContextAccess::checkAdminOptionImpl(const std::vector & role_ids, const Strings & names_of_roles) const +bool ContextAccess::checkAdminOptionImpl(const ContextPtr & context, const std::vector & role_ids, const Strings & names_of_roles) const { - return checkAdminOptionImplHelper(role_ids, [&names_of_roles](const UUID &, size_t i) { return std::optional{names_of_roles[i]}; }); + return checkAdminOptionImplHelper(context, role_ids, [&names_of_roles](const UUID &, size_t i) { return std::optional{names_of_roles[i]}; }); } template -bool ContextAccess::checkAdminOptionImpl(const std::vector & role_ids, const std::unordered_map & names_of_roles) const +bool ContextAccess::checkAdminOptionImpl(const ContextPtr & context, const std::vector & role_ids, const std::unordered_map & names_of_roles) const { - return checkAdminOptionImplHelper(role_ids, [&names_of_roles](const UUID & id, size_t) { auto it = names_of_roles.find(id); return (it != names_of_roles.end()) ? it->second : std::optional{}; }); + return checkAdminOptionImplHelper(context, role_ids, [&names_of_roles](const UUID & id, size_t) { auto it = names_of_roles.find(id); return (it != names_of_roles.end()) ? it->second : std::optional{}; }); } -bool ContextAccess::hasAdminOption(const UUID & role_id) const { return checkAdminOptionImpl(role_id); } -bool ContextAccess::hasAdminOption(const UUID & role_id, const String & role_name) const { return checkAdminOptionImpl(role_id, role_name); } -bool ContextAccess::hasAdminOption(const UUID & role_id, const std::unordered_map & names_of_roles) const { return checkAdminOptionImpl(role_id, names_of_roles); } -bool ContextAccess::hasAdminOption(const std::vector & role_ids) const { return checkAdminOptionImpl(role_ids); } -bool ContextAccess::hasAdminOption(const std::vector & role_ids, const Strings & names_of_roles) const { return checkAdminOptionImpl(role_ids, names_of_roles); } -bool ContextAccess::hasAdminOption(const std::vector & role_ids, const std::unordered_map & names_of_roles) const { return checkAdminOptionImpl(role_ids, names_of_roles); } +bool ContextAccess::hasAdminOption(const ContextPtr & context, const UUID & role_id) const { return checkAdminOptionImpl(context, role_id); } +bool ContextAccess::hasAdminOption(const ContextPtr & context, const UUID & role_id, const String & role_name) const { return checkAdminOptionImpl(context, role_id, role_name); } +bool ContextAccess::hasAdminOption(const ContextPtr & context, const UUID & role_id, const std::unordered_map & names_of_roles) const { return checkAdminOptionImpl(context, role_id, names_of_roles); } +bool ContextAccess::hasAdminOption(const ContextPtr & context, const std::vector & role_ids) const { return checkAdminOptionImpl(context, role_ids); } +bool ContextAccess::hasAdminOption(const ContextPtr & context, const std::vector & role_ids, const Strings & names_of_roles) const { return checkAdminOptionImpl(context, role_ids, names_of_roles); } +bool ContextAccess::hasAdminOption(const ContextPtr & context, const std::vector & role_ids, const std::unordered_map & names_of_roles) const { return checkAdminOptionImpl(context, role_ids, names_of_roles); } -void ContextAccess::checkAdminOption(const UUID & role_id) const { checkAdminOptionImpl(role_id); } -void ContextAccess::checkAdminOption(const UUID & role_id, const String & role_name) const { checkAdminOptionImpl(role_id, role_name); } -void ContextAccess::checkAdminOption(const UUID & role_id, const std::unordered_map & names_of_roles) const { checkAdminOptionImpl(role_id, names_of_roles); } -void ContextAccess::checkAdminOption(const std::vector & role_ids) const { checkAdminOptionImpl(role_ids); } -void ContextAccess::checkAdminOption(const std::vector & role_ids, const Strings & names_of_roles) const { checkAdminOptionImpl(role_ids, names_of_roles); } -void ContextAccess::checkAdminOption(const std::vector & role_ids, const std::unordered_map & names_of_roles) const { checkAdminOptionImpl(role_ids, names_of_roles); } +void ContextAccess::checkAdminOption(const ContextPtr & context, const UUID & role_id) const { checkAdminOptionImpl(context, role_id); } +void ContextAccess::checkAdminOption(const ContextPtr & context, const UUID & role_id, const String & role_name) const { checkAdminOptionImpl(context, role_id, role_name); } +void ContextAccess::checkAdminOption(const ContextPtr & context, const UUID & role_id, const std::unordered_map & names_of_roles) const { checkAdminOptionImpl(context, role_id, names_of_roles); } +void ContextAccess::checkAdminOption(const ContextPtr & context, const std::vector & role_ids) const { checkAdminOptionImpl(context, role_ids); } +void ContextAccess::checkAdminOption(const ContextPtr & context, const std::vector & role_ids, const Strings & names_of_roles) const { checkAdminOptionImpl(context, role_ids, names_of_roles); } +void ContextAccess::checkAdminOption(const ContextPtr & context, const std::vector & role_ids, const std::unordered_map & names_of_roles) const { checkAdminOptionImpl(context, role_ids, names_of_roles); } void ContextAccess::checkGranteeIsAllowed(const UUID & grantee_id, const IAccessEntity & grantee) const @@ -919,4 +925,10 @@ void ContextAccess::checkGranteesAreAllowed(const std::vector & grantee_id } } +std::shared_ptr ContextAccessWrapper::fromContext(const ContextPtr & context) +{ + return context->getAccess(); +} + + } diff --git a/src/Access/ContextAccess.h b/src/Access/ContextAccess.h index 237c423d261..465932af1d3 100644 --- a/src/Access/ContextAccess.h +++ b/src/Access/ContextAccess.h @@ -4,9 +4,12 @@ #include #include #include +#include +#include #include #include #include +#include #include #include #include @@ -71,59 +74,59 @@ public: /// Checks if a specified access is granted, and throws an exception if not. /// Empty database means the current database. - void checkAccess(const AccessFlags & flags) const; - void checkAccess(const AccessFlags & flags, std::string_view database) const; - void checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table) const; - void checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const; - void checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const; - void checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const; - void checkAccess(const AccessRightsElement & element) const; - void checkAccess(const AccessRightsElements & elements) const; + void checkAccess(const ContextPtr & context, const AccessFlags & flags) const; + void checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database) const; + void checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table) const; + void checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const; + void checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const; + void checkAccess(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const; + void checkAccess(const ContextPtr & context, const AccessRightsElement & element) const; + void checkAccess(const ContextPtr & context, const AccessRightsElements & elements) const; - void checkGrantOption(const AccessFlags & flags) const; - void checkGrantOption(const AccessFlags & flags, std::string_view database) const; - void checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table) const; - void checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const; - void checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const; - void checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const; - void checkGrantOption(const AccessRightsElement & element) const; - void checkGrantOption(const AccessRightsElements & elements) const; + void checkGrantOption(const ContextPtr & context, const AccessFlags & flags) const; + void checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database) const; + void checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table) const; + void checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const; + void checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const; + void checkGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const; + void checkGrantOption(const ContextPtr & context, const AccessRightsElement & element) const; + void checkGrantOption(const ContextPtr & context, const AccessRightsElements & elements) const; /// Checks if a specified access is granted, and returns false if not. /// Empty database means the current database. - bool isGranted(const AccessFlags & flags) const; - bool isGranted(const AccessFlags & flags, std::string_view database) const; - bool isGranted(const AccessFlags & flags, std::string_view database, std::string_view table) const; - bool isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const; - bool isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const; - bool isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const; - bool isGranted(const AccessRightsElement & element) const; - bool isGranted(const AccessRightsElements & elements) const; + bool isGranted(const ContextPtr & context, const AccessFlags & flags) const; + bool isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database) const; + bool isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table) const; + bool isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const; + bool isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const; + bool isGranted(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const; + bool isGranted(const ContextPtr & context, const AccessRightsElement & element) const; + bool isGranted(const ContextPtr & context, const AccessRightsElements & elements) const; - bool hasGrantOption(const AccessFlags & flags) const; - bool hasGrantOption(const AccessFlags & flags, std::string_view database) const; - bool hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table) const; - bool hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const; - bool hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const; - bool hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const; - bool hasGrantOption(const AccessRightsElement & element) const; - bool hasGrantOption(const AccessRightsElements & elements) const; + bool hasGrantOption(const ContextPtr & context, const AccessFlags & flags) const; + bool hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database) const; + bool hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table) const; + bool hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const; + bool hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const; + bool hasGrantOption(const ContextPtr & context, const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const; + bool hasGrantOption(const ContextPtr & context, const AccessRightsElement & element) const; + bool hasGrantOption(const ContextPtr & context, const AccessRightsElements & elements) const; /// Checks if a specified role is granted with admin option, and throws an exception if not. - void checkAdminOption(const UUID & role_id) const; - void checkAdminOption(const UUID & role_id, const String & role_name) const; - void checkAdminOption(const UUID & role_id, const std::unordered_map & names_of_roles) const; - void checkAdminOption(const std::vector & role_ids) const; - void checkAdminOption(const std::vector & role_ids, const Strings & names_of_roles) const; - void checkAdminOption(const std::vector & role_ids, const std::unordered_map & names_of_roles) const; + void checkAdminOption(const ContextPtr & context, const UUID & role_id) const; + void checkAdminOption(const ContextPtr & context, const UUID & role_id, const String & role_name) const; + void checkAdminOption(const ContextPtr & context, const UUID & role_id, const std::unordered_map & names_of_roles) const; + void checkAdminOption(const ContextPtr & context, const std::vector & role_ids) const; + void checkAdminOption(const ContextPtr & context, const std::vector & role_ids, const Strings & names_of_roles) const; + void checkAdminOption(const ContextPtr & context, const std::vector & role_ids, const std::unordered_map & names_of_roles) const; /// Checks if a specified role is granted with admin option, and returns false if not. - bool hasAdminOption(const UUID & role_id) const; - bool hasAdminOption(const UUID & role_id, const String & role_name) const; - bool hasAdminOption(const UUID & role_id, const std::unordered_map & names_of_roles) const; - bool hasAdminOption(const std::vector & role_ids) const; - bool hasAdminOption(const std::vector & role_ids, const Strings & names_of_roles) const; - bool hasAdminOption(const std::vector & role_ids, const std::unordered_map & names_of_roles) const; + bool hasAdminOption(const ContextPtr & context, const UUID & role_id) const; + bool hasAdminOption(const ContextPtr & context, const UUID & role_id, const String & role_name) const; + bool hasAdminOption(const ContextPtr & context, const UUID & role_id, const std::unordered_map & names_of_roles) const; + bool hasAdminOption(const ContextPtr & context, const std::vector & role_ids) const; + bool hasAdminOption(const ContextPtr & context, const std::vector & role_ids, const Strings & names_of_roles) const; + bool hasAdminOption(const ContextPtr & context, const std::vector & role_ids, const std::unordered_map & names_of_roles) const; /// Checks if a grantee is allowed for the current user, throws an exception if not. void checkGranteeIsAllowed(const UUID & grantee_id, const IAccessEntity & grantee) const; @@ -142,43 +145,43 @@ private: void calculateAccessRights() const TSA_REQUIRES(mutex); template - bool checkAccessImpl(const AccessFlags & flags) const; + bool checkAccessImpl(const ContextPtr & context, const AccessFlags & flags) const; template - bool checkAccessImpl(const AccessFlags & flags, std::string_view database, const Args &... args) const; + bool checkAccessImpl(const ContextPtr & context, const AccessFlags & flags, std::string_view database, const Args &... args) const; template - bool checkAccessImpl(const AccessRightsElement & element) const; + bool checkAccessImpl(const ContextPtr & context, const AccessRightsElement & element) const; template - bool checkAccessImpl(const AccessRightsElements & elements) const; + bool checkAccessImpl(const ContextPtr & context, const AccessRightsElements & elements) const; template - bool checkAccessImplHelper(AccessFlags flags, const Args &... args) const; + bool checkAccessImplHelper(const ContextPtr & context, AccessFlags flags, const Args &... args) const; template - bool checkAccessImplHelper(const AccessRightsElement & element) const; + bool checkAccessImplHelper(const ContextPtr & context, const AccessRightsElement & element) const; template - bool checkAdminOptionImpl(const UUID & role_id) const; + bool checkAdminOptionImpl(const ContextPtr & context, const UUID & role_id) const; template - bool checkAdminOptionImpl(const UUID & role_id, const String & role_name) const; + bool checkAdminOptionImpl(const ContextPtr & context, const UUID & role_id, const String & role_name) const; template - bool checkAdminOptionImpl(const UUID & role_id, const std::unordered_map & names_of_roles) const; + bool checkAdminOptionImpl(const ContextPtr & context, const UUID & role_id, const std::unordered_map & names_of_roles) const; template - bool checkAdminOptionImpl(const std::vector & role_ids) const; + bool checkAdminOptionImpl(const ContextPtr & context, const std::vector & role_ids) const; template - bool checkAdminOptionImpl(const std::vector & role_ids, const Strings & names_of_roles) const; + bool checkAdminOptionImpl(const ContextPtr & context, const std::vector & role_ids, const Strings & names_of_roles) const; template - bool checkAdminOptionImpl(const std::vector & role_ids, const std::unordered_map & names_of_roles) const; + bool checkAdminOptionImpl(const ContextPtr & context, const std::vector & role_ids, const std::unordered_map & names_of_roles) const; template - bool checkAdminOptionImplHelper(const Container & role_ids, const GetNameFunction & get_name_function) const; + bool checkAdminOptionImplHelper(const ContextPtr & context, const Container & role_ids, const GetNameFunction & get_name_function) const; const AccessControl * access_control = nullptr; const Params params; @@ -203,4 +206,115 @@ private: mutable std::shared_ptr enabled_settings TSA_GUARDED_BY(mutex); }; +/// This wrapper was added to be able to pass the current context to the access +/// without the need to change the signature and all calls to the ContextAccess itself. +/// Right now a context is used to store privileges that are checked for a query, +/// and might be useful for something else in the future as well. +class ContextAccessWrapper : public std::enable_shared_from_this +{ +public: + using ContextAccessPtr = std::shared_ptr; + + ContextAccessWrapper(const ContextAccessPtr & access_, const ContextPtr & context_): access(access_), context(context_) {} + ~ContextAccessWrapper() = default; + + static std::shared_ptr fromContext(const ContextPtr & context); + + const ContextAccess::Params & getParams() const { return access->getParams(); } + + const ContextAccessPtr & getAccess() const { return access; } + + /// Returns the current user. Throws if user is nullptr. + ALWAYS_INLINE UserPtr getUser() const { return access->getUser(); } + /// Same as above, but can return nullptr. + ALWAYS_INLINE UserPtr tryGetUser() const { return access->tryGetUser(); } + ALWAYS_INLINE String getUserName() const { return access->getUserName(); } + ALWAYS_INLINE std::optional getUserID() const { return access->getUserID(); } + + /// Returns information about current and enabled roles. + ALWAYS_INLINE std::shared_ptr getRolesInfo() const { return access->getRolesInfo(); } + + /// Returns the row policy filter for a specified table. + /// The function returns nullptr if there is no filter to apply. + ALWAYS_INLINE RowPolicyFilterPtr getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const { return access->getRowPolicyFilter(database, table_name, filter_type); } + + /// Returns the quota to track resource consumption. + ALWAYS_INLINE std::shared_ptr getQuota() const { return access->getQuota(); } + ALWAYS_INLINE std::optional getQuotaUsage() const { return access->getQuotaUsage(); } + + /// Returns the default settings, i.e. the settings which should be applied on user's login. + ALWAYS_INLINE SettingsChanges getDefaultSettings() const { return access->getDefaultSettings(); } + ALWAYS_INLINE std::shared_ptr getDefaultProfileInfo() const { return access->getDefaultProfileInfo(); } + + /// Returns the current access rights. + ALWAYS_INLINE std::shared_ptr getAccessRights() const { return access->getAccessRights(); } + ALWAYS_INLINE std::shared_ptr getAccessRightsWithImplicit() const { return access->getAccessRightsWithImplicit(); } + + /// Checks if a specified access is granted, and throws an exception if not. + /// Empty database means the current database. + ALWAYS_INLINE void checkAccess(const AccessFlags & flags) const { access->checkAccess(context, flags); } + ALWAYS_INLINE void checkAccess(const AccessFlags & flags, std::string_view database) const { access->checkAccess(context, flags, database); } + ALWAYS_INLINE void checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table) const { access->checkAccess(context, flags, database, table); } + ALWAYS_INLINE void checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { access->checkAccess(context, flags, database, table, column); } + ALWAYS_INLINE void checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { access->checkAccess(context, flags, database, table, columns); } + ALWAYS_INLINE void checkAccess(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { access->checkAccess(context, flags, database, table, columns); } + ALWAYS_INLINE void checkAccess(const AccessRightsElement & element) const { access->checkAccess(context, element); } + ALWAYS_INLINE void checkAccess(const AccessRightsElements & elements) const { access->checkAccess(context, elements); } + + ALWAYS_INLINE void checkGrantOption(const AccessFlags & flags) const { access->checkGrantOption(context, flags); } + ALWAYS_INLINE void checkGrantOption(const AccessFlags & flags, std::string_view database) const { access->checkGrantOption(context, flags, database); } + ALWAYS_INLINE void checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table) const { access->checkGrantOption(context, flags, database, table); } + ALWAYS_INLINE void checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { access->checkGrantOption(context, flags, database, table, column); } + ALWAYS_INLINE void checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { access->checkGrantOption(context, flags, database, table, columns); } + ALWAYS_INLINE void checkGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { access->checkGrantOption(context, flags, database, table, columns); } + ALWAYS_INLINE void checkGrantOption(const AccessRightsElement & element) const { access->checkGrantOption(context, element); } + ALWAYS_INLINE void checkGrantOption(const AccessRightsElements & elements) const { access->checkGrantOption(context, elements); } + + /// Checks if a specified access is granted, and returns false if not. + /// Empty database means the current database. + ALWAYS_INLINE bool isGranted(const AccessFlags & flags) const { return access->isGranted(context, flags); } + ALWAYS_INLINE bool isGranted(const AccessFlags & flags, std::string_view database) const { return access->isGranted(context, flags, database); } + ALWAYS_INLINE bool isGranted(const AccessFlags & flags, std::string_view database, std::string_view table) const { return access->isGranted(context, flags, database, table); } + ALWAYS_INLINE bool isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { return access->isGranted(context, flags, database, table, column); } + ALWAYS_INLINE bool isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { return access->isGranted(context, flags, database, table, columns); } + ALWAYS_INLINE bool isGranted(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { return access->isGranted(context, flags, database, table, columns); } + ALWAYS_INLINE bool isGranted(const AccessRightsElement & element) const { return access->isGranted(context, element); } + ALWAYS_INLINE bool isGranted(const AccessRightsElements & elements) const { return access->isGranted(context, elements); } + + ALWAYS_INLINE bool hasGrantOption(const AccessFlags & flags) const { return access->hasGrantOption(context, flags); } + ALWAYS_INLINE bool hasGrantOption(const AccessFlags & flags, std::string_view database) const { return access->hasGrantOption(context, flags, database); } + ALWAYS_INLINE bool hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table) const { return access->hasGrantOption(context, flags, database, table); } + ALWAYS_INLINE bool hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, std::string_view column) const { return access->hasGrantOption(context, flags, database, table, column); } + ALWAYS_INLINE bool hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const std::vector & columns) const { return access->hasGrantOption(context, flags, database, table, columns); } + ALWAYS_INLINE bool hasGrantOption(const AccessFlags & flags, std::string_view database, std::string_view table, const Strings & columns) const { return access->hasGrantOption(context, flags, database, table, columns); } + ALWAYS_INLINE bool hasGrantOption(const AccessRightsElement & element) const { return access->hasGrantOption(context, element); } + ALWAYS_INLINE bool hasGrantOption(const AccessRightsElements & elements) const { return access->hasGrantOption(context, elements); } + + /// Checks if a specified role is granted with admin option, and throws an exception if not. + ALWAYS_INLINE void checkAdminOption(const UUID & role_id) const { access->checkAdminOption(context, role_id); } + ALWAYS_INLINE void checkAdminOption(const UUID & role_id, const String & role_name) const { access->checkAdminOption(context, role_id, role_name); } + ALWAYS_INLINE void checkAdminOption(const UUID & role_id, const std::unordered_map & names_of_roles) const { access->checkAdminOption(context, role_id, names_of_roles); } + ALWAYS_INLINE void checkAdminOption(const std::vector & role_ids) const { access->checkAdminOption(context, role_ids); } + ALWAYS_INLINE void checkAdminOption(const std::vector & role_ids, const Strings & names_of_roles) const { access->checkAdminOption(context, role_ids, names_of_roles); } + ALWAYS_INLINE void checkAdminOption(const std::vector & role_ids, const std::unordered_map & names_of_roles) const { access->checkAdminOption(context, role_ids, names_of_roles); } + + /// Checks if a specified role is granted with admin option, and returns false if not. + ALWAYS_INLINE bool hasAdminOption(const UUID & role_id) const { return access->hasAdminOption(context, role_id); } + ALWAYS_INLINE bool hasAdminOption(const UUID & role_id, const String & role_name) const { return access->hasAdminOption(context, role_id, role_name); } + ALWAYS_INLINE bool hasAdminOption(const UUID & role_id, const std::unordered_map & names_of_roles) const { return access->hasAdminOption(context, role_id, names_of_roles); } + ALWAYS_INLINE bool hasAdminOption(const std::vector & role_ids) const { return access->hasAdminOption(context, role_ids); } + ALWAYS_INLINE bool hasAdminOption(const std::vector & role_ids, const Strings & names_of_roles) const { return access->hasAdminOption(context, role_ids, names_of_roles); } + ALWAYS_INLINE bool hasAdminOption(const std::vector & role_ids, const std::unordered_map & names_of_roles) const { return access->hasAdminOption(context, role_ids, names_of_roles); } + + /// Checks if a grantee is allowed for the current user, throws an exception if not. + ALWAYS_INLINE void checkGranteeIsAllowed(const UUID & grantee_id, const IAccessEntity & grantee) const { access->checkGranteeIsAllowed(grantee_id, grantee); } + /// Checks if grantees are allowed for the current user, throws an exception if not. + ALWAYS_INLINE void checkGranteesAreAllowed(const std::vector & grantee_ids) const { access->checkGranteesAreAllowed(grantee_ids); } + +private: + ContextAccessPtr access; + ContextPtr context; +}; + + } diff --git a/src/Interpreters/Access/InterpreterGrantQuery.cpp b/src/Interpreters/Access/InterpreterGrantQuery.cpp index 6a46ac9c330..ac3b549a576 100644 --- a/src/Interpreters/Access/InterpreterGrantQuery.cpp +++ b/src/Interpreters/Access/InterpreterGrantQuery.cpp @@ -118,7 +118,7 @@ namespace /// Checks if the current user has enough access rights granted with grant option to grant or revoke specified access rights. void checkGrantOption( const AccessControl & access_control, - const ContextAccess & current_user_access, + const ContextAccessWrapper & current_user_access, const std::vector & grantees_from_query, bool & need_check_grantees_are_allowed, const AccessRightsElements & elements_to_grant, @@ -200,7 +200,7 @@ namespace /// Checks if the current user has enough roles granted with admin option to grant or revoke specified roles. void checkAdminOption( const AccessControl & access_control, - const ContextAccess & current_user_access, + const ContextAccessWrapper & current_user_access, const std::vector & grantees_from_query, bool & need_check_grantees_are_allowed, const std::vector & roles_to_grant, @@ -277,7 +277,7 @@ namespace /// This function is less accurate than checkAdminOption() because it cannot use any information about /// granted roles the grantees currently have (due to those grantees are located on multiple nodes, /// we just don't have the full information about them). - void checkAdminOptionForExecutingOnCluster(const ContextAccess & current_user_access, + void checkAdminOptionForExecutingOnCluster(const ContextAccessWrapper & current_user_access, const std::vector roles_to_grant, const RolesOrUsersSet & roles_to_revoke) { @@ -376,7 +376,7 @@ namespace /// Calculates all available rights to grant with current user intersection. void calculateCurrentGrantRightsWithIntersection( AccessRights & rights, - std::shared_ptr current_user_access, + std::shared_ptr current_user_access, const AccessRightsElements & elements_to_grant) { AccessRightsElements current_user_grantable_elements; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 4b0ebc008ea..2807807b294 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -835,6 +835,7 @@ ContextMutablePtr Context::createGlobal(ContextSharedPart * shared_part) auto res = std::shared_ptr(new Context); res->shared = shared_part; res->query_access_info = std::make_shared(); + res->query_privileges_info = std::make_shared(); return res; } @@ -1427,7 +1428,7 @@ void Context::checkAccess(const AccessFlags & flags, const StorageID & table_id, void Context::checkAccess(const AccessRightsElement & element) const { checkAccessImpl(element); } void Context::checkAccess(const AccessRightsElements & elements) const { checkAccessImpl(elements); } -std::shared_ptr Context::getAccess() const +std::shared_ptr Context::getAccess() const { /// A helper function to collect parameters for calculating access rights, called with Context::getLocalSharedLock() acquired. auto get_params = [this]() @@ -1444,14 +1445,14 @@ std::shared_ptr Context::getAccess() const { SharedLockGuard lock(mutex); if (access && !need_recalculate_access) - return access; /// No need to recalculate access rights. + return std::make_shared(access, shared_from_this()); /// No need to recalculate access rights. params.emplace(get_params()); if (access && (access->getParams() == *params)) { need_recalculate_access = false; - return access; /// No need to recalculate access rights. + return std::make_shared(access, shared_from_this()); /// No need to recalculate access rights. } } @@ -1471,7 +1472,7 @@ std::shared_ptr Context::getAccess() const } } - return res; + return std::make_shared(res, shared_from_this()); } RowPolicyFilterPtr Context::getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const @@ -1857,6 +1858,15 @@ void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String } } +void Context::addQueryPrivilegesInfo(const String & privilege, bool granted) const +{ + std::lock_guard lock(query_privileges_info->mutex); + if (granted) + query_privileges_info->used_privileges.emplace(privilege); + else + query_privileges_info->missing_privileges.emplace(privilege); +} + static bool findIdentifier(const ASTFunction * function) { if (!function || !function->arguments) @@ -2538,6 +2548,7 @@ void Context::makeQueryContext() local_read_query_throttler.reset(); local_write_query_throttler.reset(); backups_query_throttler.reset(); + query_privileges_info = std::make_shared(*query_privileges_info); } void Context::makeQueryContextForMerge(const MergeTreeSettings & merge_tree_settings) diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index f3073ccc09c..b3ade94ccdc 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -50,6 +50,7 @@ class ASTSelectQuery; struct ContextSharedPart; class ContextAccess; +class ContextAccessWrapper; struct User; using UserPtr = std::shared_ptr; struct SettingsProfilesInfo; @@ -403,9 +404,31 @@ public: mutable std::mutex mutex; }; + struct QueryPrivilegesInfo + { + QueryPrivilegesInfo() = default; + + QueryPrivilegesInfo(const QueryPrivilegesInfo & rhs) + { + std::lock_guard lock(rhs.mutex); + used_privileges = rhs.used_privileges; + missing_privileges = rhs.missing_privileges; + } + + QueryPrivilegesInfo(QueryPrivilegesInfo && rhs) = delete; + + std::unordered_set used_privileges TSA_GUARDED_BY(mutex); + std::unordered_set missing_privileges TSA_GUARDED_BY(mutex); + + mutable std::mutex mutex; + }; + + using QueryPrivilegesInfoPtr = std::shared_ptr; + protected: /// Needs to be changed while having const context in factories methods mutable QueryFactoriesInfo query_factories_info; + QueryPrivilegesInfoPtr query_privileges_info; /// Query metrics for reading data asynchronously with IAsynchronousReader. mutable std::shared_ptr async_read_counters; @@ -612,7 +635,7 @@ public: void checkAccess(const AccessRightsElement & element) const; void checkAccess(const AccessRightsElements & elements) const; - std::shared_ptr getAccess() const; + std::shared_ptr getAccess() const; RowPolicyFilterPtr getRowPolicyFilter(const String & database, const String & table_name, RowPolicyFilterType filter_type) const; @@ -741,6 +764,10 @@ public: QueryFactoriesInfo getQueryFactoriesInfo() const; void addQueryFactoriesInfo(QueryLogFactories factory_type, const String & created_object) const; + const QueryPrivilegesInfo & getQueryPrivilegesInfo() const { return *getQueryPrivilegesInfoPtr(); } + QueryPrivilegesInfoPtr getQueryPrivilegesInfoPtr() const { return query_privileges_info; } + void addQueryPrivilegesInfo(const String & privilege, bool granted) const; + /// For table functions s3/file/url/hdfs/input we can use structure from /// insertion table depending on select expression. StoragePtr executeTableFunction(const ASTPtr & table_expression, const ASTSelectQuery * select_query_hint = nullptr); diff --git a/src/Interpreters/QueryLog.cpp b/src/Interpreters/QueryLog.cpp index 92f8ddae141..e63a2ae31aa 100644 --- a/src/Interpreters/QueryLog.cpp +++ b/src/Interpreters/QueryLog.cpp @@ -136,6 +136,9 @@ ColumnsDescription QueryLogElement::getColumnsDescription() {"used_row_policies", array_low_cardinality_string, "The list of row policies names that were used during query execution."}, + {"used_privileges", array_low_cardinality_string, "Privileges which were successfully checked during query execution."}, + {"missing_privileges", array_low_cardinality_string, "Privileges that are missing during query execution."}, + {"transaction_id", getTransactionIDDataType(), "The identifier of the transaction in scope of which this query was executed."}, {"query_cache_usage", std::move(query_cache_usage_datatype), "Usage of the query cache during query execution. Values: 'Unknown' = Status unknown, 'None' = The query result was neither written into nor read from the query cache, 'Write' = The query result was written into the query cache, 'Read' = The query result was read from the query cache."}, @@ -267,6 +270,8 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const auto & column_storage_factory_objects = typeid_cast(*columns[i++]); auto & column_table_function_factory_objects = typeid_cast(*columns[i++]); auto & column_row_policies_names = typeid_cast(*columns[i++]); + auto & column_used_privileges = typeid_cast(*columns[i++]); + auto & column_missing_privileges = typeid_cast(*columns[i++]); auto fill_column = [](const auto & data, ColumnArray & column) { @@ -290,6 +295,8 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const fill_column(used_storages, column_storage_factory_objects); fill_column(used_table_functions, column_table_function_factory_objects); fill_column(used_row_policies, column_row_policies_names); + fill_column(used_privileges, column_used_privileges); + fill_column(missing_privileges, column_missing_privileges); } columns[i++]->insert(Tuple{tid.start_csn, tid.local_tid, tid.host_id}); diff --git a/src/Interpreters/QueryLog.h b/src/Interpreters/QueryLog.h index 5072d220160..bbaa7179757 100644 --- a/src/Interpreters/QueryLog.h +++ b/src/Interpreters/QueryLog.h @@ -81,6 +81,8 @@ struct QueryLogElement std::unordered_set used_storages; std::unordered_set used_table_functions; std::set used_row_policies; + std::unordered_set used_privileges; + std::unordered_set missing_privileges; Int32 exception_code{}; // because ErrorCodes are int String exception; diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 396562189e0..d73593b7f1c 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -694,7 +694,7 @@ void Session::recordLoginSucess(ContextPtr login_context) const session_log->addLoginSuccess(auth_id, named_session ? named_session->key.second : "", settings, - access, + access->getAccess(), getClientInfo(), user); } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index 9c5436517ab..9f33cbf1c27 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -44,6 +44,7 @@ #include #include +#include #include #include #include @@ -221,6 +222,17 @@ static void logException(ContextPtr context, QueryLogElement & elem, bool log_er LOG_INFO(getLogger("executeQuery"), message); } +static void +addPrivilegesInfoToQueryLogElement(QueryLogElement & element, const ContextPtr context_ptr) +{ + const auto & privileges_info = context_ptr->getQueryPrivilegesInfo(); + { + std::lock_guard lock(privileges_info.mutex); + element.used_privileges = privileges_info.used_privileges; + element.missing_privileges = privileges_info.missing_privileges; + } +} + static void addStatusInfoToQueryLogElement(QueryLogElement & element, const QueryStatusInfo & info, const ASTPtr query_ast, const ContextPtr context_ptr) { @@ -286,6 +298,7 @@ addStatusInfoToQueryLogElement(QueryLogElement & element, const QueryStatusInfo } element.async_read_counters = context_ptr->getAsyncReadCounters(); + addPrivilegesInfoToQueryLogElement(element, context_ptr); } @@ -601,6 +614,8 @@ void logExceptionBeforeStart( elem.formatted_query = queryToString(ast); } + addPrivilegesInfoToQueryLogElement(elem, context); + // We don't calculate databases, tables and columns when the query isn't able to start elem.exception_code = getCurrentExceptionCode(); diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 49da1eba9ec..8dd8d3b6154 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -298,7 +298,7 @@ private: ClientInfo::Interface client_info_interface; size_t db_table_num = 0; size_t total_tables; - std::shared_ptr access; + std::shared_ptr access; bool need_to_check_access_for_tables; String query_id; std::chrono::milliseconds lock_acquire_timeout; diff --git a/tests/queries/0_stateless/03168_query_log_privileges_not_empty.reference b/tests/queries/0_stateless/03168_query_log_privileges_not_empty.reference new file mode 100644 index 00000000000..e3ac97f9945 --- /dev/null +++ b/tests/queries/0_stateless/03168_query_log_privileges_not_empty.reference @@ -0,0 +1,5 @@ +1 +3168 8613 +[] ['SELECT(a, b) ON default.d_03168_query_log'] +[] [] +['SELECT(a, b) ON default.d_03168_query_log'] [] diff --git a/tests/queries/0_stateless/03168_query_log_privileges_not_empty.sh b/tests/queries/0_stateless/03168_query_log_privileges_not_empty.sh new file mode 100755 index 00000000000..9abc635a874 --- /dev/null +++ b/tests/queries/0_stateless/03168_query_log_privileges_not_empty.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash +# Tags: no-parallel + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +user_name="u_03168_query_log" +table_name="default.d_03168_query_log" +test_query="select a, b from ${table_name}" + +${CLICKHOUSE_CLIENT_BINARY} --query "drop user if exists ${user_name}" +${CLICKHOUSE_CLIENT_BINARY} --query "create user ${user_name}" +${CLICKHOUSE_CLIENT_BINARY} --query "drop table if exists ${table_name}" +${CLICKHOUSE_CLIENT_BINARY} --query "create table ${table_name} (a UInt64, b UInt64) order by a" + +${CLICKHOUSE_CLIENT_BINARY} --query "insert into table ${table_name} values (3168, 8613)" + +error="$(${CLICKHOUSE_CLIENT_BINARY} --user ${user_name} --query "${test_query}" 2>&1 >/dev/null)" +echo "${error}" | grep -Fc "ACCESS_DENIED" + +${CLICKHOUSE_CLIENT_BINARY} --query "grant select(a, b) on ${table_name} to ${user_name}" + +${CLICKHOUSE_CLIENT_BINARY} --user ${user_name} --query "${test_query}" + +${CLICKHOUSE_CLIENT_BINARY} --query "system flush logs" +${CLICKHOUSE_CLIENT_BINARY} --query "select used_privileges, missing_privileges from system.query_log where query = '${test_query}' and type = 'ExceptionBeforeStart' and current_database = currentDatabase() order by event_time desc limit 1" +${CLICKHOUSE_CLIENT_BINARY} --query "select used_privileges, missing_privileges from system.query_log where query = '${test_query}' and type = 'QueryStart' and current_database = currentDatabase() order by event_time desc limit 1" +${CLICKHOUSE_CLIENT_BINARY} --query "select used_privileges, missing_privileges from system.query_log where query = '${test_query}' and type = 'QueryFinish' and current_database = currentDatabase() order by event_time desc limit 1" + +${CLICKHOUSE_CLIENT_BINARY} --query "drop table ${table_name}" +${CLICKHOUSE_CLIENT_BINARY} --query "drop user ${user_name}" From 13fcfd9784b824fda8dacab9dd608b2916846ca6 Mon Sep 17 00:00:00 2001 From: Alex Katsman Date: Wed, 29 May 2024 16:20:10 +0000 Subject: [PATCH 134/139] Add docs for privileges columns to the system.query_log --- docs/en/operations/system-tables/query_log.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/en/operations/system-tables/query_log.md b/docs/en/operations/system-tables/query_log.md index 75b855966a3..47094eec3f0 100644 --- a/docs/en/operations/system-tables/query_log.md +++ b/docs/en/operations/system-tables/query_log.md @@ -113,6 +113,8 @@ Columns: - `used_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `functions`, which were used during query execution. - `used_storages` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `storages`, which were used during query execution. - `used_table_functions` ([Array(String)](../../sql-reference/data-types/array.md)) — Canonical names of `table functions`, which were used during query execution. +- `used_privileges` ([Array(String)](../../sql-reference/data-types/array.md)) - Privileges which were successfully checked during query execution. +- `missing_privileges` ([Array(String)](../../sql-reference/data-types/array.md)) - Privileges that are missing during query execution. - `query_cache_usage` ([Enum8](../../sql-reference/data-types/enum.md)) — Usage of the [query cache](../query-cache.md) during query execution. Values: - `'Unknown'` = Status unknown. - `'None'` = The query result was neither written into nor read from the query cache. @@ -194,6 +196,8 @@ used_formats: [] used_functions: [] used_storages: [] used_table_functions: [] +used_privileges: [] +missing_privileges: [] query_cache_usage: None ``` From 11e844c953380bbfda3b61dcd2dfda60b809f5e2 Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 18 Jun 2024 19:12:58 +0000 Subject: [PATCH 135/139] fix tidy build --- src/Common/Scheduler/ISchedulerNode.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/Common/Scheduler/ISchedulerNode.h b/src/Common/Scheduler/ISchedulerNode.h index 3f699c881fb..81b491b0eda 100644 --- a/src/Common/Scheduler/ISchedulerNode.h +++ b/src/Common/Scheduler/ISchedulerNode.h @@ -406,13 +406,19 @@ private: void processQueue(std::unique_lock && lock) { if (events.empty()) - return processActivation(std::move(lock)); + { + processActivation(std::move(lock)); + return; + } if (activations.empty()) - return processEvent(std::move(lock)); + { + processEvent(std::move(lock)); + return; + } if (activations.front().activation_event_id < events.front().event_id) - return processActivation(std::move(lock)); + processActivation(std::move(lock)); else - return processEvent(std::move(lock)); + processEvent(std::move(lock)); } void processActivation(std::unique_lock && lock) From 5014cd20fd204c1a34c1fc981553525f2d1c816b Mon Sep 17 00:00:00 2001 From: Max K Date: Tue, 18 Jun 2024 21:59:59 +0200 Subject: [PATCH 136/139] CI: Master workflow dependencies fix --- .github/workflows/master.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 88bc50a729d..384bf6825f9 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -104,10 +104,9 @@ jobs: with: stage: Tests_2 data: ${{ needs.RunConfig.outputs.data }} - # stage for jobs that do not prohibit merge Tests_3: # Test_3 should not wait for Test_1/Test_2 and should not be blocked by them on master branch since all jobs need to run there. - needs: [RunConfig, Builds_1, Builds_2] + needs: [RunConfig, Builds_1] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }} uses: ./.github/workflows/reusable_test_stage.yml with: From 100916440bcc43df3c1b5ce2b6694ab29a5c8f3a Mon Sep 17 00:00:00 2001 From: Tim MacDonald Date: Wed, 19 Jun 2024 10:34:55 +1000 Subject: [PATCH 137/139] Typo --- docs/en/sql-reference/data-types/datetime.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md index ac9a72c2641..250e766f2b7 100644 --- a/docs/en/sql-reference/data-types/datetime.md +++ b/docs/en/sql-reference/data-types/datetime.md @@ -137,7 +137,7 @@ If the time transition (due to daylight saving time or for other reasons) was pe Non-monotonic calendar dates. For example, in Happy Valley - Goose Bay, the time was transitioned one hour backwards at 00:01:00 7 Nov 2010 (one minute after midnight). So after 6th Nov has ended, people observed a whole one minute of 7th Nov, then time was changed back to 23:01 6th Nov and after another 59 minutes the 7th Nov started again. ClickHouse does not (yet) support this kind of fun. During these days the results of time processing functions may be slightly incorrect. -Similar issue exists for Casey Antarctic station in year 2010. They changed time three hours back at 5 Mar, 02:00. If you are working in antarctic station, please don't afraid to use ClickHouse. Just make sure you set timezone to UTC or be aware of inaccuracies. +Similar issue exists for Casey Antarctic station in year 2010. They changed time three hours back at 5 Mar, 02:00. If you are working in antarctic station, please don't be afraid to use ClickHouse. Just make sure you set timezone to UTC or be aware of inaccuracies. Time shifts for multiple days. Some pacific islands changed their timezone offset from UTC+14 to UTC-12. That's alright but some inaccuracies may present if you do calculations with their timezone for historical time points at the days of conversion. From 619333b356a7328c5404be6ff483632219970913 Mon Sep 17 00:00:00 2001 From: allegrinisante <155471925+allegrinisante@users.noreply.github.com> Date: Wed, 19 Jun 2024 11:44:38 +0200 Subject: [PATCH 138/139] Mode value = 'unordered' may lead to confusion According to the documentation, the default mode was 'ordered' before version 24.6. Starting from version 24.6, there is no default value for mode. Using mode = 'unordered' can be confusing. --- docs/en/engines/table-engines/integrations/s3queue.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/integrations/s3queue.md b/docs/en/engines/table-engines/integrations/s3queue.md index 0958680dc56..97ca79501a7 100644 --- a/docs/en/engines/table-engines/integrations/s3queue.md +++ b/docs/en/engines/table-engines/integrations/s3queue.md @@ -13,7 +13,7 @@ This engine provides integration with [Amazon S3](https://aws.amazon.com/s3/) ec CREATE TABLE s3_queue_engine_table (name String, value UInt32) ENGINE = S3Queue(path, [NOSIGN, | aws_access_key_id, aws_secret_access_key,] format, [compression]) [SETTINGS] - [mode = 'unordered',] + [mode = '',] [after_processing = 'keep',] [keeper_path = '',] [s3queue_loading_retries = 0,] From 9d79ec3979b12e03b5540968696b8fe2340e77d1 Mon Sep 17 00:00:00 2001 From: Max K Date: Wed, 19 Jun 2024 12:21:26 +0200 Subject: [PATCH 139/139] CI: PR workflow dependencies fix --- .github/workflows/pull_request.yml | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 70b71da8fa5..c9e7ae4450d 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -126,6 +126,8 @@ jobs: with: stage: Builds_2 data: ${{ needs.RunConfig.outputs.data }} + + # stages for jobs that do not prohibit merge Tests_2: needs: [RunConfig, Builds_2] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_2') }} @@ -133,9 +135,8 @@ jobs: with: stage: Tests_2 data: ${{ needs.RunConfig.outputs.data }} - # stage for jobs that do not prohibit merge Tests_3: - needs: [RunConfig, Builds_1, Tests_1, Builds_2, Tests_2] + needs: [RunConfig, Builds_1, Tests_1] if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).stages_data.stages_to_do, 'Tests_3') }} uses: ./.github/workflows/reusable_test_stage.yml with: @@ -156,7 +157,8 @@ jobs: CheckReadyForMerge: if: ${{ !cancelled() && needs.StyleCheck.result == 'success' }} - needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1, Tests_2] + # Test_2 or Test_3 must not have jobs required for Mergeable check + needs: [RunConfig, BuildDockers, StyleCheck, FastTest, Builds_1, Builds_2, Builds_Report, Tests_1] runs-on: [self-hosted, style-checker-aarch64] steps: - name: Check out repository code @@ -195,8 +197,7 @@ jobs: concurrency: group: jepsen if: ${{ !failure() && !cancelled() && contains(fromJson(needs.RunConfig.outputs.data).jobs_data.jobs_to_do, 'ClickHouse Keeper Jepsen') }} - # jepsen needs binary_release build which is in Builds_2 - needs: [RunConfig, Builds_2] + needs: [RunConfig, Builds_1] uses: ./.github/workflows/reusable_test.yml with: test_name: ClickHouse Keeper Jepsen