From 4739b87732449af62b8b7dff26bc8103075d859d Mon Sep 17 00:00:00 2001 From: Sofia Antipushina Date: Thu, 14 May 2020 05:14:50 +0300 Subject: [PATCH 001/318] Add -Distinct combinator --- .../AggregateFunctionDistinct.cpp | 53 +++++++++ .../AggregateFunctionDistinct.h | 108 ++++++++++++++++++ .../registerAggregateFunctions.cpp | 1 + .../registerAggregateFunctions.h | 1 + 4 files changed, 163 insertions(+) create mode 100644 src/AggregateFunctions/AggregateFunctionDistinct.cpp create mode 100644 src/AggregateFunctions/AggregateFunctionDistinct.h diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.cpp b/src/AggregateFunctions/AggregateFunctionDistinct.cpp new file mode 100644 index 00000000000..d477a04568f --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionDistinct.cpp @@ -0,0 +1,53 @@ +#include +#include +#include +#include "registerAggregateFunctions.h" + +namespace DB +{ + + namespace ErrorCodes + { + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; + } + + class AggregateFunctionCombinatorDistinct final : public IAggregateFunctionCombinator + { + public: + String getName() const override { return "Distinct"; } + + DataTypes transformArguments(const DataTypes & arguments) const override + { + if (arguments.empty()) + throw Exception("Incorrect number of arguments for aggregate function with " + getName() + " suffix", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + +// return DataTypes(arguments.begin(), std::prev(arguments.end())); + DataTypes nested_arguments; + for (const auto & type : arguments) + { + nested_arguments.push_back(type); +// if (const DataTypeArray * array = typeid_cast(type.get())) +// nested_arguments.push_back(array->getNestedType()); +// else +// throw Exception("Illegal type " + type->getName() + " of argument" +// " for aggregate function with " + getName() + " suffix. Must be array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + + return nested_arguments; + } + + AggregateFunctionPtr transformAggregateFunction( + const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array &) const override + { + return std::make_shared(nested_function, arguments); + } + }; + + void registerAggregateFunctionCombinatorDistinct(AggregateFunctionCombinatorFactory & factory) + { + factory.registerCombinator(std::make_shared()); + } + +} diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h new file mode 100644 index 00000000000..160e113d23b --- /dev/null +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -0,0 +1,108 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +/** Adaptor for aggregate functions. + * Adding -Distinct suffix to aggregate function +**/ + +class AggregateFunctionDistinct final : public IAggregateFunctionHelper { +private: + mutable std::mutex mutex; + AggregateFunctionPtr nested_func; + mutable HashSet< + UInt128, + UInt128TrivialHash, + HashTableGrower<3>, + HashTableAllocatorWithStackMemory> storage; + +public: + AggregateFunctionDistinct(AggregateFunctionPtr nested, const DataTypes & arguments) + : IAggregateFunctionHelper(arguments, {}) + , nested_func(nested) + { + if (arguments.empty()) + throw Exception("Aggregate function " + getName() + " require at least one argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + } + + String getName() const override { + return nested_func->getName() + "Distinct"; + } + + DataTypePtr getReturnType() const override { + return nested_func->getReturnType(); + } + + void create(AggregateDataPtr place) const override + { + nested_func->create(place); + } + + void destroy(AggregateDataPtr place) const noexcept override { + nested_func->destroy(place); + } + + size_t sizeOfData() const override + { + return nested_func->sizeOfData(); + } + + size_t alignOfData() const override + { + return nested_func->alignOfData(); + } + + bool hasTrivialDestructor() const override { + return nested_func->hasTrivialDestructor(); + } + + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override { + UInt128 key; + SipHash hash; + columns[0]->updateHashWithValue(row_num, hash); + hash.get128(key.low, key.high); + { + std::lock_guard lock(mutex); + if (!storage.insert(key).second) { + return; + } + } + nested_func->add(place, columns, row_num, arena); + } + + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { + nested_func->merge(place, rhs, arena); + } + + void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { + nested_func->serialize(place, buf); + } + + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override { + nested_func->deserialize(place, buf, arena); + } + + void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override { + nested_func->insertResultInto(place, to); + } + + bool allocatesMemoryInArena() const override { + return nested_func->allocatesMemoryInArena(); + } +}; + +} diff --git a/src/AggregateFunctions/registerAggregateFunctions.cpp b/src/AggregateFunctions/registerAggregateFunctions.cpp index a9ab1d4f8ea..a8d0cf6e37c 100644 --- a/src/AggregateFunctions/registerAggregateFunctions.cpp +++ b/src/AggregateFunctions/registerAggregateFunctions.cpp @@ -58,6 +58,7 @@ void registerAggregateFunctions() registerAggregateFunctionCombinatorNull(factory); registerAggregateFunctionCombinatorOrFill(factory); registerAggregateFunctionCombinatorResample(factory); + registerAggregateFunctionCombinatorDistinct(factory); } } diff --git a/src/AggregateFunctions/registerAggregateFunctions.h b/src/AggregateFunctions/registerAggregateFunctions.h index 88cdf4a504d..981273141f9 100644 --- a/src/AggregateFunctions/registerAggregateFunctions.h +++ b/src/AggregateFunctions/registerAggregateFunctions.h @@ -45,6 +45,7 @@ void registerAggregateFunctionCombinatorMerge(AggregateFunctionCombinatorFactory void registerAggregateFunctionCombinatorNull(AggregateFunctionCombinatorFactory &); void registerAggregateFunctionCombinatorOrFill(AggregateFunctionCombinatorFactory &); void registerAggregateFunctionCombinatorResample(AggregateFunctionCombinatorFactory &); +void registerAggregateFunctionCombinatorDistinct(AggregateFunctionCombinatorFactory &); void registerAggregateFunctions(); From 6e2b93e5af00317f9612fbc9535cd6c8e00a5406 Mon Sep 17 00:00:00 2001 From: Sofia Antipushina Date: Thu, 14 May 2020 22:37:53 +0300 Subject: [PATCH 002/318] Stylefix --- .../AggregateFunctionDistinct.h | 40 +++++++++++-------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index 160e113d23b..bab78aa88bf 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -11,24 +11,35 @@ namespace DB { -namespace ErrorCodes -{ +namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } +struct AggregateFunctionDistinctData { + using Key = UInt128; + + HashSet< + Key, + UInt128TrivialHash, + HashTableGrower<3>, + HashTableAllocatorWithStackMemory + > data; + std::mutex mutex; + + bool ALWAYS_INLINE TryToInsert(const Key& key) { + std::lock_guard lock(mutex); + return data.insert(key).second; + } +}; + /** Adaptor for aggregate functions. * Adding -Distinct suffix to aggregate function **/ class AggregateFunctionDistinct final : public IAggregateFunctionHelper { private: - mutable std::mutex mutex; AggregateFunctionPtr nested_func; - mutable HashSet< - UInt128, - UInt128TrivialHash, - HashTableGrower<3>, - HashTableAllocatorWithStackMemory> storage; + mutable AggregateFunctionDistinctData storage; public: AggregateFunctionDistinct(AggregateFunctionPtr nested, const DataTypes & arguments) @@ -71,17 +82,14 @@ public: } void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override { - UInt128 key; SipHash hash; columns[0]->updateHashWithValue(row_num, hash); + + UInt128 key; hash.get128(key.low, key.high); - { - std::lock_guard lock(mutex); - if (!storage.insert(key).second) { - return; - } - } - nested_func->add(place, columns, row_num, arena); + + if (storage.TryToInsert(key)) + nested_func->add(place, columns, row_num, arena); } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { From da81c56b5e5a33e1b36c5949775f22c5a78c350f Mon Sep 17 00:00:00 2001 From: Sofia Antipushina Date: Thu, 14 May 2020 22:46:01 +0300 Subject: [PATCH 003/318] Delete extra lines --- src/AggregateFunctions/AggregateFunctionDistinct.cpp | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.cpp b/src/AggregateFunctions/AggregateFunctionDistinct.cpp index d477a04568f..369b4a5f7df 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.cpp +++ b/src/AggregateFunctions/AggregateFunctionDistinct.cpp @@ -23,16 +23,9 @@ namespace DB throw Exception("Incorrect number of arguments for aggregate function with " + getName() + " suffix", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); -// return DataTypes(arguments.begin(), std::prev(arguments.end())); DataTypes nested_arguments; - for (const auto & type : arguments) - { + for (const auto & type : arguments) { nested_arguments.push_back(type); -// if (const DataTypeArray * array = typeid_cast(type.get())) -// nested_arguments.push_back(array->getNestedType()); -// else -// throw Exception("Illegal type " + type->getName() + " of argument" -// " for aggregate function with " + getName() + " suffix. Must be array.", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); } return nested_arguments; From 13224c22ab7c7078e4a41457a72bd971792d1dc9 Mon Sep 17 00:00:00 2001 From: Sofia Antipushina Date: Fri, 15 May 2020 05:02:57 +0300 Subject: [PATCH 004/318] Stylecheck fix --- .../AggregateFunctionDistinct.cpp | 3 +- .../AggregateFunctionDistinct.h | 36 ++++++++++++------- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.cpp b/src/AggregateFunctions/AggregateFunctionDistinct.cpp index 369b4a5f7df..b01bd2226c7 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.cpp +++ b/src/AggregateFunctions/AggregateFunctionDistinct.cpp @@ -24,7 +24,8 @@ namespace DB ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); DataTypes nested_arguments; - for (const auto & type : arguments) { + for (const auto & type : arguments) + { nested_arguments.push_back(type); } diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index bab78aa88bf..5580cc3b4df 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -11,11 +11,13 @@ namespace DB { -namespace ErrorCodes { +namespace ErrorCodes +{ extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } -struct AggregateFunctionDistinctData { +struct AggregateFunctionDistinctData +{ using Key = UInt128; HashSet< @@ -36,7 +38,8 @@ struct AggregateFunctionDistinctData { * Adding -Distinct suffix to aggregate function **/ -class AggregateFunctionDistinct final : public IAggregateFunctionHelper { +class AggregateFunctionDistinct final : public IAggregateFunctionHelper +{ private: AggregateFunctionPtr nested_func; mutable AggregateFunctionDistinctData storage; @@ -50,11 +53,13 @@ public: throw Exception("Aggregate function " + getName() + " require at least one argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); } - String getName() const override { + String getName() const override + { return nested_func->getName() + "Distinct"; } - DataTypePtr getReturnType() const override { + DataTypePtr getReturnType() const override + { return nested_func->getReturnType(); } @@ -77,11 +82,13 @@ public: return nested_func->alignOfData(); } - bool hasTrivialDestructor() const override { + bool hasTrivialDestructor() const override + { return nested_func->hasTrivialDestructor(); } - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override { + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override + { SipHash hash; columns[0]->updateHashWithValue(row_num, hash); @@ -92,23 +99,28 @@ public: nested_func->add(place, columns, row_num, arena); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override + { nested_func->merge(place, rhs, arena); } - void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { + void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + { nested_func->serialize(place, buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override { + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override + { nested_func->deserialize(place, buf, arena); } - void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override { + void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override + { nested_func->insertResultInto(place, to); } - bool allocatesMemoryInArena() const override { + bool allocatesMemoryInArena() const override + { return nested_func->allocatesMemoryInArena(); } }; From 7c6322c5b03232a0dfd603a54b2e0037bf817122 Mon Sep 17 00:00:00 2001 From: Sofia Antipushina Date: Sat, 16 May 2020 02:06:25 +0300 Subject: [PATCH 005/318] Add support for many columns --- src/AggregateFunctions/AggregateFunctionDistinct.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index 5580cc3b4df..b87183f15d6 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -42,12 +42,13 @@ class AggregateFunctionDistinct final : public IAggregateFunctionHelper(arguments, {}) - , nested_func(nested) + , nested_func(nested), num_arguments(arguments.size()) { if (arguments.empty()) throw Exception("Aggregate function " + getName() + " require at least one argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); @@ -90,7 +91,8 @@ public: void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override { SipHash hash; - columns[0]->updateHashWithValue(row_num, hash); + for (size_t i = 0; i < num_arguments; ++i) + columns[i]->updateHashWithValue(row_num, hash); UInt128 key; hash.get128(key.low, key.high); From fa38cf780c0071e1d05e4e79cf0face02628516e Mon Sep 17 00:00:00 2001 From: Sofia Antipushina Date: Sat, 16 May 2020 03:02:55 +0300 Subject: [PATCH 006/318] Add tests for -Distinct combinator --- tests/queries/0_stateless/01259_combinator_distinct.reference | 4 ++++ tests/queries/0_stateless/01259_combinator_distinct.sql | 4 ++++ 2 files changed, 8 insertions(+) create mode 100644 tests/queries/0_stateless/01259_combinator_distinct.reference create mode 100644 tests/queries/0_stateless/01259_combinator_distinct.sql diff --git a/tests/queries/0_stateless/01259_combinator_distinct.reference b/tests/queries/0_stateless/01259_combinator_distinct.reference new file mode 100644 index 00000000000..34d13676466 --- /dev/null +++ b/tests/queries/0_stateless/01259_combinator_distinct.reference @@ -0,0 +1,4 @@ +499500 +78 +[0,1,2,3,4,5,6,7,8,9,10,11,12] +5.669227916063075e-17 diff --git a/tests/queries/0_stateless/01259_combinator_distinct.sql b/tests/queries/0_stateless/01259_combinator_distinct.sql new file mode 100644 index 00000000000..e3c4bb114a3 --- /dev/null +++ b/tests/queries/0_stateless/01259_combinator_distinct.sql @@ -0,0 +1,4 @@ +SELECT sum(DISTINCT x) FROM (SELECT number AS x FROM system.numbers LIMIT 1000); +SELECT sum(DISTINCT x) FROM (SELECT number % 13 AS x FROM system.numbers LIMIT 1000); +SELECT groupArray(DISTINCT x) FROM (SELECT number % 13 AS x FROM system.numbers LIMIT 1000); +SELECT corrStableDistinct(DISTINCT x, y) FROM (SELECT number % 11 AS x, number % 13 AS y FROM system.numbers LIMIT 1000); \ No newline at end of file From aeb195950c65f7e4d9ab8ec374c599e8e1466442 Mon Sep 17 00:00:00 2001 From: Sofia Antipushina Date: Sat, 16 May 2020 03:15:44 +0300 Subject: [PATCH 007/318] Checkstyle fix --- .../AggregateFunctionDistinct.cpp | 61 +++++++++---------- .../AggregateFunctionDistinct.h | 3 +- 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.cpp b/src/AggregateFunctions/AggregateFunctionDistinct.cpp index b01bd2226c7..820c2f0f72c 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.cpp +++ b/src/AggregateFunctions/AggregateFunctionDistinct.cpp @@ -6,42 +6,41 @@ namespace DB { - namespace ErrorCodes - { - extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; - } +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} - class AggregateFunctionCombinatorDistinct final : public IAggregateFunctionCombinator - { - public: - String getName() const override { return "Distinct"; } +class AggregateFunctionCombinatorDistinct final : public IAggregateFunctionCombinator +{ +public: + String getName() const override { return "Distinct"; } - DataTypes transformArguments(const DataTypes & arguments) const override + DataTypes transformArguments(const DataTypes & arguments) const override + { + if (arguments.empty()) + throw Exception("Incorrect number of arguments for aggregate function with " + getName() + " suffix", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + DataTypes nested_arguments; + for (const auto & type : arguments) { - if (arguments.empty()) - throw Exception("Incorrect number of arguments for aggregate function with " + getName() + " suffix", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - - DataTypes nested_arguments; - for (const auto & type : arguments) - { - nested_arguments.push_back(type); - } - - return nested_arguments; + nested_arguments.push_back(type); } - AggregateFunctionPtr transformAggregateFunction( - const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array &) const override - { - return std::make_shared(nested_function, arguments); - } - }; - - void registerAggregateFunctionCombinatorDistinct(AggregateFunctionCombinatorFactory & factory) - { - factory.registerCombinator(std::make_shared()); + return nested_arguments; } + AggregateFunctionPtr transformAggregateFunction( + const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array &) const override + { + return std::make_shared(nested_function, arguments); + } +}; + +void registerAggregateFunctionCombinatorDistinct(AggregateFunctionCombinatorFactory & factory) +{ + factory.registerCombinator(std::make_shared()); +} + } diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index b87183f15d6..cc4c52ea5ff 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -28,7 +28,8 @@ struct AggregateFunctionDistinctData > data; std::mutex mutex; - bool ALWAYS_INLINE TryToInsert(const Key& key) { + bool ALWAYS_INLINE TryToInsert(const Key& key) + { std::lock_guard lock(mutex); return data.insert(key).second; } From f4369381c97fca9394bd9f0673a5bb91b0983d29 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Sat, 16 May 2020 18:00:33 +0300 Subject: [PATCH 008/318] Fix build --- src/AggregateFunctions/AggregateFunctionDistinct.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index cc4c52ea5ff..e7ccbc62c57 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -117,7 +117,7 @@ public: nested_func->deserialize(place, buf, arena); } - void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to) const override { nested_func->insertResultInto(place, to); } From 7135b8491c5b66788a6dd8c3e0536e175a78a616 Mon Sep 17 00:00:00 2001 From: Sofia Antipushina Date: Mon, 25 May 2020 15:12:50 +0300 Subject: [PATCH 009/318] Base memory data storage --- .../AggregateFunctionDistinct.h | 52 +++++++++++++------ 1 file changed, 35 insertions(+), 17 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index e7ccbc62c57..7e86364ab0d 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -25,13 +25,15 @@ struct AggregateFunctionDistinctData UInt128TrivialHash, HashTableGrower<3>, HashTableAllocatorWithStackMemory - > data; + > set; std::mutex mutex; - bool ALWAYS_INLINE TryToInsert(const Key& key) + bool ALWAYS_INLINE tryToInsert(const Key& key) { std::lock_guard lock(mutex); - return data.insert(key).second; + bool a = set.insert(key).second; + if (a) std::cerr << key.high << ' ' << key.low << ' ' << a << std::endl; + return a; } }; @@ -39,18 +41,30 @@ struct AggregateFunctionDistinctData * Adding -Distinct suffix to aggregate function **/ -class AggregateFunctionDistinct final : public IAggregateFunctionHelper +class AggregateFunctionDistinct final : public IAggregateFunctionDataHelper { private: AggregateFunctionPtr nested_func; size_t num_arguments; - mutable AggregateFunctionDistinctData storage; + size_t prefix_size; + + AggregateDataPtr getNestedPlace(AggregateDataPtr place) const noexcept + { + return place + prefix_size; + } + + ConstAggregateDataPtr getNestedPlace(ConstAggregateDataPtr place) const noexcept + { + return place + prefix_size; + } public: AggregateFunctionDistinct(AggregateFunctionPtr nested, const DataTypes & arguments) - : IAggregateFunctionHelper(arguments, {}) + : IAggregateFunctionDataHelper(arguments, {}) , nested_func(nested), num_arguments(arguments.size()) { + prefix_size = 640'000'000; + if (arguments.empty()) throw Exception("Aggregate function " + getName() + " require at least one argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); } @@ -67,16 +81,19 @@ public: void create(AggregateDataPtr place) const override { - nested_func->create(place); + new (place) AggregateFunctionDistinctData; + nested_func->create(getNestedPlace(place)); } - void destroy(AggregateDataPtr place) const noexcept override { - nested_func->destroy(place); + void destroy(AggregateDataPtr place) const noexcept override + { + data(place).~AggregateFunctionDistinctData(); + nested_func->destroy(getNestedPlace(place)); } size_t sizeOfData() const override { - return nested_func->sizeOfData(); + return prefix_size + nested_func->sizeOfData(); } size_t alignOfData() const override @@ -92,34 +109,35 @@ public: void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override { SipHash hash; - for (size_t i = 0; i < num_arguments; ++i) + for (size_t i = 0; i < num_arguments; ++i) { columns[i]->updateHashWithValue(row_num, hash); + } UInt128 key; hash.get128(key.low, key.high); - if (storage.TryToInsert(key)) - nested_func->add(place, columns, row_num, arena); + if (this->data(place).tryToInsert(key)) + nested_func->add(getNestedPlace(place), columns, row_num, arena); } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { - nested_func->merge(place, rhs, arena); + nested_func->merge(getNestedPlace(place), rhs, arena); } void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { - nested_func->serialize(place, buf); + nested_func->serialize(getNestedPlace(place), buf); } void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override { - nested_func->deserialize(place, buf, arena); + nested_func->deserialize(getNestedPlace(place), buf, arena); } void insertResultInto(AggregateDataPtr place, IColumn & to) const override { - nested_func->insertResultInto(place, to); + nested_func->insertResultInto(getNestedPlace(place), to); } bool allocatesMemoryInArena() const override From f206d74b63f00b2037a82257291bd721decce8ff Mon Sep 17 00:00:00 2001 From: Sofia Antipushina Date: Mon, 25 May 2020 17:02:55 +0300 Subject: [PATCH 010/318] fix align of data && add test --- src/AggregateFunctions/AggregateFunctionDistinct.h | 7 ++----- .../0_stateless/01259_combinator_distinct.reference | 1 + tests/queries/0_stateless/01259_combinator_distinct.sql | 1 + 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index 7e86364ab0d..57e17ffb13c 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -30,10 +30,7 @@ struct AggregateFunctionDistinctData bool ALWAYS_INLINE tryToInsert(const Key& key) { - std::lock_guard lock(mutex); - bool a = set.insert(key).second; - if (a) std::cerr << key.high << ' ' << key.low << ' ' << a << std::endl; - return a; + return set.insert(key).second; } }; @@ -63,7 +60,7 @@ public: : IAggregateFunctionDataHelper(arguments, {}) , nested_func(nested), num_arguments(arguments.size()) { - prefix_size = 640'000'000; + prefix_size = sizeof(AggregateFunctionDistinctData); if (arguments.empty()) throw Exception("Aggregate function " + getName() + " require at least one argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); diff --git a/tests/queries/0_stateless/01259_combinator_distinct.reference b/tests/queries/0_stateless/01259_combinator_distinct.reference index 34d13676466..739d225ad67 100644 --- a/tests/queries/0_stateless/01259_combinator_distinct.reference +++ b/tests/queries/0_stateless/01259_combinator_distinct.reference @@ -1,4 +1,5 @@ 499500 78 [0,1,2,3,4,5,6,7,8,9,10,11,12] +[0,1,2,3,4,5,6,7,8,9,10,11,12] 5.669227916063075e-17 diff --git a/tests/queries/0_stateless/01259_combinator_distinct.sql b/tests/queries/0_stateless/01259_combinator_distinct.sql index e3c4bb114a3..3f07dc443dd 100644 --- a/tests/queries/0_stateless/01259_combinator_distinct.sql +++ b/tests/queries/0_stateless/01259_combinator_distinct.sql @@ -1,4 +1,5 @@ SELECT sum(DISTINCT x) FROM (SELECT number AS x FROM system.numbers LIMIT 1000); SELECT sum(DISTINCT x) FROM (SELECT number % 13 AS x FROM system.numbers LIMIT 1000); SELECT groupArray(DISTINCT x) FROM (SELECT number % 13 AS x FROM system.numbers LIMIT 1000); +SELECT groupArray(DISTINCT x) FROM (SELECT number % 13 AS x FROM system.numbers_mt LIMIT 1000); SELECT corrStableDistinct(DISTINCT x, y) FROM (SELECT number % 11 AS x, number % 13 AS y FROM system.numbers LIMIT 1000); \ No newline at end of file From 709b4f42c82da439b0b3b2216fd6f56959411dd3 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 27 May 2020 22:11:04 +0300 Subject: [PATCH 011/318] Prototype sending crash reports on segfaults --- .gitmodules | 3 + CMakeLists.txt | 1 + base/daemon/BaseDaemon.cpp | 8 +- base/daemon/CMakeLists.txt | 8 +- base/daemon/SentryWriter.cpp | 107 +++++++++++++ base/daemon/SentryWriter.h | 21 +++ cmake/find/sentry.cmake | 19 +++ contrib/CMakeLists.txt | 14 +- contrib/curl-cmake/CMakeLists.txt | 2 + contrib/sentry-native | 1 + programs/server/Server.cpp | 2 + src/Common/StackTrace.cpp | 146 ++++++++++++------ src/Common/StackTrace.h | 22 ++- src/Common/TraceCollector.cpp | 2 +- .../System/StorageSystemStackTrace.cpp | 2 +- utils/check-style/check-include | 1 + 16 files changed, 298 insertions(+), 61 deletions(-) create mode 100644 base/daemon/SentryWriter.cpp create mode 100644 base/daemon/SentryWriter.h create mode 100644 cmake/find/sentry.cmake create mode 160000 contrib/sentry-native diff --git a/.gitmodules b/.gitmodules index 7f5d1307a6e..daa5d12a62c 100644 --- a/.gitmodules +++ b/.gitmodules @@ -160,3 +160,6 @@ [submodule "contrib/fmtlib"] path = contrib/fmtlib url = https://github.com/fmtlib/fmt.git +[submodule "contrib/sentry-native"] + path = contrib/sentry-native + url = git@github.com:getsentry/sentry-native.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 53dfd1df1cb..79db4c624ca 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -365,6 +365,7 @@ include (cmake/find/fastops.cmake) include (cmake/find/orc.cmake) include (cmake/find/avro.cmake) include (cmake/find/msgpack.cmake) +include (cmake/find/sentry.cmake) find_contrib_lib(cityhash) find_contrib_lib(farmhash) diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index 10c7173d5b1..f269c3923e0 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -222,6 +223,7 @@ public: DB::readPODBinary(stack_trace, in); DB::readBinary(thread_num, in); DB::readBinary(query_id, in); + stack_trace.resetFrames(); /// This allows to receive more signals if failure happens inside onFault function. /// Example: segfault while symbolizing stack trace. @@ -247,6 +249,7 @@ private: UInt32 thread_num, const std::string & query_id) const { + SentryWriter::onFault(sig, info, context, stack_trace); LOG_FATAL(log, "########################################"); { @@ -272,7 +275,7 @@ private: std::stringstream bare_stacktrace; bare_stacktrace << "Stack trace:"; for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i) - bare_stacktrace << ' ' << stack_trace.getFrames()[i]; + bare_stacktrace << ' ' << stack_trace.getFramePointers()[i]; LOG_FATAL(log, bare_stacktrace.str()); } @@ -511,6 +514,8 @@ void debugIncreaseOOMScore() {} void BaseDaemon::initialize(Application & self) { closeFDs(); + SentryWriter::initialize(); + task_manager = std::make_unique(); ServerApplication::initialize(self); @@ -518,7 +523,6 @@ void BaseDaemon::initialize(Application & self) argsToConfig(argv(), config(), PRIO_APPLICATION - 100); bool is_daemon = config().getBool("application.runAsDaemon", false); - if (is_daemon) { /** When creating pid file and looking for config, will search for paths relative to the working path of the program when started. diff --git a/base/daemon/CMakeLists.txt b/base/daemon/CMakeLists.txt index 5d9a37dc75e..46fa4a0fe34 100644 --- a/base/daemon/CMakeLists.txt +++ b/base/daemon/CMakeLists.txt @@ -1,7 +1,13 @@ add_library (daemon BaseDaemon.cpp GraphiteWriter.cpp -) + SentryWriter.cpp) target_include_directories (daemon PUBLIC ..) target_link_libraries (daemon PUBLIC loggers PRIVATE clickhouse_common_io clickhouse_common_config common ${EXECINFO_LIBRARIES}) + +if (USE_SENTRY) + target_link_libraries (daemon PRIVATE curl) + target_link_libraries (daemon PRIVATE ${SENTRY_LIBRARY}) +# target_include_directories (daemon SYSTEM BEFORE PRIVATE ${SENTRY_INCLUDE_DIR}) +endif () \ No newline at end of file diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp new file mode 100644 index 00000000000..8859adc1c2e --- /dev/null +++ b/base/daemon/SentryWriter.cpp @@ -0,0 +1,107 @@ +#include + +#include +#if !defined(ARCADIA_BUILD) +# include "Common/config_version.h" +#endif + +#include + +namespace { + void setExtras() { + sentry_set_extra("version_githash", sentry_value_new_string(VERSION_GITHASH)); + sentry_set_extra("version_describe", sentry_value_new_string(VERSION_DESCRIBE)); + sentry_set_extra("version_integer", sentry_value_new_int32(VERSION_INTEGER)); + sentry_set_extra("version_revision", sentry_value_new_int32(VERSION_REVISION)); + sentry_set_extra("version_major", sentry_value_new_int32(VERSION_MAJOR)); + sentry_set_extra("version_minor", sentry_value_new_int32(VERSION_MINOR)); + sentry_set_extra("version_patch", sentry_value_new_int32(VERSION_PATCH)); + } +} + +void SentryWriter::initialize() { + sentry_options_t * options = sentry_options_new(); + sentry_options_set_release(options, VERSION_STRING); + sentry_options_set_debug(options, 1); + sentry_init(options); + sentry_options_set_dsn(options, "https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277"); + if (strstr(VERSION_DESCRIBE, "-stable") || strstr(VERSION_DESCRIBE, "-lts")) { + sentry_options_set_environment(options, "prod"); + } else { + sentry_options_set_environment(options, "test"); + } +} + +void SentryWriter::shutdown() { + sentry_shutdown(); +} + +void SentryWriter::onFault( + int sig, + const siginfo_t & info, + const ucontext_t & context, + const StackTrace & stack_trace + ) +{ + const std::string & error_message = signalToErrorMessage(sig, info, context); + sentry_value_t event = sentry_value_new_message_event(SENTRY_LEVEL_FATAL, "fault", error_message.c_str()); + sentry_set_tag("signal", strsignal(sig)); + sentry_set_tag("server_name", getFQDNOrHostName().c_str()); + sentry_set_extra("signal_number", sentry_value_new_int32(sig)); + setExtras(); + + sentry_value_t frames = sentry_value_new_list(); + + size_t stack_size = stack_trace.getSize(); + if (stack_size > 0) + { + size_t offset = stack_trace.getOffset(); + if (stack_size == 1) + { + offset = 1; + } + char instruction_addr[100]; + for (size_t i = stack_size - 1; i >= offset; --i) + { + const StackTrace::Frame & current_frame = stack_trace.getFrames().value()[i]; + sentry_value_t frame = sentry_value_new_object(); + unsigned long long frame_ptr = reinterpret_cast(current_frame.virtual_addr); + snprintf(instruction_addr, sizeof(instruction_addr), "0x%llx", frame_ptr); + sentry_value_set_by_key(frame, "instruction_addr", sentry_value_new_string(instruction_addr)); + + if (current_frame.symbol.has_value()) + { + sentry_value_set_by_key(frame, "function", sentry_value_new_string(current_frame.symbol.value().c_str())); + } + + if (current_frame.file.has_value()) + { + sentry_value_set_by_key(frame, "filename", sentry_value_new_string(current_frame.file.value().c_str())); + } + + if (current_frame.line.has_value()) + { + sentry_value_set_by_key(frame, "lineno", sentry_value_new_int32(current_frame.line.value())); + } + + sentry_value_append(frames, frame); + } + } + + sentry_value_t stacktrace = sentry_value_new_object(); + sentry_value_set_by_key(stacktrace, "frames", frames); + + sentry_value_t thread = sentry_value_new_object(); + sentry_value_set_by_key(thread, "stacktrace", stacktrace); + + sentry_value_t values = sentry_value_new_list(); + sentry_value_append(values, thread); + + sentry_value_t threads = sentry_value_new_object(); + sentry_value_set_by_key(threads, "values", values); + + sentry_value_set_by_key(event, "threads", threads); + + sentry_capture_event(event); + shutdown(); +} diff --git a/base/daemon/SentryWriter.h b/base/daemon/SentryWriter.h new file mode 100644 index 00000000000..6c85ef04dd3 --- /dev/null +++ b/base/daemon/SentryWriter.h @@ -0,0 +1,21 @@ +#pragma once + +#include +#include + +#include + +class SentryWriter +{ +public: + SentryWriter() = delete; + + static void initialize(); + static void shutdown(); + static void onFault( + int sig, + const siginfo_t & info, + const ucontext_t & context, + const StackTrace & stack_trace + ); +}; diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake new file mode 100644 index 00000000000..f94b53ffb00 --- /dev/null +++ b/cmake/find/sentry.cmake @@ -0,0 +1,19 @@ +set (SENTRY_LIBRARY "sentry") +set (SENTRY_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/sentry-native/include") +if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") + message (WARNING "submodule contrib/sentry-native is missing. to fix try run: \n git submodule update --init --recursive") + return() +endif () + +option (USE_SENTRY "Use Sentry" ON) + +set (BUILD_SHARED_LIBS OFF) +set (SENTRY_PIC OFF) +set (SENTRY_BACKEND "none") +set (SENTRY_TRANSPORT "curl") +set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) +set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) + +message (STATUS "Using sentry=${USE_SENTRY}: ${SENTRY_LIBRARY}") + +include_directories("${SENTRY_INCLUDE_DIR}") \ No newline at end of file diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 99f7be2cbb7..1d1d7756de2 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -274,7 +274,7 @@ if (USE_INTERNAL_HDFS3_LIBRARY) add_subdirectory(libhdfs3-cmake) endif () -if (USE_INTERNAL_AWS_S3_LIBRARY) +if (USE_INTERNAL_AWS_S3_LIBRARY OR USE_SENTRY) set (save_CMAKE_C_FLAGS ${CMAKE_C_FLAGS}) set (save_CMAKE_REQUIRED_LIBRARIES ${CMAKE_REQUIRED_LIBRARIES}) set (save_CMAKE_REQUIRED_INCLUDES ${CMAKE_REQUIRED_INCLUDES}) @@ -286,12 +286,18 @@ if (USE_INTERNAL_AWS_S3_LIBRARY) set (CMAKE_CMAKE_REQUIRED_INCLUDES ${save_CMAKE_REQUIRED_INCLUDES}) set (CMAKE_REQUIRED_FLAGS ${save_CMAKE_REQUIRED_FLAGS}) set (CMAKE_CMAKE_MODULE_PATH ${save_CMAKE_MODULE_PATH}) + + # The library is large - avoid bloat. + target_compile_options (curl PRIVATE -g0) +endif () + +if (USE_INTERNAL_AWS_S3_LIBRARY) add_subdirectory(aws-s3-cmake) # The library is large - avoid bloat. target_compile_options (aws_s3 PRIVATE -g0) target_compile_options (aws_s3_checksums PRIVATE -g0) - target_compile_options (curl PRIVATE -g0) + endif () if (USE_BASE64) @@ -318,4 +324,8 @@ if (USE_FASTOPS) add_subdirectory (fastops-cmake) endif() +if (USE_SENTRY) + add_subdirectory (sentry-native) +endif() + add_subdirectory (fmtlib-cmake) diff --git a/contrib/curl-cmake/CMakeLists.txt b/contrib/curl-cmake/CMakeLists.txt index d9805612ffe..d0f6a7773b0 100644 --- a/contrib/curl-cmake/CMakeLists.txt +++ b/contrib/curl-cmake/CMakeLists.txt @@ -1,4 +1,6 @@ set (CURL_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl) +set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) +set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) set (SRCS ${CURL_DIR}/lib/file.c diff --git a/contrib/sentry-native b/contrib/sentry-native new file mode 160000 index 00000000000..3bfce2d17c1 --- /dev/null +++ b/contrib/sentry-native @@ -0,0 +1 @@ +Subproject commit 3bfce2d17c1b80fbbaae83bb5ef41c1b290d34fb diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index c1a520030f4..8383fa2d9bf 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -60,6 +60,8 @@ #include #include #include "MySQLHandlerFactory.h" +#include + #if !defined(ARCADIA_BUILD) # include "config_core.h" diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 6d0b6a0f7d2..5cc8c43a27a 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -190,6 +190,63 @@ static void * getCallerAddress(const ucontext_t & context) #endif } +static void symbolize(const void * const * frame_pointers, size_t offset, size_t size, StackTrace::Frames & frames) +{ +#if defined(__ELF__) && !defined(__FreeBSD__) + + const DB::SymbolIndex & symbol_index = DB::SymbolIndex::instance(); + std::unordered_map dwarfs; + + for (size_t i = 0; i < offset; ++i) { + frames.value()[i].virtual_addr = frame_pointers[i]; + } + + for (size_t i = offset; i < size; ++i) + { + StackTrace::Frame & current_frame = frames.value()[i]; + current_frame.virtual_addr = frame_pointers[i]; + const auto * object = symbol_index.findObject(current_frame.virtual_addr); + uintptr_t virtual_offset = object ? uintptr_t(object->address_begin) : 0; + current_frame.physical_addr = reinterpret_cast(uintptr_t(current_frame.virtual_addr) - virtual_offset); + + if (object) + { + current_frame.object = object->name; + if (std::filesystem::exists(current_frame.object.value())) + { + auto dwarf_it = dwarfs.try_emplace(object->name, *object->elf).first; + + DB::Dwarf::LocationInfo location; + if (dwarf_it->second.findAddress(uintptr_t(current_frame.physical_addr), location, DB::Dwarf::LocationInfoMode::FAST)) { + current_frame.file = location.file.toString(); + current_frame.line = location.line; + } + } + } + else + { + current_frame.object = "?"; + } + + const auto * symbol = symbol_index.findSymbol(current_frame.virtual_addr); + if (symbol) + { + int status = 0; + current_frame.symbol = demangle(symbol->name, status); + } + else + { + current_frame.symbol = "?"; + } + } +# else + for (size_t i = 0; i < size; ++i) { + frames.value()[i].virtual_addr = frame_pointers[i]; + } + UNUSED(offset); +#endif +} + StackTrace::StackTrace() { tryCapture(); @@ -203,7 +260,7 @@ StackTrace::StackTrace(const ucontext_t & signal_context) if (size == 0 && caller_address) { - frames[0] = caller_address; + frame_pointers[0] = caller_address; size = 1; } else @@ -212,7 +269,7 @@ StackTrace::StackTrace(const ucontext_t & signal_context) for (size_t i = 0; i < size; ++i) { - if (frames[i] == caller_address) + if (frame_pointers[i] == caller_address) { offset = i; break; @@ -229,8 +286,8 @@ void StackTrace::tryCapture() { size = 0; #if USE_UNWIND - size = unw_backtrace(frames.data(), capacity); - __msan_unpoison(frames.data(), size * sizeof(frames[0])); + size = unw_backtrace(frame_pointers.data(), capacity); + __msan_unpoison(frame_pointers.data(), size * sizeof(frame_pointers[0])); #endif } @@ -244,102 +301,89 @@ size_t StackTrace::getOffset() const return offset; } -const StackTrace::Frames & StackTrace::getFrames() const +const StackTrace::FramePointers & StackTrace::getFramePointers() const { - return frames; + return frame_pointers; } +const StackTrace::Frames & StackTrace::getFrames() const +{ + if (!frames.has_value()) { + frames = {{}}; + symbolize(frame_pointers.data(), offset, size, frames); + } + return frames; +} static void toStringEveryLineImpl(const StackTrace::Frames & frames, size_t offset, size_t size, std::function callback) { if (size == 0) return callback(""); -#if defined(__ELF__) && !defined(__FreeBSD__) - const DB::SymbolIndex & symbol_index = DB::SymbolIndex::instance(); - std::unordered_map dwarfs; - std::stringstream out; for (size_t i = offset; i < size; ++i) { - const void * virtual_addr = frames[i]; - const auto * object = symbol_index.findObject(virtual_addr); - uintptr_t virtual_offset = object ? uintptr_t(object->address_begin) : 0; - const void * physical_addr = reinterpret_cast(uintptr_t(virtual_addr) - virtual_offset); - + const StackTrace::Frame& current_frame = frames.value()[i]; out << i << ". "; - if (object) + if (current_frame.file.has_value() && current_frame.line.has_value()) { - if (std::filesystem::exists(object->name)) - { - auto dwarf_it = dwarfs.try_emplace(object->name, *object->elf).first; - - DB::Dwarf::LocationInfo location; - if (dwarf_it->second.findAddress(uintptr_t(physical_addr), location, DB::Dwarf::LocationInfoMode::FAST)) - out << location.file.toString() << ":" << location.line << ": "; - } + out << current_frame.file.value() << ":" << current_frame.line.value() << ": "; } - const auto * symbol = symbol_index.findSymbol(virtual_addr); - if (symbol) + if (current_frame.symbol.has_value()) { - int status = 0; - out << demangle(symbol->name, status); + out << current_frame.symbol.value(); } - else - out << "?"; - out << " @ " << physical_addr; - out << " in " << (object ? object->name : "?"); + out << " @ " << current_frame.physical_addr; + if (current_frame.object.has_value()) { + out << " in " << current_frame.object.value(); + } callback(out.str()); out.str({}); } -#else - std::stringstream out; - - for (size_t i = offset; i < size; ++i) - { - const void * addr = frames[i]; - out << i << ". " << addr; - - callback(out.str()); - out.str({}); - } -#endif } -static std::string toStringImpl(const StackTrace::Frames & frames, size_t offset, size_t size) +static std::string toStringImpl(const void * const * frame_pointers, size_t offset, size_t size) { std::stringstream out; + StackTrace::Frames frames{}; + frames = {{}}; + symbolize(frame_pointers, offset, size, frames); toStringEveryLineImpl(frames, offset, size, [&](const std::string & str) { out << str << '\n'; }); return out.str(); } void StackTrace::toStringEveryLine(std::function callback) const { - toStringEveryLineImpl(frames, offset, size, std::move(callback)); + toStringEveryLineImpl(getFrames(), offset, size, std::move(callback)); } +void StackTrace::resetFrames() { + frames.reset(); +} + + std::string StackTrace::toString() const { /// Calculation of stack trace text is extremely slow. /// We use simple cache because otherwise the server could be overloaded by trash queries. static SimpleCache func_cached; - return func_cached(frames, offset, size); + return func_cached(frame_pointers.data(), offset, size); } -std::string StackTrace::toString(void ** frames_, size_t offset, size_t size) +std::string StackTrace::toString(void ** frame_pointers, size_t offset, size_t size) { __msan_unpoison(frames_, size * sizeof(*frames_)); - StackTrace::Frames frames_copy{}; + StackTrace::FramePointers frame_pointers_copy{}; for (size_t i = 0; i < size; ++i) - frames_copy[i] = frames_[i]; + frame_pointers_copy[i] = frame_pointers[i]; static SimpleCache func_cached; - return func_cached(frames_copy, offset, size); + return func_cached(frame_pointers_copy.data(), offset, size); } diff --git a/src/Common/StackTrace.h b/src/Common/StackTrace.h index 401c8344f2d..27b2c44dd94 100644 --- a/src/Common/StackTrace.h +++ b/src/Common/StackTrace.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include #include @@ -23,8 +25,18 @@ struct NoCapture class StackTrace { public: + struct Frame + { + const void * virtual_addr = nullptr; + void * physical_addr = nullptr; + std::optional symbol; + std::optional object; + std::optional file; + std::optional line; + }; static constexpr size_t capacity = 32; - using Frames = std::array; + using FramePointers = std::array; + using Frames = std::optional>; /// Tries to capture stack trace StackTrace(); @@ -38,19 +50,23 @@ public: size_t getSize() const; size_t getOffset() const; + const FramePointers & getFramePointers() const; const Frames & getFrames() const; std::string toString() const; - static std::string toString(void ** frames, size_t offset, size_t size); + static std::string toString(void ** frame_pointers, size_t offset, size_t size); void toStringEveryLine(std::function callback) const; + void resetFrames(); + protected: void tryCapture(); size_t size = 0; size_t offset = 0; /// How many frames to skip while displaying. - Frames frames{}; + FramePointers frame_pointers{}; + mutable Frames frames{}; }; std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext_t & context); diff --git a/src/Common/TraceCollector.cpp b/src/Common/TraceCollector.cpp index 7df06dc7892..f5bdfd2b826 100644 --- a/src/Common/TraceCollector.cpp +++ b/src/Common/TraceCollector.cpp @@ -81,7 +81,7 @@ void TraceCollector::collect(TraceType trace_type, const StackTrace & stack_trac size_t stack_trace_offset = stack_trace.getOffset(); writeIntBinary(UInt8(stack_trace_size - stack_trace_offset), out); for (size_t i = stack_trace_offset; i < stack_trace_size; ++i) - writePODBinary(stack_trace.getFrames()[i], out); + writePODBinary(stack_trace.getFramePointers()[i], out); writePODBinary(trace_type, out); writePODBinary(thread_id, out); diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp index a8966ad0307..bdce70894d5 100644 --- a/src/Storages/System/StorageSystemStackTrace.cpp +++ b/src/Storages/System/StorageSystemStackTrace.cpp @@ -198,7 +198,7 @@ void StorageSystemStackTrace::fillData(MutableColumns & res_columns, const Conte Array arr; arr.reserve(stack_trace_size - stack_trace_offset); for (size_t i = stack_trace_offset; i < stack_trace_size; ++i) - arr.emplace_back(reinterpret_cast(stack_trace->getFrames()[i])); + arr.emplace_back(reinterpret_cast(stack_trace->getFramePointers()[i])); res_columns[0]->insert(tid); res_columns[1]->insertData(query_id_data, query_id_size); diff --git a/utils/check-style/check-include b/utils/check-style/check-include index 211172979bd..35f94d6e706 100755 --- a/utils/check-style/check-include +++ b/utils/check-style/check-include @@ -59,6 +59,7 @@ inc="-I. \ -I./contrib/lz4/lib \ -I./contrib/hyperscan/src \ -I./contrib/simdjson/include \ +-I./contrib/sentry-native/include \ -I./src \ -I${BUILD_DIR}/src" From 31123236cb359f1783dcadf8c3062ddb1ca6b8cf Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 27 May 2020 23:15:33 +0300 Subject: [PATCH 012/318] Settings for crash report opt-in --- base/daemon/BaseDaemon.cpp | 2 +- base/daemon/SentryWriter.cpp | 164 +++++++++++++++++++++-------------- base/daemon/SentryWriter.h | 4 +- src/Common/config.h.in | 1 + 4 files changed, 105 insertions(+), 66 deletions(-) diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index f269c3923e0..a8a79827552 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -514,7 +514,6 @@ void debugIncreaseOOMScore() {} void BaseDaemon::initialize(Application & self) { closeFDs(); - SentryWriter::initialize(); task_manager = std::make_unique(); ServerApplication::initialize(self); @@ -533,6 +532,7 @@ void BaseDaemon::initialize(Application & self) } reloadConfiguration(); + SentryWriter::initialize(config()); /// This must be done before creation of any files (including logs). mode_t umask_num = 0027; diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 8859adc1c2e..5c7d6eadd98 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -1,14 +1,21 @@ #include +#include #include #if !defined(ARCADIA_BUILD) # include "Common/config_version.h" #endif +#if USE_SENTRY #include +#endif + namespace { + static bool initialized = false; + void setExtras() { +#if USE_SENTRY sentry_set_extra("version_githash", sentry_value_new_string(VERSION_GITHASH)); sentry_set_extra("version_describe", sentry_value_new_string(VERSION_DESCRIBE)); sentry_set_extra("version_integer", sentry_value_new_int32(VERSION_INTEGER)); @@ -16,24 +23,47 @@ namespace { sentry_set_extra("version_major", sentry_value_new_int32(VERSION_MAJOR)); sentry_set_extra("version_minor", sentry_value_new_int32(VERSION_MINOR)); sentry_set_extra("version_patch", sentry_value_new_int32(VERSION_PATCH)); +#endif } } -void SentryWriter::initialize() { - sentry_options_t * options = sentry_options_new(); - sentry_options_set_release(options, VERSION_STRING); - sentry_options_set_debug(options, 1); - sentry_init(options); - sentry_options_set_dsn(options, "https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277"); - if (strstr(VERSION_DESCRIBE, "-stable") || strstr(VERSION_DESCRIBE, "-lts")) { - sentry_options_set_environment(options, "prod"); - } else { - sentry_options_set_environment(options, "test"); +void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { +#if USE_SENTRY + bool enabled = false; + if (config.getBool("send_crash_reports.enabled", false)) + { + if ((strlen(VERSION_OFFICIAL) > 0) || config.getBool("send_crash_reports.debug", false)) + { + enabled = true; + } } + if (enabled) + { + const std::string & endpoint = config.getString( + "send_crash_reports.endpoint", + "https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277" + ); + sentry_options_t * options = sentry_options_new(); + sentry_options_set_release(options, VERSION_STRING); + sentry_options_set_debug(options, 1); + sentry_init(options); + sentry_options_set_dsn(options, endpoint.c_str()); + if (strstr(VERSION_DESCRIBE, "-stable") || strstr(VERSION_DESCRIBE, "-lts")) { + sentry_options_set_environment(options, "prod"); + } else { + sentry_options_set_environment(options, "test"); + } + initialized = true; + } +#endif } void SentryWriter::shutdown() { - sentry_shutdown(); +#if USE_SENTRY + if (initialized) { + sentry_shutdown(); + } +#endif } void SentryWriter::onFault( @@ -43,65 +73,71 @@ void SentryWriter::onFault( const StackTrace & stack_trace ) { - const std::string & error_message = signalToErrorMessage(sig, info, context); - sentry_value_t event = sentry_value_new_message_event(SENTRY_LEVEL_FATAL, "fault", error_message.c_str()); - sentry_set_tag("signal", strsignal(sig)); - sentry_set_tag("server_name", getFQDNOrHostName().c_str()); - sentry_set_extra("signal_number", sentry_value_new_int32(sig)); - setExtras(); - - sentry_value_t frames = sentry_value_new_list(); - - size_t stack_size = stack_trace.getSize(); - if (stack_size > 0) +#if USE_SENTRY + if (initialized) { - size_t offset = stack_trace.getOffset(); - if (stack_size == 1) + const std::string & error_message = signalToErrorMessage(sig, info, context); + sentry_value_t event = sentry_value_new_message_event(SENTRY_LEVEL_FATAL, "fault", error_message.c_str()); + sentry_set_tag("signal", strsignal(sig)); + sentry_set_tag("server_name", getFQDNOrHostName().c_str()); + sentry_set_extra("signal_number", sentry_value_new_int32(sig)); + setExtras(); + + /// Prepare data for https://develop.sentry.dev/sdk/event-payloads/stacktrace/ + sentry_value_t frames = sentry_value_new_list(); + size_t stack_size = stack_trace.getSize(); + if (stack_size > 0) { - offset = 1; + size_t offset = stack_trace.getOffset(); + if (stack_size == 1) + { + offset = 1; + } + char instruction_addr[100]; + for (size_t i = stack_size - 1; i >= offset; --i) + { + const StackTrace::Frame & current_frame = stack_trace.getFrames().value()[i]; + sentry_value_t frame = sentry_value_new_object(); + unsigned long long frame_ptr = reinterpret_cast(current_frame.virtual_addr); + snprintf(instruction_addr, sizeof(instruction_addr), "0x%llx", frame_ptr); + sentry_value_set_by_key(frame, "instruction_addr", sentry_value_new_string(instruction_addr)); + + if (current_frame.symbol.has_value()) + { + sentry_value_set_by_key(frame, "function", sentry_value_new_string(current_frame.symbol.value().c_str())); + } + + if (current_frame.file.has_value()) + { + sentry_value_set_by_key(frame, "filename", sentry_value_new_string(current_frame.file.value().c_str())); + } + + if (current_frame.line.has_value()) + { + sentry_value_set_by_key(frame, "lineno", sentry_value_new_int32(current_frame.line.value())); + } + + sentry_value_append(frames, frame); + } } - char instruction_addr[100]; - for (size_t i = stack_size - 1; i >= offset; --i) - { - const StackTrace::Frame & current_frame = stack_trace.getFrames().value()[i]; - sentry_value_t frame = sentry_value_new_object(); - unsigned long long frame_ptr = reinterpret_cast(current_frame.virtual_addr); - snprintf(instruction_addr, sizeof(instruction_addr), "0x%llx", frame_ptr); - sentry_value_set_by_key(frame, "instruction_addr", sentry_value_new_string(instruction_addr)); - if (current_frame.symbol.has_value()) - { - sentry_value_set_by_key(frame, "function", sentry_value_new_string(current_frame.symbol.value().c_str())); - } + /// Prepare data for https://develop.sentry.dev/sdk/event-payloads/threads/ + sentry_value_t stacktrace = sentry_value_new_object(); + sentry_value_set_by_key(stacktrace, "frames", frames); - if (current_frame.file.has_value()) - { - sentry_value_set_by_key(frame, "filename", sentry_value_new_string(current_frame.file.value().c_str())); - } + sentry_value_t thread = sentry_value_new_object(); + sentry_value_set_by_key(thread, "stacktrace", stacktrace); - if (current_frame.line.has_value()) - { - sentry_value_set_by_key(frame, "lineno", sentry_value_new_int32(current_frame.line.value())); - } + sentry_value_t values = sentry_value_new_list(); + sentry_value_append(values, thread); - sentry_value_append(frames, frame); - } + sentry_value_t threads = sentry_value_new_object(); + sentry_value_set_by_key(threads, "values", values); + + sentry_value_set_by_key(event, "threads", threads); + + sentry_capture_event(event); + shutdown(); } - - sentry_value_t stacktrace = sentry_value_new_object(); - sentry_value_set_by_key(stacktrace, "frames", frames); - - sentry_value_t thread = sentry_value_new_object(); - sentry_value_set_by_key(thread, "stacktrace", stacktrace); - - sentry_value_t values = sentry_value_new_list(); - sentry_value_append(values, thread); - - sentry_value_t threads = sentry_value_new_object(); - sentry_value_set_by_key(threads, "values", values); - - sentry_value_set_by_key(event, "threads", threads); - - sentry_capture_event(event); - shutdown(); +#endif } diff --git a/base/daemon/SentryWriter.h b/base/daemon/SentryWriter.h index 6c85ef04dd3..ee45ae4f203 100644 --- a/base/daemon/SentryWriter.h +++ b/base/daemon/SentryWriter.h @@ -3,6 +3,8 @@ #include #include +#include + #include class SentryWriter @@ -10,7 +12,7 @@ class SentryWriter public: SentryWriter() = delete; - static void initialize(); + static void initialize(Poco::Util::LayeredConfiguration & config); static void shutdown(); static void onFault( int sig, diff --git a/src/Common/config.h.in b/src/Common/config.h.in index df2359c1c29..dd6263c3948 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -9,4 +9,5 @@ #cmakedefine01 USE_BROTLI #cmakedefine01 USE_UNWIND #cmakedefine01 USE_OPENCL +#cmakedefine01 USE_SENTRY #cmakedefine01 CLICKHOUSE_SPLIT_BINARY From 52e4a0293d622072bbd8d9f09d37bc7257b83174 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 27 May 2020 23:21:53 +0300 Subject: [PATCH 013/318] Keep sentry-native in debug mode only under setting --- base/daemon/SentryWriter.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 5c7d6eadd98..7e2a95c8369 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -30,9 +30,10 @@ namespace { void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { #if USE_SENTRY bool enabled = false; + bool debug = config.getBool("send_crash_reports.debug", false); if (config.getBool("send_crash_reports.enabled", false)) { - if ((strlen(VERSION_OFFICIAL) > 0) || config.getBool("send_crash_reports.debug", false)) + if (debug || (strlen(VERSION_OFFICIAL) > 0)) { enabled = true; } @@ -45,7 +46,10 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { ); sentry_options_t * options = sentry_options_new(); sentry_options_set_release(options, VERSION_STRING); - sentry_options_set_debug(options, 1); + if (debug) + { + sentry_options_set_debug(options, 1); + } sentry_init(options); sentry_options_set_dsn(options, endpoint.c_str()); if (strstr(VERSION_DESCRIBE, "-stable") || strstr(VERSION_DESCRIBE, "-lts")) { From d9bb3ef91ba801f4752dde77032b43e892395e14 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 29 May 2020 22:48:32 +0300 Subject: [PATCH 014/318] Add logging and adjust initialization --- base/daemon/BaseDaemon.cpp | 2 +- base/daemon/SentryWriter.cpp | 38 ++++++++++++++++++++++++++++++++++-- base/daemon/ya.make | 1 + 3 files changed, 38 insertions(+), 3 deletions(-) diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index a8a79827552..4fd5bfa1379 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -532,7 +532,6 @@ void BaseDaemon::initialize(Application & self) } reloadConfiguration(); - SentryWriter::initialize(config()); /// This must be done before creation of any files (including logs). mode_t umask_num = 0027; @@ -658,6 +657,7 @@ void BaseDaemon::initialize(Application & self) void BaseDaemon::initializeTerminationAndSignalProcessing() { + SentryWriter::initialize(config()); std::set_terminate(terminate_handler); /// We want to avoid SIGPIPE when working with sockets and pipes, and just handle return value/errno instead. diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 7e2a95c8369..d5c2766cf21 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -1,7 +1,11 @@ #include +#include +#include + #include #include +#include #if !defined(ARCADIA_BUILD) # include "Common/config_version.h" #endif @@ -44,20 +48,45 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { "send_crash_reports.endpoint", "https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277" ); + const std::string & temp_folder_path = config.getString( + "send_crash_reports.tmp_path", + config.getString("tmp_path", Poco::Path::temp()) + "sentry/" + ); + Poco::File(temp_folder_path).createDirectories(); + sentry_options_t * options = sentry_options_new(); sentry_options_set_release(options, VERSION_STRING); if (debug) { sentry_options_set_debug(options, 1); } - sentry_init(options); sentry_options_set_dsn(options, endpoint.c_str()); + sentry_options_set_database_path(options, temp_folder_path.c_str()); if (strstr(VERSION_DESCRIBE, "-stable") || strstr(VERSION_DESCRIBE, "-lts")) { sentry_options_set_environment(options, "prod"); } else { sentry_options_set_environment(options, "test"); } - initialized = true; + int init_status = sentry_init(options); + if (!init_status) + { + initialized = true; + LOG_INFO( + &Logger::get("SentryWriter"), + "Sending crash reports is initialized with {} endpoint and {} temp folder", + endpoint, + temp_folder_path + ); + } + else + { + LOG_WARNING(&Logger::get("SentryWriter"), "Sending crash reports failed to initialized with {} status", init_status); + } + + } + else + { + LOG_INFO(&Logger::get("SentryWriter"), "Sending crash reports is disabled"); } #endif } @@ -140,8 +169,13 @@ void SentryWriter::onFault( sentry_value_set_by_key(event, "threads", threads); + LOG_INFO(&Logger::get("SentryWriter"), "Sending crash report"); sentry_capture_event(event); shutdown(); } + else + { + LOG_INFO(&Logger::get("SentryWriter"), "Not sending crash report"); + } #endif } diff --git a/base/daemon/ya.make b/base/daemon/ya.make index 1c72af3ed53..125417adca5 100644 --- a/base/daemon/ya.make +++ b/base/daemon/ya.make @@ -9,6 +9,7 @@ PEERDIR( SRCS( BaseDaemon.cpp GraphiteWriter.cpp + SentryWriter.cpp ) END() From 4ef322274d117ecb6d04f79c4f73d0447b961c64 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 29 May 2020 22:53:16 +0300 Subject: [PATCH 015/318] Add integration test --- .gitignore | 1 + programs/server/config.xml | 6 +++ .../configs/config_send_crash_reports.xml | 8 ++++ .../test_send_crash_reports/http_server.py | 43 ++++++++++++++++++ .../test_send_crash_reports/test.py | 44 +++++++++++++++++++ 5 files changed, 102 insertions(+) create mode 100644 tests/integration/test_send_crash_reports/configs/config_send_crash_reports.xml create mode 100644 tests/integration/test_send_crash_reports/http_server.py create mode 100644 tests/integration/test_send_crash_reports/test.py diff --git a/.gitignore b/.gitignore index 6bd57911ac8..afb4e67a1b8 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ /build /build_* /build-* +/tests/venv /docs/build /docs/publish diff --git a/programs/server/config.xml b/programs/server/config.xml index e16af9d75d7..d07f20aa0e0 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -42,6 +42,12 @@ --> + + + + false + + 8123 9000 diff --git a/tests/integration/test_send_crash_reports/configs/config_send_crash_reports.xml b/tests/integration/test_send_crash_reports/configs/config_send_crash_reports.xml new file mode 100644 index 00000000000..10f559b0054 --- /dev/null +++ b/tests/integration/test_send_crash_reports/configs/config_send_crash_reports.xml @@ -0,0 +1,8 @@ + + + + true + true + http://6f33034cfe684dd7a3ab9875e57b1c8d@localhost:9500/5226277 + + diff --git a/tests/integration/test_send_crash_reports/http_server.py b/tests/integration/test_send_crash_reports/http_server.py new file mode 100644 index 00000000000..e3fa2e1cb57 --- /dev/null +++ b/tests/integration/test_send_crash_reports/http_server.py @@ -0,0 +1,43 @@ +import BaseHTTPServer + +RESULT_PATH = '/result.txt' + +class SentryHandler(BaseHTTPServer.BaseHTTPRequestHandler): + def do_POST(self): + post_data = self.__read_and_decode_post_data() + with open(RESULT_PATH, 'w') as f: + if self.headers.get("content-type") != "application/x-sentry-envelope": + f.write("INCORRECT_CONTENT_TYPE") + elif self.headers.get("content-length") < 3000: + f.write("INCORRECT_CONTENT_LENGTH") + elif '"http://6f33034cfe684dd7a3ab9875e57b1c8d@localhost:9500/5226277"' not in post_data: + f.write('INCORRECT_POST_DATA') + else: + f.write("OK") + self.send_response(200) + + def __read_and_decode_post_data(self): + transfer_encoding = self.headers.get("transfer-Encoding") + decoded = "" + if transfer_encoding == "chunked": + while True: + s = self.rfile.readline() + chunk_length = int(s, 16) + if not chunk_length: + break + decoded += self.rfile.read(chunk_length) + self.rfile.readline() + else: + content_length = int(self.headers.get("content-length", 0)) + decoded = self.rfile.read(content_length) + return decoded + + +if __name__ == "__main__": + with open(RESULT_PATH, 'w') as f: + f.write("INITIAL_STATE") + httpd = BaseHTTPServer.HTTPServer(("localhost", 9500,), SentryHandler) + try: + httpd.serve_forever() + finally: + httpd.server_close() \ No newline at end of file diff --git a/tests/integration/test_send_crash_reports/test.py b/tests/integration/test_send_crash_reports/test.py new file mode 100644 index 00000000000..f9e95f953d0 --- /dev/null +++ b/tests/integration/test_send_crash_reports/test.py @@ -0,0 +1,44 @@ +import os +import time + +import pytest + +import helpers.cluster +import helpers.test_tools +import http_server + + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + + +@pytest.fixture(scope="module") +def started_node(): + cluster = helpers.cluster.ClickHouseCluster(__file__) + try: + node = cluster.add_instance("node", main_configs=[ + os.path.join(SCRIPT_DIR, "configs", "config_send_crash_reports.xml") + ]) + cluster.start() + yield node + finally: + cluster.shutdown() + + +def test_send_segfault(started_node,): + started_node.copy_file_to_container(os.path.join(SCRIPT_DIR, "http_server.py"), "/http_server.py") + started_node.exec_in_container(["bash", "-c", "python2 /http_server.py"], detach=True, user="root") + time.sleep(0.5) + started_node.exec_in_container(["bash", "-c", "pkill -11 clickhouse"], user="root") + + result = None + for attempt in range(1, 6): + time.sleep(0.25 * attempt) + result = started_node.exec_in_container(['cat', http_server.RESULT_PATH], user='root') + if result == 'OK': + break + elif result == 'INITIAL_STATE': + continue + elif result: + assert False, 'Unexpected state: ' + result + + assert result == 'OK', 'Crash report not sent' From 0386e526b2c7cbf13e017f5445ea79eb4f24f67a Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 29 May 2020 23:03:59 +0300 Subject: [PATCH 016/318] grammar --- programs/server/config.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index d07f20aa0e0..6086fcd7b1d 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -43,7 +43,7 @@ - + false From a84123195b7fe4677c417de9fe5483c5c283ec13 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 29 May 2020 23:08:05 +0300 Subject: [PATCH 017/318] adjust comments --- base/daemon/CMakeLists.txt | 1 - base/daemon/SentryWriter.h | 1 + src/Common/config.h.in | 3 --- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/base/daemon/CMakeLists.txt b/base/daemon/CMakeLists.txt index 46fa4a0fe34..0b6a7188c83 100644 --- a/base/daemon/CMakeLists.txt +++ b/base/daemon/CMakeLists.txt @@ -9,5 +9,4 @@ target_link_libraries (daemon PUBLIC loggers PRIVATE clickhouse_common_io clickh if (USE_SENTRY) target_link_libraries (daemon PRIVATE curl) target_link_libraries (daemon PRIVATE ${SENTRY_LIBRARY}) -# target_include_directories (daemon SYSTEM BEFORE PRIVATE ${SENTRY_INCLUDE_DIR}) endif () \ No newline at end of file diff --git a/base/daemon/SentryWriter.h b/base/daemon/SentryWriter.h index ee45ae4f203..0b3f1ddd2b7 100644 --- a/base/daemon/SentryWriter.h +++ b/base/daemon/SentryWriter.h @@ -7,6 +7,7 @@ #include +/// Sends crash reports to ClickHouse core developer team via https://sentry.io class SentryWriter { public: diff --git a/src/Common/config.h.in b/src/Common/config.h.in index 08fa03d659f..ed818b53167 100644 --- a/src/Common/config.h.in +++ b/src/Common/config.h.in @@ -9,9 +9,6 @@ #cmakedefine01 USE_BROTLI #cmakedefine01 USE_UNWIND #cmakedefine01 USE_OPENCL -<<<<<<< HEAD #cmakedefine01 USE_SENTRY -======= #cmakedefine01 USE_GRPC ->>>>>>> a4e40fb5f209539cfee6af5da7f27c1c96e02eac #cmakedefine01 CLICKHOUSE_SPLIT_BINARY From d0339413993371c37577ce513ecf0555683878b8 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 29 May 2020 23:20:28 +0300 Subject: [PATCH 018/318] try to fix merge issues --- contrib/grpc | 2 +- contrib/jemalloc | 2 +- programs/server/Server.cpp | 6 ------ 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/contrib/grpc b/contrib/grpc index c1d176528fd..8aea4e168e7 160000 --- a/contrib/grpc +++ b/contrib/grpc @@ -1 +1 @@ -Subproject commit c1d176528fd8da9dd4066d16554bcd216d29033f +Subproject commit 8aea4e168e78f3eb9828080740fc8cb73d53bf79 diff --git a/contrib/jemalloc b/contrib/jemalloc index cd2931ad9bb..ea6b3e973b4 160000 --- a/contrib/jemalloc +++ b/contrib/jemalloc @@ -1 +1 @@ -Subproject commit cd2931ad9bbd78208565716ab102e86d858c2fff +Subproject commit ea6b3e973b477b8061e0076bb257dbd7f3faa756 diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 77dc5305fa8..ce1d35e65d4 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -59,13 +59,7 @@ #include #include #include -<<<<<<< HEAD -#include "MySQLHandlerFactory.h" -#include - -======= #include ->>>>>>> a4e40fb5f209539cfee6af5da7f27c1c96e02eac #if !defined(ARCADIA_BUILD) # include "config_core.h" From b6e4a2ec61c928a433037fefa0657df7ebf8b8ac Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 29 May 2020 23:21:53 +0300 Subject: [PATCH 019/318] one more merge issue --- contrib/cppkafka | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/cppkafka b/contrib/cppkafka index 9b184d881c1..f555ee36aaa 160000 --- a/contrib/cppkafka +++ b/contrib/cppkafka @@ -1 +1 @@ -Subproject commit 9b184d881c15cc50784b28688c7c99d3d764db24 +Subproject commit f555ee36aaa74d17ca0dab3ce472070a610b2966 From 69dedcbe21bd323f3d87508ba78f36b587a7dff5 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sat, 30 May 2020 00:28:55 +0300 Subject: [PATCH 020/318] Move sending crash reports below logging --- base/daemon/BaseDaemon.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index 4fd5bfa1379..72da1984287 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -249,7 +249,6 @@ private: UInt32 thread_num, const std::string & query_id) const { - SentryWriter::onFault(sig, info, context, stack_trace); LOG_FATAL(log, "########################################"); { @@ -282,6 +281,9 @@ private: /// Write symbolized stack trace line by line for better grep-ability. stack_trace.toStringEveryLine([&](const std::string & s) { LOG_FATAL(log, s); }); + + /// Send crash report if configured + SentryWriter::onFault(sig, info, context, stack_trace); } }; From f88b85625a44cbcb1628dc76283567c6fceeedd7 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sat, 30 May 2020 00:36:47 +0300 Subject: [PATCH 021/318] style --- base/daemon/SentryWriter.cpp | 71 +++++++++++++++++------------------- src/Common/StackTrace.cpp | 52 +++++++++++++------------- 2 files changed, 61 insertions(+), 62 deletions(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index d5c2766cf21..7bbf3c62e97 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -3,35 +3,38 @@ #include #include -#include #include #include #if !defined(ARCADIA_BUILD) -# include "Common/config_version.h" +# include "Common/config_version.h" +# include #endif #if USE_SENTRY -#include +# include #endif -namespace { - static bool initialized = false; +namespace +{ +static bool initialized = false; - void setExtras() { +void setExtras() +{ #if USE_SENTRY - sentry_set_extra("version_githash", sentry_value_new_string(VERSION_GITHASH)); - sentry_set_extra("version_describe", sentry_value_new_string(VERSION_DESCRIBE)); - sentry_set_extra("version_integer", sentry_value_new_int32(VERSION_INTEGER)); - sentry_set_extra("version_revision", sentry_value_new_int32(VERSION_REVISION)); - sentry_set_extra("version_major", sentry_value_new_int32(VERSION_MAJOR)); - sentry_set_extra("version_minor", sentry_value_new_int32(VERSION_MINOR)); - sentry_set_extra("version_patch", sentry_value_new_int32(VERSION_PATCH)); + sentry_set_extra("version_githash", sentry_value_new_string(VERSION_GITHASH)); + sentry_set_extra("version_describe", sentry_value_new_string(VERSION_DESCRIBE)); + sentry_set_extra("version_integer", sentry_value_new_int32(VERSION_INTEGER)); + sentry_set_extra("version_revision", sentry_value_new_int32(VERSION_REVISION)); + sentry_set_extra("version_major", sentry_value_new_int32(VERSION_MAJOR)); + sentry_set_extra("version_minor", sentry_value_new_int32(VERSION_MINOR)); + sentry_set_extra("version_patch", sentry_value_new_int32(VERSION_PATCH)); #endif - } +} } -void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { +void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) +{ #if USE_SENTRY bool enabled = false; bool debug = config.getBool("send_crash_reports.debug", false); @@ -44,14 +47,10 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { } if (enabled) { - const std::string & endpoint = config.getString( - "send_crash_reports.endpoint", - "https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277" - ); - const std::string & temp_folder_path = config.getString( - "send_crash_reports.tmp_path", - config.getString("tmp_path", Poco::Path::temp()) + "sentry/" - ); + const std::string & endpoint + = config.getString("send_crash_reports.endpoint", "https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277"); + const std::string & temp_folder_path + = config.getString("send_crash_reports.tmp_path", config.getString("tmp_path", Poco::Path::temp()) + "sentry/"); Poco::File(temp_folder_path).createDirectories(); sentry_options_t * options = sentry_options_new(); @@ -62,9 +61,12 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { } sentry_options_set_dsn(options, endpoint.c_str()); sentry_options_set_database_path(options, temp_folder_path.c_str()); - if (strstr(VERSION_DESCRIBE, "-stable") || strstr(VERSION_DESCRIBE, "-lts")) { + if (strstr(VERSION_DESCRIBE, "-stable") || strstr(VERSION_DESCRIBE, "-lts")) + { sentry_options_set_environment(options, "prod"); - } else { + } + else + { sentry_options_set_environment(options, "test"); } int init_status = sentry_init(options); @@ -75,14 +77,12 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { &Logger::get("SentryWriter"), "Sending crash reports is initialized with {} endpoint and {} temp folder", endpoint, - temp_folder_path - ); + temp_folder_path); } else { LOG_WARNING(&Logger::get("SentryWriter"), "Sending crash reports failed to initialized with {} status", init_status); } - } else { @@ -91,20 +91,17 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { #endif } -void SentryWriter::shutdown() { +void SentryWriter::shutdown() +{ #if USE_SENTRY - if (initialized) { + if (initialized) + { sentry_shutdown(); } #endif } -void SentryWriter::onFault( - int sig, - const siginfo_t & info, - const ucontext_t & context, - const StackTrace & stack_trace - ) +void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & context, const StackTrace & stack_trace) { #if USE_SENTRY if (initialized) @@ -178,4 +175,4 @@ void SentryWriter::onFault( LOG_INFO(&Logger::get("SentryWriter"), "Not sending crash report"); } #endif -} +} \ No newline at end of file diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 5cc8c43a27a..2fd554fd008 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -1,12 +1,12 @@ #include +#include #include #include -#include #include +#include #include #include -#include #include #include @@ -26,8 +26,7 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext std::stringstream error; switch (sig) { - case SIGSEGV: - { + case SIGSEGV: { /// Print info about address and reason. if (nullptr == info.si_addr) error << "Address: NULL pointer."; @@ -59,8 +58,7 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext break; } - case SIGBUS: - { + case SIGBUS: { switch (info.si_code) { case BUS_ADRALN: @@ -92,8 +90,7 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext break; } - case SIGILL: - { + case SIGILL: { switch (info.si_code) { case ILL_ILLOPC: @@ -127,8 +124,7 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext break; } - case SIGFPE: - { + case SIGFPE: { switch (info.si_code) { case FPE_INTDIV: @@ -162,8 +158,7 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext break; } - case SIGTSTP: - { + case SIGTSTP: { error << "This is a signal used for debugging purposes by the user."; break; } @@ -176,13 +171,13 @@ static void * getCallerAddress(const ucontext_t & context) { #if defined(__x86_64__) /// Get the address at the time the signal was raised from the RIP (x86-64) -#if defined(__FreeBSD__) +# if defined(__FreeBSD__) return reinterpret_cast(context.uc_mcontext.mc_rip); -#elif defined(__APPLE__) +# elif defined(__APPLE__) return reinterpret_cast(context.uc_mcontext->__ss.__rip); -#else +# else return reinterpret_cast(context.uc_mcontext.gregs[REG_RIP]); -#endif +# endif #elif defined(__aarch64__) return reinterpret_cast(context.uc_mcontext.pc); #else @@ -197,7 +192,8 @@ static void symbolize(const void * const * frame_pointers, size_t offset, size_t const DB::SymbolIndex & symbol_index = DB::SymbolIndex::instance(); std::unordered_map dwarfs; - for (size_t i = 0; i < offset; ++i) { + for (size_t i = 0; i < offset; ++i) + { frames.value()[i].virtual_addr = frame_pointers[i]; } @@ -217,7 +213,8 @@ static void symbolize(const void * const * frame_pointers, size_t offset, size_t auto dwarf_it = dwarfs.try_emplace(object->name, *object->elf).first; DB::Dwarf::LocationInfo location; - if (dwarf_it->second.findAddress(uintptr_t(current_frame.physical_addr), location, DB::Dwarf::LocationInfoMode::FAST)) { + if (dwarf_it->second.findAddress(uintptr_t(current_frame.physical_addr), location, DB::Dwarf::LocationInfoMode::FAST)) + { current_frame.file = location.file.toString(); current_frame.line = location.line; } @@ -239,8 +236,9 @@ static void symbolize(const void * const * frame_pointers, size_t offset, size_t current_frame.symbol = "?"; } } -# else - for (size_t i = 0; i < size; ++i) { +#else + for (size_t i = 0; i < size; ++i) + { frames.value()[i].virtual_addr = frame_pointers[i]; } UNUSED(offset); @@ -308,14 +306,16 @@ const StackTrace::FramePointers & StackTrace::getFramePointers() const const StackTrace::Frames & StackTrace::getFrames() const { - if (!frames.has_value()) { + if (!frames.has_value()) + { frames = {{}}; symbolize(frame_pointers.data(), offset, size, frames); } return frames; } -static void toStringEveryLineImpl(const StackTrace::Frames & frames, size_t offset, size_t size, std::function callback) +static void +toStringEveryLineImpl(const StackTrace::Frames & frames, size_t offset, size_t size, std::function callback) { if (size == 0) return callback(""); @@ -324,7 +324,7 @@ static void toStringEveryLineImpl(const StackTrace::Frames & frames, size_t offs for (size_t i = offset; i < size; ++i) { - const StackTrace::Frame& current_frame = frames.value()[i]; + const StackTrace::Frame & current_frame = frames.value()[i]; out << i << ". "; if (current_frame.file.has_value() && current_frame.line.has_value()) @@ -338,7 +338,8 @@ static void toStringEveryLineImpl(const StackTrace::Frames & frames, size_t offs } out << " @ " << current_frame.physical_addr; - if (current_frame.object.has_value()) { + if (current_frame.object.has_value()) + { out << " in " << current_frame.object.value(); } @@ -362,7 +363,8 @@ void StackTrace::toStringEveryLine(std::function call toStringEveryLineImpl(getFrames(), offset, size, std::move(callback)); } -void StackTrace::resetFrames() { +void StackTrace::resetFrames() +{ frames.reset(); } From 444026494f9383d465ab9b9611c8bfc935661d85 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sat, 30 May 2020 00:52:49 +0300 Subject: [PATCH 022/318] brief docs --- .../settings.md | 31 ++++++++++++++++--- 1 file changed, 26 insertions(+), 5 deletions(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 5961c701283..a103473a4ea 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -307,11 +307,11 @@ Logging settings. Keys: -- level – Logging level. Acceptable values: `trace`, `debug`, `information`, `warning`, `error`. -- log – The log file. Contains all the entries according to `level`. -- errorlog – Error log file. -- size – Size of the file. Applies to `log`and`errorlog`. Once the file reaches `size`, ClickHouse archives and renames it, and creates a new log file in its place. -- count – The number of archived log files that ClickHouse stores. +- `level` – Logging level. Acceptable values: `trace`, `debug`, `information`, `warning`, `error`. +- `log` – The log file. Contains all the entries according to `level`. +- `errorlog` – Error log file. +- `size` – Size of the file. Applies to `log`and`errorlog`. Once the file reaches `size`, ClickHouse archives and renames it, and creates a new log file in its place. +- `count` – The number of archived log files that ClickHouse stores. **Example** @@ -348,6 +348,27 @@ Keys: Default value: `LOG_USER` if `address` is specified, `LOG_DAEMON otherwise.` - format – Message format. Possible values: `bsd` and `syslog.` +## send_crash_reports {#server_configuration_parameters-logger} + +Settings for opt-in sending crash reports to the ClickHouse core developers team via [Sentry](https://sentry.io). +Enabling it, especially in pre-production environments, is strongly appreciated. + +Keys: + +- `enabled` – Boolean flag to enable the feature. Set to `true` to allow sending crash reports. +- `endpoint` – Overrides the Sentry endpoint. +- `debug` - Sets the Sentry client into debug mode. +- `tmp_path` - Filesystem path for temporary crash report state. + + +**Recommended way to use** + +``` xml + + true + +``` + ## macros {#macros} Parameter substitutions for replicated tables. From 95ca1c648da4e95ec1cb252afd6e79216f4f5aec Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sat, 30 May 2020 10:59:43 +0300 Subject: [PATCH 023/318] fix __msan_unpoison --- src/Common/StackTrace.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 2fd554fd008..e38bfa25dff 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -378,13 +378,13 @@ std::string StackTrace::toString() const return func_cached(frame_pointers.data(), offset, size); } -std::string StackTrace::toString(void ** frame_pointers, size_t offset, size_t size) +std::string StackTrace::toString(void ** frame_pointers_, size_t offset, size_t size) { - __msan_unpoison(frames_, size * sizeof(*frames_)); + __msan_unpoison(frame_pointers_, size * sizeof(*frame_pointers_)); StackTrace::FramePointers frame_pointers_copy{}; for (size_t i = 0; i < size; ++i) - frame_pointers_copy[i] = frame_pointers[i]; + frame_pointers_copy[i] = frame_pointers_[i]; static SimpleCache func_cached; return func_cached(frame_pointers_copy.data(), offset, size); From 6dfe44f437c393ded72f8859ca16d9625b8fbb53 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sat, 30 May 2020 11:01:15 +0300 Subject: [PATCH 024/318] style --- src/Common/StackTrace.cpp | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index e38bfa25dff..aa78ab62f9b 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -26,7 +26,8 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext std::stringstream error; switch (sig) { - case SIGSEGV: { + case SIGSEGV: + { /// Print info about address and reason. if (nullptr == info.si_addr) error << "Address: NULL pointer."; @@ -58,7 +59,8 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext break; } - case SIGBUS: { + case SIGBUS: + { switch (info.si_code) { case BUS_ADRALN: @@ -90,7 +92,8 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext break; } - case SIGILL: { + case SIGILL: + { switch (info.si_code) { case ILL_ILLOPC: @@ -124,7 +127,8 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext break; } - case SIGFPE: { + case SIGFPE: + { switch (info.si_code) { case FPE_INTDIV: @@ -158,7 +162,8 @@ std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext break; } - case SIGTSTP: { + case SIGTSTP: + { error << "This is a signal used for debugging purposes by the user."; break; } From 77d8c9bacae6b833a28a85ab45442355c0f4b2df Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sat, 30 May 2020 11:02:13 +0300 Subject: [PATCH 025/318] Add anonymize option and version tag --- base/daemon/SentryWriter.cpp | 17 +++++++++++++---- cmake/version.cmake | 1 + .../server-configuration-parameters/settings.md | 1 + src/Common/config_version.h.in | 1 + 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 7bbf3c62e97..878ce6548aa 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -18,10 +18,16 @@ namespace { static bool initialized = false; +static bool anonymize = false; void setExtras() { #if USE_SENTRY + if (!anonymize) + { + sentry_set_extra("server_name", sentry_value_new_string(getFQDNOrHostName().c_str())); + } + sentry_set_tag("version", VERSION_STRING_SHORT); sentry_set_extra("version_githash", sentry_value_new_string(VERSION_GITHASH)); sentry_set_extra("version_describe", sentry_value_new_string(VERSION_DESCRIBE)); sentry_set_extra("version_integer", sentry_value_new_int32(VERSION_INTEGER)); @@ -69,15 +75,19 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { sentry_options_set_environment(options, "test"); } + int init_status = sentry_init(options); if (!init_status) { initialized = true; + anonymize = config.getBool("send_crash_reports.anonymize", false); + const std::string& anonymize_status = anonymize ? " (anonymized)" : ""; LOG_INFO( &Logger::get("SentryWriter"), - "Sending crash reports is initialized with {} endpoint and {} temp folder", + "Sending crash reports is initialized with {} endpoint and {} temp folder{}", endpoint, - temp_folder_path); + temp_folder_path, + anonymize_status); } else { @@ -109,7 +119,6 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c const std::string & error_message = signalToErrorMessage(sig, info, context); sentry_value_t event = sentry_value_new_message_event(SENTRY_LEVEL_FATAL, "fault", error_message.c_str()); sentry_set_tag("signal", strsignal(sig)); - sentry_set_tag("server_name", getFQDNOrHostName().c_str()); sentry_set_extra("signal_number", sentry_value_new_int32(sig)); setExtras(); @@ -175,4 +184,4 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c LOG_INFO(&Logger::get("SentryWriter"), "Not sending crash report"); } #endif -} \ No newline at end of file +} diff --git a/cmake/version.cmake b/cmake/version.cmake index eea17f68c47..963f291c0f3 100644 --- a/cmake/version.cmake +++ b/cmake/version.cmake @@ -14,6 +14,7 @@ endif () set (VERSION_NAME "${PROJECT_NAME}") set (VERSION_FULL "${VERSION_NAME} ${VERSION_STRING}") set (VERSION_SO "${VERSION_STRING}") +set (VERSION_STRING_SHORT "${VERSION_MAJOR}.${VERSION_MINOR}") math (EXPR VERSION_INTEGER "${VERSION_PATCH} + ${VERSION_MINOR}*1000 + ${VERSION_MAJOR}*1000000") diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index a103473a4ea..ba8f3df9ad0 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -357,6 +357,7 @@ Keys: - `enabled` – Boolean flag to enable the feature. Set to `true` to allow sending crash reports. - `endpoint` – Overrides the Sentry endpoint. +- `anonymize` - Avoid attaching the server hostname to crash report. - `debug` - Sets the Sentry client into debug mode. - `tmp_path` - Filesystem path for temporary crash report state. diff --git a/src/Common/config_version.h.in b/src/Common/config_version.h.in index bc90e63e39c..c3c0c6df87b 100644 --- a/src/Common/config_version.h.in +++ b/src/Common/config_version.h.in @@ -20,6 +20,7 @@ #cmakedefine VERSION_MINOR @VERSION_MINOR@ #cmakedefine VERSION_PATCH @VERSION_PATCH@ #cmakedefine VERSION_STRING "@VERSION_STRING@" +#cmakedefine VERSION_STRING_SHORT "@VERSION_STRING_SHORT@" #cmakedefine VERSION_OFFICIAL "@VERSION_OFFICIAL@" #cmakedefine VERSION_FULL "@VERSION_FULL@" #cmakedefine VERSION_DESCRIBE "@VERSION_DESCRIBE@" From d154415a5bedf24a0217306c1d7798b718a2995a Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sat, 30 May 2020 11:13:04 +0300 Subject: [PATCH 026/318] adjust comments --- .../en/operations/server-configuration-parameters/settings.md | 4 +++- programs/server/config.xml | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index ba8f3df9ad0..3dc68e7fa6a 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -351,7 +351,9 @@ Keys: ## send_crash_reports {#server_configuration_parameters-logger} Settings for opt-in sending crash reports to the ClickHouse core developers team via [Sentry](https://sentry.io). -Enabling it, especially in pre-production environments, is strongly appreciated. +Enabling it, especially in pre-production environments, is greatly appreciated. + +The server will need an access to public Internet for this feature to be functioning properly. Keys: diff --git a/programs/server/config.xml b/programs/server/config.xml index 6086fcd7b1d..d8d75222bc0 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -46,6 +46,8 @@ false + + false From 52f7b9545b17304aa8e23373b77ab620fb338d50 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sat, 30 May 2020 11:24:21 +0300 Subject: [PATCH 027/318] Add http_proxy option --- base/daemon/SentryWriter.cpp | 6 ++++++ .../operations/server-configuration-parameters/settings.md | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 878ce6548aa..b2b1c69af8c 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -76,6 +76,12 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) sentry_options_set_environment(options, "test"); } + const std::string & http_proxy = config.getString("send_crash_reports.http_proxy", ""); + if (!http_proxy.empty()) + { + sentry_options_set_http_proxy(options, http_proxy.c_str()); + } + int init_status = sentry_init(options); if (!init_status) { diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 3dc68e7fa6a..194293d5a19 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -359,7 +359,8 @@ Keys: - `enabled` – Boolean flag to enable the feature. Set to `true` to allow sending crash reports. - `endpoint` – Overrides the Sentry endpoint. -- `anonymize` - Avoid attaching the server hostname to crash report. +- `anonymize` - Avoid attaching the server hostname to crash report. +- `http_proxy` - Configure HTTP proxy for sending crash reports. - `debug` - Sets the Sentry client into debug mode. - `tmp_path` - Filesystem path for temporary crash report state. From be94d8454dcefa117d43a8d96a01e4164be4ea51 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sat, 30 May 2020 13:54:57 +0300 Subject: [PATCH 028/318] fix Arcadia build --- base/daemon/SentryWriter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index b2b1c69af8c..2fd846b720a 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -11,7 +11,7 @@ #endif #if USE_SENTRY -# include +# include // Y_IGNORE #endif From 3543da3ca463fc4deb1002e5b5bf91df13306931 Mon Sep 17 00:00:00 2001 From: Sofia Antipushina Date: Sun, 31 May 2020 17:44:49 +0300 Subject: [PATCH 029/318] fix stylecheck --- src/AggregateFunctions/AggregateFunctionDistinct.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index 57e17ffb13c..32f5df6d8f0 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -106,7 +106,8 @@ public: void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override { SipHash hash; - for (size_t i = 0; i < num_arguments; ++i) { + for (size_t i = 0; i < num_arguments; ++i) + { columns[i]->updateHashWithValue(row_num, hash); } From fd3279f9f16e704d78a0fd425216679651bc45ac Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 00:02:08 +0300 Subject: [PATCH 030/318] trigger ci --- docs/en/operations/server-configuration-parameters/settings.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index 194293d5a19..a3a6d1a0955 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -364,7 +364,6 @@ Keys: - `debug` - Sets the Sentry client into debug mode. - `tmp_path` - Filesystem path for temporary crash report state. - **Recommended way to use** ``` xml From 81989bd95a91f0e1d70fb49bcfb4167ecbdd59c1 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 10:51:22 +0300 Subject: [PATCH 031/318] submodule via https --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index daa5d12a62c..4175eb223db 100644 --- a/.gitmodules +++ b/.gitmodules @@ -162,4 +162,4 @@ url = https://github.com/fmtlib/fmt.git [submodule "contrib/sentry-native"] path = contrib/sentry-native - url = git@github.com:getsentry/sentry-native.git + url = https://github.com/getsentry/sentry-native.git From ba112e84cb10891cfdfa561ef6da4fd40693693e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 13:30:11 +0300 Subject: [PATCH 032/318] trigger ci From 3e0811f297cfdc77c657aa0a8ce04eb387e55ec6 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 17:15:14 +0300 Subject: [PATCH 033/318] Adapt to recent logging changes --- base/daemon/SentryWriter.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 2fd846b720a..f7edc8d1e93 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -44,6 +44,7 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) #if USE_SENTRY bool enabled = false; bool debug = config.getBool("send_crash_reports.debug", false); + auto logger = &Poco::Logger::get("SentryWriter"); if (config.getBool("send_crash_reports.enabled", false)) { if (debug || (strlen(VERSION_OFFICIAL) > 0)) @@ -89,7 +90,7 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) anonymize = config.getBool("send_crash_reports.anonymize", false); const std::string& anonymize_status = anonymize ? " (anonymized)" : ""; LOG_INFO( - &Logger::get("SentryWriter"), + logger, "Sending crash reports is initialized with {} endpoint and {} temp folder{}", endpoint, temp_folder_path, @@ -97,12 +98,12 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) } else { - LOG_WARNING(&Logger::get("SentryWriter"), "Sending crash reports failed to initialized with {} status", init_status); + LOG_WARNING(logger, "Sending crash reports failed to initialized with {} status", init_status); } } else { - LOG_INFO(&Logger::get("SentryWriter"), "Sending crash reports is disabled"); + LOG_INFO(logger, "Sending crash reports is disabled"); } #endif } @@ -120,6 +121,7 @@ void SentryWriter::shutdown() void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & context, const StackTrace & stack_trace) { #if USE_SENTRY + auto logger = &Poco::Logger::get("SentryWriter"); if (initialized) { const std::string & error_message = signalToErrorMessage(sig, info, context); @@ -181,13 +183,13 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c sentry_value_set_by_key(event, "threads", threads); - LOG_INFO(&Logger::get("SentryWriter"), "Sending crash report"); + LOG_INFO(logger, "Sending crash report"); sentry_capture_event(event); shutdown(); } else { - LOG_INFO(&Logger::get("SentryWriter"), "Not sending crash report"); + LOG_INFO(logger, "Not sending crash report"); } #endif } From 9ad1bb8d9398cb83a95b39df8d36a17d340afc16 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 18:26:20 +0300 Subject: [PATCH 034/318] trigger ci From acf22bfb19292c5ae56e54dcedd06895577e2914 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 19:42:59 +0300 Subject: [PATCH 035/318] fix sanitizers build --- base/daemon/BaseDaemon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index 3aeebd369e5..9da8849342d 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -317,7 +317,7 @@ static void sanitizerDeathCallback() std::stringstream bare_stacktrace; bare_stacktrace << "Stack trace:"; for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i) - bare_stacktrace << ' ' << stack_trace.getFrames()[i]; + bare_stacktrace << ' ' << stack_trace.getFramePointers()[i]; LOG_FATAL(log, bare_stacktrace.str()); } From 1ce25238f80fc9435c82766f44da896639e97ee1 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 19:49:11 +0300 Subject: [PATCH 036/318] try fix some more build issues --- cmake/find/sentry.cmake | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index f94b53ffb00..30b8b28f6f1 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -5,15 +5,17 @@ if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") return() endif () -option (USE_SENTRY "Use Sentry" ON) +if (NOT OS_FREEBSD AND NOT UNBUNDLED) + option (USE_SENTRY "Use Sentry" ON) -set (BUILD_SHARED_LIBS OFF) -set (SENTRY_PIC OFF) -set (SENTRY_BACKEND "none") -set (SENTRY_TRANSPORT "curl") -set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) -set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) + set (BUILD_SHARED_LIBS OFF) + set (SENTRY_PIC OFF) + set (SENTRY_BACKEND "none") + set (SENTRY_TRANSPORT "curl") + set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) + set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) -message (STATUS "Using sentry=${USE_SENTRY}: ${SENTRY_LIBRARY}") + message (STATUS "Using sentry=${USE_SENTRY}: ${SENTRY_LIBRARY}") -include_directories("${SENTRY_INCLUDE_DIR}") \ No newline at end of file + include_directories("${SENTRY_INCLUDE_DIR}") +endif () \ No newline at end of file From f6e69355faa1f131fd22bfca93bc1cee0c1aca1e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 21:10:19 +0300 Subject: [PATCH 037/318] experiment --- src/Common/StackTrace.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index aa78ab62f9b..526edd7792f 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -3,8 +3,8 @@ #include #include #include -#include #include +#include #include #include From 1797a47a9f26d4a97e8aa044af5a45a6c34e6d4f Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 21:25:25 +0300 Subject: [PATCH 038/318] fix clang warnings --- base/daemon/SentryWriter.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index f7edc8d1e93..6bfc07ea2fb 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -17,8 +17,8 @@ namespace { -static bool initialized = false; -static bool anonymize = false; +bool initialized = false; +bool anonymize = false; void setExtras() { @@ -44,7 +44,7 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) #if USE_SENTRY bool enabled = false; bool debug = config.getBool("send_crash_reports.debug", false); - auto logger = &Poco::Logger::get("SentryWriter"); + auto * logger = &Poco::Logger::get("SentryWriter"); if (config.getBool("send_crash_reports.enabled", false)) { if (debug || (strlen(VERSION_OFFICIAL) > 0)) @@ -121,7 +121,7 @@ void SentryWriter::shutdown() void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & context, const StackTrace & stack_trace) { #if USE_SENTRY - auto logger = &Poco::Logger::get("SentryWriter"); + auto * logger = &Poco::Logger::get("SentryWriter"); if (initialized) { const std::string & error_message = signalToErrorMessage(sig, info, context); From 8babd4d18c093f44d96e616a31d5551b24e73de2 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 21:36:33 +0300 Subject: [PATCH 039/318] experiment --- cmake/find/sentry.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index 30b8b28f6f1..06312b64495 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -6,6 +6,7 @@ if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") endif () if (NOT OS_FREEBSD AND NOT UNBUNDLED) + cmake_policy (SET CMP0077 NEW) option (USE_SENTRY "Use Sentry" ON) set (BUILD_SHARED_LIBS OFF) From 965204dfb161953616b6fa5168bde03375d87205 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 21:48:34 +0300 Subject: [PATCH 040/318] Try to fix the msan build --- .gitmodules | 2 +- contrib/sentry-native | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index 4175eb223db..ff4e644f657 100644 --- a/.gitmodules +++ b/.gitmodules @@ -162,4 +162,4 @@ url = https://github.com/fmtlib/fmt.git [submodule "contrib/sentry-native"] path = contrib/sentry-native - url = https://github.com/getsentry/sentry-native.git + url = https://github.com/blinkov/sentry-native.git diff --git a/contrib/sentry-native b/contrib/sentry-native index 3bfce2d17c1..9e214a1265a 160000 --- a/contrib/sentry-native +++ b/contrib/sentry-native @@ -1 +1 @@ -Subproject commit 3bfce2d17c1b80fbbaae83bb5ef41c1b290d34fb +Subproject commit 9e214a1265a4ea628c21045b7f43d1aec15e385d From 65ff11aeac99061b53de62ab120d9ff75ae0dc03 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 22:49:00 +0300 Subject: [PATCH 041/318] old cmake compatibility --- cmake/find/sentry.cmake | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index 06312b64495..d10c15cd334 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -6,7 +6,9 @@ if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") endif () if (NOT OS_FREEBSD AND NOT UNBUNDLED) - cmake_policy (SET CMP0077 NEW) + if (POLICY CMP0077) + cmake_policy (SET CMP0077 NEW) + endif () option (USE_SENTRY "Use Sentry" ON) set (BUILD_SHARED_LIBS OFF) From e9a04f7741550a48d6c963fe4a225bf3ce616141 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 23:48:42 +0300 Subject: [PATCH 042/318] more build fixes --- base/daemon/SentryWriter.cpp | 4 ++-- src/Common/StackTrace.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 6bfc07ea2fb..95189b72e81 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -145,8 +145,8 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c { const StackTrace::Frame & current_frame = stack_trace.getFrames().value()[i]; sentry_value_t frame = sentry_value_new_object(); - unsigned long long frame_ptr = reinterpret_cast(current_frame.virtual_addr); - snprintf(instruction_addr, sizeof(instruction_addr), "0x%llx", frame_ptr); + UInt64 frame_ptr = reinterpret_cast(current_frame.virtual_addr); + std::snprintf(instruction_addr, sizeof(instruction_addr), "0x%" PRIu64 "x", frame_ptr); sentry_value_set_by_key(frame, "instruction_addr", sentry_value_new_string(instruction_addr)); if (current_frame.symbol.has_value()) diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 526edd7792f..e71ce1e1139 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -192,7 +192,7 @@ static void * getCallerAddress(const ucontext_t & context) static void symbolize(const void * const * frame_pointers, size_t offset, size_t size, StackTrace::Frames & frames) { -#if defined(__ELF__) && !defined(__FreeBSD__) +#if defined(__ELF__) && !defined(__FreeBSD__) && !defined(ARCADIA_BUILD) const DB::SymbolIndex & symbol_index = DB::SymbolIndex::instance(); std::unordered_map dwarfs; From 5a32d7913524a139ce43b835f53f73e1f0b42943 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Mon, 1 Jun 2020 23:55:32 +0300 Subject: [PATCH 043/318] experiment --- cmake/find/sentry.cmake | 2 +- src/Common/StackTrace.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index d10c15cd334..309f63e9165 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -5,7 +5,7 @@ if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") return() endif () -if (NOT OS_FREEBSD AND NOT UNBUNDLED) +if (NOT OS_FREEBSD AND NOT UNBUNDLED AND NOT SPLITTED AND NOT (COMPILER_CLANG AND OS_DARWIN)) if (POLICY CMP0077) cmake_policy (SET CMP0077 NEW) endif () diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index e71ce1e1139..dbe3d005be7 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -313,7 +313,7 @@ const StackTrace::Frames & StackTrace::getFrames() const { if (!frames.has_value()) { - frames = {{}}; + frames = std::array(); symbolize(frame_pointers.data(), offset, size, frames); } return frames; From 35734aadde1b7cd0f68cc6a513bab1e8497229f8 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 2 Jun 2020 08:15:11 +0300 Subject: [PATCH 044/318] apply comment --- src/Common/StackTrace.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index dbe3d005be7..793de7709cc 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -313,7 +313,7 @@ const StackTrace::Frames & StackTrace::getFrames() const { if (!frames.has_value()) { - frames = std::array(); + frames.emplace({}); symbolize(frame_pointers.data(), offset, size, frames); } return frames; From 03f4aa19aa64e10e8433e938931e95400db7fdf6 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 2 Jun 2020 08:16:41 +0300 Subject: [PATCH 045/318] apply comment --- src/Common/StackTrace.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 793de7709cc..819f74f37cb 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -313,7 +313,7 @@ const StackTrace::Frames & StackTrace::getFrames() const { if (!frames.has_value()) { - frames.emplace({}); + frames.emplace(); symbolize(frame_pointers.data(), offset, size, frames); } return frames; @@ -357,7 +357,7 @@ static std::string toStringImpl(const void * const * frame_pointers, size_t offs { std::stringstream out; StackTrace::Frames frames{}; - frames = {{}}; + frames.emplace(); symbolize(frame_pointers, offset, size, frames); toStringEveryLineImpl(frames, offset, size, [&](const std::string & str) { out << str << '\n'; }); return out.str(); From 40f6e559e2d2424e29604d5191dee6b941bc3d6e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 2 Jun 2020 08:29:13 +0300 Subject: [PATCH 046/318] fix compiling when disabled --- base/daemon/SentryWriter.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 95189b72e81..15602be2581 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -3,6 +3,7 @@ #include #include +#include #include #include #if !defined(ARCADIA_BUILD) @@ -105,6 +106,8 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { LOG_INFO(logger, "Sending crash reports is disabled"); } +#else + UNUSED(config); #endif } @@ -191,5 +194,10 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c { LOG_INFO(logger, "Not sending crash report"); } +#else + UNUSED(sig); + UNUSED(info); + UNUSED(context); + UNUSED(stack_trace); #endif } From 9c1ac2f1c1af35b20bb7ea031370a3c8e347f4df Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 2 Jun 2020 09:46:36 +0300 Subject: [PATCH 047/318] experiment --- cmake/find/sentry.cmake | 9 +-------- contrib/CMakeLists.txt | 2 ++ 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index 309f63e9165..94c4f4a6e93 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -5,16 +5,9 @@ if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") return() endif () -if (NOT OS_FREEBSD AND NOT UNBUNDLED AND NOT SPLITTED AND NOT (COMPILER_CLANG AND OS_DARWIN)) - if (POLICY CMP0077) - cmake_policy (SET CMP0077 NEW) - endif () +if (NOT OS_FREEBSD) option (USE_SENTRY "Use Sentry" ON) - set (BUILD_SHARED_LIBS OFF) - set (SENTRY_PIC OFF) - set (SENTRY_BACKEND "none") - set (SENTRY_TRANSPORT "curl") set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index ea13969db16..d9af4bc0ac5 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -321,6 +321,8 @@ if (USE_FASTOPS) endif() if (USE_SENTRY) + set (SENTRY_BACKEND "none") + set (SENTRY_TRANSPORT "curl") add_subdirectory (sentry-native) endif() From 280eea1e12fa4770f114ad952efa0be0eecc3e34 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 2 Jun 2020 10:33:11 +0300 Subject: [PATCH 048/318] fix compiling when disabled --- base/daemon/SentryWriter.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 15602be2581..c8197d8a160 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -16,14 +16,16 @@ #endif +#if USE_SENTRY namespace { + bool initialized = false; bool anonymize = false; void setExtras() { -#if USE_SENTRY + if (!anonymize) { sentry_set_extra("server_name", sentry_value_new_string(getFQDNOrHostName().c_str())); @@ -36,9 +38,9 @@ void setExtras() sentry_set_extra("version_major", sentry_value_new_int32(VERSION_MAJOR)); sentry_set_extra("version_minor", sentry_value_new_int32(VERSION_MINOR)); sentry_set_extra("version_patch", sentry_value_new_int32(VERSION_PATCH)); +} +} #endif -} -} void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { From 0e8d559d832df40c23942673aac254728b0e77b1 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 2 Jun 2020 13:13:21 +0300 Subject: [PATCH 049/318] disable for splitted --- cmake/find/sentry.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index 94c4f4a6e93..4a5fe6f2478 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -5,7 +5,7 @@ if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") return() endif () -if (NOT OS_FREEBSD) +if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES) option (USE_SENTRY "Use Sentry" ON) set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) From 5036ad7c6afad995da38bf38de76f1e7134a4137 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 2 Jun 2020 13:13:29 +0300 Subject: [PATCH 050/318] back to upstream --- .gitmodules | 2 +- contrib/sentry-native | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index ff4e644f657..4175eb223db 100644 --- a/.gitmodules +++ b/.gitmodules @@ -162,4 +162,4 @@ url = https://github.com/fmtlib/fmt.git [submodule "contrib/sentry-native"] path = contrib/sentry-native - url = https://github.com/blinkov/sentry-native.git + url = https://github.com/getsentry/sentry-native.git diff --git a/contrib/sentry-native b/contrib/sentry-native index 9e214a1265a..aed9c18536d 160000 --- a/contrib/sentry-native +++ b/contrib/sentry-native @@ -1 +1 @@ -Subproject commit 9e214a1265a4ea628c21045b7f43d1aec15e385d +Subproject commit aed9c18536dff1851b1240f84263a55ef716acb6 From 862693d78dc1924f472646584fbd70f955239c0c Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 2 Jun 2020 16:59:45 +0300 Subject: [PATCH 051/318] change sentry-native commit --- contrib/sentry-native | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/sentry-native b/contrib/sentry-native index aed9c18536d..b48c21d2440 160000 --- a/contrib/sentry-native +++ b/contrib/sentry-native @@ -1 +1 @@ -Subproject commit aed9c18536dff1851b1240f84263a55ef716acb6 +Subproject commit b48c21d244092658d6e2d1bb243b705fd968b9f7 From 711e7d101da19c8364b761ee09ca042bf9c680f8 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 2 Jun 2020 21:50:55 +0300 Subject: [PATCH 052/318] experiment --- base/CMakeLists.txt | 5 +++++ base/daemon/CMakeLists.txt | 10 +++------- cmake/find/sentry.cmake | 2 ++ 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/base/CMakeLists.txt b/base/CMakeLists.txt index cfa54fe2ca4..ad3bf56cd00 100644 --- a/base/CMakeLists.txt +++ b/base/CMakeLists.txt @@ -11,3 +11,8 @@ add_subdirectory (widechar_width) if (USE_MYSQL) add_subdirectory (mysqlxx) endif () + +if (USE_SENTRY) + target_link_libraries (daemon PRIVATE curl) + target_link_libraries (daemon PRIVATE ${SENTRY_LIBRARY}) +endif () \ No newline at end of file diff --git a/base/daemon/CMakeLists.txt b/base/daemon/CMakeLists.txt index 0b6a7188c83..8f70f30aeb1 100644 --- a/base/daemon/CMakeLists.txt +++ b/base/daemon/CMakeLists.txt @@ -1,12 +1,8 @@ add_library (daemon BaseDaemon.cpp GraphiteWriter.cpp - SentryWriter.cpp) + SentryWriter.cpp +) target_include_directories (daemon PUBLIC ..) -target_link_libraries (daemon PUBLIC loggers PRIVATE clickhouse_common_io clickhouse_common_config common ${EXECINFO_LIBRARIES}) - -if (USE_SENTRY) - target_link_libraries (daemon PRIVATE curl) - target_link_libraries (daemon PRIVATE ${SENTRY_LIBRARY}) -endif () \ No newline at end of file +target_link_libraries (daemon PUBLIC loggers PRIVATE clickhouse_common_io clickhouse_common_config common ${EXECINFO_LIBRARIES}) \ No newline at end of file diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index 4a5fe6f2478..449d995935d 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -8,6 +8,8 @@ endif () if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES) option (USE_SENTRY "Use Sentry" ON) + set (SENTRY_TRANSPORT "url") + set (SENTRY_BACKEND "none") set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) From 921b7c748000cc4a33c9db618716922fc34f1f17 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 2 Jun 2020 22:25:34 +0300 Subject: [PATCH 053/318] partial revert --- base/CMakeLists.txt | 5 ----- base/daemon/CMakeLists.txt | 7 ++++++- cmake/find/sentry.cmake | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/base/CMakeLists.txt b/base/CMakeLists.txt index ad3bf56cd00..a8dedec9269 100644 --- a/base/CMakeLists.txt +++ b/base/CMakeLists.txt @@ -10,9 +10,4 @@ add_subdirectory (widechar_width) if (USE_MYSQL) add_subdirectory (mysqlxx) -endif () - -if (USE_SENTRY) - target_link_libraries (daemon PRIVATE curl) - target_link_libraries (daemon PRIVATE ${SENTRY_LIBRARY}) endif () \ No newline at end of file diff --git a/base/daemon/CMakeLists.txt b/base/daemon/CMakeLists.txt index 8f70f30aeb1..36de193bccd 100644 --- a/base/daemon/CMakeLists.txt +++ b/base/daemon/CMakeLists.txt @@ -5,4 +5,9 @@ add_library (daemon ) target_include_directories (daemon PUBLIC ..) -target_link_libraries (daemon PUBLIC loggers PRIVATE clickhouse_common_io clickhouse_common_config common ${EXECINFO_LIBRARIES}) \ No newline at end of file +target_link_libraries (daemon PUBLIC loggers PRIVATE clickhouse_common_io clickhouse_common_config common ${EXECINFO_LIBRARIES}) + +if (USE_SENTRY) + target_link_libraries (daemon PRIVATE curl) + target_link_libraries (daemon PRIVATE ${SENTRY_LIBRARY}) +endif () \ No newline at end of file diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index 449d995935d..6848dc00b43 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -5,7 +5,7 @@ if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") return() endif () -if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES) +if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT (OS_DARWIN AND COMPILER_CLANG)) option (USE_SENTRY "Use Sentry" ON) set (SENTRY_TRANSPORT "url") From 2f74c58b0598a14db9583b660f2316b01013f052 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 2 Jun 2020 23:50:18 +0300 Subject: [PATCH 054/318] experiment with BUILD_SHARED_LIBS --- cmake/find/sentry.cmake | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index 6848dc00b43..08f712d5574 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -12,8 +12,11 @@ if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT (OS_DARWIN AND COMPILE set (SENTRY_BACKEND "none") set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) + if (NOT_UNBUNDLED) + set (BUILD_SHARED_LIBS OFF) + endif() message (STATUS "Using sentry=${USE_SENTRY}: ${SENTRY_LIBRARY}") include_directories("${SENTRY_INCLUDE_DIR}") -endif () \ No newline at end of file +endif () From 6f0e754f1e6bfd5753c04b1f81b231eece4f82fa Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 4 Jun 2020 11:57:01 +0300 Subject: [PATCH 055/318] try to fix the glibc compatibility --- .gitmodules | 2 +- cmake/find/sentry.cmake | 9 ++++----- contrib/CMakeLists.txt | 2 -- contrib/sentry-native | 2 +- 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/.gitmodules b/.gitmodules index 4175eb223db..ff4e644f657 100644 --- a/.gitmodules +++ b/.gitmodules @@ -162,4 +162,4 @@ url = https://github.com/fmtlib/fmt.git [submodule "contrib/sentry-native"] path = contrib/sentry-native - url = https://github.com/getsentry/sentry-native.git + url = https://github.com/blinkov/sentry-native.git diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index 08f712d5574..e1cd28c1d59 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -7,15 +7,14 @@ endif () if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT (OS_DARWIN AND COMPILER_CLANG)) option (USE_SENTRY "Use Sentry" ON) - - set (SENTRY_TRANSPORT "url") - set (SENTRY_BACKEND "none") set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) - if (NOT_UNBUNDLED) + set (SENTRY_TRANSPORT "curl" CACHE STRING "") + set (SENTRY_BACKEND "none" CACHE STRING "") + set (SENTRY_LINK_PTHREAD OFF CACHE BOOL "") + if (OS_LINUX AND NOT_UNBUNDLED) set (BUILD_SHARED_LIBS OFF) endif() - message (STATUS "Using sentry=${USE_SENTRY}: ${SENTRY_LIBRARY}") include_directories("${SENTRY_INCLUDE_DIR}") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index d9af4bc0ac5..ea13969db16 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -321,8 +321,6 @@ if (USE_FASTOPS) endif() if (USE_SENTRY) - set (SENTRY_BACKEND "none") - set (SENTRY_TRANSPORT "curl") add_subdirectory (sentry-native) endif() diff --git a/contrib/sentry-native b/contrib/sentry-native index b48c21d2440..18835dd8c49 160000 --- a/contrib/sentry-native +++ b/contrib/sentry-native @@ -1 +1 @@ -Subproject commit b48c21d244092658d6e2d1bb243b705fd968b9f7 +Subproject commit 18835dd8c496f22859bd6a1a7054a2bd4762e7ed From 966593e0a8e42266138cbd5af917ec0be60a3c0a Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 9 Jun 2020 15:50:18 +0300 Subject: [PATCH 056/318] try to completely remove sentry from odbc-bridge --- programs/odbc-bridge/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index ab8d94f2a0c..51abf4a9adb 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -10,7 +10,7 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES PingHandler.cpp validateODBCConnectionString.cpp ) - +set (USE_SENTRY OFF CACHE BOOL "" FORCE) set (CLICKHOUSE_ODBC_BRIDGE_LINK PRIVATE clickhouse_parsers From d91f0bd580aa8632cc89aae0531281f300b48740 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 9 Jun 2020 19:07:40 +0300 Subject: [PATCH 057/318] Switch back to sentry upstream --- .gitmodules | 2 +- contrib/sentry-native | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index ff4e644f657..4175eb223db 100644 --- a/.gitmodules +++ b/.gitmodules @@ -162,4 +162,4 @@ url = https://github.com/fmtlib/fmt.git [submodule "contrib/sentry-native"] path = contrib/sentry-native - url = https://github.com/blinkov/sentry-native.git + url = https://github.com/getsentry/sentry-native.git diff --git a/contrib/sentry-native b/contrib/sentry-native index 18835dd8c49..9651561d45e 160000 --- a/contrib/sentry-native +++ b/contrib/sentry-native @@ -1 +1 @@ -Subproject commit 18835dd8c496f22859bd6a1a7054a2bd4762e7ed +Subproject commit 9651561d45e4d00e9fe708275c086a3cfeb496bd From f872c639ed6893d0731ed61c1927f3c6f313f0d2 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 9 Jun 2020 20:44:56 +0300 Subject: [PATCH 058/318] Try to disable linker options from sentry --- .gitmodules | 2 +- cmake/find/sentry.cmake | 1 + contrib/sentry-native | 2 +- programs/odbc-bridge/CMakeLists.txt | 1 - 4 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.gitmodules b/.gitmodules index 4175eb223db..ff4e644f657 100644 --- a/.gitmodules +++ b/.gitmodules @@ -162,4 +162,4 @@ url = https://github.com/fmtlib/fmt.git [submodule "contrib/sentry-native"] path = contrib/sentry-native - url = https://github.com/getsentry/sentry-native.git + url = https://github.com/blinkov/sentry-native.git diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index e1cd28c1d59..7fa384cb906 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -11,6 +11,7 @@ if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT (OS_DARWIN AND COMPILE set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) set (SENTRY_TRANSPORT "curl" CACHE STRING "") set (SENTRY_BACKEND "none" CACHE STRING "") + set (SENTRY_EXPORT_SYMBOLS OFF CACHE BOOL "") set (SENTRY_LINK_PTHREAD OFF CACHE BOOL "") if (OS_LINUX AND NOT_UNBUNDLED) set (BUILD_SHARED_LIBS OFF) diff --git a/contrib/sentry-native b/contrib/sentry-native index 9651561d45e..78fb54989cd 160000 --- a/contrib/sentry-native +++ b/contrib/sentry-native @@ -1 +1 @@ -Subproject commit 9651561d45e4d00e9fe708275c086a3cfeb496bd +Subproject commit 78fb54989cd61cf11dcea142e12d1ecc6940c962 diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index 51abf4a9adb..af59383d030 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -10,7 +10,6 @@ set (CLICKHOUSE_ODBC_BRIDGE_SOURCES PingHandler.cpp validateODBCConnectionString.cpp ) -set (USE_SENTRY OFF CACHE BOOL "" FORCE) set (CLICKHOUSE_ODBC_BRIDGE_LINK PRIVATE clickhouse_parsers From 6191d33bd9cb67d4dcabb79202bdd91baf467ddd Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 16:30:12 +0300 Subject: [PATCH 059/318] Do not cache frames inside StackTrace --- base/daemon/BaseDaemon.cpp | 1 - cmake/find/sentry.cmake | 1 + src/Common/StackTrace.cpp | 30 ++++++++---------------------- src/Common/StackTrace.h | 7 +------ 4 files changed, 10 insertions(+), 29 deletions(-) diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp index 9da8849342d..1467657d31a 100644 --- a/base/daemon/BaseDaemon.cpp +++ b/base/daemon/BaseDaemon.cpp @@ -223,7 +223,6 @@ public: DB::readPODBinary(stack_trace, in); DB::readBinary(thread_num, in); DB::readBinary(query_id, in); - stack_trace.resetFrames(); /// This allows to receive more signals if failure happens inside onFault function. /// Example: segfault while symbolizing stack trace. diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index 7fa384cb906..2281d870dec 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -13,6 +13,7 @@ if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT (OS_DARWIN AND COMPILE set (SENTRY_BACKEND "none" CACHE STRING "") set (SENTRY_EXPORT_SYMBOLS OFF CACHE BOOL "") set (SENTRY_LINK_PTHREAD OFF CACHE BOOL "") + set (SENTRY_PIC OFF CACHE BOOL "") if (OS_LINUX AND NOT_UNBUNDLED) set (BUILD_SHARED_LIBS OFF) endif() diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 819f74f37cb..aacda116bfb 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -199,12 +199,12 @@ static void symbolize(const void * const * frame_pointers, size_t offset, size_t for (size_t i = 0; i < offset; ++i) { - frames.value()[i].virtual_addr = frame_pointers[i]; + frames[i].virtual_addr = frame_pointers[i]; } for (size_t i = offset; i < size; ++i) { - StackTrace::Frame & current_frame = frames.value()[i]; + StackTrace::Frame & current_frame = frames[i]; current_frame.virtual_addr = frame_pointers[i]; const auto * object = symbol_index.findObject(current_frame.virtual_addr); uintptr_t virtual_offset = object ? uintptr_t(object->address_begin) : 0; @@ -244,7 +244,7 @@ static void symbolize(const void * const * frame_pointers, size_t offset, size_t #else for (size_t i = 0; i < size; ++i) { - frames.value()[i].virtual_addr = frame_pointers[i]; + frames[i].virtual_addr = frame_pointers[i]; } UNUSED(offset); #endif @@ -309,16 +309,6 @@ const StackTrace::FramePointers & StackTrace::getFramePointers() const return frame_pointers; } -const StackTrace::Frames & StackTrace::getFrames() const -{ - if (!frames.has_value()) - { - frames.emplace(); - symbolize(frame_pointers.data(), offset, size, frames); - } - return frames; -} - static void toStringEveryLineImpl(const StackTrace::Frames & frames, size_t offset, size_t size, std::function callback) { @@ -329,7 +319,7 @@ toStringEveryLineImpl(const StackTrace::Frames & frames, size_t offset, size_t s for (size_t i = offset; i < size; ++i) { - const StackTrace::Frame & current_frame = frames.value()[i]; + const StackTrace::Frame & current_frame = frames[i]; out << i << ". "; if (current_frame.file.has_value() && current_frame.line.has_value()) @@ -356,8 +346,7 @@ toStringEveryLineImpl(const StackTrace::Frames & frames, size_t offset, size_t s static std::string toStringImpl(const void * const * frame_pointers, size_t offset, size_t size) { std::stringstream out; - StackTrace::Frames frames{}; - frames.emplace(); + StackTrace::Frames frames; symbolize(frame_pointers, offset, size, frames); toStringEveryLineImpl(frames, offset, size, [&](const std::string & str) { out << str << '\n'; }); return out.str(); @@ -365,12 +354,9 @@ static std::string toStringImpl(const void * const * frame_pointers, size_t offs void StackTrace::toStringEveryLine(std::function callback) const { - toStringEveryLineImpl(getFrames(), offset, size, std::move(callback)); -} - -void StackTrace::resetFrames() -{ - frames.reset(); + Frames frames; + symbolize(frame_pointers.data(), offset, size, frames); + toStringEveryLineImpl(frames, offset, size, std::move(callback)); } diff --git a/src/Common/StackTrace.h b/src/Common/StackTrace.h index 27b2c44dd94..4ec63b3cf86 100644 --- a/src/Common/StackTrace.h +++ b/src/Common/StackTrace.h @@ -36,7 +36,7 @@ public: }; static constexpr size_t capacity = 32; using FramePointers = std::array; - using Frames = std::optional>; + using Frames = std::array; /// Tries to capture stack trace StackTrace(); @@ -51,22 +51,17 @@ public: size_t getSize() const; size_t getOffset() const; const FramePointers & getFramePointers() const; - const Frames & getFrames() const; std::string toString() const; static std::string toString(void ** frame_pointers, size_t offset, size_t size); void toStringEveryLine(std::function callback) const; - - void resetFrames(); - protected: void tryCapture(); size_t size = 0; size_t offset = 0; /// How many frames to skip while displaying. FramePointers frame_pointers{}; - mutable Frames frames{}; }; std::string signalToErrorMessage(int sig, const siginfo_t & info, const ucontext_t & context); From 60b40f04039702a0d8d55c44cfb81e96c932836e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 17:51:25 +0300 Subject: [PATCH 060/318] Lost part of refactoring --- base/daemon/SentryWriter.cpp | 4 +++- src/Common/StackTrace.cpp | 6 +++--- src/Common/StackTrace.h | 1 + 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index c8197d8a160..eddd5bfa49c 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -146,9 +146,11 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c offset = 1; } char instruction_addr[100]; + StackTrace::Frames frames; + StackTrace::symbolize(stack_trace.getFramePointers().data(), offset, size, frames); for (size_t i = stack_size - 1; i >= offset; --i) { - const StackTrace::Frame & current_frame = stack_trace.getFrames().value()[i]; + const StackTrace::Frame & current_frame = frames[i]; sentry_value_t frame = sentry_value_new_object(); UInt64 frame_ptr = reinterpret_cast(current_frame.virtual_addr); std::snprintf(instruction_addr, sizeof(instruction_addr), "0x%" PRIu64 "x", frame_ptr); diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index aacda116bfb..8e390154838 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -190,7 +190,7 @@ static void * getCallerAddress(const ucontext_t & context) #endif } -static void symbolize(const void * const * frame_pointers, size_t offset, size_t size, StackTrace::Frames & frames) +void StackTrace::symbolize(const void * const * frame_pointers, size_t offset, size_t size, StackTrace::Frames & frames) { #if defined(__ELF__) && !defined(__FreeBSD__) && !defined(ARCADIA_BUILD) @@ -347,7 +347,7 @@ static std::string toStringImpl(const void * const * frame_pointers, size_t offs { std::stringstream out; StackTrace::Frames frames; - symbolize(frame_pointers, offset, size, frames); + StackTrace::symbolize(frame_pointers.data(), offset, size, frames); toStringEveryLineImpl(frames, offset, size, [&](const std::string & str) { out << str << '\n'; }); return out.str(); } @@ -355,7 +355,7 @@ static std::string toStringImpl(const void * const * frame_pointers, size_t offs void StackTrace::toStringEveryLine(std::function callback) const { Frames frames; - symbolize(frame_pointers.data(), offset, size, frames); + StackTrace::symbolize(frame_pointers.data(), offset, size, frames); toStringEveryLineImpl(frames, offset, size, std::move(callback)); } diff --git a/src/Common/StackTrace.h b/src/Common/StackTrace.h index 4ec63b3cf86..374f0314533 100644 --- a/src/Common/StackTrace.h +++ b/src/Common/StackTrace.h @@ -54,6 +54,7 @@ public: std::string toString() const; static std::string toString(void ** frame_pointers, size_t offset, size_t size); + static void symbolize(const void * const * frame_pointers, size_t offset, size_t size, StackTrace::Frames & frames); void toStringEveryLine(std::function callback) const; protected: From 0316464ed4eb22593bd7fc18b79584cc0f476ce0 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 18:30:13 +0300 Subject: [PATCH 061/318] fix --- src/Common/StackTrace.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/StackTrace.cpp b/src/Common/StackTrace.cpp index 8e390154838..cb0488b489a 100644 --- a/src/Common/StackTrace.cpp +++ b/src/Common/StackTrace.cpp @@ -347,7 +347,7 @@ static std::string toStringImpl(const void * const * frame_pointers, size_t offs { std::stringstream out; StackTrace::Frames frames; - StackTrace::symbolize(frame_pointers.data(), offset, size, frames); + StackTrace::symbolize(frame_pointers, offset, size, frames); toStringEveryLineImpl(frames, offset, size, [&](const std::string & str) { out << str << '\n'; }); return out.str(); } From 0a74c9373ec46713b3deb51e86c702418fc29a0d Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 19:48:08 +0300 Subject: [PATCH 062/318] less confusing --- base/daemon/SentryWriter.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index eddd5bfa49c..003a2816ce0 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -136,7 +136,7 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c setExtras(); /// Prepare data for https://develop.sentry.dev/sdk/event-payloads/stacktrace/ - sentry_value_t frames = sentry_value_new_list(); + sentry_value_t sentry_frames = sentry_value_new_list(); size_t stack_size = stack_trace.getSize(); if (stack_size > 0) { @@ -151,33 +151,33 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c for (size_t i = stack_size - 1; i >= offset; --i) { const StackTrace::Frame & current_frame = frames[i]; - sentry_value_t frame = sentry_value_new_object(); + sentry_value_t sentry_frame = sentry_value_new_object(); UInt64 frame_ptr = reinterpret_cast(current_frame.virtual_addr); std::snprintf(instruction_addr, sizeof(instruction_addr), "0x%" PRIu64 "x", frame_ptr); - sentry_value_set_by_key(frame, "instruction_addr", sentry_value_new_string(instruction_addr)); + sentry_value_set_by_key(sentry_frame, "instruction_addr", sentry_value_new_string(instruction_addr)); if (current_frame.symbol.has_value()) { - sentry_value_set_by_key(frame, "function", sentry_value_new_string(current_frame.symbol.value().c_str())); + sentry_value_set_by_key(sentry_frame, "function", sentry_value_new_string(current_frame.symbol.value().c_str())); } if (current_frame.file.has_value()) { - sentry_value_set_by_key(frame, "filename", sentry_value_new_string(current_frame.file.value().c_str())); + sentry_value_set_by_key(sentry_frame, "filename", sentry_value_new_string(current_frame.file.value().c_str())); } if (current_frame.line.has_value()) { - sentry_value_set_by_key(frame, "lineno", sentry_value_new_int32(current_frame.line.value())); + sentry_value_set_by_key(sentry_frame, "lineno", sentry_value_new_int32(current_frame.line.value())); } - sentry_value_append(frames, frame); + sentry_value_append(sentry_frames, sentry_frame); } } /// Prepare data for https://develop.sentry.dev/sdk/event-payloads/threads/ sentry_value_t stacktrace = sentry_value_new_object(); - sentry_value_set_by_key(stacktrace, "frames", frames); + sentry_value_set_by_key(stacktrace, "frames", sentry_frames); sentry_value_t thread = sentry_value_new_object(); sentry_value_set_by_key(thread, "stacktrace", stacktrace); From 5fa44019918581a4378582bcd72ff5b160e5f659 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 10 Jun 2020 21:18:34 +0300 Subject: [PATCH 063/318] fix --- base/daemon/SentryWriter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 003a2816ce0..2ce43c9f0a2 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -147,7 +147,7 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c } char instruction_addr[100]; StackTrace::Frames frames; - StackTrace::symbolize(stack_trace.getFramePointers().data(), offset, size, frames); + StackTrace::symbolize(stack_trace.getFramePointers().data(), offset, stack_size, frames); for (size_t i = stack_size - 1; i >= offset; --i) { const StackTrace::Frame & current_frame = frames[i]; From 67ccd6703ea9de805b65c7fa25a3c43620571b55 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 11 Jun 2020 00:03:13 +0300 Subject: [PATCH 064/318] maybe fix the unbundled gcc build --- cmake/find/sentry.cmake | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index 2281d870dec..2d3aa71248a 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -5,7 +5,7 @@ if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") return() endif () -if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT (OS_DARWIN AND COMPILER_CLANG)) +if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT UNBUNDLED AND NOT (OS_DARWIN AND COMPILER_CLANG)) option (USE_SENTRY "Use Sentry" ON) set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) @@ -14,9 +14,7 @@ if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT (OS_DARWIN AND COMPILE set (SENTRY_EXPORT_SYMBOLS OFF CACHE BOOL "") set (SENTRY_LINK_PTHREAD OFF CACHE BOOL "") set (SENTRY_PIC OFF CACHE BOOL "") - if (OS_LINUX AND NOT_UNBUNDLED) - set (BUILD_SHARED_LIBS OFF) - endif() + set (BUILD_SHARED_LIBS OFF) message (STATUS "Using sentry=${USE_SENTRY}: ${SENTRY_LIBRARY}") include_directories("${SENTRY_INCLUDE_DIR}") From 22707508c1ccb85d3dd9e4fd9bbee39b73ade962 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 11 Jun 2020 12:17:33 +0300 Subject: [PATCH 065/318] experiment --- tests/queries/0_stateless/00816_long_concurrent_alter_column.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh b/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh index 965408065cf..d3a26b0ed75 100755 --- a/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh +++ b/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh @@ -59,6 +59,7 @@ wait echo "DROP TABLE concurrent_alter_column" | ${CLICKHOUSE_CLIENT} +sleep 1 # Check for deadlocks echo "SELECT * FROM system.processes WHERE query_id LIKE 'alter%'" | ${CLICKHOUSE_CLIENT} From 706c5452482d633b69b8fa54c0eb6b3ccd248d9e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 11 Jun 2020 12:18:12 +0300 Subject: [PATCH 066/318] experiment --- tests/queries/0_stateless/00816_long_concurrent_alter_column.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh b/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh index d3a26b0ed75..3ed0c6e1a6a 100755 --- a/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh +++ b/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh @@ -59,7 +59,7 @@ wait echo "DROP TABLE concurrent_alter_column" | ${CLICKHOUSE_CLIENT} -sleep 1 +sleep 7 # Check for deadlocks echo "SELECT * FROM system.processes WHERE query_id LIKE 'alter%'" | ${CLICKHOUSE_CLIENT} From 3e5d735871c9a995c5450b05d030d3046f2d3051 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 11 Jun 2020 12:21:23 +0300 Subject: [PATCH 067/318] back to upstream --- .gitmodules | 2 +- contrib/sentry-native | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitmodules b/.gitmodules index 93a0078a051..2fed57a519d 100644 --- a/.gitmodules +++ b/.gitmodules @@ -170,4 +170,4 @@ url = https://github.com/fmtlib/fmt.git [submodule "contrib/sentry-native"] path = contrib/sentry-native - url = https://github.com/blinkov/sentry-native.git + url = https://github.com/getsentry/sentry-native.git diff --git a/contrib/sentry-native b/contrib/sentry-native index 78fb54989cd..f91ed3f95b5 160000 --- a/contrib/sentry-native +++ b/contrib/sentry-native @@ -1 +1 @@ -Subproject commit 78fb54989cd61cf11dcea142e12d1ecc6940c962 +Subproject commit f91ed3f95b5653f247189d720ab00765b4899d6f From 5f73c87c7142e7d137a29f827d8fab8ebdc10ad2 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 11 Jun 2020 15:18:19 +0300 Subject: [PATCH 068/318] change used flag --- cmake/find/sentry.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/find/sentry.cmake b/cmake/find/sentry.cmake index 2d3aa71248a..eadf071141e 100644 --- a/cmake/find/sentry.cmake +++ b/cmake/find/sentry.cmake @@ -5,7 +5,7 @@ if (NOT EXISTS "${SENTRY_INCLUDE_DIR}/sentry.h") return() endif () -if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT UNBUNDLED AND NOT (OS_DARWIN AND COMPILER_CLANG)) +if (NOT OS_FREEBSD AND NOT SPLIT_SHARED_LIBRARIES AND NOT_UNBUNDLED AND NOT (OS_DARWIN AND COMPILER_CLANG)) option (USE_SENTRY "Use Sentry" ON) set (CURL_LIBRARY ${ClickHouse_SOURCE_DIR}/contrib/curl/lib) set (CURL_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/curl/include) From fa47fc3f30eb144aa1593ebbfc630fdb4f095c39 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 11 Jun 2020 15:34:02 +0300 Subject: [PATCH 069/318] fix address formatting --- base/daemon/SentryWriter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 2ce43c9f0a2..45f5bd56ca1 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -153,7 +153,7 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c const StackTrace::Frame & current_frame = frames[i]; sentry_value_t sentry_frame = sentry_value_new_object(); UInt64 frame_ptr = reinterpret_cast(current_frame.virtual_addr); - std::snprintf(instruction_addr, sizeof(instruction_addr), "0x%" PRIu64 "x", frame_ptr); + std::snprintf(instruction_addr, sizeof(instruction_addr), "0x%" PRIx64, frame_ptr); sentry_value_set_by_key(sentry_frame, "instruction_addr", sentry_value_new_string(instruction_addr)); if (current_frame.symbol.has_value()) From 47a902a6ce593d9ee55a29fdb0b35bc6f44152a7 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 11 Jun 2020 18:55:44 +0300 Subject: [PATCH 070/318] Simple github hook --- utils/github-hook/hook.py | 195 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 195 insertions(+) create mode 100644 utils/github-hook/hook.py diff --git a/utils/github-hook/hook.py b/utils/github-hook/hook.py new file mode 100644 index 00000000000..13d62b311f7 --- /dev/null +++ b/utils/github-hook/hook.py @@ -0,0 +1,195 @@ +# -*- coding: utf-8 -*- +import json +import requests +import time +import os + +DB = 'gh-data' +RETRIES = 5 + + +def process_issue_event(response): + issue = response['issue'] + return dict( + action=response['action'], + sender=response['sender']['login'], + updated_at=issue['updated_at'], + url=issue['url'], + number=issue['number'], + author=issue['user']['login'], + labels=[label['name'] for label in issue['labels']], + state=issue['state'], + assignees=[assignee['login'] for assignee in issue['assignees']], + created_at=issue['created_at'], + body=issue['body'], + title=issue['title'], + comments=issue['comments'], + raw_json=json.dumps(response),) + + +def process_issue_comment_event(response): + issue = response['issue'] + comment = response['comment'] + + return dict( + action='comment_' + response['action'], + sender=response['sender']['login'], + updated_at=issue['updated_at'], + url=issue['url'], + number=issue['number'], + author=issue['user']['login'], + labels=[label['name'] for label in issue['labels']], + state=issue['state'], + assignees=[assignee['login'] for assignee in issue['assignees']], + created_at=issue['created_at'], + body=issue['body'], + title=issue['title'], + comments=issue['comments'], + comment_body=comment['body'], + comment_author=comment['user']['login'], + comment_url=comment['url'], + comment_created_at=comment['created_at'], + comment_updated_at=comment['updated_at'], + raw_json=json.dumps(response),) + + +def process_pull_request_event(response): + pull_request = response['pull_request'] + result = dict( + updated_at=pull_request['updated_at'], + number=pull_request['number'], + action=response['action'], + sender=response['sender']['login'], + url=pull_request['url'], + author=pull_request['user']['login'], + labels=[label['name'] for label in pull_request['labels']], + state=pull_request['state'], + body=pull_request['body'], + title=pull_request['title'], + created_at=pull_request['created_at'], + assignees=[assignee['login'] for assignee in pull_request['assignees']], + requested_reviewers=[reviewer['login'] for reviewer in pull_request['requested_reviewers']], + head_repo=pull_request['head']['repo']['full_name'], + head_ref=pull_request['head']['ref'], + head_clone_url=pull_request['head']['repo']['clone_url'], + head_ssh_url=pull_request['head']['repo']['ssh_url'], + base_repo=pull_request['base']['repo']['full_name'], + base_ref=pull_request['base']['ref'], + base_clone_url=pull_request['base']['repo']['clone_url'], + base_ssh_url=pull_request['base']['repo']['ssh_url'], + raw_json=json.dumps(response), + ) + + if 'mergeable' in pull_request and pull_request['mergeable'] is not None: + result['mergeable'] = 1 if pull_request['mergeable'] else 0 + + if 'merged_by' in pull_request and pull_request['merged_by'] is not None: + result['merged_by'] = pull_request['merged_by']['login'] + + if 'merged_at' in pull_request and pull_request['merged_at'] is not None: + result['merged_at'] = pull_request['merged_at'] + + if 'closed_at' in pull_request and pull_request['closed_at'] is not None: + result['closed_at'] = pull_request['closed_at'] + + if 'merge_commit_sha' in pull_request and pull_request['merge_commit_sha'] is not None: + result['merge_commit_sha'] = pull_request['merge_commit_sha'] + + if 'draft' in pull_request: + result['draft'] = 1 if pull_request['draft'] else 0 + + for field in ['comments', 'review_comments', 'commits', 'additions', 'deletions', 'changed_files']: + if field in pull_request: + result[field] = pull_request[field] + + return result + + +def process_pull_request_review(response): + result = process_pull_request_event(response) + review = response['review'] + result['action'] = 'review_' + result['action'] + result['review_body'] = review['body'] if review['body'] is not None else '' + result['review_id'] = review['id'] + result['review_author'] = review['user']['login'] + result['review_commit_sha'] = review['commit_id'] + result['review_submitted_at'] = review['submitted_at'] + result['review_state'] = review['state'] + return result + + +def process_pull_request_review_comment(response): + result = process_pull_request_event(response) + comment = response['comment'] + result['action'] = 'review_comment_' + result['action'] + result['review_id'] = comment['pull_request_review_id'] + result['review_comment_path'] = comment['path'] + result['review_commit_sha'] = comment['commit_id'] + result['review_comment_body'] = comment['body'] + result['review_comment_author'] = comment['user']['login'] + result['review_comment_created_at'] = comment['created_at'] + result['review_comment_updated_at'] = comment['updated_at'] + return result + + +def event_processor_dispatcher(headers, body, inserter): + if 'X-Github-Event' in headers: + if headers['X-Github-Event'] == 'issues': + result = process_issue_event(body) + inserter.insert_event_into(DB, 'issues', result) + elif headers['X-Github-Event'] == 'issue_comment': + result = process_issue_comment_event(body) + inserter.insert_event_into(DB, 'issues', result) + elif headers['X-Github-Event'] == 'pull_request': + result = process_pull_request_event(body) + inserter.insert_event_into(DB, 'pull_requests', result) + elif headers['X-Github-Event'] == 'pull_request_review': + result = process_pull_request_review(body) + inserter.insert_event_into(DB, 'pull_requests', result) + elif headers['X-Github-Event'] == 'pull_request_review_comment': + result = process_pull_request_review_comment(body) + inserter.insert_event_into(DB, 'pull_requests', result) + + +class ClickHouseInserter(object): + def __init__(self, url, user, password): + self.url = url + self.auth = { + 'X-ClickHouse-User': user, + 'X-ClickHouse-Key': password + } + + def insert_event_into(self, db, table, event): + params = { + 'database': db, + 'query': 'INSERT INTO {table} FORMAT JSONEachRow'.format(table=table), + 'date_time_input_format': 'best_effort' + } + event_str = json.dumps(event) + for i in range(RETRIES): + try: + response = requests.post(self.url, params=params, data=event_str, headers=self.auth, verify=False) + response.raise_for_status() + break + except Exception as ex: + print("Exception inseting into ClickHouse:", ex) + time.sleep(0.1) + + +def test(event, context): + inserter = ClickHouseInserter( + os.getenv('CLICKHOUSE_URL'), + os.getenv('CLICKHOUSE_USER'), + os.getenv('CLICKHOUSE_PASSWORD')) + + body = json.loads(event['body'], strict=False) + headers = event['headers'] + event_processor_dispatcher(headers, body, inserter) + + return { + 'statusCode': 200, + 'headers': { + 'Content-Type': 'text/plain' + }, + 'isBase64Encoded': False, + } From bbeb768a1952541a895b51ecb70eee5dd4532224 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Thu, 11 Jun 2020 21:12:48 +0300 Subject: [PATCH 071/318] use the sentry logger hook --- base/daemon/SentryWriter.cpp | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 45f5bd56ca1..bb176db813c 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -13,6 +13,7 @@ #if USE_SENTRY # include // Y_IGNORE +# include #endif @@ -39,6 +40,33 @@ void setExtras() sentry_set_extra("version_minor", sentry_value_new_int32(VERSION_MINOR)); sentry_set_extra("version_patch", sentry_value_new_int32(VERSION_PATCH)); } + +void sentry_logger(sentry_level_t level, const char * message, va_list args) +{ + auto * logger = &Poco::Logger::get("SentryWriter"); + size_t size = 1024; + char buffer[size]; + if (vsnprintf(buffer, size, message, args) >= 0) { + switch (level) { + case SENTRY_LEVEL_DEBUG: + logger->debug(buffer); + break; + case SENTRY_LEVEL_INFO: + logger->information(buffer); + break; + case SENTRY_LEVEL_WARNING: + logger->warning(buffer); + break; + case SENTRY_LEVEL_ERROR: + logger->error(buffer); + break; + case SENTRY_LEVEL_FATAL: + logger->fatal(buffer); + break; + } + } +} +} } #endif @@ -65,6 +93,7 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) sentry_options_t * options = sentry_options_new(); sentry_options_set_release(options, VERSION_STRING); + sentry_options_set_logger(options, &sentry_logger); if (debug) { sentry_options_set_debug(options, 1); From 7ba5063b7a6c80ea07ec30b473e329bf11c93879 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 12 Jun 2020 00:24:56 +0300 Subject: [PATCH 072/318] Add concurrent benchmark to performance test After the main test, run queries from `website.xml` in parallel using `clickhouse-benchmark`. This can be useful to test the effects of concurrency on performance. Comparison test can miss some effects because it always runs queries sequentially, and many of them are even single-threaded. --- docker/test/performance-comparison/compare.sh | 17 +++ docker/test/performance-comparison/perf.py | 100 ++++++++---------- docker/test/performance-comparison/report.py | 31 ++++++ 3 files changed, 90 insertions(+), 58 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index a2760907cb3..3d49e9e841a 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -161,6 +161,20 @@ function run_tests wait } +# Run some queries concurrently and report the resulting TPS. This additional +# (relatively) short test helps detect concurrency-related effects, because the +# main performance comparison testing is done query-by-query. +function run_benchmark +{ + rm -rf benchmark ||: + mkdir bencmhark ||: + + # TODO disable this when there is an explicit list of tests to run + "$script_dir/perf.py" --print right/performance/website.xml > benchmark/website-queries.tsv + clickhouse-benchmark --port 9001 --concurrency 6 --cumulative --iterations 1000 --randomize 1 --delay 0 --json benchmark/website-left.json < benchmark/website-queries.tsv + clickhouse-benchmark --port 9002 --concurrency 6 --cumulative --iterations 1000 --randomize 1 --delay 0 --json benchmark/website-right.json < benchmark/website-queries.tsv +} + function get_profiles_watchdog { sleep 6000 @@ -716,6 +730,9 @@ case "$stage" in # Ignore the errors to collect the log and build at least some report, anyway time run_tests ||: ;& +"run_benchmark") + time run_benchmark 2> >(tee -a run-errors.tsv 1>&2) ||: + ;& "get_profiles") # Getting profiles inexplicably hangs sometimes, so try to save some logs if # this happens again. Give the servers some time to collect all info, then diff --git a/docker/test/performance-comparison/perf.py b/docker/test/performance-comparison/perf.py index 308d4760b48..74d0300b074 100755 --- a/docker/test/performance-comparison/perf.py +++ b/docker/test/performance-comparison/perf.py @@ -14,22 +14,14 @@ import traceback def tsv_escape(s): return s.replace('\\', '\\\\').replace('\t', '\\t').replace('\n', '\\n').replace('\r','') -stage_start_seconds = time.perf_counter() - -def report_stage_end(stage_name): - global stage_start_seconds - print('{}\t{}'.format(stage_name, time.perf_counter() - stage_start_seconds)) - stage_start_seconds = time.perf_counter() - -report_stage_end('start') - parser = argparse.ArgumentParser(description='Run performance test.') # Explicitly decode files as UTF-8 because sometimes we have Russian characters in queries, and LANG=C is set. parser.add_argument('file', metavar='FILE', type=argparse.FileType('r', encoding='utf-8'), nargs=1, help='test description file') parser.add_argument('--host', nargs='*', default=['localhost'], help="Server hostname(s). Corresponds to '--port' options.") parser.add_argument('--port', nargs='*', default=[9000], help="Server port(s). Corresponds to '--host' options.") -parser.add_argument('--runs', type=int, default=int(os.environ.get('CHPC_RUNS', 13)), help='Number of query runs per server. Defaults to CHPC_RUNS environment variable.') -parser.add_argument('--no-long', type=bool, default=True, help='Skip the tests tagged as long.') +parser.add_argument('--runs', type=int, default=int(os.environ.get('CHPC_RUNS', 17)), help='Number of query runs per server. Defaults to CHPC_RUNS environment variable.') +parser.add_argument('--long', action='store_true', help='Do not skip the tests tagged as long.') +parser.add_argument('--print', action='store_true', help='Print test queries and exit.') args = parser.parse_args() test_name = os.path.splitext(os.path.basename(args.file[0].name))[0] @@ -37,35 +29,6 @@ test_name = os.path.splitext(os.path.basename(args.file[0].name))[0] tree = et.parse(args.file[0]) root = tree.getroot() -# Skip long tests -for tag in root.findall('.//tag'): - if tag.text == 'long': - print('skipped\tTest is tagged as long.') - sys.exit(0) - -# Check main metric -main_metric_element = root.find('main_metric/*') -if main_metric_element is not None and main_metric_element.tag != 'min_time': - raise Exception('Only the min_time main metric is supported. This test uses \'{}\''.format(main_metric_element.tag)) - -# FIXME another way to detect infinite tests. They should have an appropriate main_metric but sometimes they don't. -infinite_sign = root.find('.//average_speed_not_changing_for_ms') -if infinite_sign is not None: - raise Exception('Looks like the test is infinite (sign 1)') - -# Print report threshold for the test if it is set. -if 'max_ignored_relative_change' in root.attrib: - print(f'report-threshold\t{root.attrib["max_ignored_relative_change"]}') - -# Open connections -servers = [{'host': host, 'port': port} for (host, port) in zip(args.host, args.port)] -connections = [clickhouse_driver.Client(**server) for server in servers] - -for s in servers: - print('server\t{}\t{}'.format(s['host'], s['port'])) - -report_stage_end('connect') - # Process query parameters subst_elems = root.findall('substitutions/substitution') available_parameters = {} # { 'table': ['hits_10m', 'hits_100m'], ... } @@ -84,7 +47,45 @@ def substitute_parameters(query_templates): for values_combo in itertools.product(*values)]) return result -report_stage_end('substitute') +# Build a list of test queries, processing all substitutions +test_query_templates = [q.text for q in root.findall('query')] +test_queries = substitute_parameters(test_query_templates) + +# If we're only asked to print the queries, do that and exit +if args.print: + for q in test_queries: + print(q) + exit(0) + +# Skip long tests +if not args.long: + for tag in root.findall('.//tag'): + if tag.text == 'long': + print('skipped\tTest is tagged as long.') + sys.exit(0) + +# Check main metric to detect infinite tests. We shouldn't have such tests anymore, +# but we did in the past, and it is convenient to be able to process old tests. +main_metric_element = root.find('main_metric/*') +if main_metric_element is not None and main_metric_element.tag != 'min_time': + raise Exception('Only the min_time main metric is supported. This test uses \'{}\''.format(main_metric_element.tag)) + +# Another way to detect infinite tests. They should have an appropriate main_metric +# but sometimes they don't. +infinite_sign = root.find('.//average_speed_not_changing_for_ms') +if infinite_sign is not None: + raise Exception('Looks like the test is infinite (sign 1)') + +# Print report threshold for the test if it is set. +if 'max_ignored_relative_change' in root.attrib: + print(f'report-threshold\t{root.attrib["max_ignored_relative_change"]}') + +# Open connections +servers = [{'host': host, 'port': port} for (host, port) in zip(args.host, args.port)] +connections = [clickhouse_driver.Client(**server) for server in servers] + +for s in servers: + print('server\t{}\t{}'.format(s['host'], s['port'])) # Run drop queries, ignoring errors. Do this before all other activity, because # clickhouse_driver disconnects on error (this is not configurable), and the new @@ -98,8 +99,6 @@ for c in connections: except: pass -report_stage_end('drop1') - # Apply settings. # If there are errors, report them and continue -- maybe a new test uses a setting # that is not in master, but the queries can still run. If we have multiple @@ -115,8 +114,6 @@ for c in connections: except: print(traceback.format_exc(), file=sys.stderr) -report_stage_end('settings') - # Check tables that should exist. If they don't exist, just skip this test. tables = [e.text for e in root.findall('preconditions/table_exists')] for t in tables: @@ -129,8 +126,6 @@ for t in tables: print(f'skipped\t{tsv_escape(skipped_message)}') sys.exit(0) -report_stage_end('preconditions') - # Run create queries create_query_templates = [q.text for q in root.findall('create_query')] create_queries = substitute_parameters(create_query_templates) @@ -145,14 +140,7 @@ for c in connections: for q in fill_queries: c.execute(q) -report_stage_end('fill') - # Run test queries -test_query_templates = [q.text for q in root.findall('query')] -test_queries = substitute_parameters(test_query_templates) - -report_stage_end('substitute2') - for query_index, q in enumerate(test_queries): query_prefix = f'{test_name}.query{query_index}' @@ -199,13 +187,9 @@ for query_index, q in enumerate(test_queries): client_seconds = time.perf_counter() - start_seconds print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}') -report_stage_end('benchmark') - # Run drop queries drop_query_templates = [q.text for q in root.findall('drop_query')] drop_queries = substitute_parameters(drop_query_templates) for c in connections: for q in drop_queries: c.execute(q) - -report_stage_end('drop2') diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index 9db37932aea..d7e30190aef 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -5,6 +5,7 @@ import ast import collections import csv import itertools +import json import os import sys import traceback @@ -321,6 +322,36 @@ if args.report == 'main': print_test_times() + def print_benchmark_results(): + left_json = json.load(open('benchmark/website-left.json')); + right_json = json.load(open('benchmark/website-right.json')); + left_qps = left_json["statistics"]["QPS"] + right_qps = right_json["statistics"]["QPS"] + relative_diff = (right_qps - left_qps) / left_qps; + times_diff = max(right_qps, left_qps) / max(0.01, min(right_qps, left_qps)) + print(tableStart('Concurrent benchmarks')) + print(tableHeader(['Benchmark', 'Old, queries/s', 'New, queries/s', 'Relative difference', 'Times difference'])) + row = ['website', f'{left_qps:.3f}', f'{right_qps:.3f}', f'{relative_diff:.3f}', f'x{times_diff:.3f}'] + attrs = ['' for r in row] + if abs(relative_diff) > 0.1: + # More queries per second is better. + if relative_diff > 0.: + attrs[3] = f'style="background: {color_good}"' + else: + attrs[3] = f'style="background: {color_bad}"' + else: + attrs[3] = '' + print(tableRow(row, attrs)) + print(tableEnd()) + + try: + print_benchmark_results() + except: + report_errors.append( + traceback.format_exception_only( + *sys.exc_info()[:2])[-1]) + pass + print_report_errors() print(""" From e92641858e6fdf132c4b1f9ecf401fc985d2693e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 12 Jun 2020 01:00:35 +0300 Subject: [PATCH 073/318] fixes --- base/daemon/SentryWriter.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index bb176db813c..d7f08864e96 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -46,8 +46,10 @@ void sentry_logger(sentry_level_t level, const char * message, va_list args) auto * logger = &Poco::Logger::get("SentryWriter"); size_t size = 1024; char buffer[size]; - if (vsnprintf(buffer, size, message, args) >= 0) { - switch (level) { + if (vsnprintf(buffer, size, message, args) >= 0) + { + switch (level) + { case SENTRY_LEVEL_DEBUG: logger->debug(buffer); break; @@ -67,7 +69,6 @@ void sentry_logger(sentry_level_t level, const char * message, va_list args) } } } -} #endif void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) From 395ef1ecafff89a880bd47c2b11aa12b42e52c03 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 12 Jun 2020 09:35:31 +0300 Subject: [PATCH 074/318] experiment --- base/daemon/SentryWriter.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index d7f08864e96..88639d8bf94 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -46,8 +46,15 @@ void sentry_logger(sentry_level_t level, const char * message, va_list args) auto * logger = &Poco::Logger::get("SentryWriter"); size_t size = 1024; char buffer[size]; +#ifdef __clang__ +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wformat-nonliteral" +#endif if (vsnprintf(buffer, size, message, args) >= 0) { +#ifdef __clang__ +#pragma clang diagnostic pop +#endif switch (level) { case SENTRY_LEVEL_DEBUG: From 0da6e1c9de6d5cdd7b24e08de815589c6d2d36cc Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 12 Jun 2020 15:12:12 +0300 Subject: [PATCH 075/318] typo --- docker/test/performance-comparison/compare.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 3d49e9e841a..241fdaec70d 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -167,7 +167,7 @@ function run_tests function run_benchmark { rm -rf benchmark ||: - mkdir bencmhark ||: + mkdir benchmark ||: # TODO disable this when there is an explicit list of tests to run "$script_dir/perf.py" --print right/performance/website.xml > benchmark/website-queries.tsv From a9514d725768d0025ed848e2a5ea016fb8ac5001 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Fri, 12 Jun 2020 16:52:41 +0300 Subject: [PATCH 076/318] trigger ci From 5101708831d6550ac061b9d8b070c3439aad4968 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 12 Jun 2020 18:11:33 +0300 Subject: [PATCH 077/318] fixup --- docker/test/performance-comparison/compare.sh | 7 +++++-- .../config/users.d/perf-comparison-tweaks-users.xml | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 241fdaec70d..5435d37e2e0 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -171,8 +171,11 @@ function run_benchmark # TODO disable this when there is an explicit list of tests to run "$script_dir/perf.py" --print right/performance/website.xml > benchmark/website-queries.tsv - clickhouse-benchmark --port 9001 --concurrency 6 --cumulative --iterations 1000 --randomize 1 --delay 0 --json benchmark/website-left.json < benchmark/website-queries.tsv - clickhouse-benchmark --port 9002 --concurrency 6 --cumulative --iterations 1000 --randomize 1 --delay 0 --json benchmark/website-right.json < benchmark/website-queries.tsv + # TODO things to fix in clickhouse-benchmark: + # - --max_memory_usage setting does nothing + # - no way to continue on error + clickhouse-benchmark --port 9001 --concurrency 6 --cumulative --iterations 1000 --randomize 1 --delay 0 --json benchmark/website-left.json -- --max_memory_usage 30000000000 < benchmark/website-queries.tsv + clickhouse-benchmark --port 9002 --concurrency 6 --cumulative --iterations 1000 --randomize 1 --delay 0 --json benchmark/website-right.json -- --max_memory_usage 30000000000 < benchmark/website-queries.tsv } function get_profiles_watchdog diff --git a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml index 6e3e3df5d39..1bde2a1388b 100644 --- a/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml +++ b/docker/test/performance-comparison/config/users.d/perf-comparison-tweaks-users.xml @@ -6,6 +6,8 @@ 1 1 1 + + 30000000000 From 56869228a2db535d27bd2ab7767cf2ed64247ac9 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Fri, 12 Jun 2020 21:28:07 +0300 Subject: [PATCH 078/318] add flag to continue on errors --- docker/test/performance-comparison/compare.sh | 5 +- programs/benchmark/Benchmark.cpp | 60 ++++++++++++------- 2 files changed, 40 insertions(+), 25 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 5435d37e2e0..1dbf712ff50 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -173,9 +173,8 @@ function run_benchmark "$script_dir/perf.py" --print right/performance/website.xml > benchmark/website-queries.tsv # TODO things to fix in clickhouse-benchmark: # - --max_memory_usage setting does nothing - # - no way to continue on error - clickhouse-benchmark --port 9001 --concurrency 6 --cumulative --iterations 1000 --randomize 1 --delay 0 --json benchmark/website-left.json -- --max_memory_usage 30000000000 < benchmark/website-queries.tsv - clickhouse-benchmark --port 9002 --concurrency 6 --cumulative --iterations 1000 --randomize 1 --delay 0 --json benchmark/website-right.json -- --max_memory_usage 30000000000 < benchmark/website-queries.tsv + clickhouse-benchmark --port 9001 --concurrency 6 --cumulative --iterations 1000 --randomize 1 --delay 0 --json benchmark/website-left.json --continue_on_errors -- --max_memory_usage 30000000000 < benchmark/website-queries.tsv + clickhouse-benchmark --port 9002 --concurrency 6 --cumulative --iterations 1000 --randomize 1 --delay 0 --json benchmark/website-right.json --continue_on_errors -- --max_memory_usage 30000000000 < benchmark/website-queries.tsv } function get_profiles_watchdog diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index e17320b39ea..6884f6faed3 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -59,11 +59,14 @@ public: bool cumulative_, bool secure_, const String & default_database_, const String & user_, const String & password_, const String & stage, bool randomize_, size_t max_iterations_, double max_time_, - const String & json_path_, size_t confidence_, const String & query_id_, const Settings & settings_) + const String & json_path_, size_t confidence_, + const String & query_id_, bool continue_on_errors_, + const Settings & settings_) : concurrency(concurrency_), delay(delay_), queue(concurrency), randomize(randomize_), cumulative(cumulative_), max_iterations(max_iterations_), max_time(max_time_), - json_path(json_path_), confidence(confidence_), query_id(query_id_), settings(settings_), + json_path(json_path_), confidence(confidence_), query_id(query_id_), + continue_on_errors(continue_on_errors_), settings(settings_), shared_context(Context::createShared()), global_context(Context::createGlobal(shared_context.get())), pool(concurrency) { @@ -149,6 +152,7 @@ private: String json_path; size_t confidence; std::string query_id; + bool continue_on_errors; Settings settings; SharedContextHolder shared_context; Context global_context; @@ -332,35 +336,45 @@ private: pcg64 generator(randomSeed()); std::uniform_int_distribution distribution(0, connection_entries.size() - 1); - try + /// In these threads we do not accept INT signal. + sigset_t sig_set; + if (sigemptyset(&sig_set) + || sigaddset(&sig_set, SIGINT) + || pthread_sigmask(SIG_BLOCK, &sig_set, nullptr)) { - /// In these threads we do not accept INT signal. - sigset_t sig_set; - if (sigemptyset(&sig_set) - || sigaddset(&sig_set, SIGINT) - || pthread_sigmask(SIG_BLOCK, &sig_set, nullptr)) - throwFromErrno("Cannot block signal.", ErrorCodes::CANNOT_BLOCK_SIGNAL); + throwFromErrno("Cannot block signal.", ErrorCodes::CANNOT_BLOCK_SIGNAL); + } - while (true) + while (true) + { + bool extracted = false; + + while (!extracted) { - bool extracted = false; + extracted = queue.tryPop(query, 100); - while (!extracted) + if (shutdown + || (max_iterations && queries_executed == max_iterations)) { - extracted = queue.tryPop(query, 100); - - if (shutdown || (max_iterations && queries_executed == max_iterations)) - return; + return; } + } + + try + { execute(connection_entries, query, distribution(generator)); ++queries_executed; } - } - catch (...) - { - shutdown = true; - std::cerr << "An error occurred while processing query:\n" << query << "\n"; - throw; + catch (...) + { + std::cerr << "An error occurred while processing query:\n" + << query << "\n"; + if (!continue_on_errors) + { + shutdown = true; + throw; + } + } } } @@ -541,6 +555,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) ("stacktrace", "print stack traces of exceptions") ("confidence", value()->default_value(5), "set the level of confidence for T-test [0=80%, 1=90%, 2=95%, 3=98%, 4=99%, 5=99.5%(default)") ("query_id", value()->default_value(""), "") + ("continue_on_errors", "continue testing even if a query fails") ; Settings settings; @@ -580,6 +595,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) options["json"].as(), options["confidence"].as(), options["query_id"].as(), + options.count("continue_on_errors") > 0, settings); return benchmark.run(); } From 62eaeac713a130f73c74e6efc6d831009a582a5e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sat, 13 Jun 2020 05:35:42 +0300 Subject: [PATCH 079/318] trigger ci From c70f7778fc6144d22c69b3a1972b4720e3b6788e Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sat, 13 Jun 2020 23:05:13 +0300 Subject: [PATCH 080/318] trigger ci From ad321966f09e8801b902dffa650b8beb57924454 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sun, 14 Jun 2020 09:43:40 +0300 Subject: [PATCH 081/318] trigger ci From b8611cf46cd2d6f15f2a6e678961c06c686fa9ed Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sun, 14 Jun 2020 18:05:15 +0300 Subject: [PATCH 082/318] experiment --- tests/queries/0_stateless/00600_replace_running_query.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00600_replace_running_query.sh b/tests/queries/0_stateless/00600_replace_running_query.sh index 1331dd3c15b..75006cc56ce 100755 --- a/tests/queries/0_stateless/00600_replace_running_query.sh +++ b/tests/queries/0_stateless/00600_replace_running_query.sh @@ -36,5 +36,5 @@ wait ${CLICKHOUSE_CLIENT} --query_id=42 --query='SELECT 3, count() FROM system.numbers' 2>&1 | grep -cF 'was cancelled' & wait_for_query_to_start '42' ${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --replace_running_query_max_wait_ms=500 --query='SELECT 43' 2>&1 | grep -F "can't be stopped" > /dev/null -${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --query='SELECT 44' wait +${CLICKHOUSE_CLIENT} --query_id=42 --replace_running_query=1 --query='SELECT 44' From c30457a3edf98c61cd45b4c4269f3f5d4679e3ba Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Sun, 14 Jun 2020 20:34:59 +0300 Subject: [PATCH 083/318] trigger ci From c33b472f9a02c19b65cea68f57a6fcfa5e59be66 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 15 Jun 2020 17:25:42 +0300 Subject: [PATCH 084/318] fixup --- programs/benchmark/Benchmark.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index 6884f6faed3..590e1496fd6 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -363,7 +363,6 @@ private: try { execute(connection_entries, query, distribution(generator)); - ++queries_executed; } catch (...) { @@ -374,7 +373,14 @@ private: shutdown = true; throw; } + else + { + std::cerr << getCurrentExceptionMessage(print_stacktrace, true) ; + } } + // Count failed queries toward executed, so that we'd reach + // max_iterations even if every run fails. + ++queries_executed; } } From 857582245e894ef71e59decef44555760f8f9908 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Mon, 15 Jun 2020 19:39:00 +0300 Subject: [PATCH 085/318] fixup --- programs/benchmark/Benchmark.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/programs/benchmark/Benchmark.cpp b/programs/benchmark/Benchmark.cpp index af56aaa6db5..b8e4a0c346a 100644 --- a/programs/benchmark/Benchmark.cpp +++ b/programs/benchmark/Benchmark.cpp @@ -62,12 +62,13 @@ public: bool randomize_, size_t max_iterations_, double max_time_, const String & json_path_, size_t confidence_, const String & query_id_, bool continue_on_errors_, - const Settings & settings_) + bool print_stacktrace_, const Settings & settings_) : concurrency(concurrency_), delay(delay_), queue(concurrency), randomize(randomize_), cumulative(cumulative_), max_iterations(max_iterations_), max_time(max_time_), json_path(json_path_), confidence(confidence_), query_id(query_id_), - continue_on_errors(continue_on_errors_), settings(settings_), + continue_on_errors(continue_on_errors_), + print_stacktrace(print_stacktrace_), settings(settings_), shared_context(Context::createShared()), global_context(Context::createGlobal(shared_context.get())), pool(concurrency) { @@ -154,6 +155,7 @@ private: size_t confidence; std::string query_id; bool continue_on_errors; + bool print_stacktrace; Settings settings; SharedContextHolder shared_context; Context global_context; @@ -376,7 +378,8 @@ private: } else { - std::cerr << getCurrentExceptionMessage(print_stacktrace, true) ; + std::cerr << getCurrentExceptionMessage(print_stacktrace, + true /*check embedded stack trace*/) ; } } // Count failed queries toward executed, so that we'd reach @@ -605,6 +608,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv) options["confidence"].as(), options["query_id"].as(), options.count("continue_on_errors") > 0, + print_stacktrace, settings); return benchmark.run(); } From af2fe2ba553e7112ef474d73473fbff047c5ae60 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 15 Jun 2020 19:55:33 +0300 Subject: [PATCH 086/318] Compilable setColumns, setConstraints, setIndices --- src/Storages/IStorage.cpp | 105 ++++++++---------- src/Storages/IStorage.h | 21 ++-- src/Storages/Kafka/StorageKafka.cpp | 4 +- src/Storages/LiveView/StorageBlocks.h | 4 +- src/Storages/LiveView/StorageLiveView.cpp | 4 +- src/Storages/MergeTree/MergeTreeData.cpp | 6 +- .../MergeTree/StorageFromMergeTreeDataPart.h | 7 +- src/Storages/StorageBuffer.cpp | 8 +- src/Storages/StorageDictionary.cpp | 4 +- src/Storages/StorageDistributed.cpp | 8 +- src/Storages/StorageFile.cpp | 11 +- src/Storages/StorageGenerateRandom.cpp | 4 +- src/Storages/StorageHDFS.cpp | 7 +- src/Storages/StorageInMemoryMetadata.cpp | 15 +++ src/Storages/StorageInMemoryMetadata.h | 13 +++ src/Storages/StorageInput.cpp | 4 +- src/Storages/StorageLog.cpp | 6 +- src/Storages/StorageMaterializedView.cpp | 6 +- src/Storages/StorageMemory.cpp | 6 +- src/Storages/StorageMerge.cpp | 6 +- src/Storages/StorageMySQL.cpp | 6 +- src/Storages/StorageNull.cpp | 2 +- src/Storages/StorageNull.h | 6 +- src/Storages/StorageS3.cpp | 6 +- src/Storages/StorageSet.cpp | 7 +- src/Storages/StorageStripeLog.cpp | 6 +- src/Storages/StorageTinyLog.cpp | 6 +- src/Storages/StorageURL.cpp | 7 +- src/Storages/StorageValues.cpp | 4 +- src/Storages/StorageView.cpp | 4 +- src/Storages/System/IStorageSystemOneBlock.h | 4 +- src/Storages/System/StorageSystemColumns.cpp | 4 +- .../System/StorageSystemDetachedParts.cpp | 4 +- src/Storages/System/StorageSystemDisks.cpp | 4 +- src/Storages/System/StorageSystemNumbers.cpp | 4 +- src/Storages/System/StorageSystemOne.cpp | 4 +- .../System/StorageSystemPartsBase.cpp | 4 +- src/Storages/System/StorageSystemReplicas.cpp | 4 +- .../System/StorageSystemStoragePolicies.cpp | 4 +- src/Storages/System/StorageSystemTables.cpp | 4 +- src/Storages/System/StorageSystemZeros.cpp | 5 +- 41 files changed, 211 insertions(+), 137 deletions(-) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index a244f836f5c..6c045a6f365 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -34,22 +34,22 @@ namespace ErrorCodes const ColumnsDescription & IStorage::getColumns() const { - return metadata.columns; + return metadata->columns; } const IndicesDescription & IStorage::getSecondaryIndices() const { - return metadata.secondary_indices; + return metadata->secondary_indices; } bool IStorage::hasSecondaryIndices() const { - return !metadata.secondary_indices.empty(); + return !metadata->secondary_indices.empty(); } const ConstraintsDescription & IStorage::getConstraints() const { - return metadata.constraints; + return metadata->constraints; } Block IStorage::getSampleBlock() const @@ -287,23 +287,6 @@ void IStorage::check(const Block & block, bool need_all) const } } -void IStorage::setColumns(ColumnsDescription columns_) -{ - if (columns_.getOrdinary().empty()) - throw Exception("Empty list of columns passed", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); - metadata.columns = std::move(columns_); -} - -void IStorage::setSecondaryIndices(IndicesDescription secondary_indices_) -{ - metadata.secondary_indices = std::move(secondary_indices_); -} - -void IStorage::setConstraints(ConstraintsDescription constraints_) -{ - metadata.constraints = std::move(constraints_); -} - bool IStorage::isVirtualColumn(const String & column_name) const { /// Virtual column maybe overriden by real column @@ -382,7 +365,7 @@ void IStorage::alter( StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); params.apply(new_metadata, context); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); - setColumns(std::move(new_metadata.columns)); + setInMemoryMetadata(new_metadata); } @@ -417,137 +400,137 @@ NamesAndTypesList IStorage::getVirtuals() const const KeyDescription & IStorage::getPartitionKey() const { - return metadata.partition_key; + return metadata->partition_key; } void IStorage::setPartitionKey(const KeyDescription & partition_key_) { - metadata.partition_key = partition_key_; + metadata->partition_key = partition_key_; } bool IStorage::isPartitionKeyDefined() const { - return metadata.partition_key.definition_ast != nullptr; + return metadata->partition_key.definition_ast != nullptr; } bool IStorage::hasPartitionKey() const { - return !metadata.partition_key.column_names.empty(); + return !metadata->partition_key.column_names.empty(); } Names IStorage::getColumnsRequiredForPartitionKey() const { if (hasPartitionKey()) - return metadata.partition_key.expression->getRequiredColumns(); + return metadata->partition_key.expression->getRequiredColumns(); return {}; } const KeyDescription & IStorage::getSortingKey() const { - return metadata.sorting_key; + return metadata->sorting_key; } void IStorage::setSortingKey(const KeyDescription & sorting_key_) { - metadata.sorting_key = sorting_key_; + metadata->sorting_key = sorting_key_; } bool IStorage::isSortingKeyDefined() const { - return metadata.sorting_key.definition_ast != nullptr; + return metadata->sorting_key.definition_ast != nullptr; } bool IStorage::hasSortingKey() const { - return !metadata.sorting_key.column_names.empty(); + return !metadata->sorting_key.column_names.empty(); } Names IStorage::getColumnsRequiredForSortingKey() const { if (hasSortingKey()) - return metadata.sorting_key.expression->getRequiredColumns(); + return metadata->sorting_key.expression->getRequiredColumns(); return {}; } Names IStorage::getSortingKeyColumns() const { if (hasSortingKey()) - return metadata.sorting_key.column_names; + return metadata->sorting_key.column_names; return {}; } const KeyDescription & IStorage::getPrimaryKey() const { - return metadata.primary_key; + return metadata->primary_key; } void IStorage::setPrimaryKey(const KeyDescription & primary_key_) { - metadata.primary_key = primary_key_; + metadata->primary_key = primary_key_; } bool IStorage::isPrimaryKeyDefined() const { - return metadata.primary_key.definition_ast != nullptr; + return metadata->primary_key.definition_ast != nullptr; } bool IStorage::hasPrimaryKey() const { - return !metadata.primary_key.column_names.empty(); + return !metadata->primary_key.column_names.empty(); } Names IStorage::getColumnsRequiredForPrimaryKey() const { if (hasPrimaryKey()) - return metadata.primary_key.expression->getRequiredColumns(); + return metadata->primary_key.expression->getRequiredColumns(); return {}; } Names IStorage::getPrimaryKeyColumns() const { - if (!metadata.primary_key.column_names.empty()) - return metadata.primary_key.column_names; + if (!metadata->primary_key.column_names.empty()) + return metadata->primary_key.column_names; return {}; } const KeyDescription & IStorage::getSamplingKey() const { - return metadata.sampling_key; + return metadata->sampling_key; } void IStorage::setSamplingKey(const KeyDescription & sampling_key_) { - metadata.sampling_key = sampling_key_; + metadata->sampling_key = sampling_key_; } bool IStorage::isSamplingKeyDefined() const { - return metadata.sampling_key.definition_ast != nullptr; + return metadata->sampling_key.definition_ast != nullptr; } bool IStorage::hasSamplingKey() const { - return !metadata.sampling_key.column_names.empty(); + return !metadata->sampling_key.column_names.empty(); } Names IStorage::getColumnsRequiredForSampling() const { if (hasSamplingKey()) - return metadata.sampling_key.expression->getRequiredColumns(); + return metadata->sampling_key.expression->getRequiredColumns(); return {}; } TTLTableDescription IStorage::getTableTTLs() const { std::lock_guard lock(ttl_mutex); - return metadata.table_ttl; + return metadata->table_ttl; } void IStorage::setTableTTLs(const TTLTableDescription & table_ttl_) { std::lock_guard lock(ttl_mutex); - metadata.table_ttl = table_ttl_; + metadata->table_ttl = table_ttl_; } bool IStorage::hasAnyTableTTL() const @@ -558,43 +541,43 @@ bool IStorage::hasAnyTableTTL() const TTLColumnsDescription IStorage::getColumnTTLs() const { std::lock_guard lock(ttl_mutex); - return metadata.column_ttls_by_name; + return metadata->column_ttls_by_name; } void IStorage::setColumnTTLs(const TTLColumnsDescription & column_ttls_by_name_) { std::lock_guard lock(ttl_mutex); - metadata.column_ttls_by_name = column_ttls_by_name_; + metadata->column_ttls_by_name = column_ttls_by_name_; } bool IStorage::hasAnyColumnTTL() const { std::lock_guard lock(ttl_mutex); - return !metadata.column_ttls_by_name.empty(); + return !metadata->column_ttls_by_name.empty(); } TTLDescription IStorage::getRowsTTL() const { std::lock_guard lock(ttl_mutex); - return metadata.table_ttl.rows_ttl; + return metadata->table_ttl.rows_ttl; } bool IStorage::hasRowsTTL() const { std::lock_guard lock(ttl_mutex); - return metadata.table_ttl.rows_ttl.expression != nullptr; + return metadata->table_ttl.rows_ttl.expression != nullptr; } TTLDescriptions IStorage::getMoveTTLs() const { std::lock_guard lock(ttl_mutex); - return metadata.table_ttl.move_ttl; + return metadata->table_ttl.move_ttl; } bool IStorage::hasAnyMoveTTL() const { std::lock_guard lock(ttl_mutex); - return !metadata.table_ttl.move_ttl.empty(); + return !metadata->table_ttl.move_ttl.empty(); } @@ -660,32 +643,32 @@ ColumnDependencies IStorage::getColumnDependencies(const NameSet & updated_colum ASTPtr IStorage::getSettingsChanges() const { - if (metadata.settings_changes) - return metadata.settings_changes->clone(); + if (metadata->settings_changes) + return metadata->settings_changes->clone(); return nullptr; } void IStorage::setSettingsChanges(const ASTPtr & settings_changes_) { if (settings_changes_) - metadata.settings_changes = settings_changes_->clone(); + metadata->settings_changes = settings_changes_->clone(); else - metadata.settings_changes = nullptr; + metadata->settings_changes = nullptr; } const SelectQueryDescription & IStorage::getSelectQuery() const { - return metadata.select; + return metadata->select; } void IStorage::setSelectQuery(const SelectQueryDescription & select_) { - metadata.select = select_; + metadata->select = select_; } bool IStorage::hasSelectQuery() const { - return metadata.select.select_query != nullptr; + return metadata->select.select_query != nullptr; } } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index c7c8e382a87..4d01bb5370d 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -140,27 +140,24 @@ public: public: /// thread-unsafe part. lockStructure must be acquired const ColumnsDescription & getColumns() const; /// returns combined set of columns - void setColumns(ColumnsDescription columns_); /// sets only real columns, possibly overwrites virtual ones. - - void setSecondaryIndices(IndicesDescription secondary_indices_); const IndicesDescription & getSecondaryIndices() const; /// Has at least one non primary index bool hasSecondaryIndices() const; const ConstraintsDescription & getConstraints() const; - void setConstraints(ConstraintsDescription constraints_); /// Storage settings ASTPtr getSettingsChanges() const; void setSettingsChanges(const ASTPtr & settings_changes_); - bool hasSettingsChanges() const { return metadata.settings_changes != nullptr; } + bool hasSettingsChanges() const { return metadata->settings_changes != nullptr; } /// Select query for *View storages. const SelectQueryDescription & getSelectQuery() const; void setSelectQuery(const SelectQueryDescription & select_); bool hasSelectQuery() const; - StorageInMemoryMetadata getInMemoryMetadata() const { return metadata; } + StorageInMemoryMetadata getInMemoryMetadata() const { return *metadata; } + void setInMemoryMetadata(const StorageInMemoryMetadata & metadata_) { metadata = std::make_shared(metadata_); } Block getSampleBlock() const; /// ordinary + materialized. Block getSampleBlockWithVirtuals() const; /// ordinary + materialized + virtuals. @@ -207,7 +204,7 @@ private: /// TODO (alesap) just use multiversion for atomic metadata mutable std::mutex ttl_mutex; - StorageInMemoryMetadata metadata; + StorageMetadataPtr metadata; private: RWLockImpl::LockHolder tryLockTimed( const RWLock & rwlock, RWLockImpl::Type type, const String & query_id, const SettingSeconds & acquire_timeout) const; @@ -354,7 +351,7 @@ public: /** ALTER tables in the form of column changes that do not affect the change to Storage or its parameters. * This method must fully execute the ALTER query, taking care of the locks itself. - * To update the table metadata on disk, this method should call InterpreterAlterQuery::updateMetadata. + * To update the table metadata on disk, this method should call InterpreterAlterQuery::updateMetadata-> */ virtual void alter(const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder); @@ -445,7 +442,7 @@ public: /// struct). void setPartitionKey(const KeyDescription & partition_key_); /// Returns ASTExpressionList of partition key expression for storage or nullptr if there is none. - ASTPtr getPartitionKeyAST() const { return metadata.partition_key.definition_ast; } + ASTPtr getPartitionKeyAST() const { return metadata->partition_key.definition_ast; } /// Storage has user-defined (in CREATE query) partition key. bool isPartitionKeyDefined() const; /// Storage has partition key. @@ -460,7 +457,7 @@ public: /// struct). void setSortingKey(const KeyDescription & sorting_key_); /// Returns ASTExpressionList of sorting key expression for storage or nullptr if there is none. - ASTPtr getSortingKeyAST() const { return metadata.sorting_key.definition_ast; } + ASTPtr getSortingKeyAST() const { return metadata->sorting_key.definition_ast; } /// Storage has user-defined (in CREATE query) sorting key. bool isSortingKeyDefined() const; /// Storage has sorting key. It means, that it contains at least one column. @@ -477,7 +474,7 @@ public: /// struct). void setPrimaryKey(const KeyDescription & primary_key_); /// Returns ASTExpressionList of primary key expression for storage or nullptr if there is none. - ASTPtr getPrimaryKeyAST() const { return metadata.primary_key.definition_ast; } + ASTPtr getPrimaryKeyAST() const { return metadata->primary_key.definition_ast; } /// Storage has user-defined (in CREATE query) sorting key. bool isPrimaryKeyDefined() const; /// Storage has primary key (maybe part of some other key). It means, that @@ -495,7 +492,7 @@ public: /// struct). void setSamplingKey(const KeyDescription & sampling_key_); /// Returns sampling expression AST for storage or nullptr if there is none. - ASTPtr getSamplingKeyAST() const { return metadata.sampling_key.definition_ast; } + ASTPtr getSamplingKeyAST() const { return metadata->sampling_key.definition_ast; } /// Storage has user-defined (in CREATE query) sampling key. bool isSamplingKeyDefined() const; /// Storage has sampling key. diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index bb721417c5b..2109afed932 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -137,7 +137,9 @@ StorageKafka::StorageKafka( , intermediate_commit(kafka_settings->kafka_commit_every_batch.value) , settings_adjustments(createSettingsAdjustments()) { - setColumns(columns_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + setInMemoryMetadata(metadata_); task = global_context.getSchedulePool().createTask(log->name(), [this]{ threadFunc(); }); task->deactivate(); diff --git a/src/Storages/LiveView/StorageBlocks.h b/src/Storages/LiveView/StorageBlocks.h index 2a9d7766fd7..78d60163d5e 100644 --- a/src/Storages/LiveView/StorageBlocks.h +++ b/src/Storages/LiveView/StorageBlocks.h @@ -18,7 +18,9 @@ public: QueryProcessingStage::Enum to_stage_) : IStorage(table_id_), pipes(std::move(pipes_)), to_stage(to_stage_) { - setColumns(columns_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + setInMemoryMetadata(metadata_); } static StoragePtr createStorage(const StorageID & table_id, const ColumnsDescription & columns, Pipes pipes, QueryProcessingStage::Enum to_stage) diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 8a04a9e49e4..ade2d1c967d 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -251,7 +251,9 @@ StorageLiveView::StorageLiveView( live_view_context = std::make_unique(global_context); live_view_context->makeQueryContext(); - setColumns(columns_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + setInMemoryMetadata(metadata_); if (!query.select) throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 84470088ebe..14be526d7f6 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -387,11 +387,7 @@ void MergeTreeData::setProperties(const StorageInMemoryMetadata & new_metadata, checkProperties(new_metadata, attach); /// Other parts of metadata initialized is separate methods - setColumns(std::move(new_metadata.columns)); - setSecondaryIndices(std::move(new_metadata.secondary_indices)); - setConstraints(std::move(new_metadata.constraints)); - setSortingKey(std::move(new_metadata.sorting_key)); - setPrimaryKey(std::move(new_metadata.primary_key)); + setInMemoryMetadata(new_metadata); } namespace diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index 3031402715a..342a89c38ea 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -49,12 +49,7 @@ protected: : IStorage(getIDFromPart(part_)) , part(part_) { - setColumns(part_->storage.getColumns()); - setSecondaryIndices(part_->storage.getSecondaryIndices()); - setPrimaryKey(part_->storage.getPrimaryKey()); - setSortingKey(part_->storage.getSortingKey()); - setColumnTTLs(part->storage.getColumnTTLs()); - setTableTTLs(part->storage.getTableTTLs()); + setInMemoryMetadata(part_->storage.getInMemoryMetadata()); } private: diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 08dc81b4945..007625790f4 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -77,8 +77,10 @@ StorageBuffer::StorageBuffer( , log(&Poco::Logger::get("StorageBuffer (" + table_id_.getFullTableName() + ")")) , bg_pool(global_context.getBufferFlushSchedulePool()) { - setColumns(columns_); - setConstraints(constraints_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + metadata_.setConstraints(constraints_); + setInMemoryMetadata(metadata_); } @@ -778,7 +780,7 @@ void StorageBuffer::alter(const AlterCommands & params, const Context & context, StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); params.apply(new_metadata, context); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); - setColumns(std::move(new_metadata.columns)); + setInMemoryMetadata(new_metadata); } diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index 9b2c5784d85..4348973ec60 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -100,7 +100,9 @@ StorageDictionary::StorageDictionary( : IStorage(table_id_) , dictionary_name(dictionary_name_) { - setColumns(ColumnsDescription{getNamesAndTypes(dictionary_structure_)}); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(ColumnsDescription{getNamesAndTypes(dictionary_structure_)}); + setInMemoryMetadata(metadata_); } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 3383c609520..bf5f729ed19 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -283,8 +283,10 @@ StorageDistributed::StorageDistributed( , storage_policy(storage_policy_) , relative_data_path(relative_data_path_) { - setColumns(columns_); - setConstraints(constraints_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + metadata_.setConstraints(constraints_); + setInMemoryMetadata(metadata_); if (sharding_key_) { @@ -562,7 +564,7 @@ void StorageDistributed::alter(const AlterCommands & params, const Context & con StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); params.apply(new_metadata, context); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); - setColumns(std::move(new_metadata.columns)); + setInMemoryMetadata(new_metadata); } diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 0bcb624bec4..f94a7b71e56 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -166,7 +166,10 @@ StorageFile::StorageFile(const std::string & table_path_, const std::string & us auto & first_path = paths[0]; Block header = StorageDistributedDirectoryMonitor::createStreamFromFile(first_path)->getHeader(); - setColumns(ColumnsDescription(header.getNamesAndTypesList())); + + StorageInMemoryMetadata metadata_; + metadata_.setColumns(ColumnsDescription(header.getNamesAndTypesList())); + setInMemoryMetadata(metadata_); } } } @@ -188,10 +191,12 @@ StorageFile::StorageFile(CommonArguments args) , compression_method(args.compression_method) , base_path(args.context.getPath()) { + StorageInMemoryMetadata metadata_; if (args.format_name != "Distributed") - setColumns(args.columns); + metadata_.setColumns(args.columns); - setConstraints(args.constraints); + metadata_.setConstraints(args.constraints); + setInMemoryMetadata(metadata_); } class StorageFileSource : public SourceWithProgress diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index 8c186f38943..f69478a4bdd 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -388,7 +388,9 @@ StorageGenerateRandom::StorageGenerateRandom(const StorageID & table_id_, const : IStorage(table_id_), max_array_length(max_array_length_), max_string_length(max_string_length_) { random_seed = random_seed_ ? sipHash64(*random_seed_) : randomSeed(); - setColumns(columns_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + setInMemoryMetadata(metadata_); } diff --git a/src/Storages/StorageHDFS.cpp b/src/Storages/StorageHDFS.cpp index 352e0a43f39..082e40f6d6d 100644 --- a/src/Storages/StorageHDFS.cpp +++ b/src/Storages/StorageHDFS.cpp @@ -49,8 +49,11 @@ StorageHDFS::StorageHDFS(const String & uri_, , compression_method(compression_method_) { context.getRemoteHostFilter().checkURL(Poco::URI(uri)); - setColumns(columns_); - setConstraints(constraints_); + + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + metadata_.setConstraints(constraints_); + setInMemoryMetadata(metadata_); } namespace diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 8d23bd7bccf..ac2c0417c45 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -51,4 +51,19 @@ StorageInMemoryMetadata & StorageInMemoryMetadata::operator=(const StorageInMemo } +void StorageInMemoryMetadata::setColumns(ColumnsDescription columns_) +{ + columns = std::move(columns_); +} + +void StorageInMemoryMetadata::setSecondaryIndices(IndicesDescription secondary_indices_) +{ + secondary_indices = std::move(secondary_indices_); +} + +void StorageInMemoryMetadata::setConstraints(ConstraintsDescription constraints_) +{ + constraints = std::move(constraints_); +} + } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 889f8e49f69..f4d6e9b38b3 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -8,6 +8,7 @@ #include #include +#include namespace DB { @@ -47,6 +48,18 @@ struct StorageInMemoryMetadata StorageInMemoryMetadata(const StorageInMemoryMetadata & other); StorageInMemoryMetadata & operator=(const StorageInMemoryMetadata & other); + + + //////////////////////////////////////////////////////////////////////// + void setColumns(ColumnsDescription columns_); /// sets only real columns, possibly overwrites virtual ones. + + void setSecondaryIndices(IndicesDescription secondary_indices_); + + void setConstraints(ConstraintsDescription constraints_); + }; +using StorageMetadataPtr = std::shared_ptr; +using MultiVersionStorageMetadataPtr = MultiVersion; + } diff --git a/src/Storages/StorageInput.cpp b/src/Storages/StorageInput.cpp index e30ae55e715..92287051bf3 100644 --- a/src/Storages/StorageInput.cpp +++ b/src/Storages/StorageInput.cpp @@ -21,7 +21,9 @@ namespace ErrorCodes StorageInput::StorageInput(const StorageID & table_id, const ColumnsDescription & columns_) : IStorage(table_id) { - setColumns(columns_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + setInMemoryMetadata(metadata_); } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index bc2bbb2ce67..09be868bcfa 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -431,8 +431,10 @@ StorageLog::StorageLog( , max_compress_block_size(max_compress_block_size_) , file_checker(disk, table_path + "sizes.json") { - setColumns(columns_); - setConstraints(constraints_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + metadata_.setConstraints(constraints_); + setInMemoryMetadata(metadata_); if (relative_path_.empty()) throw Exception("Storage " + getName() + " requires data path", ErrorCodes::INCORRECT_FILE_NAME); diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index a0c2fa87eb2..34d5e1d4374 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -50,7 +50,9 @@ StorageMaterializedView::StorageMaterializedView( bool attach_) : IStorage(table_id_), global_context(local_context.getGlobalContext()) { - setColumns(columns_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + setInMemoryMetadata(metadata_); if (!query.select) throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY); @@ -209,7 +211,7 @@ void StorageMaterializedView::alter( /// end modify query DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); - setColumns(std::move(new_metadata.columns)); + setInMemoryMetadata(new_metadata); } diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index b55a6227127..bb89bdb5c48 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -78,8 +78,10 @@ private: StorageMemory::StorageMemory(const StorageID & table_id_, ColumnsDescription columns_description_, ConstraintsDescription constraints_) : IStorage(table_id_) { - setColumns(std::move(columns_description_)); - setConstraints(std::move(constraints_)); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(std::move(columns_description_)); + metadata_.setConstraints(std::move(constraints_)); + setInMemoryMetadata(metadata_); } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 8264eaa4cb6..3685a777bf0 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -51,7 +51,9 @@ StorageMerge::StorageMerge( , table_name_regexp(table_name_regexp_) , global_context(context_) { - setColumns(columns_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + setInMemoryMetadata(metadata_); } template @@ -393,7 +395,7 @@ void StorageMerge::alter( StorageInMemoryMetadata storage_metadata = getInMemoryMetadata(); params.apply(storage_metadata, context); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, storage_metadata); - setColumns(storage_metadata.columns); + setInMemoryMetadata(storage_metadata); } Block StorageMerge::getQueryHeader( diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index ee44ca7948e..f9aad8a58a7 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -56,8 +56,10 @@ StorageMySQL::StorageMySQL( , pool(std::move(pool_)) , global_context(context_) { - setColumns(columns_); - setConstraints(constraints_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + metadata_.setConstraints(constraints_); + setInMemoryMetadata(metadata_); } diff --git a/src/Storages/StorageNull.cpp b/src/Storages/StorageNull.cpp index 182ce09ef96..7589c4b44dc 100644 --- a/src/Storages/StorageNull.cpp +++ b/src/Storages/StorageNull.cpp @@ -54,7 +54,7 @@ void StorageNull::alter( StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); params.apply(new_metadata, context); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); - setColumns(std::move(new_metadata.columns)); + setInMemoryMetadata(new_metadata); } } diff --git a/src/Storages/StorageNull.h b/src/Storages/StorageNull.h index 5fb4a16a24b..fe8bd05d53a 100644 --- a/src/Storages/StorageNull.h +++ b/src/Storages/StorageNull.h @@ -59,8 +59,10 @@ protected: StorageNull(const StorageID & table_id_, ColumnsDescription columns_description_, ConstraintsDescription constraints_) : IStorage(table_id_) { - setColumns(std::move(columns_description_)); - setConstraints(std::move(constraints_)); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_description_); + metadata_.setConstraints(constraints_); + setInMemoryMetadata(metadata_); } }; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 397d064ba15..acaa2bcc7d6 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -206,8 +206,10 @@ StorageS3::StorageS3( , compression_method(compression_method_) { context_global.getRemoteHostFilter().checkURL(uri_.uri); - setColumns(columns_); - setConstraints(constraints_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + metadata_.setConstraints(constraints_); + setInMemoryMetadata(metadata_); auto settings = context_.getStorageS3Settings().getSettings(uri.endpoint); Aws::Auth::AWSCredentials credentials(access_key_id_, secret_access_key_); diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 86bfed5ac84..38b4d30c25b 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -96,8 +96,11 @@ StorageSetOrJoinBase::StorageSetOrJoinBase( const Context & context_) : IStorage(table_id_) { - setColumns(columns_); - setConstraints(constraints_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + metadata_.setConstraints(constraints_); + setInMemoryMetadata(metadata_); + if (relative_path_.empty()) throw Exception("Join and Set storages require data path", ErrorCodes::INCORRECT_FILE_NAME); diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index b61d52657dd..b68505fa147 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -223,8 +223,10 @@ StorageStripeLog::StorageStripeLog( , file_checker(disk, table_path + "sizes.json") , log(&Poco::Logger::get("StorageStripeLog")) { - setColumns(columns_); - setConstraints(constraints_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + metadata_.setConstraints(constraints_); + setInMemoryMetadata(metadata_); if (relative_path_.empty()) throw Exception("Storage " + getName() + " requires data path", ErrorCodes::INCORRECT_FILE_NAME); diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index 2a62068516e..5bca6072da0 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -336,8 +336,10 @@ StorageTinyLog::StorageTinyLog( , file_checker(disk, table_path + "sizes.json") , log(&Poco::Logger::get("StorageTinyLog")) { - setColumns(columns_); - setConstraints(constraints_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + metadata_.setConstraints(constraints_); + setInMemoryMetadata(metadata_); if (relative_path_.empty()) throw Exception("Storage " + getName() + " requires data path", ErrorCodes::INCORRECT_FILE_NAME); diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index a69e140fe5a..0301412e029 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -43,8 +43,11 @@ IStorageURLBase::IStorageURLBase( , format_name(format_name_) { context_global.getRemoteHostFilter().checkURL(uri); - setColumns(columns_); - setConstraints(constraints_); + + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + metadata_.setConstraints(constraints_); + setInMemoryMetadata(metadata_); } namespace diff --git a/src/Storages/StorageValues.cpp b/src/Storages/StorageValues.cpp index cf0b39df8f1..5ba36a936e2 100644 --- a/src/Storages/StorageValues.cpp +++ b/src/Storages/StorageValues.cpp @@ -16,7 +16,9 @@ StorageValues::StorageValues( const NamesAndTypesList & virtuals_) : IStorage(table_id_), res_block(res_block_), virtuals(virtuals_) { - setColumns(columns_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + setInMemoryMetadata(metadata_); } Pipes StorageValues::read( diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 055faed5899..60ae681e002 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -38,7 +38,9 @@ StorageView::StorageView( const ColumnsDescription & columns_) : IStorage(table_id_) { - setColumns(columns_); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(columns_); + setInMemoryMetadata(metadata_); if (!query.select) throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY); diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index b3951bc3f75..1ceff26ba83 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -23,7 +23,9 @@ protected: public: IStorageSystemOneBlock(const String & name_) : IStorage({"system", name_}) { - setColumns(ColumnsDescription(Self::getNamesAndTypes())); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(ColumnsDescription(Self::getNamesAndTypes())); + setInMemoryMetadata(metadata_); } Pipes read(const Names & column_names, diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 90e52ad373e..6359e367106 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -26,7 +26,8 @@ namespace ErrorCodes StorageSystemColumns::StorageSystemColumns(const std::string & name_) : IStorage({"system", name_}) { - setColumns(ColumnsDescription( + StorageInMemoryMetadata metadata_; + metadata_.setColumns(ColumnsDescription( { { "database", std::make_shared() }, { "table", std::make_shared() }, @@ -45,6 +46,7 @@ StorageSystemColumns::StorageSystemColumns(const std::string & name_) { "is_in_sampling_key", std::make_shared() }, { "compression_codec", std::make_shared() }, })); + setInMemoryMetadata(metadata_); } diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index f3fd51330d9..ef88c3ca058 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -30,7 +30,8 @@ protected: explicit StorageSystemDetachedParts() : IStorage({"system", "detached_parts"}) { - setColumns(ColumnsDescription{{ + StorageInMemoryMetadata metadata_; + metadata_.setColumns(ColumnsDescription{{ {"database", std::make_shared()}, {"table", std::make_shared()}, {"partition_id", std::make_shared(std::make_shared())}, @@ -41,6 +42,7 @@ protected: {"max_block_number", std::make_shared(std::make_shared())}, {"level", std::make_shared(std::make_shared())} }}); + setInMemoryMetadata(metadata_); } Pipes read( diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index b5a5026b2e7..5905080539e 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -14,7 +14,8 @@ namespace ErrorCodes StorageSystemDisks::StorageSystemDisks(const std::string & name_) : IStorage({"system", name_}) { - setColumns(ColumnsDescription( + StorageInMemoryMetadata metadata_; + metadata_.setColumns(ColumnsDescription( { {"name", std::make_shared()}, {"path", std::make_shared()}, @@ -22,6 +23,7 @@ StorageSystemDisks::StorageSystemDisks(const std::string & name_) {"total_space", std::make_shared()}, {"keep_free_space", std::make_shared()}, })); + setInMemoryMetadata(metadata_); } Pipes StorageSystemDisks::read( diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index 20dcc58f652..0fa7b71555e 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -118,7 +118,9 @@ private: StorageSystemNumbers::StorageSystemNumbers(const StorageID & table_id, bool multithreaded_, std::optional limit_, UInt64 offset_, bool even_distribution_) : IStorage(table_id), multithreaded(multithreaded_), even_distribution(even_distribution_), limit(limit_), offset(offset_) { - setColumns(ColumnsDescription({{"number", std::make_shared()}})); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(ColumnsDescription({{"number", std::make_shared()}})); + setInMemoryMetadata(metadata_); } Pipes StorageSystemNumbers::read( diff --git a/src/Storages/System/StorageSystemOne.cpp b/src/Storages/System/StorageSystemOne.cpp index 6cbb634d2b7..e7c8c446847 100644 --- a/src/Storages/System/StorageSystemOne.cpp +++ b/src/Storages/System/StorageSystemOne.cpp @@ -14,7 +14,9 @@ namespace DB StorageSystemOne::StorageSystemOne(const std::string & name_) : IStorage({"system", name_}) { - setColumns(ColumnsDescription({{"dummy", std::make_shared()}})); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(ColumnsDescription({{"dummy", std::make_shared()}})); + setInMemoryMetadata(metadata_); } diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 925a5df889e..42a432489f4 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -277,7 +277,9 @@ StorageSystemPartsBase::StorageSystemPartsBase(std::string name_, NamesAndTypesL add_alias("bytes", "bytes_on_disk"); add_alias("marks_size", "marks_bytes"); - setColumns(tmp_columns); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(tmp_columns); + setInMemoryMetadata(metadata_); } NamesAndTypesList StorageSystemPartsBase::getVirtuals() const diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index c2cd3a1e4b1..ca71e7e5f74 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -19,7 +19,8 @@ namespace DB StorageSystemReplicas::StorageSystemReplicas(const std::string & name_) : IStorage({"system", name_}) { - setColumns(ColumnsDescription({ + StorageInMemoryMetadata metadata_; + metadata_.setColumns(ColumnsDescription({ { "database", std::make_shared() }, { "table", std::make_shared() }, { "engine", std::make_shared() }, @@ -52,6 +53,7 @@ StorageSystemReplicas::StorageSystemReplicas(const std::string & name_) { "active_replicas", std::make_shared() }, { "zookeeper_exception", std::make_shared() }, })); + setInMemoryMetadata(metadata_); } diff --git a/src/Storages/System/StorageSystemStoragePolicies.cpp b/src/Storages/System/StorageSystemStoragePolicies.cpp index acbc9d72a20..dbb47dc771a 100644 --- a/src/Storages/System/StorageSystemStoragePolicies.cpp +++ b/src/Storages/System/StorageSystemStoragePolicies.cpp @@ -17,7 +17,8 @@ namespace ErrorCodes StorageSystemStoragePolicies::StorageSystemStoragePolicies(const std::string & name_) : IStorage({"system", name_}) { - setColumns( + StorageInMemoryMetadata metadata_; + metadata_.setColumns( ColumnsDescription({ {"policy_name", std::make_shared()}, {"volume_name", std::make_shared()}, @@ -26,6 +27,7 @@ StorageSystemStoragePolicies::StorageSystemStoragePolicies(const std::string & n {"max_data_part_size", std::make_shared()}, {"move_factor", std::make_shared()} })); + setInMemoryMetadata(metadata_); } Pipes StorageSystemStoragePolicies::read( diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 2bf6595bf53..84d441a8c6e 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -33,7 +33,8 @@ namespace ErrorCodes StorageSystemTables::StorageSystemTables(const std::string & name_) : IStorage({"system", name_}) { - setColumns(ColumnsDescription( + StorageInMemoryMetadata metadata_; + metadata_.setColumns(ColumnsDescription( { {"database", std::make_shared()}, {"name", std::make_shared()}, @@ -55,6 +56,7 @@ StorageSystemTables::StorageSystemTables(const std::string & name_) {"total_rows", std::make_shared(std::make_shared())}, {"total_bytes", std::make_shared(std::make_shared())}, })); + setInMemoryMetadata(metadata_); } diff --git a/src/Storages/System/StorageSystemZeros.cpp b/src/Storages/System/StorageSystemZeros.cpp index cd2fa0a6059..438d31e7e02 100644 --- a/src/Storages/System/StorageSystemZeros.cpp +++ b/src/Storages/System/StorageSystemZeros.cpp @@ -84,7 +84,10 @@ private: StorageSystemZeros::StorageSystemZeros(const StorageID & table_id_, bool multithreaded_, std::optional limit_) : IStorage(table_id_), multithreaded(multithreaded_), limit(limit_) { - setColumns(ColumnsDescription({{"zero", std::make_shared()}})); + StorageInMemoryMetadata metadata_; + metadata_.setColumns(ColumnsDescription({{"zero", std::make_shared()}})); + setInMemoryMetadata(metadata_); + } Pipes StorageSystemZeros::read( From aa30649ce5eb3edc14641b595ccca6c3cba38dfa Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 15 Jun 2020 20:10:14 +0300 Subject: [PATCH 087/318] Working setColumns, setConstraints, setIndices --- src/Storages/IStorage.h | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 4d01bb5370d..403f5293588 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -82,7 +82,7 @@ public: IStorage() = delete; /// Storage fields should be initialized in separate methods like setColumns /// or setTableTTLs. - explicit IStorage(StorageID storage_id_) : storage_id(std::move(storage_id_)) {} //-V730 + explicit IStorage(StorageID storage_id_) : storage_id(std::move(storage_id_)), metadata(std::make_shared()) {} //-V730 virtual ~IStorage() = default; IStorage(const IStorage &) = delete; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 14be526d7f6..3414143c46b 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -142,9 +142,8 @@ MergeTreeData::MergeTreeData( if (relative_data_path.empty()) throw Exception("MergeTree storages require data path", ErrorCodes::INCORRECT_FILE_NAME); - setSettingsChanges(metadata_.settings_changes); - const auto settings = getSettings(); setProperties(metadata_, attach); + const auto settings = getSettings(); /// NOTE: using the same columns list as is read when performing actual merges. merging_params.check(getColumns().getAllPhysical()); @@ -385,8 +384,6 @@ void MergeTreeData::checkProperties(const StorageInMemoryMetadata & new_metadata void MergeTreeData::setProperties(const StorageInMemoryMetadata & new_metadata, bool attach) { checkProperties(new_metadata, attach); - - /// Other parts of metadata initialized is separate methods setInMemoryMetadata(new_metadata); } From 5fc41c7eccc9d98d524ee997ae689d5e5333820a Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 15 Jun 2020 20:17:06 +0300 Subject: [PATCH 088/318] Move set*Key methods to StorageInMemoryMetadata --- src/Storages/IStorage.cpp | 21 --------------------- src/Storages/IStorage.h | 12 ------------ src/Storages/MergeTree/MergeTreeData.cpp | 3 --- src/Storages/StorageInMemoryMetadata.h | 12 ++++++++++++ 4 files changed, 12 insertions(+), 36 deletions(-) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 6c045a6f365..2bbbabbff08 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -403,11 +403,6 @@ const KeyDescription & IStorage::getPartitionKey() const return metadata->partition_key; } -void IStorage::setPartitionKey(const KeyDescription & partition_key_) -{ - metadata->partition_key = partition_key_; -} - bool IStorage::isPartitionKeyDefined() const { return metadata->partition_key.definition_ast != nullptr; @@ -430,11 +425,6 @@ const KeyDescription & IStorage::getSortingKey() const return metadata->sorting_key; } -void IStorage::setSortingKey(const KeyDescription & sorting_key_) -{ - metadata->sorting_key = sorting_key_; -} - bool IStorage::isSortingKeyDefined() const { return metadata->sorting_key.definition_ast != nullptr; @@ -464,11 +454,6 @@ const KeyDescription & IStorage::getPrimaryKey() const return metadata->primary_key; } -void IStorage::setPrimaryKey(const KeyDescription & primary_key_) -{ - metadata->primary_key = primary_key_; -} - bool IStorage::isPrimaryKeyDefined() const { return metadata->primary_key.definition_ast != nullptr; @@ -498,12 +483,6 @@ const KeyDescription & IStorage::getSamplingKey() const return metadata->sampling_key; } -void IStorage::setSamplingKey(const KeyDescription & sampling_key_) -{ - metadata->sampling_key = sampling_key_; -} - - bool IStorage::isSamplingKeyDefined() const { return metadata->sampling_key.definition_ast != nullptr; diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 403f5293588..ec7e8fc1795 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -438,9 +438,6 @@ public: /// Returns structure with partition key. const KeyDescription & getPartitionKey() const; - /// Set partition key for storage (methods bellow, are just wrappers for this - /// struct). - void setPartitionKey(const KeyDescription & partition_key_); /// Returns ASTExpressionList of partition key expression for storage or nullptr if there is none. ASTPtr getPartitionKeyAST() const { return metadata->partition_key.definition_ast; } /// Storage has user-defined (in CREATE query) partition key. @@ -453,9 +450,6 @@ public: /// Returns structure with sorting key. const KeyDescription & getSortingKey() const; - /// Set sorting key for storage (methods bellow, are just wrappers for this - /// struct). - void setSortingKey(const KeyDescription & sorting_key_); /// Returns ASTExpressionList of sorting key expression for storage or nullptr if there is none. ASTPtr getSortingKeyAST() const { return metadata->sorting_key.definition_ast; } /// Storage has user-defined (in CREATE query) sorting key. @@ -470,9 +464,6 @@ public: /// Returns structure with primary key. const KeyDescription & getPrimaryKey() const; - /// Set primary key for storage (methods bellow, are just wrappers for this - /// struct). - void setPrimaryKey(const KeyDescription & primary_key_); /// Returns ASTExpressionList of primary key expression for storage or nullptr if there is none. ASTPtr getPrimaryKeyAST() const { return metadata->primary_key.definition_ast; } /// Storage has user-defined (in CREATE query) sorting key. @@ -488,9 +479,6 @@ public: /// Returns structure with sampling key. const KeyDescription & getSamplingKey() const; - /// Set sampling key for storage (methods bellow, are just wrappers for this - /// struct). - void setSamplingKey(const KeyDescription & sampling_key_); /// Returns sampling expression AST for storage or nullptr if there is none. ASTPtr getSamplingKeyAST() const { return metadata->sampling_key.definition_ast; } /// Storage has user-defined (in CREATE query) sampling key. diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 3414143c46b..ab0544c641b 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -154,8 +154,6 @@ MergeTreeData::MergeTreeData( if (!pk_sample_block.has(metadata_.sampling_key.column_names[0]) && !attach && !settings->compatibility_allow_sampling_expression_not_in_primary_key) /// This is for backward compatibility. throw Exception("Sampling expression must be present in the primary key", ErrorCodes::BAD_ARGUMENTS); - - setSamplingKey(metadata_.sampling_key); } MergeTreeDataFormatVersion min_format_version(0); @@ -472,7 +470,6 @@ void MergeTreeData::initPartitionKey(const KeyDescription & new_partition_key) } } } - setPartitionKey(new_partition_key); } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index f4d6e9b38b3..3b3c9d07c89 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -57,6 +57,18 @@ struct StorageInMemoryMetadata void setConstraints(ConstraintsDescription constraints_); + /// Set partition key for storage (methods bellow, are just wrappers for this + /// struct). + void setPartitionKey(const KeyDescription & partition_key_); + /// Set sorting key for storage (methods bellow, are just wrappers for this + /// struct). + void setSortingKey(const KeyDescription & sorting_key_); + /// Set primary key for storage (methods bellow, are just wrappers for this + /// struct). + void setPrimaryKey(const KeyDescription & primary_key_); + /// Set sampling key for storage (methods bellow, are just wrappers for this + /// struct). + void setSamplingKey(const KeyDescription & sampling_key_); }; using StorageMetadataPtr = std::shared_ptr; From e667eb57b2aeaea856c24ebbb6e869ee508a368d Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 15 Jun 2020 20:24:49 +0300 Subject: [PATCH 089/318] Working set*Keys methods --- src/Storages/MergeTree/MergeTreeData.cpp | 36 +++++++++++++----------- src/Storages/MergeTree/MergeTreeData.h | 2 +- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index ab0544c641b..8af38bdf500 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -118,7 +118,7 @@ const char * DELETE_ON_DESTROY_MARKER_PATH = "delete-on-destroy.txt"; MergeTreeData::MergeTreeData( const StorageID & table_id_, const String & relative_data_path_, - const StorageInMemoryMetadata & metadata_, + StorageInMemoryMetadata metadata_, Context & context_, const String & date_column_name, const MergingParams & merging_params_, @@ -142,28 +142,15 @@ MergeTreeData::MergeTreeData( if (relative_data_path.empty()) throw Exception("MergeTree storages require data path", ErrorCodes::INCORRECT_FILE_NAME); - setProperties(metadata_, attach); - const auto settings = getSettings(); - - /// NOTE: using the same columns list as is read when performing actual merges. - merging_params.check(getColumns().getAllPhysical()); - - if (metadata_.sampling_key.definition_ast != nullptr) - { - const auto & pk_sample_block = getPrimaryKey().sample_block; - if (!pk_sample_block.has(metadata_.sampling_key.column_names[0]) && !attach - && !settings->compatibility_allow_sampling_expression_not_in_primary_key) /// This is for backward compatibility. - throw Exception("Sampling expression must be present in the primary key", ErrorCodes::BAD_ARGUMENTS); - } - MergeTreeDataFormatVersion min_format_version(0); + /// TODO(alesap) Move to register methods if (!date_column_name.empty()) { try { auto partition_by_ast = makeASTFunction("toYYYYMM", std::make_shared(date_column_name)); - auto partition_key = KeyDescription::getKeyFromAST(partition_by_ast, getColumns(), global_context); - initPartitionKey(partition_key); + metadata_.partition_key = KeyDescription::getKeyFromAST(partition_by_ast, metadata_.columns, global_context); + initPartitionKey(metadata_.partition_key); if (minmax_idx_date_column_pos == -1) throw Exception("Could not find Date column", ErrorCodes::BAD_TYPE_OF_FIELD); @@ -182,6 +169,21 @@ MergeTreeData::MergeTreeData( min_format_version = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING; } + setProperties(metadata_, attach); + const auto settings = getSettings(); + + /// NOTE: using the same columns list as is read when performing actual merges. + merging_params.check(getColumns().getAllPhysical()); + + if (metadata_.sampling_key.definition_ast != nullptr) + { + const auto & pk_sample_block = getPrimaryKey().sample_block; + if (!pk_sample_block.has(metadata_.sampling_key.column_names[0]) && !attach + && !settings->compatibility_allow_sampling_expression_not_in_primary_key) /// This is for backward compatibility. + throw Exception("Sampling expression must be present in the primary key", ErrorCodes::BAD_ARGUMENTS); + } + + setTTLExpressions(metadata_); /// format_file always contained on any data path diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 007c6898e60..12350b7bd10 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -323,7 +323,7 @@ public: /// attach - whether the existing table is attached or the new table is created. MergeTreeData(const StorageID & table_id_, const String & relative_data_path_, - const StorageInMemoryMetadata & metadata_, + StorageInMemoryMetadata metadata_, Context & context_, const String & date_column_name, const MergingParams & merging_params_, From 33a74a3ea05ee7ff405e7255c7faeeae08de144c Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 15 Jun 2020 20:50:53 +0300 Subject: [PATCH 090/318] TTL methods in StorageInMemoryMetadata --- src/Storages/IStorage.cpp | 11 ----------- src/Storages/IStorage.h | 2 -- src/Storages/MergeTree/MergeTreeData.cpp | 4 ++-- src/Storages/StorageInMemoryMetadata.cpp | 10 ++++++++++ src/Storages/StorageInMemoryMetadata.h | 4 ++++ src/Storages/StorageMergeTree.cpp | 1 - 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 2bbbabbff08..afe61008553 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -506,11 +506,6 @@ TTLTableDescription IStorage::getTableTTLs() const return metadata->table_ttl; } -void IStorage::setTableTTLs(const TTLTableDescription & table_ttl_) -{ - std::lock_guard lock(ttl_mutex); - metadata->table_ttl = table_ttl_; -} bool IStorage::hasAnyTableTTL() const { @@ -523,12 +518,6 @@ TTLColumnsDescription IStorage::getColumnTTLs() const return metadata->column_ttls_by_name; } -void IStorage::setColumnTTLs(const TTLColumnsDescription & column_ttls_by_name_) -{ - std::lock_guard lock(ttl_mutex); - metadata->column_ttls_by_name = column_ttls_by_name_; -} - bool IStorage::hasAnyColumnTTL() const { std::lock_guard lock(ttl_mutex); diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index ec7e8fc1795..f3081386c76 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -500,12 +500,10 @@ public: /// Common tables TTLs (for rows and moves). TTLTableDescription getTableTTLs() const; - void setTableTTLs(const TTLTableDescription & table_ttl_); bool hasAnyTableTTL() const; /// Separate TTLs for columns. TTLColumnsDescription getColumnTTLs() const; - void setColumnTTLs(const TTLColumnsDescription & column_ttls_by_name_); bool hasAnyColumnTTL() const; /// Just wrapper for table TTLs, return rows part of table TTLs. diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 8af38bdf500..24c787e7c63 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -520,8 +520,8 @@ void MergeTreeData::checkTTLExpressions(const StorageInMemoryMetadata & new_meta void MergeTreeData::setTTLExpressions(const StorageInMemoryMetadata & new_metadata) { checkTTLExpressions(new_metadata); - setColumnTTLs(new_metadata.column_ttls_by_name); - setTableTTLs(new_metadata.table_ttl); + //setColumnTTLs(new_metadata.column_ttls_by_name); + //setTableTTLs(new_metadata.table_ttl); } diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index ac2c0417c45..2d29ac433e9 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -66,4 +66,14 @@ void StorageInMemoryMetadata::setConstraints(ConstraintsDescription constraints_ constraints = std::move(constraints_); } +void StorageInMemoryMetadata::setTableTTLs(const TTLTableDescription & table_ttl_) +{ + table_ttl = table_ttl_; +} + +void StorageInMemoryMetadata::setColumnTTLs(const TTLColumnsDescription & column_ttls_by_name_) +{ + column_ttls_by_name = column_ttls_by_name_; +} + } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 3b3c9d07c89..b5c1a1997b6 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -69,6 +69,10 @@ struct StorageInMemoryMetadata /// Set sampling key for storage (methods bellow, are just wrappers for this /// struct). void setSamplingKey(const KeyDescription & sampling_key_); + + void setTableTTLs(const TTLTableDescription & table_ttl_); + + void setColumnTTLs(const TTLColumnsDescription & column_ttls_by_name_); }; using StorageMetadataPtr = std::shared_ptr; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 2a7efa164d4..7007a544eac 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -281,7 +281,6 @@ void StorageMergeTree::alter( changeSettings(new_metadata.settings_changes, table_lock_holder); /// Reinitialize primary key because primary key column types might have changed. setProperties(new_metadata); - setTTLExpressions(new_metadata); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); From b47a7327fdbd5f2753b84aed98595c1a7d4df5e3 Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 15 Jun 2020 21:08:05 +0300 Subject: [PATCH 091/318] All set methods in metadata --- src/Storages/IStorage.cpp | 13 ------------- src/Storages/IStorage.h | 2 -- src/Storages/MergeTree/MergeTreeData.cpp | 4 +++- src/Storages/StorageInMemoryMetadata.cpp | 13 +++++++++++++ src/Storages/StorageInMemoryMetadata.h | 4 ++++ src/Storages/StorageMaterializedView.cpp | 6 +++--- src/Storages/StorageReplicatedMergeTree.cpp | 1 - src/Storages/StorageView.cpp | 4 ++-- 8 files changed, 25 insertions(+), 22 deletions(-) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index afe61008553..e5ab14e046e 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -616,24 +616,11 @@ ASTPtr IStorage::getSettingsChanges() const return nullptr; } -void IStorage::setSettingsChanges(const ASTPtr & settings_changes_) -{ - if (settings_changes_) - metadata->settings_changes = settings_changes_->clone(); - else - metadata->settings_changes = nullptr; -} - const SelectQueryDescription & IStorage::getSelectQuery() const { return metadata->select; } -void IStorage::setSelectQuery(const SelectQueryDescription & select_) -{ - metadata->select = select_; -} - bool IStorage::hasSelectQuery() const { return metadata->select.select_query != nullptr; diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index f3081386c76..0f48f3bf63c 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -148,12 +148,10 @@ public: /// thread-unsafe part. lockStructure must be acquired /// Storage settings ASTPtr getSettingsChanges() const; - void setSettingsChanges(const ASTPtr & settings_changes_); bool hasSettingsChanges() const { return metadata->settings_changes != nullptr; } /// Select query for *View storages. const SelectQueryDescription & getSelectQuery() const; - void setSelectQuery(const SelectQueryDescription & select_); bool hasSelectQuery() const; StorageInMemoryMetadata getInMemoryMetadata() const { return *metadata; } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 24c787e7c63..8971b50a0fd 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1474,7 +1474,9 @@ void MergeTreeData::changeSettings( MergeTreeSettings copy = *getSettings(); copy.applyChanges(new_changes); storage_settings.set(std::make_unique(copy)); - setSettingsChanges(new_settings); + StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); + new_metadata.setSettingsChanges(new_settings); + setInMemoryMetadata(new_metadata); } } diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 2d29ac433e9..b6dd2f38c4e 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -76,4 +76,17 @@ void StorageInMemoryMetadata::setColumnTTLs(const TTLColumnsDescription & column column_ttls_by_name = column_ttls_by_name_; } +void StorageInMemoryMetadata::setSettingsChanges(const ASTPtr & settings_changes_) +{ + if (settings_changes_) + settings_changes = settings_changes_; + else + settings_changes = nullptr; +} + +void StorageInMemoryMetadata::setSelectQuery(const SelectQueryDescription & select_) +{ + select = select_; +} + } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index b5c1a1997b6..b129cdc7756 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -73,6 +73,10 @@ struct StorageInMemoryMetadata void setTableTTLs(const TTLTableDescription & table_ttl_); void setColumnTTLs(const TTLColumnsDescription & column_ttls_by_name_); + + void setSettingsChanges(const ASTPtr & settings_changes_); + + void setSelectQuery(const SelectQueryDescription & select_); }; using StorageMetadataPtr = std::shared_ptr; diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 34d5e1d4374..638a13612f2 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -52,7 +52,6 @@ StorageMaterializedView::StorageMaterializedView( { StorageInMemoryMetadata metadata_; metadata_.setColumns(columns_); - setInMemoryMetadata(metadata_); if (!query.select) throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY); @@ -68,7 +67,8 @@ StorageMaterializedView::StorageMaterializedView( throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); auto select = SelectQueryDescription::getSelectQueryFromASTForMatView(query.select->clone(), local_context); - setSelectQuery(select); + metadata_.setSelectQuery(select); + setInMemoryMetadata(metadata_); if (!has_inner_table) target_table_id = query.to_table_id; @@ -206,7 +206,7 @@ void StorageMaterializedView::alter( DatabaseCatalog::instance().updateDependency(old_select.select_table_id, table_id, new_select.select_table_id, table_id); - setSelectQuery(new_select); + new_metadata.setSelectQuery(new_select); } /// end modify query diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index d8e45b97438..cb5e5aaf701 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3648,7 +3648,6 @@ void StorageReplicatedMergeTree::alter( StorageInMemoryMetadata future_metadata = getInMemoryMetadata(); params.apply(future_metadata, query_context); - changeSettings(future_metadata.settings_changes, table_lock_holder); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(query_context, table_id, future_metadata); diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 60ae681e002..d8392b2edd8 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -40,7 +40,6 @@ StorageView::StorageView( { StorageInMemoryMetadata metadata_; metadata_.setColumns(columns_); - setInMemoryMetadata(metadata_); if (!query.select) throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY); @@ -48,7 +47,8 @@ StorageView::StorageView( SelectQueryDescription description; description.inner_query = query.select->ptr(); - setSelectQuery(description); + metadata_.setSelectQuery(description); + setInMemoryMetadata(metadata_); } From 36ba0192df07424d5c5b7c1ca8a197648238c38a Mon Sep 17 00:00:00 2001 From: alesapin Date: Mon, 15 Jun 2020 22:08:58 +0300 Subject: [PATCH 092/318] Metadata in read and write methods of IStorage --- src/Core/ExternalTable.cpp | 2 +- .../CreatingSetsBlockInputStream.cpp | 2 +- .../PushingToViewsBlockOutputStream.cpp | 2 +- src/DataStreams/RemoteQueryExecutor.cpp | 7 +++++-- src/Interpreters/InterpreterInsertQuery.cpp | 3 ++- src/Interpreters/InterpreterSelectQuery.cpp | 4 ++-- src/Interpreters/InterpreterSelectQuery.h | 1 + .../Transforms/CreatingSetsTransform.cpp | 2 +- src/Server/TCPHandler.cpp | 3 ++- src/Storages/IStorage.h | 3 +++ src/Storages/Kafka/StorageKafka.cpp | 3 ++- src/Storages/Kafka/StorageKafka.h | 2 ++ src/Storages/LiveView/StorageBlocks.h | 1 + src/Storages/LiveView/StorageLiveView.cpp | 1 + src/Storages/LiveView/StorageLiveView.h | 1 + .../MergeTree/StorageFromMergeTreeDataPart.h | 1 + src/Storages/StorageBuffer.cpp | 13 ++++++++++--- src/Storages/StorageBuffer.h | 3 ++- src/Storages/StorageDictionary.cpp | 1 + src/Storages/StorageDictionary.h | 4 +++- src/Storages/StorageDistributed.cpp | 3 ++- src/Storages/StorageDistributed.h | 3 ++- src/Storages/StorageFile.cpp | 2 ++ src/Storages/StorageFile.h | 2 ++ src/Storages/StorageGenerateRandom.cpp | 1 + src/Storages/StorageGenerateRandom.h | 1 + src/Storages/StorageHDFS.cpp | 5 +++-- src/Storages/StorageHDFS.h | 6 ++++-- src/Storages/StorageInput.cpp | 4 +++- src/Storages/StorageInput.h | 1 + src/Storages/StorageJoin.cpp | 1 + src/Storages/StorageJoin.h | 1 + src/Storages/StorageLog.cpp | 4 ++-- src/Storages/StorageLog.h | 3 ++- src/Storages/StorageMaterializedView.cpp | 11 ++++++++--- src/Storages/StorageMaterializedView.h | 3 ++- src/Storages/StorageMemory.cpp | 4 ++-- src/Storages/StorageMemory.h | 3 ++- src/Storages/StorageMerge.cpp | 4 +++- src/Storages/StorageMerge.h | 1 + src/Storages/StorageMergeTree.cpp | 3 ++- src/Storages/StorageMergeTree.h | 3 ++- src/Storages/StorageMySQL.cpp | 4 ++-- src/Storages/StorageMySQL.h | 3 ++- src/Storages/StorageNull.h | 3 ++- src/Storages/StorageReplicatedMergeTree.cpp | 3 ++- src/Storages/StorageReplicatedMergeTree.h | 3 ++- src/Storages/StorageS3.cpp | 3 ++- src/Storages/StorageS3.h | 3 ++- src/Storages/StorageSet.cpp | 2 +- src/Storages/StorageSet.h | 2 +- src/Storages/StorageStripeLog.cpp | 4 ++-- src/Storages/StorageStripeLog.h | 3 ++- src/Storages/StorageTinyLog.cpp | 4 ++-- src/Storages/StorageTinyLog.h | 3 ++- src/Storages/StorageURL.cpp | 6 ++++-- src/Storages/StorageURL.h | 3 ++- src/Storages/StorageValues.cpp | 1 + src/Storages/StorageValues.h | 1 + src/Storages/StorageView.cpp | 1 + src/Storages/StorageView.h | 1 + src/Storages/StorageXDBC.cpp | 8 +++++--- src/Storages/StorageXDBC.h | 17 +++++++++-------- src/Storages/System/IStorageSystemOneBlock.h | 4 +++- src/Storages/System/StorageSystemColumns.cpp | 1 + src/Storages/System/StorageSystemColumns.h | 1 + .../System/StorageSystemDetachedParts.cpp | 13 +++++++------ src/Storages/System/StorageSystemDisks.cpp | 1 + src/Storages/System/StorageSystemDisks.h | 1 + src/Storages/System/StorageSystemNumbers.cpp | 1 + src/Storages/System/StorageSystemNumbers.h | 1 + src/Storages/System/StorageSystemOne.cpp | 1 + src/Storages/System/StorageSystemOne.h | 1 + src/Storages/System/StorageSystemPartsBase.cpp | 13 +++++++------ src/Storages/System/StorageSystemPartsBase.h | 13 +++++++------ src/Storages/System/StorageSystemReplicas.cpp | 1 + src/Storages/System/StorageSystemReplicas.h | 1 + .../System/StorageSystemStoragePolicies.cpp | 13 +++++++------ .../System/StorageSystemStoragePolicies.h | 13 +++++++------ src/Storages/System/StorageSystemTables.cpp | 1 + src/Storages/System/StorageSystemTables.h | 1 + src/Storages/System/StorageSystemZeros.cpp | 13 +++++++------ src/Storages/System/StorageSystemZeros.h | 13 +++++++------ src/Storages/tests/gtest_storage_log.cpp | 6 ++++-- 84 files changed, 208 insertions(+), 111 deletions(-) diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp index 5ec6980dbfa..3639a109b42 100644 --- a/src/Core/ExternalTable.cpp +++ b/src/Core/ExternalTable.cpp @@ -167,7 +167,7 @@ void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header, auto temporary_table = TemporaryTableHolder(context, ColumnsDescription{columns}, {}); auto storage = temporary_table.getTable(); context.addExternalTable(data->table_name, std::move(temporary_table)); - BlockOutputStreamPtr output = storage->write(ASTPtr(), context); + BlockOutputStreamPtr output = storage->write(ASTPtr(), storage->getInMemoryMetadataPtr(), context); /// Write data auto sink = std::make_shared(std::move(output)); diff --git a/src/DataStreams/CreatingSetsBlockInputStream.cpp b/src/DataStreams/CreatingSetsBlockInputStream.cpp index e40b5979b05..2a2275a4e89 100644 --- a/src/DataStreams/CreatingSetsBlockInputStream.cpp +++ b/src/DataStreams/CreatingSetsBlockInputStream.cpp @@ -101,7 +101,7 @@ void CreatingSetsBlockInputStream::createOne(SubqueryForSet & subquery) BlockOutputStreamPtr table_out; if (subquery.table) - table_out = subquery.table->write({}, context); + table_out = subquery.table->write({}, subquery.table->getInMemoryMetadataPtr(), context); bool done_with_set = !subquery.set; bool done_with_join = !subquery.join; diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/src/DataStreams/PushingToViewsBlockOutputStream.cpp index 2c2e6972158..fa213b054df 100644 --- a/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -107,7 +107,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( /// Do not push to destination table if the flag is set if (!no_destination) { - output = storage->write(query_ptr, context); + output = storage->write(query_ptr, storage->getInMemoryMetadataPtr(), context); replicated_output = dynamic_cast(output.get()); } } diff --git a/src/DataStreams/RemoteQueryExecutor.cpp b/src/DataStreams/RemoteQueryExecutor.cpp index cf3b2c4abcd..071cb6e9aba 100644 --- a/src/DataStreams/RemoteQueryExecutor.cpp +++ b/src/DataStreams/RemoteQueryExecutor.cpp @@ -319,12 +319,15 @@ void RemoteQueryExecutor::sendExternalTables() for (const auto & table : external_tables) { StoragePtr cur = table.second; + auto metadata_snapshot = cur->getInMemoryMetadataPtr(); QueryProcessingStage::Enum read_from_table_stage = cur->getQueryProcessingStage(context); Pipes pipes; - pipes = cur->read(cur->getColumns().getNamesOfPhysical(), {}, context, - read_from_table_stage, DEFAULT_BLOCK_SIZE, 1); + pipes = cur->read( + cur->getColumns().getNamesOfPhysical(), + metadata_snapshot, {}, context, + read_from_table_stage, DEFAULT_BLOCK_SIZE, 1); auto data = std::make_unique(); data->table_name = table.first; diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 7deed262eda..1841c82b710 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -117,6 +117,7 @@ BlockIO InterpreterInsertQuery::execute() StoragePtr table = getTable(query); auto table_lock = table->lockStructureForShare( true, context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto metadata_snapshot = table->getInMemoryMetadataPtr(); auto query_sample_block = getSampleBlock(query, table); if (!query.table_function) @@ -226,7 +227,7 @@ BlockIO InterpreterInsertQuery::execute() /// NOTE: we explicitly ignore bound materialized views when inserting into Kafka Storage. /// Otherwise we'll get duplicates when MV reads same rows again from Kafka. if (table->noPushingToViews() && !no_destination) - out = table->write(query_ptr, context); + out = table->write(query_ptr, metadata_snapshot, context); else out = std::make_shared(table, context, query_ptr, no_destination); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index ac17a3042d8..c22296cfb26 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -255,6 +255,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( table_lock = storage->lockStructureForShare( false, context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); table_id = storage->getStorageID(); + metadata_snapshot = storage->getInMemoryMetadataPtr(); } if (has_input || !joined_tables.resolveTables()) @@ -1293,7 +1294,6 @@ void InterpreterSelectQuery::executeFetchColumns( else if (storage) { /// Table. - if (max_streams == 0) throw Exception("Logical error: zero number of streams requested", ErrorCodes::LOGICAL_ERROR); @@ -1324,7 +1324,7 @@ void InterpreterSelectQuery::executeFetchColumns( query_info.input_order_info = query_info.order_optimizer->getInputOrder(storage); } - Pipes pipes = storage->read(required_columns, query_info, *context, processing_stage, max_block_size, max_streams); + Pipes pipes = storage->read(required_columns, metadata_snapshot, query_info, *context, processing_stage, max_block_size, max_streams); if (pipes.empty()) { diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index 8ed775f60ae..8f7237ffd7e 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -184,6 +184,7 @@ private: StoragePtr storage; StorageID table_id = StorageID::createEmpty(); /// Will be initialized if storage is not nullptr TableStructureReadLockHolder table_lock; + StorageMetadataPtr metadata_snapshot; /// Used when we read from prepared input, not table or subquery. BlockInputStreamPtr input; diff --git a/src/Processors/Transforms/CreatingSetsTransform.cpp b/src/Processors/Transforms/CreatingSetsTransform.cpp index af8fa4097df..c0e34d9fbd4 100644 --- a/src/Processors/Transforms/CreatingSetsTransform.cpp +++ b/src/Processors/Transforms/CreatingSetsTransform.cpp @@ -66,7 +66,7 @@ void CreatingSetsTransform::startSubquery(SubqueryForSet & subquery) elapsed_nanoseconds = 0; if (subquery.table) - table_out = subquery.table->write({}, context); + table_out = subquery.table->write({}, subquery.table->getInMemoryMetadataPtr(), context); done_with_set = !subquery.set; done_with_join = !subquery.join; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 7e17604c4c7..a01cc4fa0aa 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -952,8 +952,9 @@ bool TCPHandler::receiveData(bool scalar) storage = temporary_table.getTable(); query_context->addExternalTable(temporary_id.table_name, std::move(temporary_table)); } + auto metadata_snapshot = storage->getInMemoryMetadataPtr(); /// The data will be written directly to the table. - state.io.out = storage->write(ASTPtr(), *query_context); + state.io.out = storage->write(ASTPtr(), metadata_snapshot, *query_context); } if (state.need_receive_data_for_input) state.block_for_input = block; diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 0f48f3bf63c..28ad7b0ea8b 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -155,6 +155,7 @@ public: /// thread-unsafe part. lockStructure must be acquired bool hasSelectQuery() const; StorageInMemoryMetadata getInMemoryMetadata() const { return *metadata; } + StorageMetadataPtr getInMemoryMetadataPtr() const { return metadata; } void setInMemoryMetadata(const StorageInMemoryMetadata & metadata_) { metadata = std::make_shared(metadata_); } Block getSampleBlock() const; /// ordinary + materialized. @@ -292,6 +293,7 @@ public: */ virtual Pipes read( const Names & /*column_names*/, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, @@ -309,6 +311,7 @@ public: */ virtual BlockOutputStreamPtr write( const ASTPtr & /*query*/, + const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) { throw Exception("Method write is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 2109afed932..190397bc675 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -201,6 +201,7 @@ String StorageKafka::getDefaultClientId(const StorageID & table_id_) Pipes StorageKafka::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /* query_info */, const Context & context, QueryProcessingStage::Enum /* processed_stage */, @@ -231,7 +232,7 @@ Pipes StorageKafka::read( } -BlockOutputStreamPtr StorageKafka::write(const ASTPtr &, const Context & context) +BlockOutputStreamPtr StorageKafka::write(const ASTPtr &, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) { auto modified_context = std::make_shared(context); modified_context->applySettingsChanges(settings_adjustments); diff --git a/src/Storages/Kafka/StorageKafka.h b/src/Storages/Kafka/StorageKafka.h index be3f89687fe..6f479ba2089 100644 --- a/src/Storages/Kafka/StorageKafka.h +++ b/src/Storages/Kafka/StorageKafka.h @@ -39,6 +39,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -47,6 +48,7 @@ public: BlockOutputStreamPtr write( const ASTPtr & query, + const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; void pushReadBuffer(ConsumerBufferPtr buf); diff --git a/src/Storages/LiveView/StorageBlocks.h b/src/Storages/LiveView/StorageBlocks.h index 78d60163d5e..56fd0c620c2 100644 --- a/src/Storages/LiveView/StorageBlocks.h +++ b/src/Storages/LiveView/StorageBlocks.h @@ -37,6 +37,7 @@ public: Pipes read( const Names & /*column_names*/, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index ade2d1c967d..cb4964f3c55 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -520,6 +520,7 @@ void StorageLiveView::refresh(const Context & context) Pipes StorageLiveView::read( const Names & /*column_names*/, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/LiveView/StorageLiveView.h b/src/Storages/LiveView/StorageLiveView.h index 458e74eb506..13386c7a4e6 100644 --- a/src/Storages/LiveView/StorageLiveView.h +++ b/src/Storages/LiveView/StorageLiveView.h @@ -126,6 +126,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index 342a89c38ea..826af505b12 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -21,6 +21,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 007625790f4..3e419921115 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -145,6 +145,7 @@ QueryProcessingStage::Enum StorageBuffer::getQueryProcessingStage(const Context Pipes StorageBuffer::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -157,6 +158,7 @@ Pipes StorageBuffer::read( { auto destination = DatabaseCatalog::instance().getTable(destination_id, context); + auto destination_metadata_snapshot = destination->getInMemoryMetadataPtr(); if (destination.get() == this) throw Exception("Destination table is myself. Read will cause infinite loop.", ErrorCodes::INFINITE_LOOP); @@ -177,7 +179,9 @@ Pipes StorageBuffer::read( query_info.input_order_info = query_info.order_optimizer->getInputOrder(destination); /// The destination table has the same structure of the requested columns and we can simply read blocks from there. - pipes_from_dst = destination->read(column_names, query_info, context, processed_stage, max_block_size, num_streams); + pipes_from_dst = destination->read( + column_names, destination_metadata_snapshot, query_info, + context, processed_stage, max_block_size, num_streams); } else { @@ -210,7 +214,10 @@ Pipes StorageBuffer::read( } else { - pipes_from_dst = destination->read(columns_intersection, query_info, context, processed_stage, max_block_size, num_streams); + pipes_from_dst = destination->read( + columns_intersection, destination_metadata_snapshot, query_info, + context, processed_stage, max_block_size, num_streams); + for (auto & pipe : pipes_from_dst) { pipe.addSimpleTransform(std::make_shared( @@ -425,7 +432,7 @@ private: }; -BlockOutputStreamPtr StorageBuffer::write(const ASTPtr & /*query*/, const Context & /*context*/) +BlockOutputStreamPtr StorageBuffer::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) { return std::make_shared(*this); } diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index 02fd35136bf..7cd73dc556c 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -57,13 +57,14 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; void startup() override; /// Flush all buffers into the subordinate table and stop background thread. diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index 4348973ec60..25126ad951d 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -113,6 +113,7 @@ void StorageDictionary::checkTableCanBeDropped() const Pipes StorageDictionary::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/StorageDictionary.h b/src/Storages/StorageDictionary.h index 7bb6fc22480..6175902381b 100644 --- a/src/Storages/StorageDictionary.h +++ b/src/Storages/StorageDictionary.h @@ -16,7 +16,9 @@ public: void checkTableCanBeDropped() const override; - Pipes read(const Names & column_names, + Pipes read( + const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index bf5f729ed19..719811bbc6b 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -464,6 +464,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Con Pipes StorageDistributed::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -510,7 +511,7 @@ Pipes StorageDistributed::read( } -BlockOutputStreamPtr StorageDistributed::write(const ASTPtr &, const Context & context) +BlockOutputStreamPtr StorageDistributed::write(const ASTPtr &, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) { auto cluster = getCluster(); const auto & settings = context.getSettingsRef(); diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index 63021e0a169..3f148cfff01 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -70,13 +70,14 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; /// Removes temporary data in local filesystem. void truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) override; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index f94a7b71e56..07df2b4ec8a 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -377,6 +377,7 @@ private: Pipes StorageFile::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -479,6 +480,7 @@ private: BlockOutputStreamPtr StorageFile::write( const ASTPtr & /*query*/, + const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) { if (format_name == "Distributed") diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index fa5034d946c..65589d245b9 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -26,6 +26,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -34,6 +35,7 @@ public: BlockOutputStreamPtr write( const ASTPtr & query, + const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; void truncate(const ASTPtr & /*query*/, const Context & /* context */, TableStructureWriteLockHolder &) override; diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index f69478a4bdd..f1d97a4e5c4 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -429,6 +429,7 @@ void registerStorageGenerateRandom(StorageFactory & factory) Pipes StorageGenerateRandom::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/StorageGenerateRandom.h b/src/Storages/StorageGenerateRandom.h index 955b8bd671d..0d068eb951e 100644 --- a/src/Storages/StorageGenerateRandom.h +++ b/src/Storages/StorageGenerateRandom.h @@ -17,6 +17,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/StorageHDFS.cpp b/src/Storages/StorageHDFS.cpp index 082e40f6d6d..77afc4c47c8 100644 --- a/src/Storages/StorageHDFS.cpp +++ b/src/Storages/StorageHDFS.cpp @@ -264,9 +264,10 @@ Strings LSWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, c Pipes StorageHDFS::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & context_, - QueryProcessingStage::Enum /*processed_stage*/, + QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, unsigned num_streams) { @@ -300,7 +301,7 @@ Pipes StorageHDFS::read( return pipes; } -BlockOutputStreamPtr StorageHDFS::write(const ASTPtr & /*query*/, const Context & /*context*/) +BlockOutputStreamPtr StorageHDFS::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) { return std::make_shared(uri, format_name, diff --git a/src/Storages/StorageHDFS.h b/src/Storages/StorageHDFS.h index 5b250247b84..62425cc518f 100644 --- a/src/Storages/StorageHDFS.h +++ b/src/Storages/StorageHDFS.h @@ -19,14 +19,16 @@ class StorageHDFS final : public ext::shared_ptr_helper, public ISt public: String getName() const override { return "HDFS"; } - Pipes read(const Names & column_names, + Pipes read( + const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; NamesAndTypesList getVirtuals() const override; diff --git a/src/Storages/StorageInput.cpp b/src/Storages/StorageInput.cpp index 92287051bf3..4117a6b3a37 100644 --- a/src/Storages/StorageInput.cpp +++ b/src/Storages/StorageInput.cpp @@ -58,7 +58,9 @@ void StorageInput::setInputStream(BlockInputStreamPtr input_stream_) } -Pipes StorageInput::read(const Names & /*column_names*/, +Pipes StorageInput::read( + const Names & /*column_names*/, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/StorageInput.h b/src/Storages/StorageInput.h index 60bda222c2e..f4425ee8cd5 100644 --- a/src/Storages/StorageInput.h +++ b/src/Storages/StorageInput.h @@ -19,6 +19,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 5cceefe907b..7ed4c1c110b 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -435,6 +435,7 @@ private: // TODO: multiple stream read and index read Pipes StorageJoin::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/StorageJoin.h b/src/Storages/StorageJoin.h index f956abb4d3b..40dbf1b44dd 100644 --- a/src/Storages/StorageJoin.h +++ b/src/Storages/StorageJoin.h @@ -38,6 +38,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 09be868bcfa..a09a99b30e1 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -577,6 +577,7 @@ const StorageLog::Marks & StorageLog::getMarksWithRealRowCount() const Pipes StorageLog::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -620,8 +621,7 @@ Pipes StorageLog::read( return pipes; } -BlockOutputStreamPtr StorageLog::write( - const ASTPtr & /*query*/, const Context & /*context*/) +BlockOutputStreamPtr StorageLog::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) { loadMarks(); return std::make_shared(*this); diff --git a/src/Storages/StorageLog.h b/src/Storages/StorageLog.h index 2c2abdb0275..60f885ce45c 100644 --- a/src/Storages/StorageLog.h +++ b/src/Storages/StorageLog.h @@ -26,13 +26,14 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override; diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 638a13612f2..a387eadabe0 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -108,6 +108,7 @@ QueryProcessingStage::Enum StorageMaterializedView::getQueryProcessingStage(cons Pipes StorageMaterializedView::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -117,11 +118,12 @@ Pipes StorageMaterializedView::read( auto storage = getTargetTable(); auto lock = storage->lockStructureForShare( false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto metadata_snapshot = storage->getInMemoryMetadataPtr(); if (query_info.order_optimizer) query_info.input_order_info = query_info.order_optimizer->getInputOrder(storage); - Pipes pipes = storage->read(column_names, query_info, context, processed_stage, max_block_size, num_streams); + Pipes pipes = storage->read(column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams); for (auto & pipe : pipes) pipe.addTableLock(lock); @@ -129,12 +131,15 @@ Pipes StorageMaterializedView::read( return pipes; } -BlockOutputStreamPtr StorageMaterializedView::write(const ASTPtr & query, const Context & context) +BlockOutputStreamPtr StorageMaterializedView::write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) { auto storage = getTargetTable(); auto lock = storage->lockStructureForShare( true, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); - auto stream = storage->write(query, context); + + auto metadata_snapshot = storage->getInMemoryMetadataPtr(); + auto stream = storage->write(query, metadata_snapshot, context); + stream->addTableLock(lock); return stream; } diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 480c75aa114..42fe186a068 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -31,7 +31,7 @@ public: return getTargetTable()->mayBenefitFromIndexForIn(left_in_operand, query_context); } - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; void drop() override; @@ -63,6 +63,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index bb89bdb5c48..f9c39d78a05 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -87,6 +87,7 @@ StorageMemory::StorageMemory(const StorageID & table_id_, ColumnsDescription col Pipes StorageMemory::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, @@ -119,8 +120,7 @@ Pipes StorageMemory::read( } -BlockOutputStreamPtr StorageMemory::write( - const ASTPtr & /*query*/, const Context & /*context*/) +BlockOutputStreamPtr StorageMemory::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) { return std::make_shared(*this); } diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h index 5a79358d76d..3c583533462 100644 --- a/src/Storages/StorageMemory.h +++ b/src/Storages/StorageMemory.h @@ -30,13 +30,14 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; void drop() override; diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 3685a777bf0..6656e91189c 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -129,6 +129,7 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(const Context & Pipes StorageMerge::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -243,6 +244,7 @@ Pipes StorageMerge::createSources(const SelectQueryInfo & query_info, const Quer return pipes; } + auto metadata_snapshot = storage->getInMemoryMetadataPtr(); auto storage_stage = storage->getQueryProcessingStage(*modified_context, QueryProcessingStage::Complete, query_info.query); if (processed_stage <= storage_stage) { @@ -250,7 +252,7 @@ Pipes StorageMerge::createSources(const SelectQueryInfo & query_info, const Quer if (real_column_names.empty()) real_column_names.push_back(ExpressionActions::getSmallestColumn(storage->getColumns().getAllPhysical())); - pipes = storage->read(real_column_names, modified_query_info, *modified_context, processed_stage, max_block_size, UInt32(streams_num)); + pipes = storage->read(real_column_names, metadata_snapshot, modified_query_info, *modified_context, processed_stage, max_block_size, UInt32(streams_num)); } else if (processed_stage > storage_stage) { diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index adf4a40e675..a5d3b8d2667 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -31,6 +31,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 7007a544eac..e3f48a05d6e 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -177,6 +177,7 @@ StorageMergeTree::~StorageMergeTree() Pipes StorageMergeTree::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -196,7 +197,7 @@ std::optional StorageMergeTree::totalBytes() const return getTotalActiveSizeInBytes(); } -BlockOutputStreamPtr StorageMergeTree::write(const ASTPtr & /*query*/, const Context & context) +BlockOutputStreamPtr StorageMergeTree::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) { return std::make_shared(*this, context.getSettingsRef().max_partitions_per_insert_block); } diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index c6c8f99a62a..679726826d4 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -39,6 +39,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -48,7 +49,7 @@ public: std::optional totalRows() const override; std::optional totalBytes() const override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; /** Perform the next step in combining the parts. */ diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index f9aad8a58a7..dce9e0f38ec 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -65,6 +65,7 @@ StorageMySQL::StorageMySQL( Pipes StorageMySQL::read( const Names & column_names_, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info_, const Context & context_, QueryProcessingStage::Enum /*processed_stage*/, @@ -198,8 +199,7 @@ private: }; -BlockOutputStreamPtr StorageMySQL::write( - const ASTPtr & /*query*/, const Context & context) +BlockOutputStreamPtr StorageMySQL::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) { return std::make_shared(*this, remote_database_name, remote_table_name, pool.get(), context.getSettingsRef().mysql_max_rows_to_insert); } diff --git a/src/Storages/StorageMySQL.h b/src/Storages/StorageMySQL.h index 8b98536e4d7..287c65db6f3 100644 --- a/src/Storages/StorageMySQL.h +++ b/src/Storages/StorageMySQL.h @@ -39,13 +39,14 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; private: friend class StorageMySQLBlockOutputStream; diff --git a/src/Storages/StorageNull.h b/src/Storages/StorageNull.h index fe8bd05d53a..72934d185c7 100644 --- a/src/Storages/StorageNull.h +++ b/src/Storages/StorageNull.h @@ -24,6 +24,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo &, const Context & /*context*/, QueryProcessingStage::Enum /*processing_stage*/, @@ -35,7 +36,7 @@ public: return pipes; } - BlockOutputStreamPtr write(const ASTPtr &, const Context &) override + BlockOutputStreamPtr write(const ASTPtr &, const StorageMetadataPtr & /*metadata_snapshot*/, const Context &) override { return std::make_shared(getSampleBlock()); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index cb5e5aaf701..650578d7560 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3381,6 +3381,7 @@ ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock StorageReplicatedMerg Pipes StorageReplicatedMergeTree::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -3442,7 +3443,7 @@ void StorageReplicatedMergeTree::assertNotReadonly() const } -BlockOutputStreamPtr StorageReplicatedMergeTree::write(const ASTPtr & /*query*/, const Context & context) +BlockOutputStreamPtr StorageReplicatedMergeTree::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) { const auto storage_settings_ptr = getSettings(); assertNotReadonly(); diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index aae0b9c81b8..5fcfd98e71d 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -89,6 +89,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -98,7 +99,7 @@ public: std::optional totalRows() const override; std::optional totalBytes() const override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; bool optimize(const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & query_context) override; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index acaa2bcc7d6..093f4450ecb 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -285,6 +285,7 @@ Strings listFilesWithRegexpMatching(Aws::S3::S3Client & client, const S3::URI & Pipes StorageS3::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -320,7 +321,7 @@ Pipes StorageS3::read( return narrowPipes(std::move(pipes), num_streams); } -BlockOutputStreamPtr StorageS3::write(const ASTPtr & /*query*/, const Context & /*context*/) +BlockOutputStreamPtr StorageS3::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) { return std::make_shared( format_name, min_upload_part_size, getSampleBlock(), context_global, diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index fc19fe06da0..665c00b8033 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -48,13 +48,14 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; NamesAndTypesList getVirtuals() const override; diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 38b4d30c25b..cddd4657cd1 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -81,7 +81,7 @@ void SetOrJoinBlockOutputStream::writeSuffix() } -BlockOutputStreamPtr StorageSetOrJoinBase::write(const ASTPtr & /*query*/, const Context & /*context*/) +BlockOutputStreamPtr StorageSetOrJoinBase::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) { UInt64 id = ++increment; return std::make_shared(*this, path, path + "tmp/", toString(id) + ".bin"); diff --git a/src/Storages/StorageSet.h b/src/Storages/StorageSet.h index cf85dfd5d5b..b7785aadc6a 100644 --- a/src/Storages/StorageSet.h +++ b/src/Storages/StorageSet.h @@ -21,7 +21,7 @@ class StorageSetOrJoinBase : public IStorage public: void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; Strings getDataPaths() const override { return {path}; } diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index b68505fa147..c320d0afb42 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -253,6 +253,7 @@ void StorageStripeLog::rename(const String & new_path_to_table_data, const Stora Pipes StorageStripeLog::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -299,8 +300,7 @@ Pipes StorageStripeLog::read( } -BlockOutputStreamPtr StorageStripeLog::write( - const ASTPtr & /*query*/, const Context & /*context*/) +BlockOutputStreamPtr StorageStripeLog::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) { return std::make_shared(*this); } diff --git a/src/Storages/StorageStripeLog.h b/src/Storages/StorageStripeLog.h index ed8e5da081e..d06758a60e8 100644 --- a/src/Storages/StorageStripeLog.h +++ b/src/Storages/StorageStripeLog.h @@ -27,13 +27,14 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override; diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index 5bca6072da0..42b70f716f4 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -394,6 +394,7 @@ void StorageTinyLog::rename(const String & new_path_to_table_data, const Storage Pipes StorageTinyLog::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -413,8 +414,7 @@ Pipes StorageTinyLog::read( } -BlockOutputStreamPtr StorageTinyLog::write( - const ASTPtr & /*query*/, const Context & /*context*/) +BlockOutputStreamPtr StorageTinyLog::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) { return std::make_shared(*this); } diff --git a/src/Storages/StorageTinyLog.h b/src/Storages/StorageTinyLog.h index 102ec76fda3..a55bf6d0dcf 100644 --- a/src/Storages/StorageTinyLog.h +++ b/src/Storages/StorageTinyLog.h @@ -26,13 +26,14 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override; diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 0301412e029..0361718c616 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -155,7 +155,9 @@ std::function IStorageURLBase::getReadPOSTDataCallback(con } -Pipes IStorageURLBase::read(const Names & column_names, +Pipes IStorageURLBase::read( + const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -183,7 +185,7 @@ Pipes IStorageURLBase::read(const Names & column_names, return pipes; } -BlockOutputStreamPtr IStorageURLBase::write(const ASTPtr & /*query*/, const Context & /*context*/) +BlockOutputStreamPtr IStorageURLBase::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) { return std::make_shared( uri, format_name, getSampleBlock(), context_global, diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 5a6584f0301..ecd57024a44 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -21,13 +21,14 @@ class IStorageURLBase : public IStorage public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, size_t max_block_size, unsigned num_streams) override; - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; protected: IStorageURLBase( diff --git a/src/Storages/StorageValues.cpp b/src/Storages/StorageValues.cpp index 5ba36a936e2..bb29b4a0932 100644 --- a/src/Storages/StorageValues.cpp +++ b/src/Storages/StorageValues.cpp @@ -23,6 +23,7 @@ StorageValues::StorageValues( Pipes StorageValues::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/StorageValues.h b/src/Storages/StorageValues.h index 254f3bfa8aa..88fb023fb2b 100644 --- a/src/Storages/StorageValues.h +++ b/src/Storages/StorageValues.h @@ -17,6 +17,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index d8392b2edd8..52b7e8764d9 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -54,6 +54,7 @@ StorageView::StorageView( Pipes StorageView::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index c6b48d4d357..143ed3c06c4 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -23,6 +23,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index 08538798389..c7fa8a88251 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -82,7 +82,9 @@ std::function StorageXDBC::getReadPOSTDataCallback(const N return [query](std::ostream & os) { os << "query=" << query; }; } -Pipes StorageXDBC::read(const Names & column_names, +Pipes StorageXDBC::read( + const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -92,10 +94,10 @@ Pipes StorageXDBC::read(const Names & column_names, check(column_names); bridge_helper->startBridgeSync(); - return IStorageURLBase::read(column_names, query_info, context, processed_stage, max_block_size, num_streams); + return IStorageURLBase::read(column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams); } -BlockOutputStreamPtr StorageXDBC::write(const ASTPtr & /*query*/, const Context & context) +BlockOutputStreamPtr StorageXDBC::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) { bridge_helper->startBridgeSync(); diff --git a/src/Storages/StorageXDBC.h b/src/Storages/StorageXDBC.h index afc61dac5cd..4488122656d 100644 --- a/src/Storages/StorageXDBC.h +++ b/src/Storages/StorageXDBC.h @@ -15,13 +15,14 @@ namespace DB class StorageXDBC : public IStorageURLBase { public: - - Pipes read(const Names & column_names, - const SelectQueryInfo & query_info, - const Context & context, - QueryProcessingStage::Enum processed_stage, - size_t max_block_size, - unsigned num_streams) override; + Pipes read( + const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + unsigned num_streams) override; StorageXDBC(const StorageID & table_id_, const std::string & remote_database_name, @@ -29,7 +30,7 @@ public: const ColumnsDescription & columns_, const Context & context_, BridgeHelperPtr bridge_helper_); - BlockOutputStreamPtr write(const ASTPtr & query, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; private: diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index 1ceff26ba83..7644f62b96d 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -28,7 +28,9 @@ public: setInMemoryMetadata(metadata_); } - Pipes read(const Names & column_names, + Pipes read( + const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 6359e367106..646a5434b64 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -242,6 +242,7 @@ private: Pipes StorageSystemColumns::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/System/StorageSystemColumns.h b/src/Storages/System/StorageSystemColumns.h index 66b423efb96..7336b406183 100644 --- a/src/Storages/System/StorageSystemColumns.h +++ b/src/Storages/System/StorageSystemColumns.h @@ -19,6 +19,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index ef88c3ca058..3d24d90bbef 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -46,12 +46,13 @@ protected: } Pipes read( - const Names & /* column_names */, - const SelectQueryInfo & query_info, - const Context & context, - QueryProcessingStage::Enum /*processed_stage*/, - const size_t /*max_block_size*/, - const unsigned /*num_streams*/) override + const Names & /* column_names */, + const StorageMetadataPtr & /*metadata_snapshot*/, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum /*processed_stage*/, + const size_t /*max_block_size*/, + const unsigned /*num_streams*/) override { StoragesInfoStream stream(query_info, context); diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index 5905080539e..36fde616bd4 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -28,6 +28,7 @@ StorageSystemDisks::StorageSystemDisks(const std::string & name_) Pipes StorageSystemDisks::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/System/StorageSystemDisks.h b/src/Storages/System/StorageSystemDisks.h index b136a217508..714a0a5428c 100644 --- a/src/Storages/System/StorageSystemDisks.h +++ b/src/Storages/System/StorageSystemDisks.h @@ -22,6 +22,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index 0fa7b71555e..fd7e04cfb1f 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -125,6 +125,7 @@ StorageSystemNumbers::StorageSystemNumbers(const StorageID & table_id, bool mult Pipes StorageSystemNumbers::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo &, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/System/StorageSystemNumbers.h b/src/Storages/System/StorageSystemNumbers.h index 4d205728496..88d3651c7f9 100644 --- a/src/Storages/System/StorageSystemNumbers.h +++ b/src/Storages/System/StorageSystemNumbers.h @@ -31,6 +31,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/System/StorageSystemOne.cpp b/src/Storages/System/StorageSystemOne.cpp index e7c8c446847..af736c215b5 100644 --- a/src/Storages/System/StorageSystemOne.cpp +++ b/src/Storages/System/StorageSystemOne.cpp @@ -22,6 +22,7 @@ StorageSystemOne::StorageSystemOne(const std::string & name_) Pipes StorageSystemOne::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo &, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/System/StorageSystemOne.h b/src/Storages/System/StorageSystemOne.h index 9fb3a670949..3b3e531dc86 100644 --- a/src/Storages/System/StorageSystemOne.h +++ b/src/Storages/System/StorageSystemOne.h @@ -23,6 +23,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 42a432489f4..e599bbb19e3 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -224,12 +224,13 @@ StoragesInfo StoragesInfoStream::next() } Pipes StorageSystemPartsBase::read( - const Names & column_names, - const SelectQueryInfo & query_info, - const Context & context, - QueryProcessingStage::Enum /*processed_stage*/, - const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const Names & column_names, + const StorageMetadataPtr & /*metadata_*/, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum /*processed_stage*/, + const size_t /*max_block_size*/, + const unsigned /*num_streams*/) { bool has_state_column = hasStateColumn(column_names); diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index 7b9ce7cbae2..a46cecec9dd 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -56,12 +56,13 @@ class StorageSystemPartsBase : public IStorage { public: Pipes read( - const Names & column_names, - const SelectQueryInfo & query_info, - const Context & context, - QueryProcessingStage::Enum processed_stage, - size_t max_block_size, - unsigned num_streams) override; + const Names & column_names, + const StorageMetadataPtr & metadata_, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + unsigned num_streams) override; NamesAndTypesList getVirtuals() const override; diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index ca71e7e5f74..24861fcbd6a 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -59,6 +59,7 @@ StorageSystemReplicas::StorageSystemReplicas(const std::string & name_) Pipes StorageSystemReplicas::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/System/StorageSystemReplicas.h b/src/Storages/System/StorageSystemReplicas.h index 94b0d6c9d06..b068ebc8b0a 100644 --- a/src/Storages/System/StorageSystemReplicas.h +++ b/src/Storages/System/StorageSystemReplicas.h @@ -20,6 +20,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/System/StorageSystemStoragePolicies.cpp b/src/Storages/System/StorageSystemStoragePolicies.cpp index dbb47dc771a..a80747c1fa1 100644 --- a/src/Storages/System/StorageSystemStoragePolicies.cpp +++ b/src/Storages/System/StorageSystemStoragePolicies.cpp @@ -31,12 +31,13 @@ StorageSystemStoragePolicies::StorageSystemStoragePolicies(const std::string & n } Pipes StorageSystemStoragePolicies::read( - const Names & column_names, - const SelectQueryInfo & /*query_info*/, - const Context & context, - QueryProcessingStage::Enum /*processed_stage*/, - const size_t /*max_block_size*/, - const unsigned /*num_streams*/) + const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, + const SelectQueryInfo & /*query_info*/, + const Context & context, + QueryProcessingStage::Enum /*processed_stage*/, + const size_t /*max_block_size*/, + const unsigned /*num_streams*/) { check(column_names); diff --git a/src/Storages/System/StorageSystemStoragePolicies.h b/src/Storages/System/StorageSystemStoragePolicies.h index 79e89863bf0..a1427da8559 100644 --- a/src/Storages/System/StorageSystemStoragePolicies.h +++ b/src/Storages/System/StorageSystemStoragePolicies.h @@ -21,12 +21,13 @@ public: std::string getName() const override { return "SystemStoragePolicies"; } Pipes read( - const Names & column_names, - const SelectQueryInfo & query_info, - const Context & context, - QueryProcessingStage::Enum processed_stage, - size_t max_block_size, - unsigned num_streams) override; + const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + unsigned num_streams) override; protected: StorageSystemStoragePolicies(const std::string & name_); diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 84d441a8c6e..f04b3ea20c9 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -448,6 +448,7 @@ private: Pipes StorageSystemTables::read( const Names & column_names, + const StorageMetadataPtr & /*metadata_*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, diff --git a/src/Storages/System/StorageSystemTables.h b/src/Storages/System/StorageSystemTables.h index bab3aef6e15..54551205684 100644 --- a/src/Storages/System/StorageSystemTables.h +++ b/src/Storages/System/StorageSystemTables.h @@ -20,6 +20,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & /*metadata_*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, diff --git a/src/Storages/System/StorageSystemZeros.cpp b/src/Storages/System/StorageSystemZeros.cpp index 438d31e7e02..2bc53b5093e 100644 --- a/src/Storages/System/StorageSystemZeros.cpp +++ b/src/Storages/System/StorageSystemZeros.cpp @@ -91,12 +91,13 @@ StorageSystemZeros::StorageSystemZeros(const StorageID & table_id_, bool multith } Pipes StorageSystemZeros::read( - const Names & column_names, - const SelectQueryInfo &, - const Context & /*context*/, - QueryProcessingStage::Enum /*processed_stage*/, - size_t max_block_size, - unsigned num_streams) + const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, + const SelectQueryInfo &, + const Context & /*context*/, + QueryProcessingStage::Enum /*processed_stage*/, + size_t max_block_size, + unsigned num_streams) { check(column_names); diff --git a/src/Storages/System/StorageSystemZeros.h b/src/Storages/System/StorageSystemZeros.h index 3768885d03d..f169861122a 100644 --- a/src/Storages/System/StorageSystemZeros.h +++ b/src/Storages/System/StorageSystemZeros.h @@ -21,12 +21,13 @@ public: std::string getName() const override { return "SystemZeros"; } Pipes read( - const Names & column_names, - const SelectQueryInfo & query_info, - const Context & context, - QueryProcessingStage::Enum processed_stage, - size_t max_block_size, - unsigned num_streams) override; + const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum processed_stage, + size_t max_block_size, + unsigned num_streams) override; bool hasEvenlyDistributedRead() const override { return true; } diff --git a/src/Storages/tests/gtest_storage_log.cpp b/src/Storages/tests/gtest_storage_log.cpp index fff352210e7..618d524987b 100644 --- a/src/Storages/tests/gtest_storage_log.cpp +++ b/src/Storages/tests/gtest_storage_log.cpp @@ -71,6 +71,7 @@ TYPED_TEST_SUITE(StorageLogTest, DiskImplementations); std::string writeData(int rows, DB::StoragePtr & table, const DB::Context & context) { using namespace DB; + auto metadata_snapshot = table->getInMemoryMetadataPtr(); std::string data; @@ -97,7 +98,7 @@ std::string writeData(int rows, DB::StoragePtr & table, const DB::Context & cont block.insert(column); } - BlockOutputStreamPtr out = table->write({}, context); + BlockOutputStreamPtr out = table->write({}, metadata_snapshot, context); out->write(block); return data; @@ -107,13 +108,14 @@ std::string writeData(int rows, DB::StoragePtr & table, const DB::Context & cont std::string readData(DB::StoragePtr & table, const DB::Context & context) { using namespace DB; + auto metadata_snapshot = table->getInMemoryMetadataPtr(); Names column_names; column_names.push_back("a"); QueryProcessingStage::Enum stage = table->getQueryProcessingStage(context); - BlockInputStreamPtr in = std::make_shared(std::move(table->read(column_names, {}, context, stage, 8192, 1)[0])); + BlockInputStreamPtr in = std::make_shared(std::move(table->read(column_names, metadata_snapshot, {}, context, stage, 8192, 1)[0])); Block sample; { From 977fd3e44fa08fe8427e04e75386894c785fba1f Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 16 Jun 2020 02:45:05 +0300 Subject: [PATCH 093/318] Update CMakeLists.txt --- base/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/CMakeLists.txt b/base/CMakeLists.txt index a8dedec9269..cfa54fe2ca4 100644 --- a/base/CMakeLists.txt +++ b/base/CMakeLists.txt @@ -10,4 +10,4 @@ add_subdirectory (widechar_width) if (USE_MYSQL) add_subdirectory (mysqlxx) -endif () \ No newline at end of file +endif () From 92c7760c6e87f64dca55b78c12176c4f35b5de6c Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 16 Jun 2020 02:51:33 +0300 Subject: [PATCH 094/318] Update CMakeLists.txt --- base/daemon/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/daemon/CMakeLists.txt b/base/daemon/CMakeLists.txt index 36de193bccd..04d2f059b39 100644 --- a/base/daemon/CMakeLists.txt +++ b/base/daemon/CMakeLists.txt @@ -10,4 +10,4 @@ target_link_libraries (daemon PUBLIC loggers PRIVATE clickhouse_common_io clickh if (USE_SENTRY) target_link_libraries (daemon PRIVATE curl) target_link_libraries (daemon PRIVATE ${SENTRY_LIBRARY}) -endif () \ No newline at end of file +endif () From 9b734ffded4cbf4e929bcfa2bb545c6d3e67938a Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Tue, 16 Jun 2020 03:21:20 +0300 Subject: [PATCH 095/318] Update http_server.py --- tests/integration/test_send_crash_reports/http_server.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_send_crash_reports/http_server.py b/tests/integration/test_send_crash_reports/http_server.py index e3fa2e1cb57..74f0592504f 100644 --- a/tests/integration/test_send_crash_reports/http_server.py +++ b/tests/integration/test_send_crash_reports/http_server.py @@ -40,4 +40,4 @@ if __name__ == "__main__": try: httpd.serve_forever() finally: - httpd.server_close() \ No newline at end of file + httpd.server_close() From c43bd228ab6609d5971ff4002240697d817f8a31 Mon Sep 17 00:00:00 2001 From: Bharat Nallan Date: Sat, 13 Jun 2020 23:43:01 -0700 Subject: [PATCH 096/318] make max global thread pool setting configurable This PR adds a server level config for overriding the default max number of threads in global thread pool that is currently allowed (10,000). This might be useful in scenarios where there are a large number of distributed queries that are executing concurrently and where the default number of max threads might not be necessarily be sufficient. --- programs/server/config.xml | 10 ++++++++++ src/Common/ThreadPool.cpp | 14 +++++++++++++- src/Common/ThreadPool.h | 4 +++- 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/programs/server/config.xml b/programs/server/config.xml index 0ceba85593a..f4c0f5a22fc 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -136,6 +136,16 @@ --> 0 + + + + 10000 + 0.9 diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index 3e6e31ed3fc..edfb52e01ac 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -3,6 +3,10 @@ #include +#include +#include + + namespace DB { @@ -264,6 +268,14 @@ template class ThreadPoolImpl; GlobalThreadPool & GlobalThreadPool::instance() { - static GlobalThreadPool ret; + const Poco::Util::LayeredConfiguration & config = Poco::Util::Application::instance().config(); + + UInt64 max_threads = config.getUInt64("max_thread_pool_size", 10000); + size_t max_free_threads = 1000; + size_t max_queue_size = 10000; + const bool shutdown_on_exception = false; + + static GlobalThreadPool ret(max_threads, max_free_threads, max_queue_size, shutdown_on_exception); + return ret; } diff --git a/src/Common/ThreadPool.h b/src/Common/ThreadPool.h index 9d5582db50c..3d1169d618d 100644 --- a/src/Common/ThreadPool.h +++ b/src/Common/ThreadPool.h @@ -129,7 +129,9 @@ using FreeThreadPool = ThreadPoolImpl; class GlobalThreadPool : public FreeThreadPool, private boost::noncopyable { public: - GlobalThreadPool() : FreeThreadPool(10000, 1000, 10000, false) {} + GlobalThreadPool(size_t max_threads_, size_t max_free_threads_, size_t queue_size_, + const bool shutdown_on_exception_) : + FreeThreadPool(max_threads_, max_free_threads_, queue_size_, shutdown_on_exception_) {} static GlobalThreadPool & instance(); }; From aa2d724ea13804ddd5b3cfc223ba005757b992d0 Mon Sep 17 00:00:00 2001 From: Bharat Nallan Date: Sun, 14 Jun 2020 13:35:09 -0700 Subject: [PATCH 097/318] add max_thread_pool_size setting to tests This adds the `max_thread_pool_size` config to tests/server-test.xml file. --- tests/server-test.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/server-test.xml b/tests/server-test.xml index 7f792479065..721d62ef301 100644 --- a/tests/server-test.xml +++ b/tests/server-test.xml @@ -17,6 +17,7 @@ 58443 59440 59009 + 10000 From 09e3975b9778d5849a1cd9b8cd4f156b10311cb9 Mon Sep 17 00:00:00 2001 From: Bharat Nallan Date: Sun, 14 Jun 2020 13:44:39 -0700 Subject: [PATCH 098/318] docs for max_thread_pool_size This adds the docs for the new server level setting `max_thread_pool_size`. --- .../server-configuration-parameters/settings.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index b90b432da6c..b43d6bf847a 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -426,6 +426,18 @@ The value 0 means that you can delete all tables without any restrictions. 0 ``` +## max\_thread\_pool\_size {#max-thread-pool-size} + +The maximum number of threads in the Global Thread pool. + +Default value: 10000. + +**Example** + +``` xml +12000 +``` + ## merge\_tree {#server_configuration_parameters-merge_tree} Fine tuning for tables in the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). From 334c5abe9b37a314f3a7ad4d7a783ad08bfa724c Mon Sep 17 00:00:00 2001 From: Bharat Nallan Date: Sun, 14 Jun 2020 17:09:59 -0700 Subject: [PATCH 099/318] remove extra vertical space --- src/Common/ThreadPool.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp index edfb52e01ac..3a669056f21 100644 --- a/src/Common/ThreadPool.cpp +++ b/src/Common/ThreadPool.cpp @@ -6,8 +6,6 @@ #include #include - - namespace DB { namespace ErrorCodes From cd769e5ebf2e2cdd1e4340e292412fafd6a6add9 Mon Sep 17 00:00:00 2001 From: Alexander Kuzmenkov Date: Tue, 16 Jun 2020 11:21:15 +0300 Subject: [PATCH 100/318] fixup --- docker/test/performance-comparison/compare.sh | 2 +- docker/test/performance-comparison/report.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh index 1dbf712ff50..c9beff6d7db 100755 --- a/docker/test/performance-comparison/compare.sh +++ b/docker/test/performance-comparison/compare.sh @@ -235,7 +235,7 @@ function build_log_column_definitions { # FIXME This loop builds column definitons from TSVWithNamesAndTypes in an # absolutely atrocious way. This should be done by the file() function itself. -for x in {right,left}-{addresses,{query,query-thread,trace,metric}-log}.tsv +for x in {right,left}-{addresses,{query,query-thread,trace,{async-,}metric}-log}.tsv do paste -d' ' \ <(sed -n '1{s/\t/\n/g;p;q}' "$x" | sed 's/\(^.*$\)/"\1"/') \ diff --git a/docker/test/performance-comparison/report.py b/docker/test/performance-comparison/report.py index d7e30190aef..d830b6e65fc 100755 --- a/docker/test/performance-comparison/report.py +++ b/docker/test/performance-comparison/report.py @@ -325,8 +325,8 @@ if args.report == 'main': def print_benchmark_results(): left_json = json.load(open('benchmark/website-left.json')); right_json = json.load(open('benchmark/website-right.json')); - left_qps = left_json["statistics"]["QPS"] - right_qps = right_json["statistics"]["QPS"] + left_qps = next(iter(left_json.values()))["statistics"]["QPS"] + right_qps = next(iter(right_json.values()))["statistics"]["QPS"] relative_diff = (right_qps - left_qps) / left_qps; times_diff = max(right_qps, left_qps) / max(0.01, min(right_qps, left_qps)) print(tableStart('Concurrent benchmarks')) From 0f286ac133fa360b997d3edbc2891b016c6134c5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 16 Jun 2020 15:03:27 +0300 Subject: [PATCH 101/318] Copy some methods to metdata --- src/Storages/StorageInMemoryMetadata.cpp | 121 +++++++++++++++++++++++ src/Storages/StorageInMemoryMetadata.h | 31 +++++- 2 files changed, 151 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index b6dd2f38c4e..bf747fb9b5a 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -89,4 +89,125 @@ void StorageInMemoryMetadata::setSelectQuery(const SelectQueryDescription & sele select = select_; } +const ColumnsDescription & StorageInMemoryMetadata::getColumns() const +{ + return columns; +} + +const IndicesDescription & StorageInMemoryMetadata::getSecondaryIndices() const +{ + return secondary_indices; +} + +bool StorageInMemoryMetadata::hasSecondaryIndices() const +{ + return !secondary_indices.empty(); +} + +const ConstraintsDescription & StorageInMemoryMetadata::getConstraints() const +{ + return constraints; +} + +TTLTableDescription StorageInMemoryMetadata::getTableTTLs() const +{ + return table_ttl; +} + +bool StorageInMemoryMetadata::hasAnyTableTTL() const +{ + return hasAnyMoveTTL() || hasRowsTTL(); +} + +TTLColumnsDescription StorageInMemoryMetadata::getColumnTTLs() const +{ + return column_ttls_by_name; +} + +bool StorageInMemoryMetadata::hasAnyColumnTTL() const +{ + return !column_ttls_by_name.empty(); +} + +TTLDescription StorageInMemoryMetadata::getRowsTTL() const +{ + return table_ttl.rows_ttl; +} + +bool StorageInMemoryMetadata::hasRowsTTL() const +{ + return table_ttl.rows_ttl.expression != nullptr; +} + +TTLDescriptions StorageInMemoryMetadata::getMoveTTLs() const +{ + return table_ttl.move_ttl; +} + +bool StorageInMemoryMetadata::hasAnyMoveTTL() const +{ + return !table_ttl.move_ttl.empty(); +} + +ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(const NameSet & updated_columns) const +{ + if (updated_columns.empty()) + return {}; + + ColumnDependencies res; + + NameSet indices_columns; + NameSet required_ttl_columns; + NameSet updated_ttl_columns; + + auto add_dependent_columns = [&updated_columns](const auto & expression, auto & to_set) + { + auto requiered_columns = expression->getRequiredColumns(); + for (const auto & dependency : requiered_columns) + { + if (updated_columns.count(dependency)) + { + to_set.insert(requiered_columns.begin(), requiered_columns.end()); + return true; + } + } + + return false; + }; + + for (const auto & index : getSecondaryIndices()) + add_dependent_columns(index.expression, indices_columns); + + if (hasRowsTTL()) + { + auto rows_expression = getRowsTTL().expression; + if (add_dependent_columns(rows_expression, required_ttl_columns)) + { + /// Filter all columns, if rows TTL expression have to be recalculated. + for (const auto & column : getColumns().getAllPhysical()) + updated_ttl_columns.insert(column.name); + } + } + + for (const auto & [name, entry] : getColumnTTLs()) + { + if (add_dependent_columns(entry.expression, required_ttl_columns)) + updated_ttl_columns.insert(name); + } + + for (const auto & entry : getMoveTTLs()) + add_dependent_columns(entry.expression, required_ttl_columns); + + for (const auto & column : indices_columns) + res.emplace(column, ColumnDependency::SKIP_INDEX); + for (const auto & column : required_ttl_columns) + res.emplace(column, ColumnDependency::TTL_EXPRESSION); + for (const auto & column : updated_ttl_columns) + res.emplace(column, ColumnDependency::TTL_TARGET); + + return res; + +} + + } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index b129cdc7756..fb7bcbaa349 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -1,12 +1,13 @@ #pragma once #include +#include #include #include #include #include -#include #include +#include #include @@ -77,6 +78,34 @@ struct StorageInMemoryMetadata void setSettingsChanges(const ASTPtr & settings_changes_); void setSelectQuery(const SelectQueryDescription & select_); + + const ColumnsDescription & getColumns() const; /// returns combined set of columns + const IndicesDescription & getSecondaryIndices() const; + /// Has at least one non primary index + bool hasSecondaryIndices() const; + + const ConstraintsDescription & getConstraints() const; + + /// Common tables TTLs (for rows and moves). + TTLTableDescription getTableTTLs() const; + bool hasAnyTableTTL() const; + + /// Separate TTLs for columns. + TTLColumnsDescription getColumnTTLs() const; + bool hasAnyColumnTTL() const; + + /// Just wrapper for table TTLs, return rows part of table TTLs. + TTLDescription getRowsTTL() const; + bool hasRowsTTL() const; + + /// Just wrapper for table TTLs, return moves (to disks or volumes) parts of + /// table TTL. + TTLDescriptions getMoveTTLs() const; + bool hasAnyMoveTTL() const; + + /// Returns columns, which will be needed to calculate dependencies (skip + /// indices, TTL expressions) if we update @updated_columns set of columns. + ColumnDependencies getColumnDependencies(const NameSet & updated_columns) const; }; using StorageMetadataPtr = std::shared_ptr; From 0bcd22008a2e42c79c2ff8724b065a854e335335 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 16 Jun 2020 15:19:21 +0300 Subject: [PATCH 102/318] Get column dependencies in StorageInMemoryMetadata --- src/Interpreters/MutationsInterpreter.cpp | 11 ++-- src/Interpreters/MutationsInterpreter.h | 1 + src/Storages/IStorage.cpp | 61 ------------------- src/Storages/IStorage.h | 4 -- .../MergeTree/MergeTreeDataMergerMutator.cpp | 17 ++++-- .../MergeTree/MergeTreeDataMergerMutator.h | 12 +++- src/Storages/StorageMergeTree.cpp | 7 ++- src/Storages/StorageReplicatedMergeTree.cpp | 10 ++- 8 files changed, 41 insertions(+), 82 deletions(-) diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 123de1b6e84..ce47ce6e476 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -137,13 +137,13 @@ ASTPtr prepareQueryAffectedAST(const std::vector & commands) return select; } -ColumnDependencies getAllColumnDependencies(const StoragePtr & storage, const NameSet & updated_columns) +ColumnDependencies getAllColumnDependencies(const StorageMetadataPtr & metadata_snapshot, const NameSet & updated_columns) { NameSet new_updated_columns = updated_columns; ColumnDependencies dependencies; while (!new_updated_columns.empty()) { - auto new_dependencies = storage->getColumnDependencies(new_updated_columns); + auto new_dependencies = metadata_snapshot->getColumnDependencies(new_updated_columns); new_updated_columns.clear(); for (const auto & dependency : new_dependencies) { @@ -204,6 +204,7 @@ MutationsInterpreter::MutationsInterpreter( const Context & context_, bool can_execute_) : storage(std::move(storage_)) + , metadata_snapshot(storage->getInMemoryMetadataPtr()) , commands(std::move(commands_)) , context(context_) , can_execute(can_execute_) @@ -329,7 +330,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) } /// Columns, that we need to read for calculation of skip indices or TTL expressions. - auto dependencies = getAllColumnDependencies(storage, updated_columns); + auto dependencies = getAllColumnDependencies(metadata_snapshot, updated_columns); /// First, break a sequence of commands into stages. for (const auto & command : commands) @@ -423,7 +424,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) } auto all_columns_vec = all_columns.getNames(); - auto all_dependencies = getAllColumnDependencies(storage, NameSet(all_columns_vec.begin(), all_columns_vec.end())); + auto all_dependencies = getAllColumnDependencies(metadata_snapshot, NameSet(all_columns_vec.begin(), all_columns_vec.end())); for (const auto & dependency : all_dependencies) { @@ -432,7 +433,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) } /// Recalc only skip indices of columns, that could be updated by TTL. - auto new_dependencies = storage->getColumnDependencies(new_updated_columns); + auto new_dependencies = metadata_snapshot->getColumnDependencies(new_updated_columns); for (const auto & dependency : new_dependencies) { if (dependency.kind == ColumnDependency::SKIP_INDEX) diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h index 35c4f8ece0a..158ed8d55af 100644 --- a/src/Interpreters/MutationsInterpreter.h +++ b/src/Interpreters/MutationsInterpreter.h @@ -47,6 +47,7 @@ private: std::optional getStorageSortDescriptionIfPossible(const Block & header) const; StoragePtr storage; + StorageMetadataPtr metadata_snapshot; MutationCommands commands; Context context; bool can_execute; diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index e5ab14e046e..6dae96a3322 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -548,67 +548,6 @@ bool IStorage::hasAnyMoveTTL() const return !metadata->table_ttl.move_ttl.empty(); } - -ColumnDependencies IStorage::getColumnDependencies(const NameSet & updated_columns) const -{ - if (updated_columns.empty()) - return {}; - - ColumnDependencies res; - - NameSet indices_columns; - NameSet required_ttl_columns; - NameSet updated_ttl_columns; - - auto add_dependent_columns = [&updated_columns](const auto & expression, auto & to_set) - { - auto requiered_columns = expression->getRequiredColumns(); - for (const auto & dependency : requiered_columns) - { - if (updated_columns.count(dependency)) - { - to_set.insert(requiered_columns.begin(), requiered_columns.end()); - return true; - } - } - - return false; - }; - - for (const auto & index : getSecondaryIndices()) - add_dependent_columns(index.expression, indices_columns); - - if (hasRowsTTL()) - { - auto rows_expression = getRowsTTL().expression; - if (add_dependent_columns(rows_expression, required_ttl_columns)) - { - /// Filter all columns, if rows TTL expression have to be recalculated. - for (const auto & column : getColumns().getAllPhysical()) - updated_ttl_columns.insert(column.name); - } - } - - for (const auto & [name, entry] : getColumnTTLs()) - { - if (add_dependent_columns(entry.expression, required_ttl_columns)) - updated_ttl_columns.insert(name); - } - - for (const auto & entry : getMoveTTLs()) - add_dependent_columns(entry.expression, required_ttl_columns); - - for (const auto & column : indices_columns) - res.emplace(column, ColumnDependency::SKIP_INDEX); - for (const auto & column : required_ttl_columns) - res.emplace(column, ColumnDependency::TTL_EXPRESSION); - for (const auto & column : updated_ttl_columns) - res.emplace(column, ColumnDependency::TTL_TARGET); - - return res; - -} - ASTPtr IStorage::getSettingsChanges() const { if (metadata->settings_changes) diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 28ad7b0ea8b..d3e65b6a845 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -492,10 +492,6 @@ public: /// Returns column names that need to be read for FINAL to work. Names getColumnsRequiredForFinal() const { return getColumnsRequiredForSortingKey(); } - /// Returns columns, which will be needed to calculate dependencies (skip - /// indices, TTL expressions) if we update @updated_columns set of columns. - ColumnDependencies getColumnDependencies(const NameSet & updated_columns) const; - /// Returns storage policy if storage supports it. virtual StoragePolicyPtr getStoragePolicy() const { return {}; } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index d861173d8a0..595370e7ecc 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -578,8 +578,14 @@ public: /// parts should be sorted. MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart( - const FutureMergedMutatedPart & future_part, MergeList::Entry & merge_entry, TableStructureReadLockHolder &, - time_t time_of_merge, const ReservationPtr & space_reservation, bool deduplicate, bool force_ttl) + const FutureMergedMutatedPart & future_part, + const StorageMetadataPtr & /*metadata_snapshot*/, + MergeList::Entry & merge_entry, + TableStructureReadLockHolder &, + time_t time_of_merge, + const ReservationPtr & space_reservation, + bool deduplicate, + bool force_ttl) { static const String TMP_PREFIX = "tmp_merge_"; @@ -975,6 +981,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTemporaryPart( const FutureMergedMutatedPart & future_part, + const StorageMetadataPtr & metadata_snapshot, const MutationCommands & commands, MergeListEntry & merge_entry, time_t time_of_mutation, @@ -1069,7 +1076,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor bool need_remove_expired_values = false; - if (in && shouldExecuteTTL(in->getHeader().getNamesAndTypesList().getNames(), commands_for_part)) + if (in && shouldExecuteTTL(metadata_snapshot, in->getHeader().getNamesAndTypesList().getNames(), commands_for_part)) need_remove_expired_values = true; /// All columns from part are changed and may be some more that were missing before in part @@ -1556,7 +1563,7 @@ std::set MergeTreeDataMergerMutator::getIndicesToRecalculate( return indices_to_recalc; } -bool MergeTreeDataMergerMutator::shouldExecuteTTL(const Names & columns, const MutationCommands & commands) const +bool MergeTreeDataMergerMutator::shouldExecuteTTL(const StorageMetadataPtr & metadata_snapshot, const Names & columns, const MutationCommands & commands) const { if (!data.hasAnyTTL()) return false; @@ -1565,7 +1572,7 @@ bool MergeTreeDataMergerMutator::shouldExecuteTTL(const Names & columns, const M if (command.type == MutationCommand::MATERIALIZE_TTL) return true; - auto dependencies = data.getColumnDependencies(NameSet(columns.begin(), columns.end())); + auto dependencies = metadata_snapshot->getColumnDependencies(NameSet(columns.begin(), columns.end())); for (const auto & dependency : dependencies) if (dependency.kind == ColumnDependency::TTL_EXPRESSION || dependency.kind == ColumnDependency::TTL_TARGET) return true; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 7c2ee53fc1d..185961972a8 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -105,12 +105,18 @@ public: */ MergeTreeData::MutableDataPartPtr mergePartsToTemporaryPart( const FutureMergedMutatedPart & future_part, - MergeListEntry & merge_entry, TableStructureReadLockHolder & table_lock_holder, time_t time_of_merge, - const ReservationPtr & space_reservation, bool deduplicate, bool force_ttl); + const StorageMetadataPtr & metadata_snapshot, + MergeListEntry & merge_entry, + TableStructureReadLockHolder & table_lock_holder, + time_t time_of_merge, + const ReservationPtr & space_reservation, + bool deduplicate, + bool force_ttl); /// Mutate a single data part with the specified commands. Will create and return a temporary part. MergeTreeData::MutableDataPartPtr mutatePartToTemporaryPart( const FutureMergedMutatedPart & future_part, + const StorageMetadataPtr & metadata_snapshot, const MutationCommands & commands, MergeListEntry & merge_entry, time_t time_of_mutation, @@ -164,7 +170,7 @@ private: const IndicesDescription & all_indices, const MutationCommands & commands_for_removes); - bool shouldExecuteTTL(const Names & columns, const MutationCommands & commands) const; + bool shouldExecuteTTL(const StorageMetadataPtr & metadata_snapshot, const Names & columns, const MutationCommands & commands) const; /// Return set of indices which should be recalculated during mutation also /// wraps input stream into additional expression stream diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index e3f48a05d6e..1b00487c816 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -591,6 +591,7 @@ bool StorageMergeTree::merge( { auto table_lock_holder = lockStructureForShare( true, RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); + auto metadata_snapshot = getInMemoryMetadataPtr(); FutureMergedMutatedPart future_part; @@ -693,7 +694,7 @@ bool StorageMergeTree::merge( bool force_ttl = (final && hasAnyTTL()); new_part = merger_mutator.mergePartsToTemporaryPart( - future_part, *merge_entry, table_lock_holder, time(nullptr), + future_part, metadata_snapshot, *merge_entry, table_lock_holder, time(nullptr), merging_tagger->reserved_space, deduplicate, force_ttl); merger_mutator.renameMergedTemporaryPart(new_part, future_part.parts, nullptr); @@ -739,6 +740,7 @@ bool StorageMergeTree::tryMutatePart() { auto table_lock_holder = lockStructureForShare( true, RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); + StorageMetadataPtr metadata_snapshot = getInMemoryMetadataPtr(); size_t max_ast_elements = global_context.getSettingsRef().max_expanded_ast_elements; FutureMergedMutatedPart future_part; @@ -832,7 +834,8 @@ bool StorageMergeTree::tryMutatePart() try { - new_part = merger_mutator.mutatePartToTemporaryPart(future_part, commands, *merge_entry, + new_part = merger_mutator.mutatePartToTemporaryPart( + future_part, metadata_snapshot, commands, *merge_entry, time(nullptr), global_context, tagger->reserved_space, table_lock_holder); renameTempPartAndReplace(new_part); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 7abf90d3eac..810a4fa5c97 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1304,6 +1304,7 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry) auto table_lock = lockStructureForShare( false, RWLockImpl::NO_QUERY, storage_settings_ptr->lock_acquire_timeout_for_background_operations); + StorageMetadataPtr metadata_snapshot = getInMemoryMetadataPtr(); FutureMergedMutatedPart future_merged_part(parts, entry.new_part_type); if (future_merged_part.name != entry.new_part_name) @@ -1331,7 +1332,9 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry) try { part = merger_mutator.mergePartsToTemporaryPart( - future_merged_part, *merge_entry, table_lock, entry.create_time, reserved_space, entry.deduplicate, entry.force_ttl); + future_merged_part, metadata_snapshot, *merge_entry, + table_lock, entry.create_time, reserved_space, entry.deduplicate, + entry.force_ttl); merger_mutator.renameMergedTemporaryPart(part, parts, &transaction); @@ -1428,6 +1431,7 @@ bool StorageReplicatedMergeTree::tryExecutePartMutation(const StorageReplicatedM auto table_lock = lockStructureForShare( false, RWLockImpl::NO_QUERY, storage_settings_ptr->lock_acquire_timeout_for_background_operations); + StorageMetadataPtr metadata_snapshot = getInMemoryMetadataPtr(); MutableDataPartPtr new_part; Transaction transaction(*this); @@ -1454,7 +1458,9 @@ bool StorageReplicatedMergeTree::tryExecutePartMutation(const StorageReplicatedM try { - new_part = merger_mutator.mutatePartToTemporaryPart(future_mutated_part, commands, *merge_entry, entry.create_time, global_context, reserved_space, table_lock); + new_part = merger_mutator.mutatePartToTemporaryPart( + future_mutated_part, metadata_snapshot, commands, *merge_entry, + entry.create_time, global_context, reserved_space, table_lock); renameTempPartAndReplace(new_part, nullptr, &transaction); try From 53cb5210debb5baa10d521d90bd6afb7988245e2 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 16 Jun 2020 15:48:10 +0300 Subject: [PATCH 103/318] Move getSampleBlockNonMaterialized to StorageInMemoryMetadata --- src/Interpreters/InterpreterInsertQuery.cpp | 9 ++++++--- src/Interpreters/InterpreterInsertQuery.h | 3 ++- src/Interpreters/SystemLog.h | 3 ++- src/Storages/IStorage.cpp | 9 --------- src/Storages/IStorage.h | 1 - src/Storages/Kafka/KafkaBlockInputStream.cpp | 13 ++++++++++--- src/Storages/Kafka/KafkaBlockInputStream.h | 8 +++++++- src/Storages/Kafka/KafkaBlockOutputStream.cpp | 10 ++++++++-- src/Storages/Kafka/KafkaBlockOutputStream.h | 6 +++++- src/Storages/Kafka/StorageKafka.cpp | 11 ++++++----- src/Storages/StorageBuffer.cpp | 4 +++- src/Storages/StorageDistributed.cpp | 4 ++-- src/Storages/StorageInMemoryMetadata.cpp | 8 ++++++++ src/Storages/StorageInMemoryMetadata.h | 2 ++ 14 files changed, 61 insertions(+), 30 deletions(-) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 1841c82b710..d281dc5ccca 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -73,9 +73,12 @@ StoragePtr InterpreterInsertQuery::getTable(ASTInsertQuery & query) return DatabaseCatalog::instance().getTable(query.table_id, context); } -Block InterpreterInsertQuery::getSampleBlock(const ASTInsertQuery & query, const StoragePtr & table) const +Block InterpreterInsertQuery::getSampleBlock( + const ASTInsertQuery & query, + const StoragePtr & table, + const StorageMetadataPtr & metadata_snapshot) const { - Block table_sample_non_materialized = table->getSampleBlockNonMaterialized(); + Block table_sample_non_materialized = metadata_snapshot->getSampleBlockNonMaterialized(); /// If the query does not include information about columns if (!query.columns) { @@ -119,7 +122,7 @@ BlockIO InterpreterInsertQuery::execute() true, context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout); auto metadata_snapshot = table->getInMemoryMetadataPtr(); - auto query_sample_block = getSampleBlock(query, table); + auto query_sample_block = getSampleBlock(query, table, metadata_snapshot); if (!query.table_function) context.checkAccess(AccessType::INSERT, query.table_id, query_sample_block.getNames()); diff --git a/src/Interpreters/InterpreterInsertQuery.h b/src/Interpreters/InterpreterInsertQuery.h index fef962d24a3..3386b471d26 100644 --- a/src/Interpreters/InterpreterInsertQuery.h +++ b/src/Interpreters/InterpreterInsertQuery.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB { @@ -34,7 +35,7 @@ public: private: StoragePtr getTable(ASTInsertQuery & query); - Block getSampleBlock(const ASTInsertQuery & query, const StoragePtr & table) const; + Block getSampleBlock(const ASTInsertQuery & query, const StoragePtr & table, const StorageMetadataPtr & metadata_snapshot) const; ASTPtr query_ptr; const Context & context; diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index e49ce574478..cf163226b93 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -438,8 +438,9 @@ void SystemLog::prepareTable() if (table) { + auto metadata_snapshot = table->getInMemoryMetadataPtr(); const Block expected = LogElement::createBlock(); - const Block actual = table->getSampleBlockNonMaterialized(); + const Block actual = metadata_snapshot->getSampleBlockNonMaterialized(); if (!blocksHaveEqualStructure(actual, expected)) { diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 6dae96a3322..e675d51b4b7 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -74,15 +74,6 @@ Block IStorage::getSampleBlockWithVirtuals() const return res; } -Block IStorage::getSampleBlockNonMaterialized() const -{ - Block res; - - for (const auto & column : getColumns().getOrdinary()) - res.insert({column.type->createColumn(), column.type, column.name}); - - return res; -} Block IStorage::getSampleBlockForColumns(const Names & column_names) const { diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index d3e65b6a845..42581ebb63b 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -160,7 +160,6 @@ public: /// thread-unsafe part. lockStructure must be acquired Block getSampleBlock() const; /// ordinary + materialized. Block getSampleBlockWithVirtuals() const; /// ordinary + materialized + virtuals. - Block getSampleBlockNonMaterialized() const; /// ordinary. Block getSampleBlockForColumns(const Names & column_names) const; /// ordinary + materialized + aliases + virtuals. /// Verify that all the requested names are in the table and are set correctly: diff --git a/src/Storages/Kafka/KafkaBlockInputStream.cpp b/src/Storages/Kafka/KafkaBlockInputStream.cpp index 3edfcc7b9d2..dd2bb68c11a 100644 --- a/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -13,14 +13,21 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } KafkaBlockInputStream::KafkaBlockInputStream( - StorageKafka & storage_, const std::shared_ptr & context_, const Names & columns, size_t max_block_size_, bool commit_in_suffix_) + StorageKafka & storage_, + const StorageMetadataPtr & metadata_snapshot_, + const std::shared_ptr & context_, + const Names & columns, + size_t max_block_size_, + bool commit_in_suffix_) : storage(storage_) + , metadata_snapshot(metadata_snapshot_) , context(context_) , column_names(columns) , max_block_size(max_block_size_) , commit_in_suffix(commit_in_suffix_) - , non_virtual_header(storage.getSampleBlockNonMaterialized()) - , virtual_header(storage.getSampleBlockForColumns({"_topic", "_key", "_offset", "_partition", "_timestamp","_timestamp_ms","_headers.name","_headers.value"})) + , non_virtual_header(metadata_snapshot->getSampleBlockNonMaterialized()) + , virtual_header(storage.getSampleBlockForColumns( + {"_topic", "_key", "_offset", "_partition", "_timestamp", "_timestamp_ms", "_headers.name", "_headers.value"})) { } diff --git a/src/Storages/Kafka/KafkaBlockInputStream.h b/src/Storages/Kafka/KafkaBlockInputStream.h index 387f5088721..4851050a56e 100644 --- a/src/Storages/Kafka/KafkaBlockInputStream.h +++ b/src/Storages/Kafka/KafkaBlockInputStream.h @@ -14,7 +14,12 @@ class KafkaBlockInputStream : public IBlockInputStream { public: KafkaBlockInputStream( - StorageKafka & storage_, const std::shared_ptr & context_, const Names & columns, size_t max_block_size_, bool commit_in_suffix = true); + StorageKafka & storage_, + const StorageMetadataPtr & metadata_snapshot_, + const std::shared_ptr & context_, + const Names & columns, + size_t max_block_size_, + bool commit_in_suffix = true); ~KafkaBlockInputStream() override; String getName() const override { return storage.getName(); } @@ -29,6 +34,7 @@ public: private: StorageKafka & storage; + StorageMetadataPtr metadata_snapshot; const std::shared_ptr context; Names column_names; UInt64 max_block_size; diff --git a/src/Storages/Kafka/KafkaBlockOutputStream.cpp b/src/Storages/Kafka/KafkaBlockOutputStream.cpp index 17ef5aa104c..60ac714bd52 100644 --- a/src/Storages/Kafka/KafkaBlockOutputStream.cpp +++ b/src/Storages/Kafka/KafkaBlockOutputStream.cpp @@ -11,13 +11,19 @@ namespace ErrorCodes extern const int CANNOT_CREATE_IO_BUFFER; } -KafkaBlockOutputStream::KafkaBlockOutputStream(StorageKafka & storage_, const std::shared_ptr & context_) : storage(storage_), context(context_) +KafkaBlockOutputStream::KafkaBlockOutputStream( + StorageKafka & storage_, + const StorageMetadataPtr & metadata_snapshot_, + const std::shared_ptr & context_) + : storage(storage_) + , metadata_snapshot(metadata_snapshot_) + , context(context_) { } Block KafkaBlockOutputStream::getHeader() const { - return storage.getSampleBlockNonMaterialized(); + return metadata_snapshot->getSampleBlockNonMaterialized(); } void KafkaBlockOutputStream::writePrefix() diff --git a/src/Storages/Kafka/KafkaBlockOutputStream.h b/src/Storages/Kafka/KafkaBlockOutputStream.h index 7a973724f1b..1121d2a119e 100644 --- a/src/Storages/Kafka/KafkaBlockOutputStream.h +++ b/src/Storages/Kafka/KafkaBlockOutputStream.h @@ -10,7 +10,10 @@ namespace DB class KafkaBlockOutputStream : public IBlockOutputStream { public: - explicit KafkaBlockOutputStream(StorageKafka & storage_, const std::shared_ptr & context_); + explicit KafkaBlockOutputStream( + StorageKafka & storage_, + const StorageMetadataPtr & metadata_snapshot_, + const std::shared_ptr & context_); Block getHeader() const override; @@ -22,6 +25,7 @@ public: private: StorageKafka & storage; + StorageMetadataPtr metadata_snapshot; const std::shared_ptr context; ProducerBufferPtr buffer; BlockOutputStreamPtr child; diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index 190397bc675..b46cf0579ec 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -201,7 +201,7 @@ String StorageKafka::getDefaultClientId(const StorageID & table_id_) Pipes StorageKafka::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & /* query_info */, const Context & context, QueryProcessingStage::Enum /* processed_stage */, @@ -224,7 +224,7 @@ Pipes StorageKafka::read( /// TODO: probably that leads to awful performance. /// FIXME: seems that doesn't help with extra reading and committing unprocessed messages. /// TODO: rewrite KafkaBlockInputStream to KafkaSource. Now it is used in other place. - pipes.emplace_back(std::make_shared(std::make_shared(*this, modified_context, column_names, 1))); + pipes.emplace_back(std::make_shared(std::make_shared(*this, metadata_snapshot, modified_context, column_names, 1))); } LOG_DEBUG(log, "Starting reading {} streams", pipes.size()); @@ -232,14 +232,14 @@ Pipes StorageKafka::read( } -BlockOutputStreamPtr StorageKafka::write(const ASTPtr &, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) +BlockOutputStreamPtr StorageKafka::write(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context & context) { auto modified_context = std::make_shared(context); modified_context->applySettingsChanges(settings_adjustments); if (topics.size() > 1) throw Exception("Can't write to Kafka table with multiple topics!", ErrorCodes::NOT_IMPLEMENTED); - return std::make_shared(*this, modified_context); + return std::make_shared(*this, metadata_snapshot, modified_context); } @@ -519,6 +519,7 @@ bool StorageKafka::streamToViews() auto table = DatabaseCatalog::instance().getTable(table_id, global_context); if (!table) throw Exception("Engine table " + table_id.getNameForLogs() + " doesn't exist.", ErrorCodes::LOGICAL_ERROR); + auto metadata_snapshot = getInMemoryMetadataPtr(); // Create an INSERT query for streaming data auto insert = std::make_shared(); @@ -538,7 +539,7 @@ bool StorageKafka::streamToViews() for (size_t i = 0; i < num_created_consumers; ++i) { auto stream - = std::make_shared(*this, kafka_context, block_io.out->getHeader().getNames(), block_size, false); + = std::make_shared(*this, metadata_snapshot, kafka_context, block_io.out->getHeader().getNames(), block_size, false); streams.emplace_back(stream); // Limit read batch to maximum block size to allow DDL diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 3e419921115..4754732159c 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -642,6 +642,7 @@ void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr tabl LOG_ERROR(log, "Destination table {} doesn't exist. Block of data is discarded.", destination_id.getNameForLogs()); return; } + auto destination_metadata_snapshot = table->getInMemoryMetadataPtr(); auto temporarily_disable_memory_tracker = getCurrentMemoryTrackerActionLock(); @@ -651,7 +652,8 @@ void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr tabl /** We will insert columns that are the intersection set of columns of the buffer table and the subordinate table. * This will support some of the cases (but not all) when the table structure does not match. */ - Block structure_of_destination_table = allow_materialized ? table->getSampleBlock() : table->getSampleBlockNonMaterialized(); + Block structure_of_destination_table + = allow_materialized ? table->getSampleBlock() : destination_metadata_snapshot->getSampleBlockNonMaterialized(); Block block_to_write; for (size_t i : ext::range(0, structure_of_destination_table.columns())) { diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 719811bbc6b..66066ec3c18 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -511,7 +511,7 @@ Pipes StorageDistributed::read( } -BlockOutputStreamPtr StorageDistributed::write(const ASTPtr &, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) +BlockOutputStreamPtr StorageDistributed::write(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context & context) { auto cluster = getCluster(); const auto & settings = context.getSettingsRef(); @@ -536,7 +536,7 @@ BlockOutputStreamPtr StorageDistributed::write(const ASTPtr &, const StorageMeta /// DistributedBlockOutputStream will not own cluster, but will own ConnectionPools of the cluster return std::make_shared( - context, *this, createInsertToRemoteTableQuery(remote_database, remote_table, getSampleBlockNonMaterialized()), cluster, + context, *this, createInsertToRemoteTableQuery(remote_database, remote_table, metadata_snapshot->getSampleBlockNonMaterialized()), cluster, insert_sync, timeout); } diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index bf747fb9b5a..2c5b6279e10 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -209,5 +209,13 @@ ColumnDependencies StorageInMemoryMetadata::getColumnDependencies(const NameSet } +Block StorageInMemoryMetadata::getSampleBlockNonMaterialized() const +{ + Block res; + for (const auto & column : getColumns().getOrdinary()) + res.insert({column.type->createColumn(), column.type, column.name}); + + return res; +} } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index fb7bcbaa349..d6c00bb35c8 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -106,6 +106,8 @@ struct StorageInMemoryMetadata /// Returns columns, which will be needed to calculate dependencies (skip /// indices, TTL expressions) if we update @updated_columns set of columns. ColumnDependencies getColumnDependencies(const NameSet & updated_columns) const; + + Block getSampleBlockNonMaterialized() const; /// ordinary. }; using StorageMetadataPtr = std::shared_ptr; From 0e77692a278b818d03bfd0987c7115802f89a648 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 16 Jun 2020 15:56:28 +0300 Subject: [PATCH 104/318] improvements after review comments --- base/daemon/SentryWriter.cpp | 32 ++++++++++--------- base/daemon/SentryWriter.h | 10 +++++- .../settings.md | 2 +- programs/server/config.xml | 3 +- 4 files changed, 29 insertions(+), 18 deletions(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 88639d8bf94..0524285ea42 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -14,6 +14,7 @@ #if USE_SENTRY # include // Y_IGNORE # include +# include #endif @@ -31,7 +32,7 @@ void setExtras() { sentry_set_extra("server_name", sentry_value_new_string(getFQDNOrHostName().c_str())); } - sentry_set_tag("version", VERSION_STRING_SHORT); + sentry_set_tag("version", VERSION_STRING); sentry_set_extra("version_githash", sentry_value_new_string(VERSION_GITHASH)); sentry_set_extra("version_describe", sentry_value_new_string(VERSION_DESCRIBE)); sentry_set_extra("version_integer", sentry_value_new_int32(VERSION_INTEGER)); @@ -93,14 +94,15 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) } if (enabled) { + const std::filesystem::path & default_tmp_path = std::filesystem::path(config.getString("tmp_path", Poco::Path::temp())) / "sentry"; const std::string & endpoint = config.getString("send_crash_reports.endpoint", "https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277"); const std::string & temp_folder_path - = config.getString("send_crash_reports.tmp_path", config.getString("tmp_path", Poco::Path::temp()) + "sentry/"); + = config.getString("send_crash_reports.tmp_path", default_tmp_path); Poco::File(temp_folder_path).createDirectories(); - sentry_options_t * options = sentry_options_new(); - sentry_options_set_release(options, VERSION_STRING); + sentry_options_t * options = sentry_options_new(); /// will be freed by sentry_init or sentry_shutdown + sentry_options_set_release(options, VERSION_STRING_SHORT); sentry_options_set_logger(options, &sentry_logger); if (debug) { @@ -128,17 +130,16 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { initialized = true; anonymize = config.getBool("send_crash_reports.anonymize", false); - const std::string& anonymize_status = anonymize ? " (anonymized)" : ""; LOG_INFO( logger, "Sending crash reports is initialized with {} endpoint and {} temp folder{}", endpoint, temp_folder_path, - anonymize_status); + anonymize ? " (anonymized)" : ""); } else { - LOG_WARNING(logger, "Sending crash reports failed to initialized with {} status", init_status); + LOG_WARNING(logger, "Sending crash reports failed to initialize with {} status", init_status); } } else @@ -177,21 +178,20 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c size_t stack_size = stack_trace.getSize(); if (stack_size > 0) { - size_t offset = stack_trace.getOffset(); - if (stack_size == 1) - { - offset = 1; - } + ssize_t offset = stack_trace.getOffset(); char instruction_addr[100]; StackTrace::Frames frames; StackTrace::symbolize(stack_trace.getFramePointers().data(), offset, stack_size, frames); - for (size_t i = stack_size - 1; i >= offset; --i) + for (ssize_t i = stack_size - 1; i >= offset; --i) { const StackTrace::Frame & current_frame = frames[i]; sentry_value_t sentry_frame = sentry_value_new_object(); UInt64 frame_ptr = reinterpret_cast(current_frame.virtual_addr); - std::snprintf(instruction_addr, sizeof(instruction_addr), "0x%" PRIx64, frame_ptr); - sentry_value_set_by_key(sentry_frame, "instruction_addr", sentry_value_new_string(instruction_addr)); + + if (std::snprintf(instruction_addr, sizeof(instruction_addr), "0x%" PRIx64, frame_ptr) >= 0) + { + sentry_value_set_by_key(sentry_frame, "instruction_addr", sentry_value_new_string(instruction_addr)); + } if (current_frame.symbol.has_value()) { @@ -213,6 +213,7 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c } /// Prepare data for https://develop.sentry.dev/sdk/event-payloads/threads/ + /// Stacktrace is filled only for a single thread that failed sentry_value_t stacktrace = sentry_value_new_object(); sentry_value_set_by_key(stacktrace, "frames", sentry_frames); @@ -225,6 +226,7 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c sentry_value_t threads = sentry_value_new_object(); sentry_value_set_by_key(threads, "values", values); + sentry_value_set_by_key(event, "threads", threads); LOG_INFO(logger, "Sending crash report"); diff --git a/base/daemon/SentryWriter.h b/base/daemon/SentryWriter.h index 0b3f1ddd2b7..655a4e93bfd 100644 --- a/base/daemon/SentryWriter.h +++ b/base/daemon/SentryWriter.h @@ -7,7 +7,13 @@ #include -/// Sends crash reports to ClickHouse core developer team via https://sentry.io +/// \brief Sends crash reports to ClickHouse core developer team via https://sentry.io +/// +/// This feature can enabled with "send_crash_reports.enabled" server setting, +/// in this case reports are sent only for official ClickHouse builds. +/// +/// It is possible to send those reports to your own sentry account or account of consulting company you hired +/// by overriding "send_crash_reports.endpoint" setting. "send_crash_reports.debug" setting will allow to do that for class SentryWriter { public: @@ -15,6 +21,8 @@ public: static void initialize(Poco::Util::LayeredConfiguration & config); static void shutdown(); + + /// Not signal safe and can't be called from a signal handler static void onFault( int sig, const siginfo_t & info, diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md index e1ff3a872d1..58a02b8266a 100644 --- a/docs/en/operations/server-configuration-parameters/settings.md +++ b/docs/en/operations/server-configuration-parameters/settings.md @@ -353,7 +353,7 @@ Keys: Settings for opt-in sending crash reports to the ClickHouse core developers team via [Sentry](https://sentry.io). Enabling it, especially in pre-production environments, is greatly appreciated. -The server will need an access to public Internet for this feature to be functioning properly. +The server will need an access to public Internet via IPv4 (at the time of writing IPv6 is not supported by Sentry) for this feature to be functioning properly. Keys: diff --git a/programs/server/config.xml b/programs/server/config.xml index afb44989bbe..e5482a074a3 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -46,7 +46,8 @@ - + + false From 08b9aa6b2ed0b39e542e0077efea231374a1ba32 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 16 Jun 2020 15:58:05 +0300 Subject: [PATCH 105/318] getSampleBlockWithVirtuals in StorageInMemoryMetadata --- .../PushingToViewsBlockOutputStream.cpp | 21 +++++++++++++----- .../PushingToViewsBlockOutputStream.h | 9 ++++++-- src/Interpreters/InterpreterInsertQuery.cpp | 4 ++-- src/Storages/IStorage.cpp | 13 ----------- src/Storages/IStorage.h | 1 - src/Storages/StorageInMemoryMetadata.cpp | 22 +++++++++++++++++++ src/Storages/StorageInMemoryMetadata.h | 2 ++ 7 files changed, 48 insertions(+), 24 deletions(-) diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/src/DataStreams/PushingToViewsBlockOutputStream.cpp index fa213b054df..2e02c26d38c 100644 --- a/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -19,8 +19,14 @@ namespace DB PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( const StoragePtr & storage_, - const Context & context_, const ASTPtr & query_ptr_, bool no_destination) - : storage(storage_), context(context_), query_ptr(query_ptr_) + const StorageMetadataPtr & metadata_snapshot_, + const Context & context_, + const ASTPtr & query_ptr_, + bool no_destination) + : storage(storage_) + , metadata_snapshot(metadata_snapshot_) + , context(context_) + , query_ptr(query_ptr_) { /** TODO This is a very important line. At any insertion into the table one of streams should own lock. * Although now any insertion into the table is done via PushingToViewsBlockOutputStream, @@ -60,6 +66,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( for (const auto & database_table : dependencies) { auto dependent_table = DatabaseCatalog::instance().getTable(database_table, context); + auto dependent_metadata_snapshot = dependent_table->getInMemoryMetadataPtr(); ASTPtr query; BlockOutputStreamPtr out; @@ -97,9 +104,11 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( out = io.out; } else if (dynamic_cast(dependent_table.get())) - out = std::make_shared(dependent_table, *insert_context, ASTPtr(), true); + out = std::make_shared( + dependent_table, dependent_metadata_snapshot, *insert_context, ASTPtr(), true); else - out = std::make_shared(dependent_table, *insert_context, ASTPtr()); + out = std::make_shared( + dependent_table, dependent_metadata_snapshot, *insert_context, ASTPtr()); views.emplace_back(ViewInfo{std::move(query), database_table, std::move(out), nullptr}); } @@ -118,9 +127,9 @@ Block PushingToViewsBlockOutputStream::getHeader() const /// If we don't write directly to the destination /// then expect that we're inserting with precalculated virtual columns if (output) - return storage->getSampleBlock(); + return metadata_snapshot->getSampleBlock(); else - return storage->getSampleBlockWithVirtuals(); + return metadata_snapshot->getSampleBlockWithVirtuals(storage->getVirtuals()); } diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.h b/src/DataStreams/PushingToViewsBlockOutputStream.h index c5fef413a23..ca09126a561 100644 --- a/src/DataStreams/PushingToViewsBlockOutputStream.h +++ b/src/DataStreams/PushingToViewsBlockOutputStream.h @@ -17,8 +17,12 @@ class ReplicatedMergeTreeBlockOutputStream; class PushingToViewsBlockOutputStream : public IBlockOutputStream { public: - PushingToViewsBlockOutputStream(const StoragePtr & storage_, - const Context & context_, const ASTPtr & query_ptr_, bool no_destination = false); + PushingToViewsBlockOutputStream( + const StoragePtr & storage_, + const StorageMetadataPtr & metadata_snapshot_, + const Context & context_, + const ASTPtr & query_ptr_, + bool no_destination = false); Block getHeader() const override; void write(const Block & block) override; @@ -29,6 +33,7 @@ public: private: StoragePtr storage; + StorageMetadataPtr metadata_snapshot; BlockOutputStreamPtr output; ReplicatedMergeTreeBlockOutputStream * replicated_output = nullptr; diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index d281dc5ccca..f61ef0e7381 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -83,7 +83,7 @@ Block InterpreterInsertQuery::getSampleBlock( if (!query.columns) { if (no_destination) - return table->getSampleBlockWithVirtuals(); + return metadata_snapshot->getSampleBlockWithVirtuals(table->getVirtuals()); else return table_sample_non_materialized; } @@ -232,7 +232,7 @@ BlockIO InterpreterInsertQuery::execute() if (table->noPushingToViews() && !no_destination) out = table->write(query_ptr, metadata_snapshot, context); else - out = std::make_shared(table, context, query_ptr, no_destination); + out = std::make_shared(table, metadata_snapshot, context, query_ptr, no_destination); /// Note that we wrap transforms one on top of another, so we write them in reverse of data processing order. diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index e675d51b4b7..fd012c3cd75 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -62,19 +62,6 @@ Block IStorage::getSampleBlock() const return res; } -Block IStorage::getSampleBlockWithVirtuals() const -{ - auto res = getSampleBlock(); - - /// Virtual columns must be appended after ordinary, because user can - /// override them. - for (const auto & column : getVirtuals()) - res.insert({column.type->createColumn(), column.type, column.name}); - - return res; -} - - Block IStorage::getSampleBlockForColumns(const Names & column_names) const { Block res; diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 42581ebb63b..e7a7786c2d6 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -159,7 +159,6 @@ public: /// thread-unsafe part. lockStructure must be acquired void setInMemoryMetadata(const StorageInMemoryMetadata & metadata_) { metadata = std::make_shared(metadata_); } Block getSampleBlock() const; /// ordinary + materialized. - Block getSampleBlockWithVirtuals() const; /// ordinary + materialized + virtuals. Block getSampleBlockForColumns(const Names & column_names) const; /// ordinary + materialized + aliases + virtuals. /// Verify that all the requested names are in the table and are set correctly: diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 2c5b6279e10..f3719562af7 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -218,4 +218,26 @@ Block StorageInMemoryMetadata::getSampleBlockNonMaterialized() const return res; } + +Block StorageInMemoryMetadata::getSampleBlockWithVirtuals(const NamesAndTypesList & virtuals) const +{ + auto res = getSampleBlock(); + + /// Virtual columns must be appended after ordinary, because user can + /// override them. + for (const auto & column : virtuals) + res.insert({column.type->createColumn(), column.type, column.name}); + + return res; +} + +Block StorageInMemoryMetadata::getSampleBlock() const +{ + Block res; + + for (const auto & column : getColumns().getAllPhysical()) + res.insert({column.type->createColumn(), column.type, column.name}); + + return res; +} } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index d6c00bb35c8..2da766caacd 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -107,7 +107,9 @@ struct StorageInMemoryMetadata /// indices, TTL expressions) if we update @updated_columns set of columns. ColumnDependencies getColumnDependencies(const NameSet & updated_columns) const; + Block getSampleBlock() const; /// ordinary + materialized. Block getSampleBlockNonMaterialized() const; /// ordinary. + Block getSampleBlockWithVirtuals(const NamesAndTypesList & virtuals) const; /// ordinary + materialized + virtuals. }; using StorageMetadataPtr = std::shared_ptr; From 71f99a274dae57e78738159792e18ee3707a865c Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 16 Jun 2020 17:25:08 +0300 Subject: [PATCH 106/318] Compileable getSampleBlockWithColumns in StorageInMemoryMetadata --- src/Interpreters/InterpreterSelectQuery.cpp | 6 +- src/Storages/IStorage.cpp | 31 -------- src/Storages/IStorage.h | 1 - src/Storages/Kafka/KafkaBlockInputStream.cpp | 6 +- .../MergeTreeBaseSelectProcessor.cpp | 21 +++--- .../MergeTree/MergeTreeBaseSelectProcessor.h | 2 + .../MergeTree/MergeTreeDataMergerMutator.cpp | 6 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 74 ++++++++++++++----- .../MergeTree/MergeTreeDataSelectExecutor.h | 5 ++ src/Storages/MergeTree/MergeTreeReadPool.cpp | 31 +++++--- src/Storages/MergeTree/MergeTreeReadPool.h | 3 +- .../MergeTreeReverseSelectProcessor.cpp | 5 +- .../MergeTreeReverseSelectProcessor.h | 1 + .../MergeTree/MergeTreeSelectProcessor.cpp | 5 +- .../MergeTree/MergeTreeSelectProcessor.h | 1 + .../MergeTree/MergeTreeSequentialSource.cpp | 6 +- .../MergeTree/MergeTreeSequentialSource.h | 5 +- ...rgeTreeThreadSelectBlockInputProcessor.cpp | 5 +- ...MergeTreeThreadSelectBlockInputProcessor.h | 1 + .../MergeTree/StorageFromMergeTreeDataPart.h | 6 +- src/Storages/StorageBuffer.cpp | 12 +-- src/Storages/StorageInMemoryMetadata.cpp | 50 +++++++++++++ src/Storages/StorageInMemoryMetadata.h | 2 + src/Storages/StorageJoin.cpp | 4 +- src/Storages/StorageMemory.cpp | 34 ++++++--- src/Storages/StorageMerge.cpp | 14 ++-- src/Storages/StorageMerge.h | 8 +- src/Storages/StorageMergeTree.cpp | 5 +- src/Storages/StorageNull.h | 9 ++- src/Storages/StorageReplicatedMergeTree.cpp | 6 +- src/Storages/StorageS3.cpp | 12 +-- src/Storages/StorageS3.h | 5 -- src/Storages/StorageStripeLog.cpp | 27 +++++-- src/Storages/StorageURL.cpp | 9 ++- src/Storages/StorageURL.h | 6 +- src/Storages/StorageView.cpp | 7 +- src/Storages/StorageXDBC.cpp | 19 +++-- src/Storages/StorageXDBC.h | 35 +++++---- 38 files changed, 309 insertions(+), 176 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 40d7ed9ecc9..f73245179ce 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -377,14 +377,14 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (storage) { - source_header = storage->getSampleBlockForColumns(required_columns); + source_header = metadata_snapshot->getSampleBlockForColumns(required_columns, storage->getVirtuals()); /// Fix source_header for filter actions. if (row_policy_filter) { filter_info = std::make_shared(); filter_info->column_name = generateFilterActions(filter_info->actions, row_policy_filter, required_columns); - source_header = storage->getSampleBlockForColumns(filter_info->actions->getRequiredColumns()); + source_header = metadata_snapshot->getSampleBlockForColumns(filter_info->actions->getRequiredColumns(), storage->getVirtuals()); } } @@ -1336,7 +1336,7 @@ void InterpreterSelectQuery::executeFetchColumns( if (pipes.empty()) { - Pipe pipe(std::make_shared(storage->getSampleBlockForColumns(required_columns))); + Pipe pipe(std::make_shared(metadata_snapshot->getSampleBlockForColumns(required_columns, storage->getVirtuals()))); if (query_info.prewhere_info) { diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index fd012c3cd75..d090dc9e51d 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -62,37 +62,6 @@ Block IStorage::getSampleBlock() const return res; } -Block IStorage::getSampleBlockForColumns(const Names & column_names) const -{ - Block res; - - std::unordered_map columns_map; - - NamesAndTypesList all_columns = getColumns().getAll(); - for (const auto & elem : all_columns) - columns_map.emplace(elem.name, elem.type); - - /// Virtual columns must be appended after ordinary, because user can - /// override them. - for (const auto & column : getVirtuals()) - columns_map.emplace(column.name, column.type); - - for (const auto & name : column_names) - { - auto it = columns_map.find(name); - if (it != columns_map.end()) - { - res.insert({it->second->createColumn(), it->second, it->first}); - } - else - { - throw Exception( - "Column " + backQuote(name) + " not found in table " + getStorageID().getNameForLogs(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); - } - } - - return res; -} namespace { diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index e7a7786c2d6..a4173c1c9fa 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -159,7 +159,6 @@ public: /// thread-unsafe part. lockStructure must be acquired void setInMemoryMetadata(const StorageInMemoryMetadata & metadata_) { metadata = std::make_shared(metadata_); } Block getSampleBlock() const; /// ordinary + materialized. - Block getSampleBlockForColumns(const Names & column_names) const; /// ordinary + materialized + aliases + virtuals. /// Verify that all the requested names are in the table and are set correctly: /// list of names is not empty and the names do not repeat. diff --git a/src/Storages/Kafka/KafkaBlockInputStream.cpp b/src/Storages/Kafka/KafkaBlockInputStream.cpp index dd2bb68c11a..847b0d915cd 100644 --- a/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -26,8 +26,8 @@ KafkaBlockInputStream::KafkaBlockInputStream( , max_block_size(max_block_size_) , commit_in_suffix(commit_in_suffix_) , non_virtual_header(metadata_snapshot->getSampleBlockNonMaterialized()) - , virtual_header(storage.getSampleBlockForColumns( - {"_topic", "_key", "_offset", "_partition", "_timestamp", "_timestamp_ms", "_headers.name", "_headers.value"})) + , virtual_header(metadata_snapshot->getSampleBlockForColumns( + {"_topic", "_key", "_offset", "_partition", "_timestamp", "_timestamp_ms", "_headers.name", "_headers.value"}, storage.getVirtuals())) { } @@ -44,7 +44,7 @@ KafkaBlockInputStream::~KafkaBlockInputStream() Block KafkaBlockInputStream::getHeader() const { - return storage.getSampleBlockForColumns(column_names); + return metadata_snapshot->getSampleBlockForColumns(column_names, storage.getVirtuals()); } void KafkaBlockInputStream::readPrefixImpl() diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp index a2a3ca3a6cf..ec24c9ad652 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp @@ -20,6 +20,7 @@ namespace ErrorCodes MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor( Block header, const MergeTreeData & storage_, + const StorageMetadataPtr & metadata_snapshot_, const PrewhereInfoPtr & prewhere_info_, UInt64 max_block_size_rows_, UInt64 preferred_block_size_bytes_, @@ -27,16 +28,16 @@ MergeTreeBaseSelectProcessor::MergeTreeBaseSelectProcessor( const MergeTreeReaderSettings & reader_settings_, bool use_uncompressed_cache_, const Names & virt_column_names_) -: - SourceWithProgress(getHeader(std::move(header), prewhere_info_, virt_column_names_)), - storage(storage_), - prewhere_info(prewhere_info_), - max_block_size_rows(max_block_size_rows_), - preferred_block_size_bytes(preferred_block_size_bytes_), - preferred_max_column_in_block_size_bytes(preferred_max_column_in_block_size_bytes_), - reader_settings(reader_settings_), - use_uncompressed_cache(use_uncompressed_cache_), - virt_column_names(virt_column_names_) + : SourceWithProgress(getHeader(std::move(header), prewhere_info_, virt_column_names_)) + , storage(storage_) + , metadata_snapshot(metadata_snapshot_) + , prewhere_info(prewhere_info_) + , max_block_size_rows(max_block_size_rows_) + , preferred_block_size_bytes(preferred_block_size_bytes_) + , preferred_max_column_in_block_size_bytes(preferred_max_column_in_block_size_bytes_) + , reader_settings(reader_settings_) + , use_uncompressed_cache(use_uncompressed_cache_) + , virt_column_names(virt_column_names_) { header_without_virtual_columns = getPort().getHeader(); diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h index 8fe8296381a..00ef131ae45 100644 --- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.h @@ -22,6 +22,7 @@ public: MergeTreeBaseSelectProcessor( Block header, const MergeTreeData & storage_, + const StorageMetadataPtr & metadata_snapshot_, const PrewhereInfoPtr & prewhere_info_, UInt64 max_block_size_rows_, UInt64 preferred_block_size_bytes_, @@ -54,6 +55,7 @@ protected: protected: const MergeTreeData & storage; + StorageMetadataPtr metadata_snapshot; PrewhereInfoPtr prewhere_info; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 595370e7ecc..829f7cac528 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -579,7 +579,7 @@ public: /// parts should be sorted. MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTemporaryPart( const FutureMergedMutatedPart & future_part, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, MergeList::Entry & merge_entry, TableStructureReadLockHolder &, time_t time_of_merge, @@ -712,7 +712,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor for (const auto & part : parts) { auto input = std::make_unique( - data, part, merging_column_names, read_with_direct_io, true); + data, metadata_snapshot, part, merging_column_names, read_with_direct_io, true); input->setProgressCallback( MergeProgressCallback(merge_entry, watch_prev_elapsed, horizontal_stage_progress)); @@ -898,7 +898,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor for (size_t part_num = 0; part_num < parts.size(); ++part_num) { auto column_part_source = std::make_shared( - data, parts[part_num], column_names, read_with_direct_io, true); + data, metadata_snapshot, parts[part_num], column_names, read_with_direct_io, true); column_part_source->setProgressCallback( MergeProgressCallback(merge_entry, watch_prev_elapsed, column_progress)); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index c1fc8184206..ac2f4851185 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -147,6 +147,7 @@ static RelativeSize convertAbsoluteSampleSizeToRelative(const ASTPtr & node, siz Pipes MergeTreeDataSelectExecutor::read( const Names & column_names_to_return, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, const UInt64 max_block_size, @@ -154,13 +155,15 @@ Pipes MergeTreeDataSelectExecutor::read( const PartitionIdToMaxBlock * max_block_numbers_to_read) const { return readFromParts( - data.getDataPartsVector(), column_names_to_return, query_info, context, - max_block_size, num_streams, max_block_numbers_to_read); + data.getDataPartsVector(), column_names_to_return, metadata_snapshot, + query_info, context, max_block_size, num_streams, + max_block_numbers_to_read); } Pipes MergeTreeDataSelectExecutor::readFromParts( MergeTreeData::DataPartsVector parts, const Names & column_names_to_return, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, const UInt64 max_block_size, @@ -205,7 +208,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( } } - NamesAndTypesList available_real_columns = data.getColumns().getAllPhysical(); + NamesAndTypesList available_real_columns = metadata_snapshot->getColumns().getAllPhysical(); /// If there are only virtual columns in the query, you must request at least one non-virtual one. if (real_column_names.empty()) @@ -629,6 +632,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( std::move(parts_with_ranges), num_streams, column_names_to_read, + metadata_snapshot, max_block_size, settings.use_uncompressed_cache, query_info, @@ -650,6 +654,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( std::move(parts_with_ranges), num_streams, column_names_to_read, + metadata_snapshot, max_block_size, settings.use_uncompressed_cache, query_info, @@ -665,6 +670,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( std::move(parts_with_ranges), num_streams, column_names_to_read, + metadata_snapshot, max_block_size, settings.use_uncompressed_cache, query_info, @@ -727,6 +733,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( RangesInDataParts && parts, size_t num_streams, const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, UInt64 max_block_size, bool use_uncompressed_cache, const SelectQueryInfo & query_info, @@ -783,8 +790,18 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( num_streams = std::max((sum_marks + min_marks_for_concurrent_read - 1) / min_marks_for_concurrent_read, parts.size()); MergeTreeReadPoolPtr pool = std::make_shared( - num_streams, sum_marks, min_marks_for_concurrent_read, parts, data, query_info.prewhere_info, true, - column_names, MergeTreeReadPool::BackoffSettings(settings), settings.preferred_block_size_bytes, false); + num_streams, + sum_marks, + min_marks_for_concurrent_read, + parts, + data, + metadata_snapshot, + query_info.prewhere_info, + true, + column_names, + MergeTreeReadPool::BackoffSettings(settings), + settings.preferred_block_size_bytes, + false); /// Let's estimate total number of rows for progress bar. LOG_TRACE(log, "Reading approx. {} rows with {} streams", total_rows, num_streams); @@ -792,8 +809,9 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( for (size_t i = 0; i < num_streams; ++i) { auto source = std::make_shared( - i, pool, min_marks_for_concurrent_read, max_block_size, settings.preferred_block_size_bytes, - settings.preferred_max_column_in_block_size_bytes, data, use_uncompressed_cache, + i, pool, min_marks_for_concurrent_read, max_block_size, + settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes, + data, metadata_snapshot, use_uncompressed_cache, query_info.prewhere_info, reader_settings, virt_columns); if (i == 0) @@ -812,7 +830,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreams( for (const auto & part : parts) { auto source = std::make_shared( - data, part.data_part, max_block_size, settings.preferred_block_size_bytes, + data, metadata_snapshot, part.data_part, max_block_size, settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes, column_names, part.ranges, use_uncompressed_cache, query_info.prewhere_info, true, reader_settings, virt_columns, part.part_index_in_query); @@ -845,6 +863,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( RangesInDataParts && parts, size_t num_streams, const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, UInt64 max_block_size, bool use_uncompressed_cache, const SelectQueryInfo & query_info, @@ -1004,18 +1023,38 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( if (input_order_info->direction == 1) { pipes.emplace_back(std::make_shared( - data, part.data_part, max_block_size, settings.preferred_block_size_bytes, - settings.preferred_max_column_in_block_size_bytes, column_names, ranges_to_get_from_part, - use_uncompressed_cache, query_info.prewhere_info, true, reader_settings, - virt_columns, part.part_index_in_query)); + data, + metadata_snapshot, + part.data_part, + max_block_size, + settings.preferred_block_size_bytes, + settings.preferred_max_column_in_block_size_bytes, + column_names, + ranges_to_get_from_part, + use_uncompressed_cache, + query_info.prewhere_info, + true, + reader_settings, + virt_columns, + part.part_index_in_query)); } else { pipes.emplace_back(std::make_shared( - data, part.data_part, max_block_size, settings.preferred_block_size_bytes, - settings.preferred_max_column_in_block_size_bytes, column_names, ranges_to_get_from_part, - use_uncompressed_cache, query_info.prewhere_info, true, reader_settings, - virt_columns, part.part_index_in_query)); + data, + metadata_snapshot, + part.data_part, + max_block_size, + settings.preferred_block_size_bytes, + settings.preferred_max_column_in_block_size_bytes, + column_names, + ranges_to_get_from_part, + use_uncompressed_cache, + query_info.prewhere_info, + true, + reader_settings, + virt_columns, + part.part_index_in_query)); pipes.back().addSimpleTransform(std::make_shared(pipes.back().getHeader())); } @@ -1050,6 +1089,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( RangesInDataParts && parts, size_t num_streams, const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, UInt64 max_block_size, bool use_uncompressed_cache, const SelectQueryInfo & query_info, @@ -1088,7 +1128,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( for (const auto & part : parts) { auto source_processor = std::make_shared( - data, part.data_part, max_block_size, settings.preferred_block_size_bytes, + data, metadata_snapshot, part.data_part, max_block_size, settings.preferred_block_size_bytes, settings.preferred_max_column_in_block_size_bytes, column_names, part.ranges, use_uncompressed_cache, query_info.prewhere_info, true, reader_settings, virt_columns, part.part_index_in_query); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 85d69ead181..7811eb53b71 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -26,6 +26,7 @@ public: Pipes read( const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, UInt64 max_block_size, @@ -35,6 +36,7 @@ public: Pipes readFromParts( MergeTreeData::DataPartsVector parts, const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, UInt64 max_block_size, @@ -50,6 +52,7 @@ private: RangesInDataParts && parts, size_t num_streams, const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, UInt64 max_block_size, bool use_uncompressed_cache, const SelectQueryInfo & query_info, @@ -62,6 +65,7 @@ private: RangesInDataParts && parts, size_t num_streams, const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, UInt64 max_block_size, bool use_uncompressed_cache, const SelectQueryInfo & query_info, @@ -75,6 +79,7 @@ private: RangesInDataParts && parts, size_t num_streams, const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, UInt64 max_block_size, bool use_uncompressed_cache, const SelectQueryInfo & query_info, diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index 9ca1446ef64..eb0b51235ad 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -17,17 +17,28 @@ namespace ErrorCodes namespace DB { - - MergeTreeReadPool::MergeTreeReadPool( - const size_t threads_, const size_t sum_marks_, const size_t min_marks_for_concurrent_read_, - RangesInDataParts parts_, const MergeTreeData & data_, const PrewhereInfoPtr & prewhere_info_, - const bool check_columns_, const Names & column_names_, - const BackoffSettings & backoff_settings_, size_t preferred_block_size_bytes_, + const size_t threads_, + const size_t sum_marks_, + const size_t min_marks_for_concurrent_read_, + RangesInDataParts parts_, + const MergeTreeData & data_, + const StorageMetadataPtr & metadata_snapshot_, + const PrewhereInfoPtr & prewhere_info_, + const bool check_columns_, + const Names & column_names_, + const BackoffSettings & backoff_settings_, + size_t preferred_block_size_bytes_, const bool do_not_steal_tasks_) - : backoff_settings{backoff_settings_}, backoff_state{threads_}, data{data_}, - column_names{column_names_}, do_not_steal_tasks{do_not_steal_tasks_}, - predict_block_size_bytes{preferred_block_size_bytes_ > 0}, prewhere_info{prewhere_info_}, parts_ranges{parts_} + : backoff_settings{backoff_settings_} + , backoff_state{threads_} + , data{data_} + , metadata_snapshot{metadata_snapshot_} + , column_names{column_names_} + , do_not_steal_tasks{do_not_steal_tasks_} + , predict_block_size_bytes{preferred_block_size_bytes_ > 0} + , prewhere_info{prewhere_info_} + , parts_ranges{parts_} { /// parts don't contain duplicate MergeTreeDataPart's. const auto per_part_sum_marks = fillPerPartInfo(parts_, check_columns_); @@ -139,7 +150,7 @@ MarkRanges MergeTreeReadPool::getRestMarks(const IMergeTreeDataPart & part, cons Block MergeTreeReadPool::getHeader() const { - return data.getSampleBlockForColumns(column_names); + return metadata_snapshot->getSampleBlockForColumns(column_names, data.getVirtuals()); } void MergeTreeReadPool::profileFeedback(const ReadBufferFromFileBase::ProfileInfo info) diff --git a/src/Storages/MergeTree/MergeTreeReadPool.h b/src/Storages/MergeTree/MergeTreeReadPool.h index c43074f1962..c0b04c6a228 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.h +++ b/src/Storages/MergeTree/MergeTreeReadPool.h @@ -68,7 +68,7 @@ private: public: MergeTreeReadPool( const size_t threads_, const size_t sum_marks_, const size_t min_marks_for_concurrent_read_, - RangesInDataParts parts_, const MergeTreeData & data_, const PrewhereInfoPtr & prewhere_info_, + RangesInDataParts parts_, const MergeTreeData & data_, const StorageMetadataPtr & metadata_snapshot_, const PrewhereInfoPtr & prewhere_info_, const bool check_columns_, const Names & column_names_, const BackoffSettings & backoff_settings_, size_t preferred_block_size_bytes_, const bool do_not_steal_tasks_ = false); @@ -95,6 +95,7 @@ private: RangesInDataParts & parts, const size_t min_marks_for_concurrent_read); const MergeTreeData & data; + StorageMetadataPtr metadata_snapshot; Names column_names; bool do_not_steal_tasks; bool predict_block_size_bytes; diff --git a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp index 09bf784a293..81366614988 100644 --- a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp @@ -34,6 +34,7 @@ static Block replaceTypes(Block && header, const MergeTreeData::DataPartPtr & da MergeTreeReverseSelectProcessor::MergeTreeReverseSelectProcessor( const MergeTreeData & storage_, + const StorageMetadataPtr & metadata_snapshot_, const MergeTreeData::DataPartPtr & owned_data_part_, UInt64 max_block_size_rows_, size_t preferred_block_size_bytes_, @@ -49,8 +50,8 @@ MergeTreeReverseSelectProcessor::MergeTreeReverseSelectProcessor( bool quiet) : MergeTreeBaseSelectProcessor{ - replaceTypes(storage_.getSampleBlockForColumns(required_columns_), owned_data_part_), - storage_, prewhere_info_, max_block_size_rows_, + replaceTypes(metadata_snapshot_->getSampleBlockForColumns(required_columns_, storage_.getVirtuals()), owned_data_part_), + storage_, metadata_snapshot_, prewhere_info_, max_block_size_rows_, preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, reader_settings_, use_uncompressed_cache_, virt_column_names_}, required_columns{std::move(required_columns_)}, diff --git a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h index ea603bd468f..c9fd06c5534 100644 --- a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.h @@ -18,6 +18,7 @@ class MergeTreeReverseSelectProcessor : public MergeTreeBaseSelectProcessor public: MergeTreeReverseSelectProcessor( const MergeTreeData & storage, + const StorageMetadataPtr & metadata_snapshot, const MergeTreeData::DataPartPtr & owned_data_part, UInt64 max_block_size_rows, size_t preferred_block_size_bytes, diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index df471a8b8ec..e32fa70cb97 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -14,6 +14,7 @@ namespace ErrorCodes MergeTreeSelectProcessor::MergeTreeSelectProcessor( const MergeTreeData & storage_, + const StorageMetadataPtr & metadata_snapshot_, const MergeTreeData::DataPartPtr & owned_data_part_, UInt64 max_block_size_rows_, size_t preferred_block_size_bytes_, @@ -29,8 +30,8 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( bool quiet) : MergeTreeBaseSelectProcessor{ - storage_.getSampleBlockForColumns(required_columns_), - storage_, prewhere_info_, max_block_size_rows_, + metadata_snapshot_->getSampleBlockForColumns(required_columns_, storage_.getVirtuals()), + storage_, metadata_snapshot_, prewhere_info_, max_block_size_rows_, preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, reader_settings_, use_uncompressed_cache_, virt_column_names_}, required_columns{std::move(required_columns_)}, diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.h b/src/Storages/MergeTree/MergeTreeSelectProcessor.h index d2438e20192..dff4ebc2627 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.h +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.h @@ -18,6 +18,7 @@ class MergeTreeSelectProcessor : public MergeTreeBaseSelectProcessor public: MergeTreeSelectProcessor( const MergeTreeData & storage, + const StorageMetadataPtr & metadata_snapshot, const MergeTreeData::DataPartPtr & owned_data_part, UInt64 max_block_size_rows, size_t preferred_block_size_bytes, diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 045962f44dd..dfd60bd50ef 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -11,13 +11,15 @@ namespace ErrorCodes MergeTreeSequentialSource::MergeTreeSequentialSource( const MergeTreeData & storage_, + const StorageMetadataPtr & metadata_snapshot_, MergeTreeData::DataPartPtr data_part_, Names columns_to_read_, bool read_with_direct_io_, bool take_column_types_from_storage, bool quiet) - : SourceWithProgress(storage_.getSampleBlockForColumns(columns_to_read_)) + : SourceWithProgress(metadata_snapshot_->getSampleBlockForColumns(columns_to_read_, storage_.getVirtuals())) , storage(storage_) + , metadata_snapshot(metadata_snapshot_) , data_part(std::move(data_part_)) , columns_to_read(std::move(columns_to_read_)) , read_with_direct_io(read_with_direct_io_) @@ -41,7 +43,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( NamesAndTypesList columns_for_reader; if (take_column_types_from_storage) { - const NamesAndTypesList & physical_columns = storage.getColumns().getAllPhysical(); + const NamesAndTypesList & physical_columns = metadata_snapshot->getColumns().getAllPhysical(); columns_for_reader = physical_columns.addTypes(columns_to_read); } else diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.h b/src/Storages/MergeTree/MergeTreeSequentialSource.h index 6155fef200a..7eefdd9335b 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.h +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.h @@ -14,12 +14,12 @@ class MergeTreeSequentialSource : public SourceWithProgress public: MergeTreeSequentialSource( const MergeTreeData & storage_, + const StorageMetadataPtr & metadata_snapshot_, MergeTreeData::DataPartPtr data_part_, Names columns_to_read_, bool read_with_direct_io_, bool take_column_types_from_storage, - bool quiet = false - ); + bool quiet = false); ~MergeTreeSequentialSource() override; @@ -35,6 +35,7 @@ protected: private: const MergeTreeData & storage; + StorageMetadataPtr metadata_snapshot; /// Data part will not be removed if the pointer owns it MergeTreeData::DataPartPtr data_part; diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp b/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp index 0b09fad91d1..784c842d7d6 100644 --- a/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp @@ -16,12 +16,15 @@ MergeTreeThreadSelectBlockInputProcessor::MergeTreeThreadSelectBlockInputProcess size_t preferred_block_size_bytes_, size_t preferred_max_column_in_block_size_bytes_, const MergeTreeData & storage_, + const StorageMetadataPtr & metadata_snapshot_, const bool use_uncompressed_cache_, const PrewhereInfoPtr & prewhere_info_, const MergeTreeReaderSettings & reader_settings_, const Names & virt_column_names_) : - MergeTreeBaseSelectProcessor{pool_->getHeader(), storage_, prewhere_info_, max_block_size_rows_, + MergeTreeBaseSelectProcessor{ + pool_->getHeader(), storage_, metadata_snapshot_, prewhere_info_, + max_block_size_rows_, preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, reader_settings_, use_uncompressed_cache_, virt_column_names_}, thread{thread_}, diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.h b/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.h index e214696b705..d5a11f3d93b 100644 --- a/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.h +++ b/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.h @@ -22,6 +22,7 @@ public: size_t preferred_block_size_bytes_, size_t preferred_max_column_in_block_size_bytes_, const MergeTreeData & storage_, + const StorageMetadataPtr & metadata_snapshot_, const bool use_uncompressed_cache_, const PrewhereInfoPtr & prewhere_info_, const MergeTreeReaderSettings & reader_settings_, diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index 826af505b12..45ee947b81f 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -21,15 +21,15 @@ public: Pipes read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, unsigned num_streams) override { - return MergeTreeDataSelectExecutor(part->storage).readFromParts( - {part}, column_names, query_info, context, max_block_size, num_streams); + return MergeTreeDataSelectExecutor(part->storage) + .readFromParts({part}, column_names, metadata_snapshot, query_info, context, max_block_size, num_streams); } diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 4754732159c..42eab838f32 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -88,9 +88,11 @@ StorageBuffer::StorageBuffer( class BufferSource : public SourceWithProgress { public: - BufferSource(const Names & column_names_, StorageBuffer::Buffer & buffer_, const StorageBuffer & storage) - : SourceWithProgress(storage.getSampleBlockForColumns(column_names_)) - , column_names(column_names_.begin(), column_names_.end()), buffer(buffer_) {} + BufferSource(const Names & column_names_, StorageBuffer::Buffer & buffer_, const StorageBuffer & storage, const StorageMetadataPtr & metadata_snapshot) + : SourceWithProgress( + metadata_snapshot->getSampleBlockForColumns(column_names_, storage.getVirtuals())) + , column_names(column_names_.begin(), column_names_.end()) + , buffer(buffer_) {} String getName() const override { return "Buffer"; } @@ -145,7 +147,7 @@ QueryProcessingStage::Enum StorageBuffer::getQueryProcessingStage(const Context Pipes StorageBuffer::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -236,7 +238,7 @@ Pipes StorageBuffer::read( Pipes pipes_from_buffers; pipes_from_buffers.reserve(num_shards); for (auto & buf : buffers) - pipes_from_buffers.emplace_back(std::make_shared(column_names, buf, *this)); + pipes_from_buffers.emplace_back(std::make_shared(column_names, buf, *this, metadata_snapshot)); /** If the sources from the table were processed before some non-initial stage of query execution, * then sources from the buffers must also be wrapped in the processing pipeline before the same stage. diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index f3719562af7..cce3911370d 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -1,7 +1,24 @@ #include +#include + namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; + extern const int COLUMN_QUERIED_MORE_THAN_ONCE; + extern const int DUPLICATE_COLUMN; + extern const int EMPTY_LIST_OF_COLUMNS_PASSED; + extern const int EMPTY_LIST_OF_COLUMNS_QUERIED; + extern const int NO_SUCH_COLUMN_IN_TABLE; + extern const int NOT_FOUND_COLUMN_IN_BLOCK; + extern const int TYPE_MISMATCH; + extern const int TABLE_IS_DROPPED; + extern const int NOT_IMPLEMENTED; + extern const int DEADLOCK_AVOIDED; +} + StorageInMemoryMetadata::StorageInMemoryMetadata( const ColumnsDescription & columns_, @@ -240,4 +257,37 @@ Block StorageInMemoryMetadata::getSampleBlock() const return res; } + +Block StorageInMemoryMetadata::getSampleBlockForColumns(const Names & column_names, const NamesAndTypesList & virtuals) const +{ + Block res; + + std::unordered_map columns_map; + + NamesAndTypesList all_columns = getColumns().getAll(); + for (const auto & elem : all_columns) + columns_map.emplace(elem.name, elem.type); + + /// Virtual columns must be appended after ordinary, because user can + /// override them. + for (const auto & column : virtuals) + columns_map.emplace(column.name, column.type); + + for (const auto & name : column_names) + { + auto it = columns_map.find(name); + if (it != columns_map.end()) + { + res.insert({it->second->createColumn(), it->second, it->first}); + } + else + { + throw Exception( + "Column " + backQuote(name) + " not found in table " /*+ getStorageID().getNameForLogs() TODO(alesap)*/, + ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); + } + } + + return res; +} } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 2da766caacd..9f9154c48fb 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -110,6 +110,8 @@ struct StorageInMemoryMetadata Block getSampleBlock() const; /// ordinary + materialized. Block getSampleBlockNonMaterialized() const; /// ordinary. Block getSampleBlockWithVirtuals(const NamesAndTypesList & virtuals) const; /// ordinary + materialized + virtuals. + Block getSampleBlockForColumns( + const Names & column_names, const NamesAndTypesList & virtuals) const; /// ordinary + materialized + aliases + virtuals. }; using StorageMetadataPtr = std::shared_ptr; diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 7ed4c1c110b..7d481395ef4 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -435,7 +435,7 @@ private: // TODO: multiple stream read and index read Pipes StorageJoin::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, @@ -445,7 +445,7 @@ Pipes StorageJoin::read( check(column_names); Pipes pipes; - pipes.emplace_back(std::make_shared(*join, max_block_size, getSampleBlockForColumns(column_names))); + pipes.emplace_back(std::make_shared(*join, max_block_size, metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals()))); return pipes; } diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index f9c39d78a05..442c5a3d67b 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -22,9 +22,19 @@ namespace ErrorCodes class MemorySource : public SourceWithProgress { public: - MemorySource(Names column_names_, BlocksList::iterator begin_, BlocksList::iterator end_, const StorageMemory & storage) - : SourceWithProgress(storage.getSampleBlockForColumns(column_names_)) - , column_names(std::move(column_names_)), begin(begin_), end(end_), it(begin) {} + MemorySource( + Names column_names_, + BlocksList::iterator begin_, + BlocksList::iterator end_, + const StorageMemory & storage, + const StorageMetadataPtr & metadata_snapshot) + : SourceWithProgress(metadata_snapshot->getSampleBlockForColumns(column_names_, storage.getVirtuals())) + , column_names(std::move(column_names_)) + , begin(begin_) + , end(end_) + , it(begin) + { + } String getName() const override { return "Memory"; } @@ -60,9 +70,14 @@ private: class MemoryBlockOutputStream : public IBlockOutputStream { public: - explicit MemoryBlockOutputStream(StorageMemory & storage_) : storage(storage_) {} + explicit MemoryBlockOutputStream( + StorageMemory & storage_, + const StorageMetadataPtr & metadata_snapshot_) + : storage(storage_) + , metadata_snapshot(metadata_snapshot_) + {} - Block getHeader() const override { return storage.getSampleBlock(); } + Block getHeader() const override { return metadata_snapshot->getSampleBlock(); } void write(const Block & block) override { @@ -72,6 +87,7 @@ public: } private: StorageMemory & storage; + StorageMetadataPtr metadata_snapshot; }; @@ -87,7 +103,7 @@ StorageMemory::StorageMemory(const StorageID & table_id_, ColumnsDescription col Pipes StorageMemory::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, @@ -113,16 +129,16 @@ Pipes StorageMemory::read( std::advance(begin, stream * size / num_streams); std::advance(end, (stream + 1) * size / num_streams); - pipes.emplace_back(std::make_shared(column_names, begin, end, *this)); + pipes.emplace_back(std::make_shared(column_names, begin, end, *this, metadata_snapshot)); } return pipes; } -BlockOutputStreamPtr StorageMemory::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) +BlockOutputStreamPtr StorageMemory::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & /*context*/) { - return std::make_shared(*this); + return std::make_shared(*this, metadata_snapshot); } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 6656e91189c..c5a3c20bb0c 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -129,7 +129,7 @@ QueryProcessingStage::Enum StorageMerge::getQueryProcessingStage(const Context & Pipes StorageMerge::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -157,7 +157,7 @@ Pipes StorageMerge::read( modified_context->setSetting("optimize_move_to_prewhere", false); /// What will be result structure depending on query processed stage in source tables? - Block header = getQueryHeader(column_names, query_info, context, processed_stage); + Block header = getQueryHeader(column_names, metadata_snapshot, query_info, context, processed_stage); /** First we make list of selected tables to find out its size. * This is necessary to correctly pass the recommended number of threads to each table. @@ -401,13 +401,17 @@ void StorageMerge::alter( } Block StorageMerge::getQueryHeader( - const Names & column_names, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage) + const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum processed_stage) { switch (processed_stage) { case QueryProcessingStage::FetchColumns: { - Block header = getSampleBlockForColumns(column_names); + Block header = metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals()); if (query_info.prewhere_info) { query_info.prewhere_info->prewhere_actions->execute(header); @@ -420,7 +424,7 @@ Block StorageMerge::getQueryHeader( case QueryProcessingStage::WithMergeableState: case QueryProcessingStage::Complete: return materializeBlock(InterpreterSelectQuery( - query_info.query, context, std::make_shared(getSampleBlockForColumns(column_names)), + query_info.query, context, std::make_shared(metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals())), SelectQueryOptions(processed_stage).analyze()).getSampleBlock()); } throw Exception("Logical Error: unknown processed stage.", ErrorCodes::LOGICAL_ERROR); diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index a5d3b8d2667..350f7a124fe 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -74,8 +74,12 @@ protected: const String & table_name_regexp_, const Context & context_); - Block getQueryHeader(const Names & column_names, const SelectQueryInfo & query_info, - const Context & context, QueryProcessingStage::Enum processed_stage); + Block getQueryHeader( + const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum processed_stage); Pipes createSources( const SelectQueryInfo & query_info, const QueryProcessingStage::Enum & processed_stage, diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 1b00487c816..9c37cdd2b7c 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -177,14 +177,15 @@ StorageMergeTree::~StorageMergeTree() Pipes StorageMergeTree::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, const size_t max_block_size, const unsigned num_streams) { - return reader.read(column_names, query_info, context, max_block_size, num_streams); + return reader.read(column_names, metadata_snapshot, query_info, + context, max_block_size, num_streams); } std::optional StorageMergeTree::totalRows() const diff --git a/src/Storages/StorageNull.h b/src/Storages/StorageNull.h index 72934d185c7..6bd102bdcda 100644 --- a/src/Storages/StorageNull.h +++ b/src/Storages/StorageNull.h @@ -24,7 +24,7 @@ public: Pipes read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo &, const Context & /*context*/, QueryProcessingStage::Enum /*processing_stage*/, @@ -32,13 +32,14 @@ public: unsigned) override { Pipes pipes; - pipes.emplace_back(std::make_shared(getSampleBlockForColumns(column_names))); + pipes.emplace_back( + std::make_shared(metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals()))); return pipes; } - BlockOutputStreamPtr write(const ASTPtr &, const StorageMetadataPtr & /*metadata_snapshot*/, const Context &) override + BlockOutputStreamPtr write(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &) override { - return std::make_shared(getSampleBlock()); + return std::make_shared(metadata_snapshot->getSampleBlock()); } void checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) const override; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 810a4fa5c97..a6f84ffe4df 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3387,7 +3387,7 @@ ReplicatedMergeTreeQuorumAddedParts::PartitionIdToMaxBlock StorageReplicatedMerg Pipes StorageReplicatedMergeTree::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -3402,10 +3402,10 @@ Pipes StorageReplicatedMergeTree::read( if (context.getSettingsRef().select_sequential_consistency) { auto max_added_blocks = getMaxAddedBlocks(); - return reader.read(column_names, query_info, context, max_block_size, num_streams, &max_added_blocks); + return reader.read(column_names, metadata_snapshot, query_info, context, max_block_size, num_streams, &max_added_blocks); } - return reader.read(column_names, query_info, context, max_block_size, num_streams); + return reader.read(column_names, metadata_snapshot, query_info, context, max_block_size, num_streams); } diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 093f4450ecb..7f237fd551f 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -285,7 +285,7 @@ Strings listFilesWithRegexpMatching(Aws::S3::S3Client & client, const S3::URI & Pipes StorageS3::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -309,9 +309,9 @@ Pipes StorageS3::read( need_file_column, format_name, getName(), - getHeaderBlock(column_names), + metadata_snapshot->getSampleBlock(), context, - getColumns().getDefaults(), + metadata_snapshot->getColumns().getDefaults(), max_block_size, chooseCompressionMethod(uri.endpoint, compression_method), client, @@ -321,11 +321,11 @@ Pipes StorageS3::read( return narrowPipes(std::move(pipes), num_streams); } -BlockOutputStreamPtr StorageS3::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) +BlockOutputStreamPtr StorageS3::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & /*context*/) { return std::make_shared( - format_name, min_upload_part_size, getSampleBlock(), context_global, - chooseCompressionMethod(uri.endpoint, compression_method), + format_name, min_upload_part_size, metadata_snapshot->getSampleBlock(), + context_global, chooseCompressionMethod(uri.endpoint, compression_method), client, uri.bucket, uri.key); } diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 665c00b8033..a468d69d223 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -41,11 +41,6 @@ public: return "S3"; } - Block getHeaderBlock(const Names & /*column_names*/) const - { - return getSampleBlock(); - } - Pipes read( const Names & column_names, const StorageMetadataPtr & /*metadata_snapshot*/, diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index c320d0afb42..b0c5bcfd669 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -54,12 +54,13 @@ public: static Block getHeader( StorageStripeLog & storage, + const StorageMetadataPtr & metadata_snapshot, const Names & column_names, IndexForNativeFormat::Blocks::const_iterator index_begin, IndexForNativeFormat::Blocks::const_iterator index_end) { if (index_begin == index_end) - return storage.getSampleBlockForColumns(column_names); + return metadata_snapshot->getSampleBlockForColumns(column_names, storage.getVirtuals()); /// TODO: check if possible to always return storage.getSampleBlock() @@ -74,13 +75,22 @@ public: return header; } - StripeLogSource(StorageStripeLog & storage_, const Names & column_names, size_t max_read_buffer_size_, + StripeLogSource( + StorageStripeLog & storage_, + const StorageMetadataPtr & metadata_snapshot_, + const Names & column_names, + size_t max_read_buffer_size_, std::shared_ptr & index_, IndexForNativeFormat::Blocks::const_iterator index_begin_, IndexForNativeFormat::Blocks::const_iterator index_end_) - : SourceWithProgress(getHeader(storage_, column_names, index_begin_, index_end_)) - , storage(storage_), max_read_buffer_size(max_read_buffer_size_) - , index(index_), index_begin(index_begin_), index_end(index_end_) + : SourceWithProgress( + getHeader(storage_, metadata_snapshot_, column_names, index_begin_, index_end_)) + , storage(storage_) + , metadata_snapshot(metadata_snapshot_) + , max_read_buffer_size(max_read_buffer_size_) + , index(index_) + , index_begin(index_begin_) + , index_end(index_end_) { } @@ -110,6 +120,7 @@ protected: private: StorageStripeLog & storage; + StorageMetadataPtr metadata_snapshot; size_t max_read_buffer_size; std::shared_ptr index; @@ -253,7 +264,7 @@ void StorageStripeLog::rename(const String & new_path_to_table_data, const Stora Pipes StorageStripeLog::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -271,7 +282,7 @@ Pipes StorageStripeLog::read( String index_file = table_path + "index.mrk"; if (!disk->exists(index_file)) { - pipes.emplace_back(std::make_shared(getSampleBlockForColumns(column_names))); + pipes.emplace_back(std::make_shared(metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals()))); return pipes; } @@ -291,7 +302,7 @@ Pipes StorageStripeLog::read( std::advance(end, (stream + 1) * size / num_streams); pipes.emplace_back(std::make_shared( - *this, column_names, context.getSettingsRef().max_read_buffer_size, index, begin, end)); + *this, metadata_snapshot, column_names, context.getSettingsRef().max_read_buffer_size, index, begin, end)); } /// We do not keep read lock directly at the time of reading, because we read ranges of data that do not change. diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 0361718c616..6cea7115066 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -157,7 +157,7 @@ std::function IStorageURLBase::getReadPOSTDataCallback(con Pipes IStorageURLBase::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -170,14 +170,15 @@ Pipes IStorageURLBase::read( request_uri.addQueryParameter(param, value); Pipes pipes; - pipes.emplace_back(std::make_shared(request_uri, + pipes.emplace_back(std::make_shared( + request_uri, getReadMethod(), getReadPOSTDataCallback(column_names, query_info, context, processed_stage, max_block_size), format_name, getName(), - getHeaderBlock(column_names), + getHeaderBlock(column_names, metadata_snapshot), context, - getColumns().getDefaults(), + metadata_snapshot->getColumns().getDefaults(), max_block_size, ConnectionTimeouts::getHTTPTimeouts(context), chooseCompressionMethod(request_uri.getPath(), compression_method))); diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index ecd57024a44..04cbb278c37 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -62,7 +62,7 @@ private: QueryProcessingStage::Enum & processed_stage, size_t max_block_size) const; - virtual Block getHeaderBlock(const Names & column_names) const = 0; + virtual Block getHeaderBlock(const Names & column_names, const StorageMetadataPtr & metadata_snapshot) const = 0; }; class StorageURLBlockOutputStream : public IBlockOutputStream @@ -124,9 +124,9 @@ public: return "URL"; } - Block getHeaderBlock(const Names & /*column_names*/) const override + Block getHeaderBlock(const Names & /*column_names*/, const StorageMetadataPtr & metadata_snapshot) const override { - return getSampleBlock(); + return metadata_snapshot->getSampleBlock(); } }; } diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 52b7e8764d9..7e49580d6c2 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -54,7 +54,7 @@ StorageView::StorageView( Pipes StorageView::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -86,8 +86,9 @@ Pipes StorageView::read( /// And also convert to expected structure. pipeline.addSimpleTransform([&](const Block & header) { - return std::make_shared(header, getSampleBlockForColumns(column_names), - ConvertingTransform::MatchColumnsMode::Name); + return std::make_shared( + header, metadata_snapshot->getSampleBlockForColumns( + column_names, getVirtuals()), ConvertingTransform::MatchColumnsMode::Name); }); pipes = std::move(pipeline).getPipes(); diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index c7fa8a88251..a837cf5b4ba 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -97,16 +97,16 @@ Pipes StorageXDBC::read( return IStorageURLBase::read(column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams); } -BlockOutputStreamPtr StorageXDBC::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) +BlockOutputStreamPtr StorageXDBC::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & context) { bridge_helper->startBridgeSync(); NamesAndTypesList cols; Poco::URI request_uri = uri; request_uri.setPath("/write"); - for (const String & name : getSampleBlock().getNames()) + for (const String & name : metadata_snapshot->getSampleBlock().getNames()) { - auto column_data = getColumns().getPhysical(name); + auto column_data = metadata_snapshot->getColumns().getPhysical(name); cols.emplace_back(column_data.name, column_data.type); } auto url_params = bridge_helper->getURLParams(cols.toString(), 65536); @@ -117,14 +117,17 @@ BlockOutputStreamPtr StorageXDBC::write(const ASTPtr & /*query*/, const StorageM request_uri.addQueryParameter("format_name", format_name); return std::make_shared( - request_uri, format_name, getSampleBlock(), context, - ConnectionTimeouts::getHTTPTimeouts(context), - chooseCompressionMethod(uri.toString(), compression_method)); + request_uri, + format_name, + metadata_snapshot->getSampleBlock(), + context, + ConnectionTimeouts::getHTTPTimeouts(context), + chooseCompressionMethod(uri.toString(), compression_method)); } -Block StorageXDBC::getHeaderBlock(const Names & column_names) const +Block StorageXDBC::getHeaderBlock(const Names & column_names, const StorageMetadataPtr & metadata_snapshot) const { - return getSampleBlockForColumns(column_names); + return metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals()); } std::string StorageXDBC::getName() const diff --git a/src/Storages/StorageXDBC.h b/src/Storages/StorageXDBC.h index 4488122656d..0e227d7d432 100644 --- a/src/Storages/StorageXDBC.h +++ b/src/Storages/StorageXDBC.h @@ -24,11 +24,12 @@ public: size_t max_block_size, unsigned num_streams) override; - StorageXDBC(const StorageID & table_id_, - const std::string & remote_database_name, - const std::string & remote_table_name, - const ColumnsDescription & columns_, - const Context & context_, BridgeHelperPtr bridge_helper_); + StorageXDBC( + const StorageID & table_id_, + const std::string & remote_database_name, + const std::string & remote_table_name, + const ColumnsDescription & columns_, + const Context & context_, BridgeHelperPtr bridge_helper_); BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; @@ -42,19 +43,21 @@ private: std::string getReadMethod() const override; - std::vector> getReadURIParams(const Names & column_names, - const SelectQueryInfo & query_info, - const Context & context, - QueryProcessingStage::Enum & processed_stage, - size_t max_block_size) const override; + std::vector> getReadURIParams( + const Names & column_names, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum & processed_stage, + size_t max_block_size) const override; - std::function getReadPOSTDataCallback(const Names & column_names, - const SelectQueryInfo & query_info, - const Context & context, - QueryProcessingStage::Enum & processed_stage, - size_t max_block_size) const override; + std::function getReadPOSTDataCallback( + const Names & column_names, + const SelectQueryInfo & query_info, + const Context & context, + QueryProcessingStage::Enum & processed_stage, + size_t max_block_size) const override; - Block getHeaderBlock(const Names & column_names) const override; + Block getHeaderBlock(const Names & column_names, const StorageMetadataPtr & metadata_snapshot) const override; std::string getName() const override; }; From 824d6667d97c945d72aafc87b531eba177305f9e Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 16 Jun 2020 17:27:44 +0300 Subject: [PATCH 107/318] Seems to be working getSampleBlockWithColumns in StorageInMemoryMetadata From 1ddeb3d149a19b21adc1c426287995dbfd4b3249 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 16 Jun 2020 18:51:29 +0300 Subject: [PATCH 108/318] Buildable getSampleBlock in StorageInMemoryMetadata --- src/DataStreams/RemoteQueryExecutor.cpp | 3 +- src/Interpreters/InterpreterAlterQuery.cpp | 3 +- src/Interpreters/InterpreterInsertQuery.cpp | 2 +- src/Interpreters/executeQuery.cpp | 5 ++- src/Server/TCPHandler.cpp | 2 +- .../DistributedBlockOutputStream.cpp | 25 +++++++---- .../DistributedBlockOutputStream.h | 12 +++++- src/Storages/IStorage.cpp | 10 ----- src/Storages/IStorage.h | 4 +- .../MergeTree/IMergedBlockOutputStream.cpp | 5 ++- .../MergeTree/IMergedBlockOutputStream.h | 4 +- .../MergeTree/MergeTreeBlockOutputStream.cpp | 4 +- .../MergeTree/MergeTreeBlockOutputStream.h | 13 +++++- .../MergeTree/MergeTreeDataMergerMutator.cpp | 8 ++++ .../MergeTree/MergeTreeDataMergerMutator.h | 2 + .../MergeTree/MergeTreeDataWriter.cpp | 4 +- src/Storages/MergeTree/MergeTreeDataWriter.h | 2 +- src/Storages/MergeTree/MergeTreeReadPool.cpp | 2 +- .../MergeTreeReverseSelectProcessor.cpp | 2 +- .../MergeTree/MergeTreeSelectProcessor.cpp | 2 +- .../MergeTree/MergedBlockOutputStream.cpp | 11 ++++- .../MergeTree/MergedBlockOutputStream.h | 4 +- .../MergedColumnOnlyOutputStream.cpp | 4 +- .../MergeTree/MergedColumnOnlyOutputStream.h | 1 + .../ReplicatedMergeTreeBlockOutputStream.cpp | 20 ++++++--- .../ReplicatedMergeTreeBlockOutputStream.h | 9 +++- src/Storages/StorageBuffer.cpp | 24 +++++++---- src/Storages/StorageDistributed.cpp | 2 +- src/Storages/StorageFile.cpp | 32 +++++++++------ src/Storages/StorageHDFS.cpp | 8 ++-- src/Storages/StorageInput.cpp | 4 +- src/Storages/StorageJoin.cpp | 12 ++++-- src/Storages/StorageLog.cpp | 17 ++++---- src/Storages/StorageMaterializedView.cpp | 5 ++- src/Storages/StorageMaterializedView.h | 2 +- src/Storages/StorageMerge.cpp | 36 +++++++++++----- src/Storages/StorageMerge.h | 18 +++++--- src/Storages/StorageMergeTree.cpp | 12 +++--- src/Storages/StorageMergeTree.h | 6 ++- src/Storages/StorageMySQL.cpp | 25 +++++++---- src/Storages/StorageReplicatedMergeTree.cpp | 17 ++++---- src/Storages/StorageReplicatedMergeTree.h | 8 +++- src/Storages/StorageSet.cpp | 41 ++++++++++++------- src/Storages/StorageStripeLog.cpp | 27 ++++++------ src/Storages/StorageTinyLog.cpp | 15 +++---- src/Storages/StorageURL.cpp | 4 +- src/Storages/System/IStorageSystemOneBlock.h | 4 +- src/Storages/System/StorageSystemColumns.cpp | 4 +- .../System/StorageSystemDetachedParts.cpp | 4 +- src/Storages/System/StorageSystemDisks.cpp | 4 +- .../System/StorageSystemPartsBase.cpp | 6 +-- src/Storages/System/StorageSystemReplicas.cpp | 8 ++-- .../System/StorageSystemStoragePolicies.cpp | 4 +- src/Storages/System/StorageSystemTables.cpp | 4 +- 54 files changed, 328 insertions(+), 188 deletions(-) diff --git a/src/DataStreams/RemoteQueryExecutor.cpp b/src/DataStreams/RemoteQueryExecutor.cpp index 071cb6e9aba..45ddd7c0893 100644 --- a/src/DataStreams/RemoteQueryExecutor.cpp +++ b/src/DataStreams/RemoteQueryExecutor.cpp @@ -333,7 +333,8 @@ void RemoteQueryExecutor::sendExternalTables() data->table_name = table.first; if (pipes.empty()) - data->pipe = std::make_unique(std::make_shared(cur->getSampleBlock(), Chunk())); + data->pipe = std::make_unique( + std::make_shared(metadata_snapshot->getSampleBlock(), Chunk())); else if (pipes.size() == 1) data->pipe = std::make_unique(std::move(pipes.front())); else diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index bd20d78279d..869c3ae98d3 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -43,6 +43,7 @@ BlockIO InterpreterAlterQuery::execute() context.checkAccess(getRequiredAccess()); auto table_id = context.resolveStorageID(alter, Context::ResolveOrdinary); StoragePtr table = DatabaseCatalog::instance().getTable(table_id, context); + auto metadata_snapshot = table->getInMemoryMetadataPtr(); /// Add default database to table identifiers that we can encounter in e.g. default expressions, /// mutation expression, etc. @@ -91,7 +92,7 @@ BlockIO InterpreterAlterQuery::execute() if (!partition_commands.empty()) { - table->alterPartition(query_ptr, partition_commands, context); + table->alterPartition(query_ptr, metadata_snapshot, partition_commands, context); } if (!live_view_commands.empty()) diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index f61ef0e7381..443e2714ec7 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -88,7 +88,7 @@ Block InterpreterInsertQuery::getSampleBlock( return table_sample_non_materialized; } - Block table_sample = table->getSampleBlock(); + Block table_sample = metadata_snapshot->getSampleBlock(); /// Form the block based on the column names from the query Block res; for (const auto & identifier : query.columns->children) diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index e1e2108c0fc..2b8ebf12a20 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -326,8 +326,9 @@ static std::tuple executeQueryImpl( { StoragePtr storage = context.executeTableFunction(input_function); auto & input_storage = dynamic_cast(*storage); - BlockInputStreamPtr input_stream = std::make_shared(ast, istr, - input_storage.getSampleBlock(), context, input_function); + auto input_metadata_snapshot = input_storage.getInMemoryMetadataPtr(); + BlockInputStreamPtr input_stream = std::make_shared( + ast, istr, input_metadata_snapshot->getSampleBlock(), context, input_function); input_storage.setInputStream(input_stream); } } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index a01cc4fa0aa..009f7ad80f0 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -223,7 +223,7 @@ void TCPHandler::runImpl() } /// Send block to the client - input storage structure. - state.input_header = input_storage->getSampleBlock(); + state.input_header = input_storage->getInMemoryMetadataPtr()->getSampleBlock(); sendData(state.input_header); }); diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/src/Storages/Distributed/DistributedBlockOutputStream.cpp index 4e28923ebfc..bf9efef1ba6 100644 --- a/src/Storages/Distributed/DistributedBlockOutputStream.cpp +++ b/src/Storages/Distributed/DistributedBlockOutputStream.cpp @@ -83,18 +83,29 @@ static void writeBlockConvert(const BlockOutputStreamPtr & out, const Block & bl DistributedBlockOutputStream::DistributedBlockOutputStream( - const Context & context_, StorageDistributed & storage_, const ASTPtr & query_ast_, const ClusterPtr & cluster_, - bool insert_sync_, UInt64 insert_timeout_) - : context(context_), storage(storage_), query_ast(query_ast_), query_string(queryToString(query_ast_)), - cluster(cluster_), insert_sync(insert_sync_), - insert_timeout(insert_timeout_), log(&Poco::Logger::get("DistributedBlockOutputStream")) + const Context & context_, + StorageDistributed & storage_, + const StorageMetadataPtr & metadata_snapshot_, + const ASTPtr & query_ast_, + const ClusterPtr & cluster_, + bool insert_sync_, + UInt64 insert_timeout_) + : context(context_) + , storage(storage_) + , metadata_snapshot(metadata_snapshot_) + , query_ast(query_ast_) + , query_string(queryToString(query_ast_)) + , cluster(cluster_) + , insert_sync(insert_sync_) + , insert_timeout(insert_timeout_) + , log(&Poco::Logger::get("DistributedBlockOutputStream")) { } Block DistributedBlockOutputStream::getHeader() const { - return storage.getSampleBlock(); + return metadata_snapshot->getSampleBlock(); } @@ -109,7 +120,7 @@ void DistributedBlockOutputStream::write(const Block & block) /* They are added by the AddingDefaultBlockOutputStream, and we will get * different number of columns eventually */ - for (const auto & col : storage.getColumns().getMaterialized()) + for (const auto & col : metadata_snapshot->getColumns().getMaterialized()) { if (ordinary_block.has(col.name)) { diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.h b/src/Storages/Distributed/DistributedBlockOutputStream.h index 17db955431c..53d71ffc424 100644 --- a/src/Storages/Distributed/DistributedBlockOutputStream.h +++ b/src/Storages/Distributed/DistributedBlockOutputStream.h @@ -2,6 +2,7 @@ #include #include +#include #include #include #include @@ -36,8 +37,14 @@ class StorageDistributed; class DistributedBlockOutputStream : public IBlockOutputStream { public: - DistributedBlockOutputStream(const Context & context_, StorageDistributed & storage_, const ASTPtr & query_ast_, - const ClusterPtr & cluster_, bool insert_sync_, UInt64 insert_timeout_); + DistributedBlockOutputStream( + const Context & context_, + StorageDistributed & storage_, + const StorageMetadataPtr & metadata_snapshot_, + const ASTPtr & query_ast_, + const ClusterPtr & cluster_, + bool insert_sync_, + UInt64 insert_timeout_); Block getHeader() const override; void write(const Block & block) override; @@ -79,6 +86,7 @@ private: private: const Context & context; StorageDistributed & storage; + StorageMetadataPtr metadata_snapshot; ASTPtr query_ast; String query_string; ClusterPtr cluster; diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index d090dc9e51d..43e9a5dd040 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -52,16 +52,6 @@ const ConstraintsDescription & IStorage::getConstraints() const return metadata->constraints; } -Block IStorage::getSampleBlock() const -{ - Block res; - - for (const auto & column : getColumns().getAllPhysical()) - res.insert({column.type->createColumn(), column.type, column.name}); - - return res; -} - namespace { diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index a4173c1c9fa..df1e1685a2e 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -158,8 +158,6 @@ public: /// thread-unsafe part. lockStructure must be acquired StorageMetadataPtr getInMemoryMetadataPtr() const { return metadata; } void setInMemoryMetadata(const StorageInMemoryMetadata & metadata_) { metadata = std::make_shared(metadata_); } - Block getSampleBlock() const; /// ordinary + materialized. - /// Verify that all the requested names are in the table and are set correctly: /// list of names is not empty and the names do not repeat. void check(const Names & column_names, bool include_virtuals = false) const; @@ -361,7 +359,7 @@ public: /** ALTER tables with regard to its partitions. * Should handle locks for each command on its own. */ - virtual void alterPartition(const ASTPtr & /* query */, const PartitionCommands & /* commands */, const Context & /* context */) + virtual void alterPartition(const ASTPtr & /* query */, const StorageMetadataPtr & /* metadata_snapshot */, const PartitionCommands & /* commands */, const Context & /* context */) { throw Exception("Partition operations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp index 329a8ee4508..cfb4d8ba4ba 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.cpp @@ -5,10 +5,11 @@ namespace DB { - IMergedBlockOutputStream::IMergedBlockOutputStream( - const MergeTreeDataPartPtr & data_part) + const MergeTreeDataPartPtr & data_part, + const StorageMetadataPtr & metadata_snapshot_) : storage(data_part->storage) + , metadata_snapshot(metadata_snapshot_) , volume(data_part->volume) , part_path(data_part->getFullRelativePath()) { diff --git a/src/Storages/MergeTree/IMergedBlockOutputStream.h b/src/Storages/MergeTree/IMergedBlockOutputStream.h index 6a06d4b0c75..7ec9f85ff28 100644 --- a/src/Storages/MergeTree/IMergedBlockOutputStream.h +++ b/src/Storages/MergeTree/IMergedBlockOutputStream.h @@ -13,7 +13,8 @@ class IMergedBlockOutputStream : public IBlockOutputStream { public: IMergedBlockOutputStream( - const MergeTreeDataPartPtr & data_part); + const MergeTreeDataPartPtr & data_part, + const StorageMetadataPtr & metadata_snapshot_); using WrittenOffsetColumns = std::set; @@ -36,6 +37,7 @@ protected: protected: const MergeTreeData & storage; + StorageMetadataPtr metadata_snapshot; VolumePtr volume; String part_path; diff --git a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp index b6376dd3779..1ea6b049bf6 100644 --- a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp @@ -8,7 +8,7 @@ namespace DB Block MergeTreeBlockOutputStream::getHeader() const { - return storage.getSampleBlock(); + return metadata_snapshot->getSampleBlock(); } @@ -21,7 +21,7 @@ void MergeTreeBlockOutputStream::write(const Block & block) { Stopwatch watch; - MergeTreeData::MutableDataPartPtr part = storage.writer.writeTempPart(current_block); + MergeTreeData::MutableDataPartPtr part = storage.writer.writeTempPart(current_block, metadata_snapshot); storage.renameTempPartAndAdd(part, &storage.increment); PartLog::addNewPart(storage.global_context, part, watch.elapsed()); diff --git a/src/Storages/MergeTree/MergeTreeBlockOutputStream.h b/src/Storages/MergeTree/MergeTreeBlockOutputStream.h index 8f957d631d3..71e126b07ef 100644 --- a/src/Storages/MergeTree/MergeTreeBlockOutputStream.h +++ b/src/Storages/MergeTree/MergeTreeBlockOutputStream.h @@ -1,6 +1,7 @@ #pragma once #include +#include namespace DB @@ -13,14 +14,22 @@ class StorageMergeTree; class MergeTreeBlockOutputStream : public IBlockOutputStream { public: - MergeTreeBlockOutputStream(StorageMergeTree & storage_, size_t max_parts_per_block_) - : storage(storage_), max_parts_per_block(max_parts_per_block_) {} + MergeTreeBlockOutputStream( + StorageMergeTree & storage_, + const StorageMetadataPtr metadata_snapshot_, + size_t max_parts_per_block_) + : storage(storage_) + , metadata_snapshot(metadata_snapshot_) + , max_parts_per_block(max_parts_per_block_) + { + } Block getHeader() const override; void write(const Block & block) override; private: StorageMergeTree & storage; + StorageMetadataPtr metadata_snapshot; size_t max_parts_per_block; }; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 829f7cac528..39e6cfdc275 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -808,6 +808,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor const auto & index_factory = MergeTreeIndexFactory::instance(); MergedBlockOutputStream to{ new_data_part, + metadata_snapshot, merging_columns, index_factory.getMany(data.getSecondaryIndices()), compression_codec, @@ -912,6 +913,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor MergedColumnOnlyOutputStream column_to( new_data_part, + metadata_snapshot, column_gathered_stream.getHeader(), compression_codec, /// we don't need to recalc indices here @@ -1085,6 +1087,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor auto part_indices = getIndicesForNewDataPart(data.getSecondaryIndices(), for_file_renames); mutateAllPartColumns( new_data_part, + metadata_snapshot, part_indices, in, time_of_mutation, @@ -1137,6 +1140,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor { mutateSomePartColumns( source_part, + metadata_snapshot, indices_to_recalc, updated_header, new_data_part, @@ -1582,6 +1586,7 @@ bool MergeTreeDataMergerMutator::shouldExecuteTTL(const StorageMetadataPtr & met void MergeTreeDataMergerMutator::mutateAllPartColumns( MergeTreeData::MutableDataPartPtr new_data_part, + const StorageMetadataPtr & metadata_snapshot, const MergeTreeIndices & skip_indices, BlockInputStreamPtr mutating_stream, time_t time_of_mutation, @@ -1603,6 +1608,7 @@ void MergeTreeDataMergerMutator::mutateAllPartColumns( MergedBlockOutputStream out{ new_data_part, + metadata_snapshot, new_data_part->getColumns(), skip_indices, compression_codec}; @@ -1629,6 +1635,7 @@ void MergeTreeDataMergerMutator::mutateAllPartColumns( void MergeTreeDataMergerMutator::mutateSomePartColumns( const MergeTreeDataPartPtr & source_part, + const StorageMetadataPtr & metadata_snapshot, const std::set & indices_to_recalc, const Block & mutation_header, MergeTreeData::MutableDataPartPtr new_data_part, @@ -1647,6 +1654,7 @@ void MergeTreeDataMergerMutator::mutateSomePartColumns( IMergedBlockOutputStream::WrittenOffsetColumns unused_written_offsets; MergedColumnOnlyOutputStream out( new_data_part, + metadata_snapshot, mutation_header, compression_codec, std::vector(indices_to_recalc.begin(), indices_to_recalc.end()), diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 185961972a8..3625c9bbe26 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -182,6 +182,7 @@ private: /// Override all columns of new part using mutating_stream void mutateAllPartColumns( MergeTreeData::MutableDataPartPtr new_data_part, + const StorageMetadataPtr & metadata_snapshot, const MergeTreeIndices & skip_indices, BlockInputStreamPtr mutating_stream, time_t time_of_mutation, @@ -192,6 +193,7 @@ private: /// Mutate some columns of source part with mutation_stream void mutateSomePartColumns( const MergeTreeDataPartPtr & source_part, + const StorageMetadataPtr & metadata_snapshot, const std::set & indices_to_recalc, const Block & mutation_header, MergeTreeData::MutableDataPartPtr new_data_part, diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index f3da98f0ba3..71501a0e19a 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -192,7 +192,7 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(const Block & block return result; } -MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPartition & block_with_partition) +MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPartition & block_with_partition, const StorageMetadataPtr & metadata_snapshot) { Block & block = block_with_partition.block; @@ -302,7 +302,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa auto compression_codec = data.global_context.chooseCompressionCodec(0, 0); const auto & index_factory = MergeTreeIndexFactory::instance(); - MergedBlockOutputStream out(new_data_part, columns, index_factory.getMany(data.getSecondaryIndices()), compression_codec); + MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, index_factory.getMany(data.getSecondaryIndices()), compression_codec); out.writePrefix(); out.writeWithPermutation(block, perm_ptr); diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index ffaa227641e..dabdcbd2148 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -45,7 +45,7 @@ public: /** All rows must correspond to same partition. * Returns part with unique name starting with 'tmp_', yet not added to MergeTreeData. */ - MergeTreeData::MutableDataPartPtr writeTempPart(BlockWithPartition & block); + MergeTreeData::MutableDataPartPtr writeTempPart(BlockWithPartition & block, const StorageMetadataPtr & metadata_snapshot); private: MergeTreeData & data; diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index eb0b51235ad..8c73dc39dfb 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -197,7 +197,7 @@ std::vector MergeTreeReadPool::fillPerPartInfo( RangesInDataParts & parts, const bool check_columns) { std::vector per_part_sum_marks; - Block sample_block = data.getSampleBlock(); + Block sample_block = metadata_snapshot->getSampleBlock(); for (const auto i : ext::range(0, parts.size())) { diff --git a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp index 81366614988..c47dd7fb669 100644 --- a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp @@ -115,7 +115,7 @@ try auto size_predictor = (preferred_block_size_bytes == 0) ? nullptr - : std::make_unique(data_part, ordered_names, data_part->storage.getSampleBlock()); + : std::make_unique(data_part, ordered_names, metadata_snapshot->getSampleBlock()); task = std::make_unique( data_part, mark_ranges_for_task, part_index_in_query, ordered_names, column_name_set, diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index e32fa70cb97..84c0f44c109 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -72,7 +72,7 @@ try auto size_predictor = (preferred_block_size_bytes == 0) ? nullptr - : std::make_unique(data_part, ordered_names, data_part->storage.getSampleBlock()); + : std::make_unique(data_part, ordered_names, metadata_snapshot->getSampleBlock()); /// will be used to distinguish between PREWHERE and WHERE columns when applying filter const auto & column_names = task_columns.columns.getNames(); diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index bce50918ac0..c768678c454 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -15,12 +15,18 @@ namespace ErrorCodes MergedBlockOutputStream::MergedBlockOutputStream( const MergeTreeDataPartPtr & data_part, + const StorageMetadataPtr & metadata_snapshot_, const NamesAndTypesList & columns_list_, const MergeTreeIndices & skip_indices, CompressionCodecPtr default_codec, bool blocks_are_granules_size) : MergedBlockOutputStream( - data_part, columns_list_, skip_indices, default_codec, {}, + data_part, + metadata_snapshot_, + columns_list_, + skip_indices, + default_codec, + {}, data_part->storage.global_context.getSettings().min_bytes_to_use_direct_io, blocks_are_granules_size) { @@ -28,13 +34,14 @@ MergedBlockOutputStream::MergedBlockOutputStream( MergedBlockOutputStream::MergedBlockOutputStream( const MergeTreeDataPartPtr & data_part, + const StorageMetadataPtr & metadata_snapshot_, const NamesAndTypesList & columns_list_, const MergeTreeIndices & skip_indices, CompressionCodecPtr default_codec, const MergeTreeData::DataPart::ColumnToSize & merged_column_to_size, size_t aio_threshold, bool blocks_are_granules_size) - : IMergedBlockOutputStream(data_part) + : IMergedBlockOutputStream(data_part, metadata_snapshot_) , columns_list(columns_list_) { MergeTreeWriterSettings writer_settings(data_part->storage.global_context.getSettings(), diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.h b/src/Storages/MergeTree/MergedBlockOutputStream.h index 5a92977640e..1a8bf9da822 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.h +++ b/src/Storages/MergeTree/MergedBlockOutputStream.h @@ -15,6 +15,7 @@ class MergedBlockOutputStream final : public IMergedBlockOutputStream public: MergedBlockOutputStream( const MergeTreeDataPartPtr & data_part, + const StorageMetadataPtr & metadata_snapshot_, const NamesAndTypesList & columns_list_, const MergeTreeIndices & skip_indices, CompressionCodecPtr default_codec, @@ -22,6 +23,7 @@ public: MergedBlockOutputStream( const MergeTreeDataPartPtr & data_part, + const StorageMetadataPtr & metadata_snapshot_, const NamesAndTypesList & columns_list_, const MergeTreeIndices & skip_indices, CompressionCodecPtr default_codec, @@ -29,7 +31,7 @@ public: size_t aio_threshold, bool blocks_are_granules_size = false); - Block getHeader() const override { return storage.getSampleBlock(); } + Block getHeader() const override { return metadata_snapshot->getSampleBlock(); } /// If the data is pre-sorted. void write(const Block & block) override; diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index b5eefbe3f0c..b74a8243437 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -10,13 +10,15 @@ namespace ErrorCodes MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( const MergeTreeDataPartPtr & data_part, + const StorageMetadataPtr & metadata_snapshot_, const Block & header_, CompressionCodecPtr default_codec, const std::vector & indices_to_recalc, WrittenOffsetColumns * offset_columns_, const MergeTreeIndexGranularity & index_granularity, const MergeTreeIndexGranularityInfo * index_granularity_info) - : IMergedBlockOutputStream(data_part), header(header_) + : IMergedBlockOutputStream(data_part, metadata_snapshot_) + , header(header_) { const auto & global_settings = data_part->storage.global_context.getSettings(); MergeTreeWriterSettings writer_settings( diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h index 2c5024bbcfe..902138ced9d 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.h @@ -15,6 +15,7 @@ public: /// if you want to serialize elements of Nested data structure in different instances of MergedColumnOnlyOutputStream. MergedColumnOnlyOutputStream( const MergeTreeDataPartPtr & data_part, + const StorageMetadataPtr & metadata_snapshot_, const Block & header_, CompressionCodecPtr default_codec_, const std::vector & indices_to_recalc_, diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp index 1bbc56d940d..8319b0e018d 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp @@ -31,9 +31,19 @@ namespace ErrorCodes ReplicatedMergeTreeBlockOutputStream::ReplicatedMergeTreeBlockOutputStream( - StorageReplicatedMergeTree & storage_, size_t quorum_, size_t quorum_timeout_ms_, size_t max_parts_per_block_, bool deduplicate_) - : storage(storage_), quorum(quorum_), quorum_timeout_ms(quorum_timeout_ms_), max_parts_per_block(max_parts_per_block_), deduplicate(deduplicate_), - log(&Poco::Logger::get(storage.getLogName() + " (Replicated OutputStream)")) + StorageReplicatedMergeTree & storage_, + const StorageMetadataPtr & metadata_snapshot_, + size_t quorum_, + size_t quorum_timeout_ms_, + size_t max_parts_per_block_, + bool deduplicate_) + : storage(storage_) + , metadata_snapshot(metadata_snapshot_) + , quorum(quorum_) + , quorum_timeout_ms(quorum_timeout_ms_) + , max_parts_per_block(max_parts_per_block_) + , deduplicate(deduplicate_) + , log(&Poco::Logger::get(storage.getLogName() + " (Replicated OutputStream)")) { /// The quorum value `1` has the same meaning as if it is disabled. if (quorum == 1) @@ -43,7 +53,7 @@ ReplicatedMergeTreeBlockOutputStream::ReplicatedMergeTreeBlockOutputStream( Block ReplicatedMergeTreeBlockOutputStream::getHeader() const { - return storage.getSampleBlock(); + return metadata_snapshot->getSampleBlock(); } @@ -128,7 +138,7 @@ void ReplicatedMergeTreeBlockOutputStream::write(const Block & block) /// Write part to the filesystem under temporary name. Calculate a checksum. - MergeTreeData::MutableDataPartPtr part = storage.writer.writeTempPart(current_block); + MergeTreeData::MutableDataPartPtr part = storage.writer.writeTempPart(current_block, metadata_snapshot); String block_id; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h index b8650c25c7d..ac169d248c2 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.h @@ -22,8 +22,12 @@ class StorageReplicatedMergeTree; class ReplicatedMergeTreeBlockOutputStream : public IBlockOutputStream { public: - ReplicatedMergeTreeBlockOutputStream(StorageReplicatedMergeTree & storage_, - size_t quorum_, size_t quorum_timeout_ms_, size_t max_parts_per_block_, + ReplicatedMergeTreeBlockOutputStream( + StorageReplicatedMergeTree & storage_, + const StorageMetadataPtr & metadata_snapshot_, + size_t quorum_, + size_t quorum_timeout_ms_, + size_t max_parts_per_block_, bool deduplicate_); Block getHeader() const override; @@ -55,6 +59,7 @@ private: void commitPart(zkutil::ZooKeeperPtr & zookeeper, MergeTreeData::MutableDataPartPtr & part, const String & block_id); StorageReplicatedMergeTree & storage; + StorageMetadataPtr metadata_snapshot; size_t quorum; size_t quorum_timeout_ms; size_t max_parts_per_block; diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 42eab838f32..b08e4e93bed 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -167,10 +167,10 @@ Pipes StorageBuffer::read( auto destination_lock = destination->lockStructureForShare( false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); - const bool dst_has_same_structure = std::all_of(column_names.begin(), column_names.end(), [this, destination](const String& column_name) + const bool dst_has_same_structure = std::all_of(column_names.begin(), column_names.end(), [metadata_snapshot, destination](const String& column_name) { const auto & dest_columns = destination->getColumns(); - const auto & our_columns = getColumns(); + const auto & our_columns = metadata_snapshot->getColumns(); return dest_columns.hasPhysical(column_name) && dest_columns.get(column_name).type->equals(*our_columns.get(column_name).type); }); @@ -188,7 +188,7 @@ Pipes StorageBuffer::read( else { /// There is a struct mismatch and we need to convert read blocks from the destination table. - const Block header = getSampleBlock(); + const Block header = metadata_snapshot->getSampleBlock(); Names columns_intersection = column_names; Block header_after_adding_defaults = header; const auto & dest_columns = destination->getColumns(); @@ -326,9 +326,14 @@ static void appendBlock(const Block & from, Block & to) class BufferBlockOutputStream : public IBlockOutputStream { public: - explicit BufferBlockOutputStream(StorageBuffer & storage_) : storage(storage_) {} + explicit BufferBlockOutputStream( + StorageBuffer & storage_, + const StorageMetadataPtr & metadata_snapshot_) + : storage(storage_) + , metadata_snapshot(metadata_snapshot_) + {} - Block getHeader() const override { return storage.getSampleBlock(); } + Block getHeader() const override { return metadata_snapshot->getSampleBlock(); } void write(const Block & block) override { @@ -404,6 +409,7 @@ public: } private: StorageBuffer & storage; + StorageMetadataPtr metadata_snapshot; void insertIntoBuffer(const Block & block, StorageBuffer::Buffer & buffer) { @@ -434,9 +440,9 @@ private: }; -BlockOutputStreamPtr StorageBuffer::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) +BlockOutputStreamPtr StorageBuffer::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & /*context*/) { - return std::make_shared(*this); + return std::make_shared(*this, metadata_snapshot); } @@ -654,8 +660,8 @@ void StorageBuffer::writeBlockToDestination(const Block & block, StoragePtr tabl /** We will insert columns that are the intersection set of columns of the buffer table and the subordinate table. * This will support some of the cases (but not all) when the table structure does not match. */ - Block structure_of_destination_table - = allow_materialized ? table->getSampleBlock() : destination_metadata_snapshot->getSampleBlockNonMaterialized(); + Block structure_of_destination_table = allow_materialized ? destination_metadata_snapshot->getSampleBlock() + : destination_metadata_snapshot->getSampleBlockNonMaterialized(); Block block_to_write; for (size_t i : ext::range(0, structure_of_destination_table.columns())) { diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 66066ec3c18..6868f468f2e 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -536,7 +536,7 @@ BlockOutputStreamPtr StorageDistributed::write(const ASTPtr &, const StorageMeta /// DistributedBlockOutputStream will not own cluster, but will own ConnectionPools of the cluster return std::make_shared( - context, *this, createInsertToRemoteTableQuery(remote_database, remote_table, metadata_snapshot->getSampleBlockNonMaterialized()), cluster, + context, *this, metadata_snapshot, createInsertToRemoteTableQuery(remote_database, remote_table, metadata_snapshot->getSampleBlockNonMaterialized()), cluster, insert_sync, timeout); } diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 07df2b4ec8a..65f36a48170 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -214,9 +214,9 @@ public: using FilesInfoPtr = std::shared_ptr; - static Block getHeader(StorageFile & storage, bool need_path_column, bool need_file_column) + static Block getHeader(const StorageMetadataPtr & metadata_snapshot, bool need_path_column, bool need_file_column) { - auto header = storage.getSampleBlock(); + auto header = metadata_snapshot->getSampleBlock(); /// Note: AddingDefaultsBlockInputStream doesn't change header. @@ -230,12 +230,14 @@ public: StorageFileSource( std::shared_ptr storage_, + const StorageMetadataPtr & metadata_snapshot_, const Context & context_, UInt64 max_block_size_, FilesInfoPtr files_info_, ColumnDefaults column_defaults_) - : SourceWithProgress(getHeader(*storage_, files_info_->need_path_column, files_info_->need_file_column)) + : SourceWithProgress(getHeader(metadata_snapshot_, files_info_->need_path_column, files_info_->need_file_column)) , storage(std::move(storage_)) + , metadata_snapshot(metadata_snapshot_) , files_info(std::move(files_info_)) , column_defaults(std::move(column_defaults_)) , context(context_) @@ -310,7 +312,7 @@ public: read_buf = wrapReadBufferWithCompressionMethod(std::move(nested_buffer), method); reader = FormatFactory::instance().getInput( - storage->format_name, *read_buf, storage->getSampleBlock(), context, max_block_size); + storage->format_name, *read_buf, metadata_snapshot->getSampleBlock(), context, max_block_size); if (!column_defaults.empty()) reader = std::make_shared(reader, column_defaults, context); @@ -357,6 +359,7 @@ public: private: std::shared_ptr storage; + StorageMetadataPtr metadata_snapshot; FilesInfoPtr files_info; String current_path; Block sample_block; @@ -377,7 +380,7 @@ private: Pipes StorageFile::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -414,7 +417,7 @@ Pipes StorageFile::read( for (size_t i = 0; i < num_streams; ++i) pipes.emplace_back(std::make_shared( - this_ptr, context, max_block_size, files_info, getColumns().getDefaults())); + this_ptr, metadata_snapshot, context, max_block_size, files_info, getColumns().getDefaults())); return pipes; } @@ -423,10 +426,14 @@ Pipes StorageFile::read( class StorageFileBlockOutputStream : public IBlockOutputStream { public: - explicit StorageFileBlockOutputStream(StorageFile & storage_, + explicit StorageFileBlockOutputStream( + StorageFile & storage_, + const StorageMetadataPtr & metadata_snapshot_, const CompressionMethod compression_method, const Context & context) - : storage(storage_), lock(storage.rwlock) + : storage(storage_) + , metadata_snapshot(metadata_snapshot_) + , lock(storage.rwlock) { if (storage.use_table_fd) { @@ -446,10 +453,10 @@ public: compression_method, 3); } - writer = FormatFactory::instance().getOutput(storage.format_name, *write_buf, storage.getSampleBlock(), context); + writer = FormatFactory::instance().getOutput(storage.format_name, *write_buf, metadata_snapshot->getSampleBlock(), context); } - Block getHeader() const override { return storage.getSampleBlock(); } + Block getHeader() const override { return metadata_snapshot->getSampleBlock(); } void write(const Block & block) override { @@ -473,6 +480,7 @@ public: private: StorageFile & storage; + StorageMetadataPtr metadata_snapshot; std::unique_lock lock; std::unique_ptr write_buf; BlockOutputStreamPtr writer; @@ -480,13 +488,13 @@ private: BlockOutputStreamPtr StorageFile::write( const ASTPtr & /*query*/, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const Context & context) { if (format_name == "Distributed") throw Exception("Method write is not implemented for Distributed format", ErrorCodes::NOT_IMPLEMENTED); - return std::make_shared(*this, + return std::make_shared(*this, metadata_snapshot, chooseCompressionMethod(paths[0], compression_method), context); } diff --git a/src/Storages/StorageHDFS.cpp b/src/Storages/StorageHDFS.cpp index 77afc4c47c8..ee5a426cedc 100644 --- a/src/Storages/StorageHDFS.cpp +++ b/src/Storages/StorageHDFS.cpp @@ -264,7 +264,7 @@ Strings LSWithRegexpMatching(const String & path_for_ls, const HDFSFSPtr & fs, c Pipes StorageHDFS::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & /*query_info*/, const Context & context_, QueryProcessingStage::Enum /*processed_stage*/, @@ -296,16 +296,16 @@ Pipes StorageHDFS::read( for (size_t i = 0; i < num_streams; ++i) pipes.emplace_back(std::make_shared( - sources_info, uri_without_path, format_name, compression_method, getSampleBlock(), context_, max_block_size)); + sources_info, uri_without_path, format_name, compression_method, metadata_snapshot->getSampleBlock(), context_, max_block_size)); return pipes; } -BlockOutputStreamPtr StorageHDFS::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) +BlockOutputStreamPtr StorageHDFS::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & /*context*/) { return std::make_shared(uri, format_name, - getSampleBlock(), + metadata_snapshot->getSampleBlock(), context, chooseCompressionMethod(uri, compression_method)); } diff --git a/src/Storages/StorageInput.cpp b/src/Storages/StorageInput.cpp index 4117a6b3a37..4430fb11186 100644 --- a/src/Storages/StorageInput.cpp +++ b/src/Storages/StorageInput.cpp @@ -60,7 +60,7 @@ void StorageInput::setInputStream(BlockInputStreamPtr input_stream_) Pipes StorageInput::read( const Names & /*column_names*/, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -74,7 +74,7 @@ Pipes StorageInput::read( { /// Send structure to the client. query_context.initializeInput(shared_from_this()); - pipes.emplace_back(std::make_shared(query_context, getSampleBlock())); + pipes.emplace_back(std::make_shared(query_context, metadata_snapshot->getSampleBlock())); return pipes; } diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 7d481395ef4..68b974c0dde 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -53,29 +53,33 @@ StorageJoin::StorageJoin( , strictness(strictness_) , overwrite(overwrite_) { + auto metadata_snapshot = getInMemoryMetadataPtr(); for (const auto & key : key_names) - if (!getColumns().hasPhysical(key)) + if (!metadata_snapshot->getColumns().hasPhysical(key)) throw Exception{"Key column (" + key + ") does not exist in table declaration.", ErrorCodes::NO_SUCH_COLUMN_IN_TABLE}; table_join = std::make_shared(limits, use_nulls, kind, strictness, key_names); - join = std::make_shared(table_join, getSampleBlock().sortColumns(), overwrite); + join = std::make_shared(table_join, metadata_snapshot->getSampleBlock().sortColumns(), overwrite); restore(); } void StorageJoin::truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) { + /// TODO(alesap) FIXME + auto metadata_snapshot = getInMemoryMetadataPtr(); Poco::File(path).remove(true); Poco::File(path).createDirectories(); Poco::File(path + "tmp/").createDirectories(); increment = 0; - join = std::make_shared(table_join, getSampleBlock().sortColumns(), overwrite); + join = std::make_shared(table_join, metadata_snapshot->getSampleBlock().sortColumns(), overwrite); } HashJoinPtr StorageJoin::getJoin(std::shared_ptr analyzed_join) const { + auto metadata_snapshot = getInMemoryMetadataPtr(); if (!analyzed_join->sameStrictnessAndKind(strictness, kind)) throw Exception("Table " + getStorageID().getNameForLogs() + " has incompatible type of JOIN.", ErrorCodes::INCOMPATIBLE_TYPE_OF_JOIN); @@ -89,7 +93,7 @@ HashJoinPtr StorageJoin::getJoin(std::shared_ptr analyzed_join) const /// Some HACK to remove wrong names qualifiers: table.column -> column. analyzed_join->setRightKeys(key_names); - HashJoinPtr join_clone = std::make_shared(analyzed_join, getSampleBlock().sortColumns()); + HashJoinPtr join_clone = std::make_shared(analyzed_join, metadata_snapshot->getSampleBlock().sortColumns()); join_clone->reuseJoinedData(*join); return join_clone; } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index a09a99b30e1..79cc3e5bf68 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -114,10 +114,12 @@ private: class LogBlockOutputStream final : public IBlockOutputStream { public: - explicit LogBlockOutputStream(StorageLog & storage_) - : storage(storage_), - lock(storage.rwlock), - marks_stream(storage.disk->writeFile(storage.marks_file_path, 4096, WriteMode::Rewrite)) + explicit LogBlockOutputStream(StorageLog & storage_, const StorageMetadataPtr & metadata_snapshot_) + : storage(storage_) + , metadata_snapshot(metadata_snapshot_) + , lock(storage.rwlock) + , marks_stream( + storage.disk->writeFile(storage.marks_file_path, 4096, WriteMode::Rewrite)) { } @@ -133,12 +135,13 @@ public: } } - Block getHeader() const override { return storage.getSampleBlock(); } + Block getHeader() const override { return metadata_snapshot->getSampleBlock(); } void write(const Block & block) override; void writeSuffix() override; private: StorageLog & storage; + StorageMetadataPtr metadata_snapshot; std::unique_lock lock; bool done = false; @@ -621,10 +624,10 @@ Pipes StorageLog::read( return pipes; } -BlockOutputStreamPtr StorageLog::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) +BlockOutputStreamPtr StorageLog::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & /*context*/) { loadMarks(); - return std::make_shared(*this); + return std::make_shared(*this, metadata_snapshot); } CheckResults StorageLog::checkData(const ASTPtr & /* query */, const Context & /* context */) diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index a387eadabe0..e96a48efc9e 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -244,10 +244,11 @@ void StorageMaterializedView::checkAlterIsPossible(const AlterCommands & command } } -void StorageMaterializedView::alterPartition(const ASTPtr & query, const PartitionCommands &commands, const Context &context) +void StorageMaterializedView::alterPartition( + const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, const PartitionCommands & commands, const Context & context) { checkStatementCanBeForwarded(); - getTargetTable()->alterPartition(query, commands, context); + getTargetTable()->alterPartition(query, metadata_snapshot, commands, context); } void StorageMaterializedView::mutate(const MutationCommands & commands, const Context & context) diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 42fe186a068..672be800c8f 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -43,7 +43,7 @@ public: void checkAlterIsPossible(const AlterCommands & commands, const Settings & settings) const override; - void alterPartition(const ASTPtr & query, const PartitionCommands & commands, const Context & context) override; + void alterPartition(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, const PartitionCommands & commands, const Context & context) override; void mutate(const MutationCommands & commands, const Context & context) override; diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index c5a3c20bb0c..b3aed291735 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -168,7 +168,9 @@ Pipes StorageMerge::read( if (selected_tables.empty()) /// FIXME: do we support sampling in this case? return createSources( - query_info, processed_stage, max_block_size, header, {}, real_column_names, modified_context, 0, has_table_virtual_column); + metadata_snapshot, query_info, processed_stage, + max_block_size, header, {}, real_column_names, + modified_context, 0, has_table_virtual_column); size_t tables_count = selected_tables.size(); Float64 num_streams_multiplier = std::min(unsigned(tables_count), std::max(1U, unsigned(context.getSettingsRef().max_streams_multiplier_for_merge_tables))); @@ -207,8 +209,9 @@ Pipes StorageMerge::read( throw Exception("Illegal SAMPLE: table doesn't support sampling", ErrorCodes::SAMPLING_NOT_SUPPORTED); auto source_pipes = createSources( - query_info, processed_stage, max_block_size, header, table, real_column_names, modified_context, - current_streams, has_table_virtual_column); + metadata_snapshot, query_info, processed_stage, + max_block_size, header, table, real_column_names, modified_context, + current_streams, has_table_virtual_column); for (auto & pipe : source_pipes) res.emplace_back(std::move(pipe)); @@ -220,10 +223,17 @@ Pipes StorageMerge::read( return narrowPipes(std::move(res), num_streams); } -Pipes StorageMerge::createSources(const SelectQueryInfo & query_info, const QueryProcessingStage::Enum & processed_stage, - const UInt64 max_block_size, const Block & header, const StorageWithLockAndName & storage_with_lock, +Pipes StorageMerge::createSources( + const StorageMetadataPtr & metadata_snapshot, + const SelectQueryInfo & query_info, + const QueryProcessingStage::Enum & processed_stage, + const UInt64 max_block_size, + const Block & header, + const StorageWithLockAndName & storage_with_lock, Names & real_column_names, - const std::shared_ptr & modified_context, size_t streams_num, bool has_table_virtual_column, + const std::shared_ptr & modified_context, + size_t streams_num, + bool has_table_virtual_column, bool concat_streams) { const auto & [storage, struct_lock, table_name] = storage_with_lock; @@ -244,7 +254,6 @@ Pipes StorageMerge::createSources(const SelectQueryInfo & query_info, const Quer return pipes; } - auto metadata_snapshot = storage->getInMemoryMetadataPtr(); auto storage_stage = storage->getQueryProcessingStage(*modified_context, QueryProcessingStage::Complete, query_info.query); if (processed_stage <= storage_stage) { @@ -295,7 +304,7 @@ Pipes StorageMerge::createSources(const SelectQueryInfo & query_info, const Quer /// Subordinary tables could have different but convertible types, like numeric types of different width. /// We must return streams with structure equals to structure of Merge table. - convertingSourceStream(header, *modified_context, modified_query_info.query, pipe, processed_stage); + convertingSourceStream(header, metadata_snapshot, *modified_context, modified_query_info.query, pipe, processed_stage); pipe.addTableLock(struct_lock); pipe.addInterpreterContext(modified_context); @@ -430,8 +439,13 @@ Block StorageMerge::getQueryHeader( throw Exception("Logical Error: unknown processed stage.", ErrorCodes::LOGICAL_ERROR); } -void StorageMerge::convertingSourceStream(const Block & header, const Context & context, ASTPtr & query, - Pipe & pipe, QueryProcessingStage::Enum processed_stage) +void StorageMerge::convertingSourceStream( + const Block & header, + const StorageMetadataPtr & metadata_snapshot, + const Context & context, + ASTPtr & query, + Pipe & pipe, + QueryProcessingStage::Enum processed_stage) { Block before_block_header = pipe.getHeader(); pipe.addSimpleTransform(std::make_shared(before_block_header, header, ConvertingTransform::MatchColumnsMode::Name)); @@ -450,7 +464,7 @@ void StorageMerge::convertingSourceStream(const Block & header, const Context & /// So we need to throw exception. if (!header_column.type->equals(*before_column.type.get()) && processed_stage > QueryProcessingStage::FetchColumns) { - NamesAndTypesList source_columns = getSampleBlock().getNamesAndTypesList(); + NamesAndTypesList source_columns = metadata_snapshot->getSampleBlock().getNamesAndTypesList(); auto virtual_column = *getVirtuals().tryGetByName("_table"); source_columns.emplace_back(NameAndTypePair{virtual_column.name, virtual_column.type}); auto syntax_result = SyntaxAnalyzer(context).analyze(where_expression, source_columns); diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 350f7a124fe..14bf83f8534 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -82,14 +82,22 @@ protected: QueryProcessingStage::Enum processed_stage); Pipes createSources( - const SelectQueryInfo & query_info, const QueryProcessingStage::Enum & processed_stage, - const UInt64 max_block_size, const Block & header, const StorageWithLockAndName & storage_with_lock, + const StorageMetadataPtr & metadata_snapshot, + const SelectQueryInfo & query_info, + const QueryProcessingStage::Enum & processed_stage, + const UInt64 max_block_size, + const Block & header, + const StorageWithLockAndName & storage_with_lock, Names & real_column_names, - const std::shared_ptr & modified_context, size_t streams_num, bool has_table_virtual_column, + const std::shared_ptr & modified_context, + size_t streams_num, + bool has_table_virtual_column, bool concat_streams = false); - void convertingSourceStream(const Block & header, const Context & context, ASTPtr & query, - Pipe & pipe, QueryProcessingStage::Enum processed_stage); + void convertingSourceStream( + const Block & header, const StorageMetadataPtr & metadata_snapshot, + const Context & context, ASTPtr & query, + Pipe & pipe, QueryProcessingStage::Enum processed_stage); }; } diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 9c37cdd2b7c..cfa5c34bece 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -198,9 +198,9 @@ std::optional StorageMergeTree::totalBytes() const return getTotalActiveSizeInBytes(); } -BlockOutputStreamPtr StorageMergeTree::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) +BlockOutputStreamPtr StorageMergeTree::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & context) { - return std::make_shared(*this, context.getSettingsRef().max_partitions_per_insert_block); + return std::make_shared(*this, metadata_snapshot, context.getSettingsRef().max_partitions_per_insert_block); } void StorageMergeTree::checkTableCanBeDropped() const @@ -1017,7 +1017,8 @@ bool StorageMergeTree::optimize( return true; } -void StorageMergeTree::alterPartition(const ASTPtr & query, const PartitionCommands & commands, const Context & context) +void StorageMergeTree::alterPartition( + const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, const PartitionCommands & commands, const Context & context) { for (const PartitionCommand & command : commands) { @@ -1085,7 +1086,7 @@ void StorageMergeTree::alterPartition(const ASTPtr & query, const PartitionComma break; default: - IStorage::alterPartition(query, commands, context); // should throw an exception. + IStorage::alterPartition(query, metadata_snapshot, commands, context); // should throw an exception. } } } @@ -1126,7 +1127,8 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, cons } -void StorageMergeTree::attachPartition(const ASTPtr & partition, bool attach_part, const Context & context) +void StorageMergeTree::attachPartition( + const ASTPtr & partition, bool attach_part, const Context & context) { // TODO: should get some locks to prevent race with 'alter … modify column' diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 679726826d4..4b6da58572b 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -55,7 +55,11 @@ public: */ bool optimize(const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & context) override; - void alterPartition(const ASTPtr & query, const PartitionCommands & commands, const Context & context) override; + void alterPartition( + const ASTPtr & query, + const StorageMetadataPtr & /* metadata_snapshot */, + const PartitionCommands & commands, + const Context & context) override; void mutate(const MutationCommands & commands, const Context & context) override; diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index dce9e0f38ec..b1262771d21 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -65,7 +65,7 @@ StorageMySQL::StorageMySQL( Pipes StorageMySQL::read( const Names & column_names_, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info_, const Context & context_, QueryProcessingStage::Enum /*processed_stage*/, @@ -74,12 +74,17 @@ Pipes StorageMySQL::read( { check(column_names_); String query = transformQueryForExternalDatabase( - query_info_, getColumns().getOrdinary(), IdentifierQuotingStyle::BackticksMySQL, remote_database_name, remote_table_name, context_); + query_info_, + metadata_snapshot->getColumns().getOrdinary(), + IdentifierQuotingStyle::BackticksMySQL, + remote_database_name, + remote_table_name, + context_); Block sample_block; for (const String & column_name : column_names_) { - auto column_data = getColumns().getPhysical(column_name); + auto column_data = metadata_snapshot->getColumns().getPhysical(column_name); sample_block.insert({ column_data.type, column_data.name }); } @@ -95,12 +100,15 @@ Pipes StorageMySQL::read( class StorageMySQLBlockOutputStream : public IBlockOutputStream { public: - explicit StorageMySQLBlockOutputStream(const StorageMySQL & storage_, + explicit StorageMySQLBlockOutputStream( + const StorageMySQL & storage_, + const StorageMetadataPtr & metadata_snapshot_, const std::string & remote_database_name_, const std::string & remote_table_name_, const mysqlxx::PoolWithFailover::Entry & entry_, const size_t & mysql_max_rows_to_insert) : storage{storage_} + , metadata_snapshot{metadata_snapshot_} , remote_database_name{remote_database_name_} , remote_table_name{remote_table_name_} , entry{entry_} @@ -108,7 +116,7 @@ public: { } - Block getHeader() const override { return storage.getSampleBlock(); } + Block getHeader() const override { return metadata_snapshot->getSampleBlock(); } void write(const Block & block) override { @@ -136,7 +144,7 @@ public: sqlbuf << backQuoteMySQL(remote_database_name) << "." << backQuoteMySQL(remote_table_name); sqlbuf << " (" << dumpNamesWithBackQuote(block) << ") VALUES "; - auto writer = FormatFactory::instance().getOutput("Values", sqlbuf, storage.getSampleBlock(), storage.global_context); + auto writer = FormatFactory::instance().getOutput("Values", sqlbuf, metadata_snapshot->getSampleBlock(), storage.global_context); writer->write(block); if (!storage.on_duplicate_clause.empty()) @@ -192,6 +200,7 @@ public: private: const StorageMySQL & storage; + StorageMetadataPtr metadata_snapshot; std::string remote_database_name; std::string remote_table_name; mysqlxx::PoolWithFailover::Entry entry; @@ -199,9 +208,9 @@ private: }; -BlockOutputStreamPtr StorageMySQL::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) +BlockOutputStreamPtr StorageMySQL::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & context) { - return std::make_shared(*this, remote_database_name, remote_table_name, pool.get(), context.getSettingsRef().mysql_max_rows_to_insert); + return std::make_shared(*this, metadata_snapshot, remote_database_name, remote_table_name, pool.get(), context.getSettingsRef().mysql_max_rows_to_insert); } void registerStorageMySQL(StorageFactory & factory) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index a6f84ffe4df..8ae5a887013 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3449,7 +3449,7 @@ void StorageReplicatedMergeTree::assertNotReadonly() const } -BlockOutputStreamPtr StorageReplicatedMergeTree::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) +BlockOutputStreamPtr StorageReplicatedMergeTree::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & context) { const auto storage_settings_ptr = getSettings(); assertNotReadonly(); @@ -3457,8 +3457,7 @@ BlockOutputStreamPtr StorageReplicatedMergeTree::write(const ASTPtr & /*query*/, const Settings & query_settings = context.getSettingsRef(); bool deduplicate = storage_settings_ptr->replicated_deduplication_window != 0 && query_settings.insert_deduplicate; - return std::make_shared(*this, - query_settings.insert_quorum, query_settings.insert_quorum_timeout.totalMilliseconds(), query_settings.max_partitions_per_insert_block, deduplicate); + return std::make_shared(*this, metadata_snapshot, query_settings.insert_quorum, query_settings.insert_quorum_timeout.totalMilliseconds(), query_settings.max_partitions_per_insert_block, deduplicate); } @@ -3830,7 +3829,11 @@ void StorageReplicatedMergeTree::alter( } } -void StorageReplicatedMergeTree::alterPartition(const ASTPtr & query, const PartitionCommands & commands, const Context & query_context) +void StorageReplicatedMergeTree::alterPartition( + const ASTPtr & query, + const StorageMetadataPtr & metadata_snapshot, + const PartitionCommands & commands, + const Context & query_context) { for (const PartitionCommand & command : commands) { @@ -3846,7 +3849,7 @@ void StorageReplicatedMergeTree::alterPartition(const ASTPtr & query, const Part break; case PartitionCommand::ATTACH_PARTITION: - attachPartition(command.partition, command.part, query_context); + attachPartition(command.partition, metadata_snapshot, command.part, query_context); break; case PartitionCommand::MOVE_PARTITION: { @@ -4014,7 +4017,7 @@ void StorageReplicatedMergeTree::truncate(const ASTPtr & query, const Context & } -void StorageReplicatedMergeTree::attachPartition(const ASTPtr & partition, bool attach_part, const Context & query_context) +void StorageReplicatedMergeTree::attachPartition(const ASTPtr & partition, const StorageMetadataPtr & metadata_snapshot, bool attach_part, const Context & query_context) { // TODO: should get some locks to prevent race with 'alter … modify column' @@ -4023,7 +4026,7 @@ void StorageReplicatedMergeTree::attachPartition(const ASTPtr & partition, bool PartsTemporaryRename renamed_parts(*this, "detached/"); MutableDataPartsVector loaded_parts = tryLoadPartsToAttach(partition, attach_part, query_context, renamed_parts); - ReplicatedMergeTreeBlockOutputStream output(*this, 0, 0, 0, false); /// TODO Allow to use quorum here. + ReplicatedMergeTreeBlockOutputStream output(*this, metadata_snapshot, 0, 0, 0, false); /// TODO Allow to use quorum here. for (size_t i = 0; i < loaded_parts.size(); ++i) { String old_name = loaded_parts[i]->name; diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 5fcfd98e71d..50530070d19 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -105,7 +105,11 @@ public: void alter(const AlterCommands & params, const Context & query_context, TableStructureWriteLockHolder & table_lock_holder) override; - void alterPartition(const ASTPtr & query, const PartitionCommands & commands, const Context & query_context) override; + void alterPartition( + const ASTPtr & query, + const StorageMetadataPtr & metadata_snapshot, + const PartitionCommands & commands, + const Context & query_context) override; void mutate(const MutationCommands & commands, const Context & context) override; void waitMutation(const String & znode_name, size_t mutations_sync) const; @@ -527,7 +531,7 @@ private: // Partition helpers void dropPartition(const ASTPtr & query, const ASTPtr & partition, bool detach, const Context & query_context); - void attachPartition(const ASTPtr & partition, bool part, const Context & query_context); + void attachPartition(const ASTPtr & partition, const StorageMetadataPtr & metadata_snapshot, bool part, const Context & query_context); void replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & query_context); void movePartitionToTable(const StoragePtr & dest_table, const ASTPtr & partition, const Context & query_context); void fetchPartition(const ASTPtr & partition, const String & from, const Context & query_context); diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index cddd4657cd1..235251c0761 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -32,15 +32,18 @@ namespace ErrorCodes class SetOrJoinBlockOutputStream : public IBlockOutputStream { public: - SetOrJoinBlockOutputStream(StorageSetOrJoinBase & table_, - const String & backup_path_, const String & backup_tmp_path_, const String & backup_file_name_); + SetOrJoinBlockOutputStream( + StorageSetOrJoinBase & table_, const StorageMetadataPtr & metadata_snapshot_, + const String & backup_path_, const String & backup_tmp_path_, + const String & backup_file_name_); - Block getHeader() const override { return table.getSampleBlock(); } + Block getHeader() const override { return metadata_snapshot->getSampleBlock(); } void write(const Block & block) override; void writeSuffix() override; private: StorageSetOrJoinBase & table; + StorageMetadataPtr metadata_snapshot; String backup_path; String backup_tmp_path; String backup_file_name; @@ -50,14 +53,20 @@ private: }; -SetOrJoinBlockOutputStream::SetOrJoinBlockOutputStream(StorageSetOrJoinBase & table_, - const String & backup_path_, const String & backup_tmp_path_, const String & backup_file_name_) - : table(table_), - backup_path(backup_path_), backup_tmp_path(backup_tmp_path_), - backup_file_name(backup_file_name_), - backup_buf(backup_tmp_path + backup_file_name), - compressed_backup_buf(backup_buf), - backup_stream(compressed_backup_buf, 0, table.getSampleBlock()) +SetOrJoinBlockOutputStream::SetOrJoinBlockOutputStream( + StorageSetOrJoinBase & table_, + const StorageMetadataPtr & metadata_snapshot_, + const String & backup_path_, + const String & backup_tmp_path_, + const String & backup_file_name_) + : table(table_) + , metadata_snapshot(metadata_snapshot_) + , backup_path(backup_path_) + , backup_tmp_path(backup_tmp_path_) + , backup_file_name(backup_file_name_) + , backup_buf(backup_tmp_path + backup_file_name) + , compressed_backup_buf(backup_buf) + , backup_stream(compressed_backup_buf, 0, metadata_snapshot->getSampleBlock()) { } @@ -81,10 +90,10 @@ void SetOrJoinBlockOutputStream::writeSuffix() } -BlockOutputStreamPtr StorageSetOrJoinBase::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) +BlockOutputStreamPtr StorageSetOrJoinBase::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & /*context*/) { UInt64 id = ++increment; - return std::make_shared(*this, path, path + "tmp/", toString(id) + ".bin"); + return std::make_shared(*this, metadata_snapshot, path, path + "tmp/", toString(id) + ".bin"); } @@ -119,7 +128,8 @@ StorageSet::StorageSet( : StorageSetOrJoinBase{relative_path_, table_id_, columns_, constraints_, context_}, set(std::make_shared(SizeLimits(), false, true)) { - Block header = getSampleBlock(); + + Block header = getInMemoryMetadataPtr()->getSampleBlock(); header = header.sortColumns(); set->setHeader(header); @@ -134,11 +144,12 @@ size_t StorageSet::getSize() const { return set->getTotalRowCount(); } void StorageSet::truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) { + auto metadata_snapshot = getInMemoryMetadataPtr(); Poco::File(path).remove(true); Poco::File(path).createDirectories(); Poco::File(path + "tmp/").createDirectories(); - Block header = getSampleBlock(); + Block header = metadata_snapshot->getSampleBlock(); header = header.sortColumns(); increment = 0; diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index b0c5bcfd669..4d9f08a60b7 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -155,15 +155,17 @@ private: class StripeLogBlockOutputStream final : public IBlockOutputStream { public: - explicit StripeLogBlockOutputStream(StorageStripeLog & storage_) - : storage(storage_), lock(storage.rwlock), - data_out_file(storage.table_path + "data.bin"), - data_out_compressed(storage.disk->writeFile(data_out_file, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append)), - data_out(*data_out_compressed, CompressionCodecFactory::instance().getDefaultCodec(), storage.max_compress_block_size), - index_out_file(storage.table_path + "index.mrk"), - index_out_compressed(storage.disk->writeFile(index_out_file, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append)), - index_out(*index_out_compressed), - block_out(data_out, 0, storage.getSampleBlock(), false, &index_out, storage.disk->getFileSize(data_out_file)) + explicit StripeLogBlockOutputStream(StorageStripeLog & storage_, const StorageMetadataPtr & metadata_snapshot_) + : storage(storage_) + , metadata_snapshot(metadata_snapshot_) + , lock(storage.rwlock) + , data_out_file(storage.table_path + "data.bin") + , data_out_compressed(storage.disk->writeFile(data_out_file, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append)) + , data_out(*data_out_compressed, CompressionCodecFactory::instance().getDefaultCodec(), storage.max_compress_block_size) + , index_out_file(storage.table_path + "index.mrk") + , index_out_compressed(storage.disk->writeFile(index_out_file, DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Append)) + , index_out(*index_out_compressed) + , block_out(data_out, 0, metadata_snapshot->getSampleBlock(), false, &index_out, storage.disk->getFileSize(data_out_file)) { } @@ -179,7 +181,7 @@ public: } } - Block getHeader() const override { return storage.getSampleBlock(); } + Block getHeader() const override { return metadata_snapshot->getSampleBlock(); } void write(const Block & block) override { @@ -205,6 +207,7 @@ public: private: StorageStripeLog & storage; + StorageMetadataPtr metadata_snapshot; std::unique_lock lock; String data_out_file; @@ -311,9 +314,9 @@ Pipes StorageStripeLog::read( } -BlockOutputStreamPtr StorageStripeLog::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) +BlockOutputStreamPtr StorageStripeLog::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & /*context*/) { - return std::make_shared(*this); + return std::make_shared(*this, metadata_snapshot); } diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index 42b70f716f4..ba524c7761e 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -109,8 +109,8 @@ private: class TinyLogBlockOutputStream final : public IBlockOutputStream { public: - explicit TinyLogBlockOutputStream(StorageTinyLog & storage_) - : storage(storage_), lock(storage_.rwlock) + explicit TinyLogBlockOutputStream(StorageTinyLog & storage_, const StorageMetadataPtr & metadata_snapshot_) + : storage(storage_), metadata_snapshot(metadata_snapshot_), lock(storage_.rwlock) { } @@ -126,13 +126,14 @@ public: } } - Block getHeader() const override { return storage.getSampleBlock(); } + Block getHeader() const override { return metadata_snapshot->getSampleBlock(); } void write(const Block & block) override; void writeSuffix() override; private: StorageTinyLog & storage; + StorageMetadataPtr metadata_snapshot; std::unique_lock lock; bool done = false; @@ -394,7 +395,7 @@ void StorageTinyLog::rename(const String & new_path_to_table_data, const Storage Pipes StorageTinyLog::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -408,15 +409,15 @@ Pipes StorageTinyLog::read( // When reading, we lock the entire storage, because we only have one file // per column and can't modify it concurrently. pipes.emplace_back(std::make_shared( - max_block_size, Nested::collect(getColumns().getAllPhysical().addTypes(column_names)), *this, context.getSettingsRef().max_read_buffer_size)); + max_block_size, Nested::collect(metadata_snapshot->getColumns().getAllPhysical().addTypes(column_names)), *this, context.getSettingsRef().max_read_buffer_size)); return pipes; } -BlockOutputStreamPtr StorageTinyLog::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) +BlockOutputStreamPtr StorageTinyLog::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & /*context*/) { - return std::make_shared(*this); + return std::make_shared(*this, metadata_snapshot); } diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 6cea7115066..949d922b611 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -186,10 +186,10 @@ Pipes IStorageURLBase::read( return pipes; } -BlockOutputStreamPtr IStorageURLBase::write(const ASTPtr & /*query*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & /*context*/) +BlockOutputStreamPtr IStorageURLBase::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, const Context & /*context*/) { return std::make_shared( - uri, format_name, getSampleBlock(), context_global, + uri, format_name, metadata_snapshot->getSampleBlock(), context_global, ConnectionTimeouts::getHTTPTimeouts(context_global), chooseCompressionMethod(uri.toString(), compression_method)); } diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index 7644f62b96d..de7e1a0e933 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -30,7 +30,7 @@ public: Pipes read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -39,7 +39,7 @@ public: { check(column_names); - Block sample_block = getSampleBlock(); + Block sample_block = metadata_snapshot->getSampleBlock(); MutableColumns res_columns = sample_block.cloneEmptyColumns(); fillData(res_columns, context, query_info); diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 646a5434b64..14a59da1bf9 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -242,7 +242,7 @@ private: Pipes StorageSystemColumns::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -255,7 +255,7 @@ Pipes StorageSystemColumns::read( NameSet names_set(column_names.begin(), column_names.end()); - Block sample_block = getSampleBlock(); + Block sample_block = metadata_snapshot->getSampleBlock(); Block header; std::vector columns_mask(sample_block.columns()); diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 3d24d90bbef..7228651d140 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -47,7 +47,7 @@ protected: Pipes read( const Names & /* column_names */, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -57,7 +57,7 @@ protected: StoragesInfoStream stream(query_info, context); /// Create the result. - Block block = getSampleBlock(); + Block block = metadata_snapshot->getSampleBlock(); MutableColumns new_columns = block.cloneEmptyColumns(); while (StoragesInfo info = stream.next()) diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index 36fde616bd4..d13ea29804d 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -28,7 +28,7 @@ StorageSystemDisks::StorageSystemDisks(const std::string & name_) Pipes StorageSystemDisks::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -63,7 +63,7 @@ Pipes StorageSystemDisks::read( Chunk chunk(std::move(res_columns), num_rows); Pipes pipes; - pipes.emplace_back(std::make_shared(getSampleBlock(), std::move(chunk))); + pipes.emplace_back(std::make_shared(metadata_snapshot->getSampleBlock(), std::move(chunk))); return pipes; } diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index e599bbb19e3..4f99e1e8c6a 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -225,7 +225,7 @@ StoragesInfo StoragesInfoStream::next() Pipes StorageSystemPartsBase::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -238,7 +238,7 @@ Pipes StorageSystemPartsBase::read( /// Create the result. - MutableColumns res_columns = getSampleBlock().cloneEmptyColumns(); + MutableColumns res_columns = metadata_snapshot->getSampleBlock().cloneEmptyColumns(); if (has_state_column) res_columns.push_back(ColumnString::create()); @@ -247,7 +247,7 @@ Pipes StorageSystemPartsBase::read( processNextStorage(res_columns, info, has_state_column); } - Block header = getSampleBlock(); + Block header = metadata_snapshot->getSampleBlock(); if (has_state_column) header.insert(ColumnWithTypeAndName(std::make_shared(), "_state")); diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index 24861fcbd6a..8fb6a89ddd1 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -59,7 +59,7 @@ StorageSystemReplicas::StorageSystemReplicas(const std::string & name_) Pipes StorageSystemReplicas::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -146,7 +146,7 @@ Pipes StorageSystemReplicas::read( col_engine = filtered_block.getByName("engine").column; } - MutableColumns res_columns = getSampleBlock().cloneEmptyColumns(); + MutableColumns res_columns = metadata_snapshot->getSampleBlock().cloneEmptyColumns(); for (size_t i = 0, size = col_database->size(); i < size; ++i) { @@ -187,7 +187,7 @@ Pipes StorageSystemReplicas::read( res_columns[col_num++]->insert(status.zookeeper_exception); } - Block header = getSampleBlock(); + Block header = metadata_snapshot->getSampleBlock(); Columns fin_columns; fin_columns.reserve(res_columns.size()); @@ -203,7 +203,7 @@ Pipes StorageSystemReplicas::read( Chunk chunk(std::move(fin_columns), num_rows); Pipes pipes; - pipes.emplace_back(std::make_shared(getSampleBlock(), std::move(chunk))); + pipes.emplace_back(std::make_shared(metadata_snapshot->getSampleBlock(), std::move(chunk))); return pipes; } diff --git a/src/Storages/System/StorageSystemStoragePolicies.cpp b/src/Storages/System/StorageSystemStoragePolicies.cpp index a80747c1fa1..44252a788b9 100644 --- a/src/Storages/System/StorageSystemStoragePolicies.cpp +++ b/src/Storages/System/StorageSystemStoragePolicies.cpp @@ -32,7 +32,7 @@ StorageSystemStoragePolicies::StorageSystemStoragePolicies(const std::string & n Pipes StorageSystemStoragePolicies::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -78,7 +78,7 @@ Pipes StorageSystemStoragePolicies::read( Chunk chunk(std::move(res_columns), num_rows); Pipes pipes; - pipes.emplace_back(std::make_shared(getSampleBlock(), std::move(chunk))); + pipes.emplace_back(std::make_shared(metadata_snapshot->getSampleBlock(), std::move(chunk))); return pipes; } diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index f04b3ea20c9..b33886ce179 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -448,7 +448,7 @@ private: Pipes StorageSystemTables::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, @@ -461,7 +461,7 @@ Pipes StorageSystemTables::read( NameSet names_set(column_names.begin(), column_names.end()); - Block sample_block = getSampleBlock(); + Block sample_block = metadata_snapshot->getSampleBlock(); Block res_block; std::vector columns_mask(sample_block.columns()); From ccc2bda66666f1ac548c04f28dcdd465b00d20d5 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 16 Jun 2020 19:55:04 +0300 Subject: [PATCH 109/318] getConstraints() in StorageInMemoryMetadata (suspicious commit, but pretend to work) --- src/Interpreters/InterpreterCreateQuery.cpp | 7 +-- src/Interpreters/InterpreterInsertQuery.cpp | 4 +- src/Storages/IStorage.cpp | 5 -- src/Storages/IStorage.h | 3 +- .../ReplicatedMergeTreeTableMetadata.cpp | 8 +-- .../ReplicatedMergeTreeTableMetadata.h | 2 +- src/Storages/StorageInMemoryMetadata.cpp | 1 + src/Storages/StorageReplicatedMergeTree.cpp | 51 ++++++++++--------- src/Storages/StorageReplicatedMergeTree.h | 6 +-- 9 files changed, 43 insertions(+), 44 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 5d8c43aed0d..bb82c94a764 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -430,14 +430,15 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::setProperties(AS /// as_storage->getColumns() and setEngine(...) must be called under structure lock of other_table for CREATE ... AS other_table. as_storage_lock = as_storage->lockStructureForShare( false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); - properties.columns = as_storage->getColumns(); + auto as_storage_metadata = as_storage->getInMemoryMetadataPtr(); + properties.columns = as_storage_metadata->getColumns(); /// Secondary indices make sense only for MergeTree family of storage engines. /// We should not copy them for other storages. if (create.storage && endsWith(create.storage->engine->name, "MergeTree")) - properties.indices = as_storage->getSecondaryIndices(); + properties.indices = as_storage_metadata->getSecondaryIndices(); - properties.constraints = as_storage->getConstraints(); + properties.constraints = as_storage_metadata->getConstraints(); } else if (create.select) { diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index 443e2714ec7..e7fdf80e297 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -237,9 +237,9 @@ BlockIO InterpreterInsertQuery::execute() /// Note that we wrap transforms one on top of another, so we write them in reverse of data processing order. /// Checking constraints. It must be done after calculation of all defaults, so we can check them on calculated columns. - if (const auto & constraints = table->getConstraints(); !constraints.empty()) + if (const auto & constraints = metadata_snapshot->getConstraints(); !constraints.empty()) out = std::make_shared( - query.table_id, out, out->getHeader(), table->getConstraints(), context); + query.table_id, out, out->getHeader(), metadata_snapshot->getConstraints(), context); /// Actually we don't know structure of input blocks from query/table, /// because some clients break insertion protocol (columns != header) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 43e9a5dd040..1fb3e095229 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -47,11 +47,6 @@ bool IStorage::hasSecondaryIndices() const return !metadata->secondary_indices.empty(); } -const ConstraintsDescription & IStorage::getConstraints() const -{ - return metadata->constraints; -} - namespace { diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index df1e1685a2e..e45d6a1128b 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -130,7 +130,7 @@ public: virtual bool hasEvenlyDistributedRead() const { return false; } /// Returns true if there is set table TTL, any column TTL or any move TTL. - virtual bool hasAnyTTL() const { return hasAnyColumnTTL() || hasAnyTableTTL(); } + bool hasAnyTTL() const { return hasAnyColumnTTL() || hasAnyTableTTL(); } /// Optional size information of each physical column. /// Currently it's only used by the MergeTree family for query optimizations. @@ -144,7 +144,6 @@ public: /// thread-unsafe part. lockStructure must be acquired /// Has at least one non primary index bool hasSecondaryIndices() const; - const ConstraintsDescription & getConstraints() const; /// Storage settings ASTPtr getSettingsChanges() const; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 2444affdbff..820f41326f1 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -23,7 +23,7 @@ static String formattedAST(const ASTPtr & ast) return ss.str(); } -ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTreeData & data) +ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTreeData & data, const StorageMetadataPtr & metadata_snapshot) { if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) date_column = data.minmax_idx_columns[data.minmax_idx_date_column_pos]; @@ -53,15 +53,15 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr if (data.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) partition_key = formattedAST(data.getPartitionKey().expression_list_ast); - ttl_table = formattedAST(data.getTableTTLs().definition_ast); + ttl_table = formattedAST(metadata_snapshot->getTableTTLs().definition_ast); - skip_indices = data.getSecondaryIndices().toString(); + skip_indices = metadata_snapshot->getSecondaryIndices().toString(); if (data.canUseAdaptiveGranularity()) index_granularity_bytes = data_settings->index_granularity_bytes; else index_granularity_bytes = 0; - constraints = data.getConstraints().toString(); + constraints = metadata_snapshot->getConstraints().toString(); } void ReplicatedMergeTreeTableMetadata::write(WriteBuffer & out) const diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h index 280a8c8b403..f7174140ee1 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h @@ -32,7 +32,7 @@ struct ReplicatedMergeTreeTableMetadata UInt64 index_granularity_bytes; ReplicatedMergeTreeTableMetadata() = default; - explicit ReplicatedMergeTreeTableMetadata(const MergeTreeData & data); + explicit ReplicatedMergeTreeTableMetadata(const MergeTreeData & data, const StorageMetadataPtr & metadata_snapshot); void read(ReadBuffer & in); static ReplicatedMergeTreeTableMetadata parse(const String & s); diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index cce3911370d..359d561cd1f 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -290,4 +290,5 @@ Block StorageInMemoryMetadata::getSampleBlockForColumns(const Names & column_nam return res; } + } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 8ae5a887013..170a77e3508 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -248,6 +248,8 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( return; } + auto metadata_snapshot = getInMemoryMetadataPtr(); + if (!attach) { if (!getDataParts().empty()) @@ -255,21 +257,21 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( try { - bool is_first_replica = createTableIfNotExists(); + bool is_first_replica = createTableIfNotExists(metadata_snapshot); /// We have to check granularity on other replicas. If it's fixed we /// must create our new replica with fixed granularity and store this /// information in /replica/metadata. other_replicas_fixed_granularity = checkFixedGranualrityInZookeeper(); - checkTableStructure(zookeeper_path); + checkTableStructure(zookeeper_path, metadata_snapshot); Coordination::Stat metadata_stat; current_zookeeper->get(zookeeper_path + "/metadata", &metadata_stat); metadata_version = metadata_stat.version; if (!is_first_replica) - createReplica(); + createReplica(metadata_snapshot); } catch (...) { @@ -288,11 +290,11 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree( { /// We have to check shared node granularity before we create ours. other_replicas_fixed_granularity = checkFixedGranualrityInZookeeper(); - ReplicatedMergeTreeTableMetadata current_metadata(*this); + ReplicatedMergeTreeTableMetadata current_metadata(*this, metadata_snapshot); current_zookeeper->createOrUpdate(replica_path + "/metadata", current_metadata.toString(), zkutil::CreateMode::Persistent); } - checkTableStructure(replica_path); + checkTableStructure(replica_path, metadata_snapshot); checkParts(skip_sanity_checks); if (current_zookeeper->exists(replica_path + "/metadata_version")) @@ -418,7 +420,7 @@ void StorageReplicatedMergeTree::createNewZooKeeperNodes() } -bool StorageReplicatedMergeTree::createTableIfNotExists() +bool StorageReplicatedMergeTree::createTableIfNotExists(const StorageMetadataPtr & metadata_snapshot) { auto zookeeper = getZooKeeper(); zookeeper->createAncestors(zookeeper_path); @@ -483,7 +485,7 @@ bool StorageReplicatedMergeTree::createTableIfNotExists() LOG_DEBUG(log, "Creating table {}", zookeeper_path); /// We write metadata of table so that the replicas can check table parameters with them. - String metadata_str = ReplicatedMergeTreeTableMetadata(*this).toString(); + String metadata_str = ReplicatedMergeTreeTableMetadata(*this, metadata_snapshot).toString(); Coordination::Requests ops; ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path, "", zkutil::CreateMode::Persistent)); @@ -552,7 +554,7 @@ bool StorageReplicatedMergeTree::createTableIfNotExists() throw Exception("Cannot create table, because it is created concurrently every time or because of logical error", ErrorCodes::LOGICAL_ERROR); } -void StorageReplicatedMergeTree::createReplica() +void StorageReplicatedMergeTree::createReplica(const StorageMetadataPtr & metadata_snapshot) { auto zookeeper = getZooKeeper(); @@ -588,7 +590,7 @@ void StorageReplicatedMergeTree::createReplica() zkutil::CreateMode::Persistent)); ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/is_lost", is_lost_value, zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata", ReplicatedMergeTreeTableMetadata(*this).toString(), + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata", ReplicatedMergeTreeTableMetadata(*this, metadata_snapshot).toString(), zkutil::CreateMode::Persistent)); ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/columns", getColumns().toString(), zkutil::CreateMode::Persistent)); @@ -728,11 +730,11 @@ void StorageReplicatedMergeTree::drop() /** Verify that list of columns and table storage_settings_ptr match those specified in ZK (/ metadata). * If not, throw an exception. */ -void StorageReplicatedMergeTree::checkTableStructure(const String & zookeeper_prefix) +void StorageReplicatedMergeTree::checkTableStructure(const String & zookeeper_prefix, const StorageMetadataPtr & metadata_snapshot) { auto zookeeper = getZooKeeper(); - ReplicatedMergeTreeTableMetadata old_metadata(*this); + ReplicatedMergeTreeTableMetadata old_metadata(*this, metadata_snapshot); Coordination::Stat metadata_stat; String metadata_str = zookeeper->get(zookeeper_prefix + "/metadata", &metadata_stat); @@ -3624,7 +3626,7 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer LOG_INFO(log, "Metadata changed in ZooKeeper. Applying changes locally."); - auto metadata_diff = ReplicatedMergeTreeTableMetadata(*this).checkAndFindDiff(metadata_from_entry); + auto metadata_diff = ReplicatedMergeTreeTableMetadata(*this, getInMemoryMetadataPtr()).checkAndFindDiff(metadata_from_entry); setTableStructure(std::move(columns_from_entry), metadata_diff); metadata_version = entry.alter_version; @@ -3683,24 +3685,24 @@ void StorageReplicatedMergeTree::alter( throw Exception("Can't ALTER readonly table", ErrorCodes::TABLE_IS_READ_ONLY); - StorageInMemoryMetadata current_metadata = getInMemoryMetadata(); + auto current_metadata = getInMemoryMetadataPtr(); - StorageInMemoryMetadata future_metadata = current_metadata; + StorageInMemoryMetadata future_metadata = *current_metadata; params.apply(future_metadata, query_context); - ReplicatedMergeTreeTableMetadata future_metadata_in_zk(*this); - if (ast_to_str(future_metadata.sorting_key.definition_ast) != ast_to_str(current_metadata.sorting_key.definition_ast)) + ReplicatedMergeTreeTableMetadata future_metadata_in_zk(*this, current_metadata); + if (ast_to_str(future_metadata.sorting_key.definition_ast) != ast_to_str(current_metadata->sorting_key.definition_ast)) future_metadata_in_zk.sorting_key = serializeAST(*future_metadata.sorting_key.expression_list_ast); - if (ast_to_str(future_metadata.table_ttl.definition_ast) != ast_to_str(current_metadata.table_ttl.definition_ast)) + if (ast_to_str(future_metadata.table_ttl.definition_ast) != ast_to_str(current_metadata->table_ttl.definition_ast)) future_metadata_in_zk.ttl_table = serializeAST(*future_metadata.table_ttl.definition_ast); String new_indices_str = future_metadata.secondary_indices.toString(); - if (new_indices_str != current_metadata.secondary_indices.toString()) + if (new_indices_str != current_metadata->secondary_indices.toString()) future_metadata_in_zk.skip_indices = new_indices_str; String new_constraints_str = future_metadata.constraints.toString(); - if (new_constraints_str != current_metadata.constraints.toString()) + if (new_constraints_str != current_metadata->constraints.toString()) future_metadata_in_zk.constraints = new_constraints_str; Coordination::Requests ops; @@ -3711,14 +3713,15 @@ void StorageReplicatedMergeTree::alter( String new_columns_str = future_metadata.columns.toString(); ops.emplace_back(zkutil::makeSetRequest(zookeeper_path + "/columns", new_columns_str, -1)); - if (ast_to_str(current_metadata.settings_changes) != ast_to_str(future_metadata.settings_changes)) + if (ast_to_str(current_metadata->settings_changes) != ast_to_str(future_metadata.settings_changes)) { lockStructureExclusively( table_lock_holder, query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout); /// Just change settings - current_metadata.settings_changes = future_metadata.settings_changes; - changeSettings(current_metadata.settings_changes, table_lock_holder); - DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(query_context, table_id, current_metadata); + StorageInMemoryMetadata metadata_copy = *current_metadata; + metadata_copy.settings_changes = future_metadata.settings_changes; + changeSettings(metadata_copy.settings_changes, table_lock_holder); + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(query_context, table_id, metadata_copy); } /// We can be sure, that in case of successfull commit in zookeeper our @@ -3733,7 +3736,7 @@ void StorageReplicatedMergeTree::alter( alter_entry->create_time = time(nullptr); auto maybe_mutation_commands = params.getMutationCommands( - current_metadata, query_context.getSettingsRef().materialize_ttl_after_modify, query_context); + *current_metadata, query_context.getSettingsRef().materialize_ttl_after_modify, query_context); alter_entry->have_mutation = !maybe_mutation_commands.empty(); ops.emplace_back(zkutil::makeCreateRequest( diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 50530070d19..49dc09dbcf4 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -301,17 +301,17 @@ private: /** Creates the minimum set of nodes in ZooKeeper and create first replica. * Returns true if was created, false if exists. */ - bool createTableIfNotExists(); + bool createTableIfNotExists(const StorageMetadataPtr & metadata_snapshot); /** Creates a replica in ZooKeeper and adds to the queue all that it takes to catch up with the rest of the replicas. */ - void createReplica(); + void createReplica(const StorageMetadataPtr & metadata_snapshot); /** Create nodes in the ZK, which must always be, but which might not exist when older versions of the server are running. */ void createNewZooKeeperNodes(); - void checkTableStructure(const String & zookeeper_prefix); + void checkTableStructure(const String & zookeeper_prefix, const StorageMetadataPtr & metadata_snapshot); /// A part of ALTER: apply metadata changes only (data parts are altered separately). /// Must be called under IStorage::lockStructureForAlter() lock. From fa60903620b46dccc310ecca309b0e67a53600a6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 16 Jun 2020 21:41:11 +0300 Subject: [PATCH 110/318] Fix race condition --- src/Storages/StorageBuffer.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index b08e4e93bed..f8df14aa482 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -160,13 +160,14 @@ Pipes StorageBuffer::read( { auto destination = DatabaseCatalog::instance().getTable(destination_id, context); - auto destination_metadata_snapshot = destination->getInMemoryMetadataPtr(); if (destination.get() == this) throw Exception("Destination table is myself. Read will cause infinite loop.", ErrorCodes::INFINITE_LOOP); auto destination_lock = destination->lockStructureForShare( false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto destination_metadata_snapshot = destination->getInMemoryMetadataPtr(); + const bool dst_has_same_structure = std::all_of(column_names.begin(), column_names.end(), [metadata_snapshot, destination](const String& column_name) { const auto & dest_columns = destination->getColumns(); From 8b50e3450b93e436c5d205ad9e4905c735ad5291 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Tue, 16 Jun 2020 23:01:15 +0300 Subject: [PATCH 111/318] move the default endpoint to config --- base/daemon/SentryWriter.cpp | 2 +- programs/server/config.xml | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index 0524285ea42..d979c671290 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -96,7 +96,7 @@ void SentryWriter::initialize(Poco::Util::LayeredConfiguration & config) { const std::filesystem::path & default_tmp_path = std::filesystem::path(config.getString("tmp_path", Poco::Path::temp())) / "sentry"; const std::string & endpoint - = config.getString("send_crash_reports.endpoint", "https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277"); + = config.getString("send_crash_reports.endpoint"); const std::string & temp_folder_path = config.getString("send_crash_reports.tmp_path", default_tmp_path); Poco::File(temp_folder_path).createDirectories(); diff --git a/programs/server/config.xml b/programs/server/config.xml index e5482a074a3..d1226e6f09c 100644 --- a/programs/server/config.xml +++ b/programs/server/config.xml @@ -52,6 +52,9 @@ false false + + + https://6f33034cfe684dd7a3ab9875e57b1c8d@o388870.ingest.sentry.io/5226277 From 102628ff099b65c2bd764b2ea66ef8bcd1c606f5 Mon Sep 17 00:00:00 2001 From: Ivan Blinkov Date: Wed, 17 Jun 2020 10:54:06 +0300 Subject: [PATCH 112/318] remove extra line --- base/daemon/SentryWriter.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/base/daemon/SentryWriter.cpp b/base/daemon/SentryWriter.cpp index d979c671290..b59df1ba55c 100644 --- a/base/daemon/SentryWriter.cpp +++ b/base/daemon/SentryWriter.cpp @@ -226,7 +226,6 @@ void SentryWriter::onFault(int sig, const siginfo_t & info, const ucontext_t & c sentry_value_t threads = sentry_value_new_object(); sentry_value_set_by_key(threads, "values", values); - sentry_value_set_by_key(event, "threads", threads); LOG_INFO(logger, "Sending crash report"); From 62f2c17a668d85cfb88a6137a14997cda1e35d5b Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 17 Jun 2020 12:38:47 +0300 Subject: [PATCH 113/318] Secondary indices in StorageInMemoryMetadata --- src/Interpreters/ExpressionAnalyzer.cpp | 2 +- src/Interpreters/ExpressionAnalyzer.h | 12 +++++++--- src/Interpreters/InterpreterSelectQuery.cpp | 4 ++-- src/Interpreters/MutationsInterpreter.cpp | 4 ++-- src/Storages/IStorage.cpp | 11 ---------- src/Storages/IStorage.h | 6 +---- src/Storages/MergeTree/MergeTreeData.cpp | 22 ++++++++++--------- src/Storages/MergeTree/MergeTreeData.h | 7 +++--- .../MergeTree/MergeTreeDataMergerMutator.cpp | 21 +++++++++--------- .../MergeTree/MergeTreeDataMergerMutator.h | 1 + .../MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +- .../MergeTree/MergeTreeDataWriter.cpp | 6 ++--- .../MergeTree/MergedBlockOutputStream.cpp | 2 +- .../MergeTree/StorageFromMergeTreeDataPart.h | 5 +++-- src/Storages/StorageBuffer.cpp | 6 +++-- src/Storages/StorageBuffer.h | 2 +- src/Storages/StorageMaterializedView.h | 6 +++-- src/Storages/StorageMerge.cpp | 6 +++-- src/Storages/StorageMerge.h | 3 ++- 19 files changed, 66 insertions(+), 62 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 24c71e276d6..039001796cc 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -356,7 +356,7 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) const IAST & args = *func->arguments; const ASTPtr & left_in_operand = args.children.at(0); - if (storage()->mayBenefitFromIndexForIn(left_in_operand, context)) + if (storage()->mayBenefitFromIndexForIn(left_in_operand, context, metadata_snapshot)) { const ASTPtr & arg = args.children.at(1); if (arg->as() || arg->as()) diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index c69cb61162f..1cc1b33bad1 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -11,7 +11,6 @@ #include #include - namespace DB { @@ -32,6 +31,9 @@ class ASTExpressionList; class ASTSelectQuery; struct ASTTablesInSelectQueryElement; +struct StorageInMemoryMetadata; +using StorageMetadataPtr = std::shared_ptr; + /// Create columns in block or return false if not possible bool sanitizeBlock(Block & block); @@ -232,11 +234,14 @@ public: const ASTPtr & query_, const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, const Context & context_, + const StorageMetadataPtr & metadata_snapshot_, const NameSet & required_result_columns_ = {}, bool do_global_ = false, const SelectQueryOptions & options_ = {}) - : ExpressionAnalyzer(query_, syntax_analyzer_result_, context_, options_.subquery_depth, do_global_) - , required_result_columns(required_result_columns_), query_options(options_) + : ExpressionAnalyzer(query_, syntax_analyzer_result_, context_, options_.subquery_depth, do_global_) + , metadata_snapshot(metadata_snapshot_) + , required_result_columns(required_result_columns_) + , query_options(options_) { } @@ -260,6 +265,7 @@ public: void appendProjectResult(ExpressionActionsChain & chain) const; private: + StorageMetadataPtr metadata_snapshot; /// If non-empty, ignore all expressions not from this list. NameSet required_result_columns; SelectQueryOptions query_options; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index f73245179ce..331093b9d53 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -130,7 +130,7 @@ String InterpreterSelectQuery::generateFilterActions( /// Using separate expression analyzer to prevent any possible alias injection auto syntax_result = SyntaxAnalyzer(*context).analyzeSelect(query_ast, SyntaxAnalyzerResult({}, storage)); - SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, *context); + SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, *context, metadata_snapshot); actions = analyzer.simpleSelectActions(); return expr_list->children.at(0)->getColumnName(); @@ -336,7 +336,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( context->getQueryContext().addScalar(it.first, it.second); query_analyzer = std::make_unique( - query_ptr, syntax_analyzer_result, *context, + query_ptr, syntax_analyzer_result, *context, metadata_snapshot, NameSet(required_result_column_names.begin(), required_result_column_names.end()), !options.only_analyze, options); diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index ce47ce6e476..7bf54d20a61 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -294,8 +294,8 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) throw Exception("Empty mutation commands list", ErrorCodes::LOGICAL_ERROR); - const ColumnsDescription & columns_desc = storage->getColumns(); - const IndicesDescription & indices_desc = storage->getSecondaryIndices(); + const ColumnsDescription & columns_desc = metadata_snapshot->getColumns(); + const IndicesDescription & indices_desc = metadata_snapshot->getSecondaryIndices(); NamesAndTypesList all_columns = columns_desc.getAllPhysical(); NameSet updated_columns; diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 1fb3e095229..7d50025faff 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -37,17 +37,6 @@ const ColumnsDescription & IStorage::getColumns() const return metadata->columns; } -const IndicesDescription & IStorage::getSecondaryIndices() const -{ - return metadata->secondary_indices; -} - -bool IStorage::hasSecondaryIndices() const -{ - return !metadata->secondary_indices.empty(); -} - - namespace { #if !defined(ARCADIA_BUILD) diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index e45d6a1128b..78d9b7d2013 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -140,10 +140,6 @@ public: public: /// thread-unsafe part. lockStructure must be acquired const ColumnsDescription & getColumns() const; /// returns combined set of columns - const IndicesDescription & getSecondaryIndices() const; - /// Has at least one non primary index - bool hasSecondaryIndices() const; - /// Storage settings ASTPtr getSettingsChanges() const; @@ -413,7 +409,7 @@ public: virtual bool supportsIndexForIn() const { return false; } /// Provides a hint that the storage engine may evaluate the IN-condition by using an index. - virtual bool mayBenefitFromIndexForIn(const ASTPtr & /* left_in_operand */, const Context & /* query_context */) const { return false; } + virtual bool mayBenefitFromIndexForIn(const ASTPtr & /* left_in_operand */, const Context & /* query_context */, const StorageMetadataPtr & /* metadata_snapshot */) const { return false; } /// Checks validity of the data virtual CheckResults checkData(const ASTPtr & /* query */, const Context & /* context */) { throw Exception("Check query is not supported for " + getName() + " storage", ErrorCodes::NOT_IMPLEMENTED); } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 8971b50a0fd..143ce44da5e 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -408,14 +408,14 @@ ExpressionActionsPtr getCombinedIndicesExpression( } -ExpressionActionsPtr MergeTreeData::getPrimaryKeyAndSkipIndicesExpression() const +ExpressionActionsPtr MergeTreeData::getPrimaryKeyAndSkipIndicesExpression(const StorageMetadataPtr & metadata_snapshot) const { - return getCombinedIndicesExpression(getPrimaryKey(), getSecondaryIndices(), getColumns(), global_context); + return getCombinedIndicesExpression(getPrimaryKey(), metadata_snapshot->getSecondaryIndices(), metadata_snapshot->getColumns(), global_context); } -ExpressionActionsPtr MergeTreeData::getSortingKeyAndSkipIndicesExpression() const +ExpressionActionsPtr MergeTreeData::getSortingKeyAndSkipIndicesExpression(const StorageMetadataPtr & metadata_snapshot) const { - return getCombinedIndicesExpression(getSortingKey(), getSecondaryIndices(), getColumns(), global_context); + return getCombinedIndicesExpression(getSortingKey(), metadata_snapshot->getSecondaryIndices(), metadata_snapshot->getColumns(), global_context); } @@ -1237,9 +1237,10 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S { /// Check that needed transformations can be applied to the list of columns without considering type conversions. StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); + StorageInMemoryMetadata old_metadata = getInMemoryMetadata(); commands.apply(new_metadata, global_context); - if (getSecondaryIndices().empty() && !new_metadata.secondary_indices.empty() && - !settings.allow_experimental_data_skipping_indices) + if (old_metadata.getSecondaryIndices().empty() && !new_metadata.secondary_indices.empty() + && !settings.allow_experimental_data_skipping_indices) throw Exception("You must set the setting `allow_experimental_data_skipping_indices` to 1 " \ "before using data skipping indices.", ErrorCodes::BAD_ARGUMENTS); @@ -1259,7 +1260,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S columns_alter_type_forbidden.insert(col); } - for (const auto & index : getSecondaryIndices()) + for (const auto & index : old_metadata.getSecondaryIndices()) { for (const String & col : index.expression->getRequiredColumns()) columns_alter_type_forbidden.insert(col); @@ -2932,7 +2933,8 @@ bool MergeTreeData::isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(const A return false; } -bool MergeTreeData::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context &) const +bool MergeTreeData::mayBenefitFromIndexForIn( + const ASTPtr & left_in_operand, const Context &, const StorageMetadataPtr & metadata_snapshot) const { /// Make sure that the left side of the IN operator contain part of the key. /// If there is a tuple on the left side of the IN operator, at least one item of the tuple @@ -2945,7 +2947,7 @@ bool MergeTreeData::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, con { if (isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(item)) return true; - for (const auto & index : getSecondaryIndices()) + for (const auto & index : metadata_snapshot->getSecondaryIndices()) if (index_wrapper_factory.get(index)->mayBenefitFromIndexForIn(item)) return true; } @@ -2954,7 +2956,7 @@ bool MergeTreeData::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, con } else { - for (const auto & index : getSecondaryIndices()) + for (const auto & index : metadata_snapshot->getSecondaryIndices()) if (index_wrapper_factory.get(index)->mayBenefitFromIndexForIn(left_in_operand)) return true; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 4be9f450535..22d2a9da79c 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -350,7 +350,8 @@ public: bool supportsSettings() const override { return true; } NamesAndTypesList getVirtuals() const override; - bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context &) const override; + bool + mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context &, const StorageMetadataPtr & metadata_snapshot) const override; /// Load the set of data parts from disk. Call once - immediately after the object is created. void loadDataParts(bool skip_sanity_checks); @@ -643,8 +644,8 @@ public: Int64 minmax_idx_date_column_pos = -1; /// In a common case minmax index includes a date column. Int64 minmax_idx_time_column_pos = -1; /// In other cases, minmax index often includes a dateTime column. - ExpressionActionsPtr getPrimaryKeyAndSkipIndicesExpression() const; - ExpressionActionsPtr getSortingKeyAndSkipIndicesExpression() const; + ExpressionActionsPtr getPrimaryKeyAndSkipIndicesExpression(const StorageMetadataPtr & metadata_snapshot) const; + ExpressionActionsPtr getSortingKeyAndSkipIndicesExpression(const StorageMetadataPtr & metadata_snapshot) const; std::optional selectTTLEntryForTTLInfos(const IMergeTreeDataPart::TTLInfos & ttl_infos, time_t time_of_move) const; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 3bff5c9f505..afd1586ac6c 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -612,7 +612,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor NamesAndTypesList merging_columns; Names gathering_column_names, merging_column_names; extractMergingAndGatheringColumns( - storage_columns, data.getSortingKey().expression, data.getSecondaryIndices(), + storage_columns, data.getSortingKey().expression, metadata_snapshot->getSecondaryIndices(), data.merging_params, gathering_columns, gathering_column_names, merging_columns, merging_column_names); auto single_disk_volume = std::make_shared("volume_" + future_part.name, disk); @@ -798,10 +798,10 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor merged_stream = std::make_shared(merged_stream, data, new_data_part, time_of_merge, force_ttl); - if (data.hasSecondaryIndices()) + if (metadata_snapshot->hasSecondaryIndices()) { - const auto & indices = data.getSecondaryIndices(); - merged_stream = std::make_shared(merged_stream, indices.getSingleExpressionForIndices(data.getColumns(), data.global_context)); + const auto & indices = metadata_snapshot->getSecondaryIndices(); + merged_stream = std::make_shared(merged_stream, indices.getSingleExpressionForIndices(metadata_snapshot->getColumns(), data.global_context)); merged_stream = std::make_shared(merged_stream); } @@ -810,7 +810,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor new_data_part, metadata_snapshot, merging_columns, - index_factory.getMany(data.getSecondaryIndices()), + index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec, merged_column_to_size, data_settings->min_merge_bytes_to_use_direct_io, @@ -1084,7 +1084,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor /// All columns from part are changed and may be some more that were missing before in part if (isCompactPart(source_part) || source_part->getColumns().isSubsetOf(updated_header.getNamesAndTypesList())) { - auto part_indices = getIndicesForNewDataPart(data.getSecondaryIndices(), for_file_renames); + auto part_indices = getIndicesForNewDataPart(metadata_snapshot->getSecondaryIndices(), for_file_renames); mutateAllPartColumns( new_data_part, metadata_snapshot, @@ -1101,7 +1101,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor else /// TODO: check that we modify only non-key columns in this case. { /// We will modify only some of the columns. Other columns and key values can be copied as-is. - auto indices_to_recalc = getIndicesToRecalculate(in, updated_header.getNamesAndTypesList(), context); + auto indices_to_recalc = getIndicesToRecalculate(in, updated_header.getNamesAndTypesList(), metadata_snapshot, context); NameSet files_to_skip = collectFilesToSkip(updated_header, indices_to_recalc, mrk_extension); NameToNameVector files_to_rename = collectFilesForRenames(source_part, for_file_renames, mrk_extension); @@ -1524,6 +1524,7 @@ MergeTreeIndices MergeTreeDataMergerMutator::getIndicesForNewDataPart( std::set MergeTreeDataMergerMutator::getIndicesToRecalculate( BlockInputStreamPtr & input_stream, const NamesAndTypesList & updated_columns, + const StorageMetadataPtr & metadata_snapshot, const Context & context) const { /// Checks if columns used in skipping indexes modified. @@ -1532,7 +1533,7 @@ std::set MergeTreeDataMergerMutator::getIndicesToRecalculate( ASTPtr indices_recalc_expr_list = std::make_shared(); for (const auto & col : updated_columns.getNames()) { - const auto & indices = data.getSecondaryIndices(); + const auto & indices = metadata_snapshot->getSecondaryIndices(); for (size_t i = 0; i < indices.size(); ++i) { const auto & index = indices[i]; @@ -1597,9 +1598,9 @@ void MergeTreeDataMergerMutator::mutateAllPartColumns( if (mutating_stream == nullptr) throw Exception("Cannot mutate part columns with uninitialized mutations stream. It's a bug", ErrorCodes::LOGICAL_ERROR); - if (data.hasPrimaryKey() || data.hasSecondaryIndices()) + if (data.hasPrimaryKey() || metadata_snapshot->hasSecondaryIndices()) mutating_stream = std::make_shared( - std::make_shared(mutating_stream, data.getPrimaryKeyAndSkipIndicesExpression())); + std::make_shared(mutating_stream, data.getPrimaryKeyAndSkipIndicesExpression(metadata_snapshot))); if (need_remove_expired_values) mutating_stream = std::make_shared(mutating_stream, data, new_data_part, time_of_mutation, true); diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 3625c9bbe26..7828f79ea33 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -177,6 +177,7 @@ private: std::set getIndicesToRecalculate( BlockInputStreamPtr & input_stream, const NamesAndTypesList & updated_columns, + const StorageMetadataPtr & metadata_snapshot, const Context & context) const; /// Override all columns of new part using mutating_stream diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index ac2f4851185..9fd020d0317 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -550,7 +550,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( std::vector> useful_indices; - for (const auto & index : data.getSecondaryIndices()) + for (const auto & index : metadata_snapshot->getSecondaryIndices()) { auto index_helper = MergeTreeIndexFactory::instance().get(index); auto condition = index_helper->createIndexCondition(query_info, context); diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 71501a0e19a..284ea02097b 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -262,8 +262,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa new_data_part->volume->getDisk()->createDirectories(full_path); /// If we need to calculate some columns to sort. - if (data.hasSortingKey() || data.hasSecondaryIndices()) - data.getSortingKeyAndSkipIndicesExpression()->execute(block); + if (data.hasSortingKey() || metadata_snapshot->hasSecondaryIndices()) + data.getSortingKeyAndSkipIndicesExpression(metadata_snapshot)->execute(block); Names sort_columns = data.getSortingKeyColumns(); SortDescription sort_description; @@ -302,7 +302,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa auto compression_codec = data.global_context.chooseCompressionCodec(0, 0); const auto & index_factory = MergeTreeIndexFactory::instance(); - MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, index_factory.getMany(data.getSecondaryIndices()), compression_codec); + MergedBlockOutputStream out(new_data_part, metadata_snapshot, columns, index_factory.getMany(metadata_snapshot->getSecondaryIndices()), compression_codec); out.writePrefix(); out.writeWithPermutation(block, perm_ptr); diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index c768678c454..9cbdc338367 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -164,7 +164,7 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm return; std::unordered_set skip_indexes_column_names_set; - for (const auto & index : storage.getSecondaryIndices()) + for (const auto & index : metadata_snapshot->getSecondaryIndices()) std::copy(index.column_names.cbegin(), index.column_names.cend(), std::inserter(skip_indexes_column_names_set, skip_indexes_column_names_set.end())); Names skip_indexes_column_names(skip_indexes_column_names_set.begin(), skip_indexes_column_names_set.end()); diff --git a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h index 45ee947b81f..17891fde34a 100644 --- a/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h +++ b/src/Storages/MergeTree/StorageFromMergeTreeDataPart.h @@ -35,9 +35,10 @@ public: bool supportsIndexForIn() const override { return true; } - bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context & query_context) const override + bool mayBenefitFromIndexForIn( + const ASTPtr & left_in_operand, const Context & query_context, const StorageMetadataPtr & metadata_snapshot) const override { - return part->storage.mayBenefitFromIndexForIn(left_in_operand, query_context); + return part->storage.mayBenefitFromIndexForIn(left_in_operand, query_context, metadata_snapshot); } NamesAndTypesList getVirtuals() const override diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index f8df14aa482..4882b5fdc1c 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -447,7 +447,8 @@ BlockOutputStreamPtr StorageBuffer::write(const ASTPtr & /*query*/, const Storag } -bool StorageBuffer::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context & query_context) const +bool StorageBuffer::mayBenefitFromIndexForIn( + const ASTPtr & left_in_operand, const Context & query_context, const StorageMetadataPtr & /*metadata_snapshot*/) const { if (!destination_id) return false; @@ -457,7 +458,8 @@ bool StorageBuffer::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, con if (destination.get() == this) throw Exception("Destination table is myself. Read will cause infinite loop.", ErrorCodes::INFINITE_LOOP); - return destination->mayBenefitFromIndexForIn(left_in_operand, query_context); + /// TODO alesap (check destination metadata) + return destination->mayBenefitFromIndexForIn(left_in_operand, query_context, destination->getInMemoryMetadataPtr()); } diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index 7cd73dc556c..403b6c53172 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -84,7 +84,7 @@ public: bool supportsFinal() const override { return true; } bool supportsIndexForIn() const override { return true; } - bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context & query_context) const override; + bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context & query_context, const StorageMetadataPtr & metadata_snapshot) const override; void checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) const override; diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 672be800c8f..ef895ff0165 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -26,9 +26,11 @@ public: bool supportsFinal() const override { return getTargetTable()->supportsFinal(); } bool supportsIndexForIn() const override { return getTargetTable()->supportsIndexForIn(); } bool supportsParallelInsert() const override { return getTargetTable()->supportsParallelInsert(); } - bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context & query_context) const override + bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context & query_context, const StorageMetadataPtr & /* metadata_snapshot */) const override { - return getTargetTable()->mayBenefitFromIndexForIn(left_in_operand, query_context); + auto target_table = getTargetTable(); + auto metadata_snapshot = target_table->getInMemoryMetadataPtr(); + return target_table->mayBenefitFromIndexForIn(left_in_operand, query_context, metadata_snapshot); } BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index e24e5986994..e47cde8de52 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -81,7 +81,7 @@ bool StorageMerge::isRemote() const } -bool StorageMerge::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context & query_context) const +bool StorageMerge::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context & query_context, const StorageMetadataPtr & /*metadata_snapshot*/) const { /// It's beneficial if it is true for at least one table. StorageListWithLocks selected_tables = getSelectedTables( @@ -90,7 +90,9 @@ bool StorageMerge::mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, cons size_t i = 0; for (const auto & table : selected_tables) { - if (std::get<0>(table)->mayBenefitFromIndexForIn(left_in_operand, query_context)) + auto storage_ptr = std::get<0>(table); + auto metadata_snapshot = storage_ptr->getInMemoryMetadataPtr(); + if (storage_ptr->mayBenefitFromIndexForIn(left_in_operand, query_context, metadata_snapshot)) return true; ++i; diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 14bf83f8534..1ad22869e39 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -44,7 +44,8 @@ public: /// the structure of sub-tables is not checked void alter(const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder) override; - bool mayBenefitFromIndexForIn(const ASTPtr & left_in_operand, const Context & query_context) const override; + bool mayBenefitFromIndexForIn( + const ASTPtr & left_in_operand, const Context & query_context, const StorageMetadataPtr & metadata_snapshot) const override; private: String source_database; From ab61abccc1eb3901dc3154010add03e58caf3958 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 17 Jun 2020 13:34:23 +0300 Subject: [PATCH 114/318] Partition key in StorageInMemoryMetadata --- src/Interpreters/MutationsInterpreter.cpp | 15 +++--- src/Storages/IStorage.cpp | 22 --------- src/Storages/IStorage.h | 12 ----- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 8 ++-- .../MergeTree/MergeTreeBlockOutputStream.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 46 ++++++++++--------- src/Storages/MergeTree/MergeTreeData.h | 12 ++--- .../MergeTree/MergeTreeDataWriter.cpp | 8 ++-- src/Storages/MergeTree/MergeTreeDataWriter.h | 2 +- src/Storages/MergeTree/MergeTreePartition.cpp | 11 +++-- .../ReplicatedMergeTreeBlockOutputStream.cpp | 2 +- .../ReplicatedMergeTreeTableMetadata.cpp | 2 +- src/Storages/StorageInMemoryMetadata.cpp | 21 +++++++++ src/Storages/StorageInMemoryMetadata.h | 11 +++++ src/Storages/StorageMergeTree.cpp | 14 ++++-- src/Storages/StorageReplicatedMergeTree.cpp | 19 ++++++-- src/Storages/System/StorageSystemColumns.cpp | 6 +-- src/Storages/System/StorageSystemTables.cpp | 3 +- 18 files changed, 118 insertions(+), 98 deletions(-) diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 7bf54d20a61..50b68ba7ca3 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -214,7 +214,7 @@ MutationsInterpreter::MutationsInterpreter( select_interpreter = std::make_unique(mutation_ast, context, storage, limits); } -static NameSet getKeyColumns(const StoragePtr & storage) +static NameSet getKeyColumns(const StoragePtr & storage, const StorageMetadataPtr & metadata_snapshot) { const MergeTreeData * merge_tree_data = dynamic_cast(storage.get()); if (!merge_tree_data) @@ -222,7 +222,7 @@ static NameSet getKeyColumns(const StoragePtr & storage) NameSet key_columns; - for (const String & col : merge_tree_data->getColumnsRequiredForPartitionKey()) + for (const String & col : metadata_snapshot->getColumnsRequiredForPartitionKey()) key_columns.insert(col); for (const String & col : merge_tree_data->getColumnsRequiredForSortingKey()) @@ -239,15 +239,16 @@ static NameSet getKeyColumns(const StoragePtr & storage) } static void validateUpdateColumns( - const StoragePtr & storage, const NameSet & updated_columns, + const StoragePtr & storage, + const StorageMetadataPtr & metadata_snapshot, const NameSet & updated_columns, const std::unordered_map & column_to_affected_materialized) { - NameSet key_columns = getKeyColumns(storage); + NameSet key_columns = getKeyColumns(storage, metadata_snapshot); for (const String & column_name : updated_columns) { auto found = false; - for (const auto & col : storage->getColumns().getOrdinary()) + for (const auto & col : metadata_snapshot->getColumns().getOrdinary()) { if (col.name == column_name) { @@ -258,7 +259,7 @@ static void validateUpdateColumns( if (!found) { - for (const auto & col : storage->getColumns().getMaterialized()) + for (const auto & col : metadata_snapshot->getColumns().getMaterialized()) { if (col.name == column_name) throw Exception("Cannot UPDATE materialized column " + backQuote(column_name), ErrorCodes::CANNOT_UPDATE_COLUMN); @@ -326,7 +327,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) } } - validateUpdateColumns(storage, updated_columns, column_to_affected_materialized); + validateUpdateColumns(storage, metadata_snapshot, updated_columns, column_to_affected_materialized); } /// Columns, that we need to read for calculation of skip indices or TTL expressions. diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 7d50025faff..84afd2fcf1c 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -319,28 +319,6 @@ NamesAndTypesList IStorage::getVirtuals() const return {}; } -const KeyDescription & IStorage::getPartitionKey() const -{ - return metadata->partition_key; -} - -bool IStorage::isPartitionKeyDefined() const -{ - return metadata->partition_key.definition_ast != nullptr; -} - -bool IStorage::hasPartitionKey() const -{ - return !metadata->partition_key.column_names.empty(); -} - -Names IStorage::getColumnsRequiredForPartitionKey() const -{ - if (hasPartitionKey()) - return metadata->partition_key.expression->getRequiredColumns(); - return {}; -} - const KeyDescription & IStorage::getSortingKey() const { return metadata->sorting_key; diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 78d9b7d2013..5f08d48d4b0 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -427,18 +427,6 @@ public: /// Returns data paths if storage supports it, empty vector otherwise. virtual Strings getDataPaths() const { return {}; } - /// Returns structure with partition key. - const KeyDescription & getPartitionKey() const; - /// Returns ASTExpressionList of partition key expression for storage or nullptr if there is none. - ASTPtr getPartitionKeyAST() const { return metadata->partition_key.definition_ast; } - /// Storage has user-defined (in CREATE query) partition key. - bool isPartitionKeyDefined() const; - /// Storage has partition key. - bool hasPartitionKey() const; - /// Returns column names that need to be read to calculate partition key. - Names getColumnsRequiredForPartitionKey() const; - - /// Returns structure with sorting key. const KeyDescription & getSortingKey() const; /// Returns ASTExpressionList of sorting key expression for storage or nullptr if there is none. diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 287bf916c19..03b2dea23ba 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -496,7 +496,8 @@ void IMergeTreeDataPart::loadPartitionAndMinMaxIndex() minmax_idx.load(storage, volume->getDisk(), path); } - String calculated_partition_id = partition.getID(storage.getPartitionKey().sample_block); + auto metadata_snapshot = storage.getInMemoryMetadataPtr(); + String calculated_partition_id = partition.getID(metadata_snapshot->getPartitionKey().sample_block); if (calculated_partition_id != info.partition_id) throw Exception( "While loading part " + getFullPath() + ": calculated partition ID: " + calculated_partition_id @@ -840,6 +841,7 @@ void IMergeTreeDataPart::checkConsistencyBase() const { String path = getFullRelativePath(); + auto metadata_snapshot = storage.getInMemoryMetadataPtr(); const auto & pk = storage.getPrimaryKey(); if (!checksums.empty()) { @@ -851,7 +853,7 @@ void IMergeTreeDataPart::checkConsistencyBase() const if (!checksums.files.count("count.txt")) throw Exception("No checksum for count.txt", ErrorCodes::NO_FILE_IN_DATA_PART); - if (storage.hasPartitionKey() && !checksums.files.count("partition.dat")) + if (metadata_snapshot->hasPartitionKey() && !checksums.files.count("partition.dat")) throw Exception("No checksum for partition.dat", ErrorCodes::NO_FILE_IN_DATA_PART); if (!isEmpty()) @@ -884,7 +886,7 @@ void IMergeTreeDataPart::checkConsistencyBase() const { check_file_not_empty(volume->getDisk(), path + "count.txt"); - if (storage.hasPartitionKey()) + if (metadata_snapshot->hasPartitionKey()) check_file_not_empty(volume->getDisk(), path + "partition.dat"); for (const String & col_name : storage.minmax_idx_columns) diff --git a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp index 1ea6b049bf6..5f774a97bce 100644 --- a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp @@ -16,7 +16,7 @@ void MergeTreeBlockOutputStream::write(const Block & block) { storage.delayInsertOrThrowIfNeeded(); - auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block); + auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot); for (auto & current_block : part_blocks) { Stopwatch watch; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 143ce44da5e..b7e152fe6b4 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -169,7 +169,7 @@ MergeTreeData::MergeTreeData( min_format_version = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING; } - setProperties(metadata_, attach); + setProperties(metadata_, metadata_, attach); const auto settings = getSettings(); /// NOTE: using the same columns list as is read when performing actual merges. @@ -184,7 +184,7 @@ MergeTreeData::MergeTreeData( } - setTTLExpressions(metadata_); + setTTLExpressions(metadata_, metadata_); /// format_file always contained on any data path PathWithDisk version_file; @@ -274,7 +274,7 @@ static void checkKeyExpression(const ExpressionActions & expr, const Block & sam } } -void MergeTreeData::checkProperties(const StorageInMemoryMetadata & new_metadata, bool attach) const +void MergeTreeData::checkProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & /*old_metadata*/, bool attach) const { if (!new_metadata.sorting_key.definition_ast) throw Exception("ORDER BY cannot be empty", ErrorCodes::BAD_ARGUMENTS); @@ -381,9 +381,9 @@ void MergeTreeData::checkProperties(const StorageInMemoryMetadata & new_metadata } -void MergeTreeData::setProperties(const StorageInMemoryMetadata & new_metadata, bool attach) +void MergeTreeData::setProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach) { - checkProperties(new_metadata, attach); + checkProperties(new_metadata, old_metadata, attach); setInMemoryMetadata(new_metadata); } @@ -475,7 +475,7 @@ void MergeTreeData::initPartitionKey(const KeyDescription & new_partition_key) } -void MergeTreeData::checkTTLExpressions(const StorageInMemoryMetadata & new_metadata) const +void MergeTreeData::checkTTLExpressions(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata) const { auto new_column_ttls = new_metadata.column_ttls_by_name; @@ -483,8 +483,8 @@ void MergeTreeData::checkTTLExpressions(const StorageInMemoryMetadata & new_meta { NameSet columns_ttl_forbidden; - if (hasPartitionKey()) - for (const auto & col : getColumnsRequiredForPartitionKey()) + if (old_metadata.hasPartitionKey()) + for (const auto & col : old_metadata.getColumnsRequiredForPartitionKey()) columns_ttl_forbidden.insert(col); if (hasSortingKey()) @@ -517,9 +517,9 @@ void MergeTreeData::checkTTLExpressions(const StorageInMemoryMetadata & new_meta } /// Todo replace columns with TTL for columns -void MergeTreeData::setTTLExpressions(const StorageInMemoryMetadata & new_metadata) +void MergeTreeData::setTTLExpressions(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata) { - checkTTLExpressions(new_metadata); + checkTTLExpressions(new_metadata, old_metadata); //setColumnTTLs(new_metadata.column_ttls_by_name); //setTableTTLs(new_metadata.table_ttl); } @@ -1251,12 +1251,12 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S /// (and not as a part of some expression) and if the ALTER only affects column metadata. NameSet columns_alter_type_metadata_only; - if (hasPartitionKey()) + if (old_metadata.hasPartitionKey()) { /// Forbid altering partition key columns because it can change partition ID format. /// TODO: in some cases (e.g. adding an Enum value) a partition key column can still be ALTERed. /// We should allow it. - for (const String & col : getColumnsRequiredForPartitionKey()) + for (const String & col : old_metadata.getColumnsRequiredForPartitionKey()) columns_alter_type_forbidden.insert(col); } @@ -1284,7 +1284,7 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S columns_alter_type_forbidden.insert(merging_params.sign_column); std::map old_types; - for (const auto & column : getColumns().getAllPhysical()) + for (const auto & column : old_metadata.getColumns().getAllPhysical()) old_types.emplace(column.name, column.type.get()); @@ -1329,9 +1329,9 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S } } - checkProperties(new_metadata); + checkProperties(new_metadata, old_metadata); - checkTTLExpressions(new_metadata); + checkTTLExpressions(new_metadata, old_metadata); if (hasSettingsChanges()) { @@ -2450,7 +2450,8 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, const Context /// Re-parse partition key fields using the information about expected field types. - size_t fields_count = getPartitionKey().sample_block.columns(); + auto metadata_snapshot = getInMemoryMetadataPtr(); + size_t fields_count = metadata_snapshot->getPartitionKey().sample_block.columns(); if (partition_ast.fields_count != fields_count) throw Exception( "Wrong number of fields in the partition expression: " + toString(partition_ast.fields_count) + @@ -2467,7 +2468,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, const Context ReadBufferFromMemory right_paren_buf(")", 1); ConcatReadBuffer buf({&left_paren_buf, &fields_buf, &right_paren_buf}); - auto input_stream = FormatFactory::instance().getInput("Values", buf, getPartitionKey().sample_block, context, context.getSettingsRef().max_block_size); + auto input_stream = FormatFactory::instance().getInput("Values", buf, metadata_snapshot->getPartitionKey().sample_block, context, context.getSettingsRef().max_block_size); auto block = input_stream->read(); if (!block || !block.rows()) @@ -2964,7 +2965,7 @@ bool MergeTreeData::mayBenefitFromIndexForIn( } } -MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(IStorage & source_table) const +MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(IStorage & source_table, const StorageMetadataPtr & src_snapshot, const StorageMetadataPtr & my_snapshot) const { MergeTreeData * src_data = dynamic_cast(&source_table); if (!src_data) @@ -2972,7 +2973,7 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(IStorage & sour " supports attachPartitionFrom only for MergeTree family of table engines." " Got " + source_table.getName(), ErrorCodes::NOT_IMPLEMENTED); - if (getColumns().getAllPhysical().sizeOfDifference(src_data->getColumns().getAllPhysical())) + if (my_snapshot->getColumns().getAllPhysical().sizeOfDifference(src_snapshot->getColumns().getAllPhysical())) throw Exception("Tables have different structure", ErrorCodes::INCOMPATIBLE_COLUMNS); auto query_to_string = [] (const ASTPtr & ast) @@ -2983,7 +2984,7 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(IStorage & sour if (query_to_string(getSortingKeyAST()) != query_to_string(src_data->getSortingKeyAST())) throw Exception("Tables have different ordering", ErrorCodes::BAD_ARGUMENTS); - if (query_to_string(getPartitionKeyAST()) != query_to_string(src_data->getPartitionKeyAST())) + if (query_to_string(my_snapshot->getPartitionKeyAST()) != query_to_string(src_snapshot->getPartitionKeyAST())) throw Exception("Tables have different partition key", ErrorCodes::BAD_ARGUMENTS); if (format_version != src_data->format_version) @@ -2992,9 +2993,10 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(IStorage & sour return *src_data; } -MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(const StoragePtr & source_table) const +MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData( + const StoragePtr & source_table, const StorageMetadataPtr & src_snapshot, const StorageMetadataPtr & my_snapshot) const { - return checkStructureAndGetMergeTreeData(*source_table); + return checkStructureAndGetMergeTreeData(*source_table, src_snapshot, my_snapshot); } MergeTreeData::MutableDataPartPtr MergeTreeData::cloneAndLoadDataPartOnSameDisk(const MergeTreeData::DataPartPtr & src_part, diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 22d2a9da79c..863b5ba1644 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -556,8 +556,8 @@ public: /// Extracts MergeTreeData of other *MergeTree* storage /// and checks that their structure suitable for ALTER TABLE ATTACH PARTITION FROM /// Tables structure should be locked. - MergeTreeData & checkStructureAndGetMergeTreeData(const StoragePtr & source_table) const; - MergeTreeData & checkStructureAndGetMergeTreeData(IStorage & source_table) const; + MergeTreeData & checkStructureAndGetMergeTreeData(const StoragePtr & source_table, const StorageMetadataPtr & src_snapshot, const StorageMetadataPtr & my_snapshot) const; + MergeTreeData & checkStructureAndGetMergeTreeData(IStorage & source_table, const StorageMetadataPtr & src_snapshot, const StorageMetadataPtr & my_snapshot) const; MergeTreeData::MutableDataPartPtr cloneAndLoadDataPartOnSameDisk( const MergeTreeData::DataPartPtr & src_part, const String & tmp_part_prefix, const MergeTreePartInfo & dst_part_info); @@ -781,14 +781,14 @@ protected: /// The same for clearOldTemporaryDirectories. std::mutex clear_old_temporary_directories_mutex; - void checkProperties(const StorageInMemoryMetadata & new_metadata, bool attach = false) const; + void checkProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach = false) const; - void setProperties(const StorageInMemoryMetadata & new_metadata, bool attach = false); + void setProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach = false); void initPartitionKey(const KeyDescription & new_partition_key); - void checkTTLExpressions(const StorageInMemoryMetadata & new_metadata) const; - void setTTLExpressions(const StorageInMemoryMetadata & new_metadata); + void checkTTLExpressions(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata) const; + void setTTLExpressions(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata); void checkStoragePolicy(const StoragePolicyPtr & new_storage_policy) const; diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 284ea02097b..f96c9b48c4d 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -132,7 +132,7 @@ void updateTTL( } -BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(const Block & block, size_t max_parts) +BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(const Block & block, size_t max_parts, const StorageMetadataPtr & metadata_snapshot) { BlocksWithPartition result; if (!block || !block.rows()) @@ -140,14 +140,14 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(const Block & block data.check(block, true); - if (!data.hasPartitionKey()) /// Table is not partitioned. + if (!metadata_snapshot->hasPartitionKey()) /// Table is not partitioned. { result.emplace_back(Block(block), Row()); return result; } Block block_copy = block; - const auto & partition_key = data.getPartitionKey(); + const auto & partition_key = metadata_snapshot->getPartitionKey(); partition_key.expression->execute(block_copy); ColumnRawPtrs partition_columns; @@ -206,7 +206,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa MergeTreePartition partition(std::move(block_with_partition.partition)); - MergeTreePartInfo new_part_info(partition.getID(data.getPartitionKey().sample_block), temp_index, temp_index, 0); + MergeTreePartInfo new_part_info(partition.getID(metadata_snapshot->getPartitionKey().sample_block), temp_index, temp_index, 0); String part_name; if (data.format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) { diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.h b/src/Storages/MergeTree/MergeTreeDataWriter.h index dabdcbd2148..c04a09185c5 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.h +++ b/src/Storages/MergeTree/MergeTreeDataWriter.h @@ -40,7 +40,7 @@ public: * (split rows by partition) * Works deterministically: if same block was passed, function will return same result in same order. */ - BlocksWithPartition splitBlockIntoParts(const Block & block, size_t max_parts); + BlocksWithPartition splitBlockIntoParts(const Block & block, size_t max_parts, const StorageMetadataPtr & metadata_snapshot); /** All rows must correspond to same partition. * Returns part with unique name starting with 'tmp_', yet not added to MergeTreeData. diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index 54e213fafac..cd73e9228fd 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -26,7 +26,7 @@ static std::unique_ptr openForReading(const DiskPtr & di String MergeTreePartition::getID(const MergeTreeData & storage) const { - return getID(storage.getPartitionKey().sample_block); + return getID(storage.getInMemoryMetadataPtr()->getPartitionKey().sample_block); } /// NOTE: This ID is used to create part names which are then persisted in ZK and as directory names on the file system. @@ -89,7 +89,7 @@ String MergeTreePartition::getID(const Block & partition_key_sample) const void MergeTreePartition::serializeText(const MergeTreeData & storage, WriteBuffer & out, const FormatSettings & format_settings) const { - const auto & partition_key_sample = storage.getPartitionKey().sample_block; + const auto & partition_key_sample = storage.getInMemoryMetadataPtr()->getPartitionKey().sample_block; size_t key_size = partition_key_sample.columns(); if (key_size == 0) @@ -124,10 +124,11 @@ void MergeTreePartition::serializeText(const MergeTreeData & storage, WriteBuffe void MergeTreePartition::load(const MergeTreeData & storage, const DiskPtr & disk, const String & part_path) { - if (!storage.hasPartitionKey()) + auto metadata_snapshot = storage.getInMemoryMetadataPtr(); + if (!metadata_snapshot->hasPartitionKey()) return; - const auto & partition_key_sample = storage.getPartitionKey().sample_block; + const auto & partition_key_sample = metadata_snapshot->getPartitionKey().sample_block; auto partition_file_path = part_path + "partition.dat"; auto file = openForReading(disk, partition_file_path); value.resize(partition_key_sample.columns()); @@ -137,7 +138,7 @@ void MergeTreePartition::load(const MergeTreeData & storage, const DiskPtr & dis void MergeTreePartition::store(const MergeTreeData & storage, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums) const { - store(storage.getPartitionKey().sample_block, disk, part_path, checksums); + store(storage.getInMemoryMetadataPtr()->getPartitionKey().sample_block, disk, part_path, checksums); } void MergeTreePartition::store(const Block & partition_key_sample, const DiskPtr & disk, const String & part_path, MergeTreeDataPartChecksums & checksums) const diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp index 8319b0e018d..13df5ef23f1 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp @@ -130,7 +130,7 @@ void ReplicatedMergeTreeBlockOutputStream::write(const Block & block) if (quorum) checkQuorumPrecondition(zookeeper); - auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block); + auto part_blocks = storage.writer.splitBlockIntoParts(block, max_parts_per_block, metadata_snapshot); for (auto & current_block : part_blocks) { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 820f41326f1..1f62fba03a0 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -51,7 +51,7 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr data_format_version = data.format_version; if (data.format_version >= MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) - partition_key = formattedAST(data.getPartitionKey().expression_list_ast); + partition_key = formattedAST(metadata_snapshot->getPartitionKey().expression_list_ast); ttl_table = formattedAST(metadata_snapshot->getTableTTLs().definition_ast); diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 359d561cd1f..8fbe415ead6 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -291,4 +291,25 @@ Block StorageInMemoryMetadata::getSampleBlockForColumns(const Names & column_nam return res; } +const KeyDescription & StorageInMemoryMetadata::getPartitionKey() const +{ + return partition_key; +} + +bool StorageInMemoryMetadata::isPartitionKeyDefined() const +{ + return partition_key.definition_ast != nullptr; +} + +bool StorageInMemoryMetadata::hasPartitionKey() const +{ + return !partition_key.column_names.empty(); +} + +Names StorageInMemoryMetadata::getColumnsRequiredForPartitionKey() const +{ + if (hasPartitionKey()) + return partition_key.expression->getRequiredColumns(); + return {}; +} } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 9f9154c48fb..8996f9fc1b9 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -112,6 +112,17 @@ struct StorageInMemoryMetadata Block getSampleBlockWithVirtuals(const NamesAndTypesList & virtuals) const; /// ordinary + materialized + virtuals. Block getSampleBlockForColumns( const Names & column_names, const NamesAndTypesList & virtuals) const; /// ordinary + materialized + aliases + virtuals. + + /// Returns structure with partition key. + const KeyDescription & getPartitionKey() const; + /// Returns ASTExpressionList of partition key expression for storage or nullptr if there is none. + ASTPtr getPartitionKeyAST() const { return partition_key.definition_ast; } + /// Storage has user-defined (in CREATE query) partition key. + bool isPartitionKeyDefined() const; + /// Storage has partition key. + bool hasPartitionKey() const; + /// Returns column names that need to be read to calculate partition key. + Names getColumnsRequiredForPartitionKey() const; }; using StorageMetadataPtr = std::shared_ptr; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index cfa5c34bece..c13070fa4a5 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -259,6 +259,7 @@ void StorageMergeTree::alter( auto table_id = getStorageID(); StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); + StorageInMemoryMetadata old_metadata = getInMemoryMetadata(); auto maybe_mutation_commands = commands.getMutationCommands(new_metadata, context.getSettingsRef().materialize_ttl_after_modify, context); String mutation_file_name; Int64 mutation_version = -1; @@ -282,8 +283,8 @@ void StorageMergeTree::alter( changeSettings(new_metadata.settings_changes, table_lock_holder); /// Reinitialize primary key because primary key column types might have changed. - setProperties(new_metadata); - setTTLExpressions(new_metadata); + setProperties(new_metadata, old_metadata); + setTTLExpressions(new_metadata, old_metadata); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); @@ -1151,9 +1152,11 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con { auto lock1 = lockStructureForShare(false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto lock2 = source_table->lockStructureForShare(false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto source_metadata_snapshot = source_table->getInMemoryMetadataPtr(); + auto my_metadata_snapshot = getInMemoryMetadataPtr(); Stopwatch watch; - MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table); + MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, my_metadata_snapshot); String partition_id = getPartitionIDFromQuery(partition, context); DataPartsVector src_parts = src_data.getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id); @@ -1232,9 +1235,12 @@ void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const " should have the same storage policy of source table " + getStorageID().getNameForLogs() + ". " + getStorageID().getNameForLogs() + ": " + this->getStoragePolicy()->getName() + ", " + dest_table_storage->getStorageID().getNameForLogs() + ": " + dest_table_storage->getStoragePolicy()->getName(), ErrorCodes::LOGICAL_ERROR); + + auto dest_metadata_snapshot = dest_table->getInMemoryMetadataPtr(); + auto metadata_snapshot = getInMemoryMetadataPtr(); Stopwatch watch; - MergeTreeData & src_data = dest_table_storage->checkStructureAndGetMergeTreeData(*this); + MergeTreeData & src_data = dest_table_storage->checkStructureAndGetMergeTreeData(*this, metadata_snapshot, dest_metadata_snapshot); String partition_id = getPartitionIDFromQuery(partition, context); DataPartsVector src_parts = src_data.getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index a7ddf96d08c..fc1bd8538e5 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -758,6 +758,7 @@ void StorageReplicatedMergeTree::checkTableStructure(const String & zookeeper_pr void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_columns, const ReplicatedMergeTreeTableMetadata::Diff & metadata_diff) { StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); + StorageInMemoryMetadata old_metadata = getInMemoryMetadata(); if (new_columns != new_metadata.columns) { new_metadata.columns = new_columns; @@ -820,8 +821,8 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column /// Even if the primary/sorting keys didn't change we must reinitialize it /// because primary key column types might have changed. - setProperties(new_metadata); - setTTLExpressions(new_metadata); + setProperties(new_metadata, old_metadata); + setTTLExpressions(new_metadata, old_metadata); } @@ -1794,6 +1795,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) auto table_lock_holder_dst_table = lockStructureForShare( false, RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); + auto dst_metadata_snapshot = getInMemoryMetadataPtr(); for (size_t i = 0; i < entry_replace.new_part_names.size(); ++i) { @@ -1843,10 +1845,11 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) return 0; } + auto src_metadata_snapshot = source_table->getInMemoryMetadataPtr(); MergeTreeData * src_data = nullptr; try { - src_data = &checkStructureAndGetMergeTreeData(source_table); + src_data = &checkStructureAndGetMergeTreeData(source_table, src_metadata_snapshot, dst_metadata_snapshot); } catch (Exception &) { @@ -5212,8 +5215,11 @@ void StorageReplicatedMergeTree::replacePartitionFrom(const StoragePtr & source_ auto lock1 = lockStructureForShare(true, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto lock2 = source_table->lockStructureForShare(false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto source_metadata_snapshot = source_table->getInMemoryMetadataPtr(); + auto metadata_snapshot = getInMemoryMetadataPtr(); + Stopwatch watch; - MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table); + MergeTreeData & src_data = checkStructureAndGetMergeTreeData(source_table, source_metadata_snapshot, metadata_snapshot); String partition_id = getPartitionIDFromQuery(partition, context); DataPartsVector src_all_parts = src_data.getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id); @@ -5405,8 +5411,11 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta getStorageID().getNameForLogs() + ": " + this->getStoragePolicy()->getName() + ", " + getStorageID().getNameForLogs() + ": " + dest_table_storage->getStoragePolicy()->getName(), ErrorCodes::LOGICAL_ERROR); + auto dest_metadata_snapshot = dest_table->getInMemoryMetadataPtr(); + auto metadata_snapshot = getInMemoryMetadataPtr(); + Stopwatch watch; - MergeTreeData & src_data = dest_table_storage->checkStructureAndGetMergeTreeData(*this); + MergeTreeData & src_data = dest_table_storage->checkStructureAndGetMergeTreeData(*this, metadata_snapshot, dest_metadata_snapshot); auto src_data_id = src_data.getStorageID(); String partition_id = getPartitionIDFromQuery(partition, query_context); diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 14a59da1bf9..5860facc814 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -122,13 +122,13 @@ protected: throw; } - columns = storage->getColumns(); + auto metadadata_snapshot = storage->getInMemoryMetadataPtr(); + columns = metadadata_snapshot->getColumns(); - cols_required_for_partition_key = storage->getColumnsRequiredForPartitionKey(); + cols_required_for_partition_key = metadadata_snapshot->getColumnsRequiredForPartitionKey(); cols_required_for_sorting_key = storage->getColumnsRequiredForSortingKey(); cols_required_for_primary_key = storage->getColumnsRequiredForPrimaryKey(); cols_required_for_sampling = storage->getColumnsRequiredForSampling(); - column_sizes = storage->getColumnSizes(); } diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index b33886ce179..78346d1968f 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -267,6 +267,7 @@ protected: throw; } } + auto metadata_snapshot = table->getInMemoryMetadataPtr(); ++rows_count; @@ -365,7 +366,7 @@ protected: if (columns_mask[src_index++]) { assert(table != nullptr); - if ((expression_ptr = table->getPartitionKeyAST())) + if ((expression_ptr = metadata_snapshot->getPartitionKeyAST())) res_columns[res_index++]->insert(queryToString(expression_ptr)); else res_columns[res_index++]->insertDefault(); From ba04d02f1e23c6830a8750530d95ad3b73545bb8 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 17 Jun 2020 14:05:11 +0300 Subject: [PATCH 115/318] Compilable sorting key in metadata --- src/Interpreters/ExpressionAnalyzer.cpp | 3 +- src/Interpreters/ExpressionAnalyzer.h | 1 + src/Interpreters/InterpreterSelectQuery.cpp | 6 +++- src/Interpreters/MutationsInterpreter.cpp | 4 +-- src/Storages/IStorage.cpp | 29 ------------------ src/Storages/IStorage.h | 16 ---------- src/Storages/MergeTree/MergeTreeData.cpp | 20 ++++++------- .../MergeTree/MergeTreeDataMergerMutator.cpp | 16 ++++++---- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 10 +++---- .../MergeTree/MergeTreeDataWriter.cpp | 4 +-- .../ReplicatedMergeTreeTableMetadata.cpp | 4 +-- src/Storages/ReadInOrderOptimizer.cpp | 10 +++---- src/Storages/ReadInOrderOptimizer.h | 2 +- src/Storages/StorageBuffer.cpp | 2 +- src/Storages/StorageInMemoryMetadata.cpp | 30 +++++++++++++++++++ src/Storages/StorageInMemoryMetadata.h | 17 +++++++++++ src/Storages/StorageMaterializedView.cpp | 2 +- src/Storages/StorageMerge.cpp | 4 ++- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- src/Storages/System/StorageSystemColumns.cpp | 8 ++--- src/Storages/System/StorageSystemTables.cpp | 2 +- 21 files changed, 104 insertions(+), 88 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 039001796cc..28aa42877d6 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1011,6 +1011,7 @@ ExpressionActionsPtr SelectQueryExpressionAnalyzer::simpleSelectActions() ExpressionAnalysisResult::ExpressionAnalysisResult( SelectQueryExpressionAnalyzer & query_analyzer, + const StorageMetadataPtr & metadata_snapshot, bool first_stage_, bool second_stage_, bool only_types, @@ -1068,7 +1069,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (storage && query.final()) { - Names columns_for_final = storage->getColumnsRequiredForFinal(); + Names columns_for_final = metadata_snapshot->getColumnsRequiredForFinal(); additional_required_columns_after_prewhere.insert(additional_required_columns_after_prewhere.end(), columns_for_final.begin(), columns_for_final.end()); } diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 1cc1b33bad1..cd0b837b4ec 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -204,6 +204,7 @@ struct ExpressionAnalysisResult ExpressionAnalysisResult( SelectQueryExpressionAnalyzer & query_analyzer, + const StorageMetadataPtr & metadata_snapshot, bool first_stage, bool second_stage, bool only_types, diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 331093b9d53..038448ef353 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -492,8 +492,12 @@ Block InterpreterSelectQuery::getSampleBlockImpl() bool second_stage = from_stage <= QueryProcessingStage::WithMergeableState && options.to_stage > QueryProcessingStage::WithMergeableState; + Names columns_required_for_sampling; + Names columns_required_for_; + analysis_result = ExpressionAnalysisResult( *query_analyzer, + metadata_snapshot, first_stage, second_stage, options.only_analyze, @@ -1329,7 +1333,7 @@ void InterpreterSelectQuery::executeFetchColumns( getSortDescriptionFromGroupBy(query), query_info.syntax_analyzer_result); - query_info.input_order_info = query_info.order_optimizer->getInputOrder(storage); + query_info.input_order_info = query_info.order_optimizer->getInputOrder(storage, metadata_snapshot); } Pipes pipes = storage->read(required_columns, metadata_snapshot, query_info, *context, processing_stage, max_block_size, max_streams); diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 50b68ba7ca3..0c0227d476f 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -225,7 +225,7 @@ static NameSet getKeyColumns(const StoragePtr & storage, const StorageMetadataPt for (const String & col : metadata_snapshot->getColumnsRequiredForPartitionKey()) key_columns.insert(col); - for (const String & col : merge_tree_data->getColumnsRequiredForSortingKey()) + for (const String & col : metadata_snapshot->getColumnsRequiredForSortingKey()) key_columns.insert(col); /// We don't process sample_by_ast separately because it must be among the primary key columns. @@ -731,7 +731,7 @@ size_t MutationsInterpreter::evaluateCommandsSize() std::optional MutationsInterpreter::getStorageSortDescriptionIfPossible(const Block & header) const { - Names sort_columns = storage->getSortingKeyColumns(); + Names sort_columns = metadata_snapshot->getSortingKeyColumns(); SortDescription sort_description; size_t sort_columns_size = sort_columns.size(); sort_description.reserve(sort_columns_size); diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 84afd2fcf1c..0c2ddc09da8 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -319,35 +319,6 @@ NamesAndTypesList IStorage::getVirtuals() const return {}; } -const KeyDescription & IStorage::getSortingKey() const -{ - return metadata->sorting_key; -} - -bool IStorage::isSortingKeyDefined() const -{ - return metadata->sorting_key.definition_ast != nullptr; -} - -bool IStorage::hasSortingKey() const -{ - return !metadata->sorting_key.column_names.empty(); -} - -Names IStorage::getColumnsRequiredForSortingKey() const -{ - if (hasSortingKey()) - return metadata->sorting_key.expression->getRequiredColumns(); - return {}; -} - -Names IStorage::getSortingKeyColumns() const -{ - if (hasSortingKey()) - return metadata->sorting_key.column_names; - return {}; -} - const KeyDescription & IStorage::getPrimaryKey() const { return metadata->primary_key; diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 5f08d48d4b0..4e1ca81dd10 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -427,20 +427,6 @@ public: /// Returns data paths if storage supports it, empty vector otherwise. virtual Strings getDataPaths() const { return {}; } - /// Returns structure with sorting key. - const KeyDescription & getSortingKey() const; - /// Returns ASTExpressionList of sorting key expression for storage or nullptr if there is none. - ASTPtr getSortingKeyAST() const { return metadata->sorting_key.definition_ast; } - /// Storage has user-defined (in CREATE query) sorting key. - bool isSortingKeyDefined() const; - /// Storage has sorting key. It means, that it contains at least one column. - bool hasSortingKey() const; - /// Returns column names that need to be read to calculate sorting key. - Names getColumnsRequiredForSortingKey() const; - /// Returns columns names in sorting key specified by user in ORDER BY - /// expression. For example: 'a', 'x * y', 'toStartOfMonth(date)', etc. - Names getSortingKeyColumns() const; - /// Returns structure with primary key. const KeyDescription & getPrimaryKey() const; /// Returns ASTExpressionList of primary key expression for storage or nullptr if there is none. @@ -467,8 +453,6 @@ public: /// Returns column names that need to be read to calculate sampling key. Names getColumnsRequiredForSampling() const; - /// Returns column names that need to be read for FINAL to work. - Names getColumnsRequiredForFinal() const { return getColumnsRequiredForSortingKey(); } /// Returns storage policy if storage supports it. virtual StoragePolicyPtr getStoragePolicy() const { return {}; } diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index b7e152fe6b4..a1487b3a1fe 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -274,7 +274,7 @@ static void checkKeyExpression(const ExpressionActions & expr, const Block & sam } } -void MergeTreeData::checkProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & /*old_metadata*/, bool attach) const +void MergeTreeData::checkProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach) const { if (!new_metadata.sorting_key.definition_ast) throw Exception("ORDER BY cannot be empty", ErrorCodes::BAD_ARGUMENTS); @@ -312,7 +312,7 @@ void MergeTreeData::checkProperties(const StorageInMemoryMetadata & new_metadata auto all_columns = new_metadata.columns.getAllPhysical(); /// Order by check AST - if (hasSortingKey()) + if (old_metadata.hasSortingKey()) { /// This is ALTER, not CREATE/ATTACH TABLE. Let us check that all new columns used in the sorting key /// expression have just been added (so that the sorting order is guaranteed to be valid with the new key). @@ -321,7 +321,7 @@ void MergeTreeData::checkProperties(const StorageInMemoryMetadata & new_metadata Names new_sorting_key_columns = new_sorting_key.column_names; ASTPtr added_key_column_expr_list = std::make_shared(); - const auto & old_sorting_key_columns = getSortingKeyColumns(); + const auto & old_sorting_key_columns = old_metadata.getSortingKeyColumns(); for (size_t new_i = 0, old_i = 0; new_i < sorting_key_size; ++new_i) { if (old_i < old_sorting_key_columns.size()) @@ -342,7 +342,7 @@ void MergeTreeData::checkProperties(const StorageInMemoryMetadata & new_metadata NamesAndTypesList deleted_columns; NamesAndTypesList added_columns; - getColumns().getAllPhysical().getDifference(all_columns, deleted_columns, added_columns); + old_metadata.getColumns().getAllPhysical().getDifference(all_columns, deleted_columns, added_columns); for (const String & col : used_columns) { @@ -415,7 +415,7 @@ ExpressionActionsPtr MergeTreeData::getPrimaryKeyAndSkipIndicesExpression(const ExpressionActionsPtr MergeTreeData::getSortingKeyAndSkipIndicesExpression(const StorageMetadataPtr & metadata_snapshot) const { - return getCombinedIndicesExpression(getSortingKey(), metadata_snapshot->getSecondaryIndices(), metadata_snapshot->getColumns(), global_context); + return getCombinedIndicesExpression(metadata_snapshot->getSortingKey(), metadata_snapshot->getSecondaryIndices(), metadata_snapshot->getColumns(), global_context); } @@ -487,8 +487,8 @@ void MergeTreeData::checkTTLExpressions(const StorageInMemoryMetadata & new_meta for (const auto & col : old_metadata.getColumnsRequiredForPartitionKey()) columns_ttl_forbidden.insert(col); - if (hasSortingKey()) - for (const auto & col : getColumnsRequiredForSortingKey()) + if (old_metadata.hasSortingKey()) + for (const auto & col : old_metadata.getColumnsRequiredForSortingKey()) columns_ttl_forbidden.insert(col); for (const auto & [name, ttl_description] : new_column_ttls) @@ -1266,9 +1266,9 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S columns_alter_type_forbidden.insert(col); } - if (hasSortingKey()) + if (old_metadata.hasSortingKey()) { - auto sorting_key_expr = getSortingKey().expression; + auto sorting_key_expr = old_metadata.getSortingKey().expression; for (const ExpressionAction & action : sorting_key_expr->getActions()) { auto action_columns = action.getNeededColumns(); @@ -2981,7 +2981,7 @@ MergeTreeData & MergeTreeData::checkStructureAndGetMergeTreeData(IStorage & sour return ast ? queryToString(ast) : ""; }; - if (query_to_string(getSortingKeyAST()) != query_to_string(src_data->getSortingKeyAST())) + if (query_to_string(my_snapshot->getSortingKeyAST()) != query_to_string(src_snapshot->getSortingKeyAST())) throw Exception("Tables have different ordering", ErrorCodes::BAD_ARGUMENTS); if (query_to_string(my_snapshot->getPartitionKeyAST()) != query_to_string(src_snapshot->getPartitionKeyAST())) diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index afd1586ac6c..fb08f379ee3 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -612,8 +612,14 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor NamesAndTypesList merging_columns; Names gathering_column_names, merging_column_names; extractMergingAndGatheringColumns( - storage_columns, data.getSortingKey().expression, metadata_snapshot->getSecondaryIndices(), - data.merging_params, gathering_columns, gathering_column_names, merging_columns, merging_column_names); + storage_columns, + metadata_snapshot->getSortingKey().expression, + metadata_snapshot->getSecondaryIndices(), + data.merging_params, + gathering_columns, + gathering_column_names, + merging_columns, + merging_column_names); auto single_disk_volume = std::make_shared("volume_" + future_part.name, disk); MergeTreeData::MutableDataPartPtr new_data_part = data.createPart( @@ -719,16 +725,16 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor Pipe pipe(std::move(input)); - if (data.hasSortingKey()) + if (metadata_snapshot->hasSortingKey()) { - auto expr = std::make_shared(pipe.getHeader(), data.getSortingKey().expression); + auto expr = std::make_shared(pipe.getHeader(), metadata_snapshot->getSortingKey().expression); pipe.addSimpleTransform(std::move(expr)); } pipes.emplace_back(std::move(pipe)); } - Names sort_columns = data.getSortingKeyColumns(); + Names sort_columns = metadata_snapshot->getSortingKeyColumns(); SortDescription sort_description; size_t sort_columns_size = sort_columns.size(); sort_description.reserve(sort_columns_size); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 9fd020d0317..9d198c7ed65 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -617,7 +617,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( if (select.final()) { /// Add columns needed to calculate the sorting expression and the sign. - std::vector add_columns = data.getColumnsRequiredForSortingKey(); + std::vector add_columns = metadata_snapshot->getColumnsRequiredForSortingKey(); column_names_to_read.insert(column_names_to_read.end(), add_columns.begin(), add_columns.end()); if (!data.merging_params.sign_column.empty()) @@ -644,7 +644,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( else if ((settings.optimize_read_in_order || settings.optimize_aggregation_in_order) && query_info.input_order_info) { size_t prefix_size = query_info.input_order_info->order_key_prefix_descr.size(); - auto order_key_prefix_ast = data.getSortingKey().expression_list_ast->clone(); + auto order_key_prefix_ast = metadata_snapshot->getSortingKey().expression_list_ast->clone(); order_key_prefix_ast->children.resize(prefix_size); auto syntax_result = SyntaxAnalyzer(context).analyze(order_key_prefix_ast, data.getColumns().getAllPhysical()); @@ -1064,7 +1064,7 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder( { SortDescription sort_description; for (size_t j = 0; j < input_order_info->order_key_prefix_descr.size(); ++j) - sort_description.emplace_back(data.getSortingKey().column_names[j], + sort_description.emplace_back(metadata_snapshot->getSortingKey().column_names[j], input_order_info->direction, 1); /// Drop temporary columns, added by 'sorting_key_prefix_expr' @@ -1138,11 +1138,11 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( if (!out_projection) out_projection = createProjection(pipe, data); - pipe.addSimpleTransform(std::make_shared(pipe.getHeader(), data.getSortingKey().expression)); + pipe.addSimpleTransform(std::make_shared(pipe.getHeader(), metadata_snapshot->getSortingKey().expression)); pipes.emplace_back(std::move(pipe)); } - Names sort_columns = data.getSortingKeyColumns(); + Names sort_columns = metadata_snapshot->getSortingKeyColumns(); SortDescription sort_description; size_t sort_columns_size = sort_columns.size(); sort_description.reserve(sort_columns_size); diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index f96c9b48c4d..c31cfd3da6f 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -262,10 +262,10 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa new_data_part->volume->getDisk()->createDirectories(full_path); /// If we need to calculate some columns to sort. - if (data.hasSortingKey() || metadata_snapshot->hasSecondaryIndices()) + if (metadata_snapshot->hasSortingKey() || metadata_snapshot->hasSecondaryIndices()) data.getSortingKeyAndSkipIndicesExpression(metadata_snapshot)->execute(block); - Names sort_columns = data.getSortingKeyColumns(); + Names sort_columns = metadata_snapshot->getSortingKeyColumns(); SortDescription sort_description; size_t sort_columns_size = sort_columns.size(); sort_description.reserve(sort_columns_size); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 1f62fba03a0..81366db5b2a 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -41,11 +41,11 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr /// - When we have only ORDER BY, than store it in "primary key:" row of /metadata /// - When we have both, than store PRIMARY KEY in "primary key:" row and ORDER BY in "sorting key:" row of /metadata if (!data.isPrimaryKeyDefined()) - primary_key = formattedAST(data.getSortingKey().expression_list_ast); + primary_key = formattedAST(metadata_snapshot->getSortingKey().expression_list_ast); else { primary_key = formattedAST(data.getPrimaryKey().expression_list_ast); - sorting_key = formattedAST(data.getSortingKey().expression_list_ast); + sorting_key = formattedAST(metadata_snapshot->getSortingKey().expression_list_ast); } data_format_version = data.format_version; diff --git a/src/Storages/ReadInOrderOptimizer.cpp b/src/Storages/ReadInOrderOptimizer.cpp index bfdbd7ef557..a6cc6211788 100644 --- a/src/Storages/ReadInOrderOptimizer.cpp +++ b/src/Storages/ReadInOrderOptimizer.cpp @@ -30,20 +30,20 @@ ReadInOrderOptimizer::ReadInOrderOptimizer( forbidden_columns.insert(elem.first); } -InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StoragePtr & storage) const +InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StoragePtr & storage, const StorageMetadataPtr & metadata_snapshot) const { Names sorting_key_columns; if (const auto * merge_tree = dynamic_cast(storage.get())) { - if (!merge_tree->hasSortingKey()) + if (!metadata_snapshot->hasSortingKey()) return {}; - sorting_key_columns = merge_tree->getSortingKeyColumns(); + sorting_key_columns = metadata_snapshot->getSortingKeyColumns(); } else if (const auto * part = dynamic_cast(storage.get())) { - if (!part->hasSortingKey()) + if (!metadata_snapshot->hasSortingKey()) return {}; - sorting_key_columns = part->getSortingKeyColumns(); + sorting_key_columns = metadata_snapshot->getSortingKeyColumns(); } else /// Inapplicable storage type { diff --git a/src/Storages/ReadInOrderOptimizer.h b/src/Storages/ReadInOrderOptimizer.h index de858e8fd92..f2a3e448f50 100644 --- a/src/Storages/ReadInOrderOptimizer.h +++ b/src/Storages/ReadInOrderOptimizer.h @@ -20,7 +20,7 @@ public: const SortDescription & required_sort_description, const SyntaxAnalyzerResultPtr & syntax_result); - InputOrderInfoPtr getInputOrder(const StoragePtr & storage) const; + InputOrderInfoPtr getInputOrder(const StoragePtr & storage, const StorageMetadataPtr & metadata_snapshot) const; private: /// Actions for every element of order expression to analyze functions for monotonicity diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 4882b5fdc1c..5eaaf98d397 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -179,7 +179,7 @@ Pipes StorageBuffer::read( if (dst_has_same_structure) { if (query_info.order_optimizer) - query_info.input_order_info = query_info.order_optimizer->getInputOrder(destination); + query_info.input_order_info = query_info.order_optimizer->getInputOrder(destination, metadata_snapshot); /// The destination table has the same structure of the requested columns and we can simply read blocks from there. pipes_from_dst = destination->read( diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 8fbe415ead6..3c7f474c482 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -312,4 +312,34 @@ Names StorageInMemoryMetadata::getColumnsRequiredForPartitionKey() const return partition_key.expression->getRequiredColumns(); return {}; } + + +const KeyDescription & StorageInMemoryMetadata::getSortingKey() const +{ + return sorting_key; +} + +bool StorageInMemoryMetadata::isSortingKeyDefined() const +{ + return sorting_key.definition_ast != nullptr; +} + +bool StorageInMemoryMetadata::hasSortingKey() const +{ + return !sorting_key.column_names.empty(); +} + +Names StorageInMemoryMetadata::getColumnsRequiredForSortingKey() const +{ + if (hasSortingKey()) + return sorting_key.expression->getRequiredColumns(); + return {}; +} + +Names StorageInMemoryMetadata::getSortingKeyColumns() const +{ + if (hasSortingKey()) + return sorting_key.column_names; + return {}; +} } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 8996f9fc1b9..f7370057410 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -123,6 +123,23 @@ struct StorageInMemoryMetadata bool hasPartitionKey() const; /// Returns column names that need to be read to calculate partition key. Names getColumnsRequiredForPartitionKey() const; + + /// Returns structure with sorting key. + const KeyDescription & getSortingKey() const; + /// Returns ASTExpressionList of sorting key expression for storage or nullptr if there is none. + ASTPtr getSortingKeyAST() const { return sorting_key.definition_ast; } + /// Storage has user-defined (in CREATE query) sorting key. + bool isSortingKeyDefined() const; + /// Storage has sorting key. It means, that it contains at least one column. + bool hasSortingKey() const; + /// Returns column names that need to be read to calculate sorting key. + Names getColumnsRequiredForSortingKey() const; + /// Returns columns names in sorting key specified by user in ORDER BY + /// expression. For example: 'a', 'x * y', 'toStartOfMonth(date)', etc. + Names getSortingKeyColumns() const; + + /// Returns column names that need to be read for FINAL to work. + Names getColumnsRequiredForFinal() const { return getColumnsRequiredForSortingKey(); } }; using StorageMetadataPtr = std::shared_ptr; diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index e96a48efc9e..4eba4d6a165 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -121,7 +121,7 @@ Pipes StorageMaterializedView::read( auto metadata_snapshot = storage->getInMemoryMetadataPtr(); if (query_info.order_optimizer) - query_info.input_order_info = query_info.order_optimizer->getInputOrder(storage); + query_info.input_order_info = query_info.order_optimizer->getInputOrder(storage, metadata_snapshot); Pipes pipes = storage->read(column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index e47cde8de52..9765db35fc3 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -184,7 +184,9 @@ Pipes StorageMerge::read( { for (auto it = selected_tables.begin(); it != selected_tables.end(); ++it) { - auto current_info = query_info.order_optimizer->getInputOrder(std::get<0>(*it)); + auto storage_ptr = std::get<0>(*it); + auto storage_metadata_snapshot = storage_ptr->getInMemoryMetadataPtr(); + auto current_info = query_info.order_optimizer->getInputOrder(storage_ptr, storage_metadata_snapshot); if (it == selected_tables.begin()) input_sorting_info = current_info; else if (!current_info || (input_sorting_info && *current_info != *input_sorting_info)) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index fc1bd8538e5..061be8b2821 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -796,7 +796,7 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column { /// Primary and sorting key become independent after this ALTER so we have to /// save the old ORDER BY expression as the new primary key. - auto old_sorting_key_ast = getSortingKey().definition_ast; + auto old_sorting_key_ast = old_metadata.getSortingKey().definition_ast; primary_key = KeyDescription::getKeyFromAST( old_sorting_key_ast, new_metadata.columns, global_context); } diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 5860facc814..016d52ffdcb 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -122,11 +122,11 @@ protected: throw; } - auto metadadata_snapshot = storage->getInMemoryMetadataPtr(); - columns = metadadata_snapshot->getColumns(); + auto metadata_snapshot = storage->getInMemoryMetadataPtr(); + columns = metadata_snapshot->getColumns(); - cols_required_for_partition_key = metadadata_snapshot->getColumnsRequiredForPartitionKey(); - cols_required_for_sorting_key = storage->getColumnsRequiredForSortingKey(); + cols_required_for_partition_key = metadata_snapshot->getColumnsRequiredForPartitionKey(); + cols_required_for_sorting_key = metadata_snapshot->getColumnsRequiredForSortingKey(); cols_required_for_primary_key = storage->getColumnsRequiredForPrimaryKey(); cols_required_for_sampling = storage->getColumnsRequiredForSampling(); column_sizes = storage->getColumnSizes(); diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 78346d1968f..50982e3c4b7 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -375,7 +375,7 @@ protected: if (columns_mask[src_index++]) { assert(table != nullptr); - if ((expression_ptr = table->getSortingKey().expression_list_ast)) + if ((expression_ptr = metadata_snapshot->getSortingKey().expression_list_ast)) res_columns[res_index++]->insert(queryToString(expression_ptr)); else res_columns[res_index++]->insertDefault(); From eca6e9087ebae69b79d667fc5e74f1190338ab0f Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 17 Jun 2020 14:52:19 +0300 Subject: [PATCH 116/318] Fix race condition --- src/Interpreters/InterpreterAlterQuery.cpp | 2 +- src/Interpreters/InterpreterSelectQuery.cpp | 10 +++++++--- src/Interpreters/InterpreterSelectQuery.h | 6 ++++-- src/Interpreters/MutationsInterpreter.cpp | 10 ++++++---- src/Interpreters/MutationsInterpreter.h | 10 ++++++++-- src/Storages/LiveView/StorageLiveView.cpp | 4 ++-- src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp | 4 ++-- src/Storages/StorageMergeTree.cpp | 3 ++- 8 files changed, 32 insertions(+), 17 deletions(-) diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 869c3ae98d3..3736b482ddf 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -86,7 +86,7 @@ BlockIO InterpreterAlterQuery::execute() auto table_lock_holder = table->lockStructureForShare( false /* because mutation is executed asyncronously */, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); - MutationsInterpreter(table, mutation_commands, context, false).validate(table_lock_holder); + MutationsInterpreter(table, metadata_snapshot, mutation_commands, context, false).validate(table_lock_holder); table->mutate(mutation_commands, context); } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 038448ef353..42ce69f34c7 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -165,8 +165,9 @@ InterpreterSelectQuery::InterpreterSelectQuery( const ASTPtr & query_ptr_, const Context & context_, const StoragePtr & storage_, + const StorageMetadataPtr & metadata_snapshot_, const SelectQueryOptions & options_) - : InterpreterSelectQuery(query_ptr_, context_, nullptr, std::nullopt, storage_, options_.copy().noSubquery()) + : InterpreterSelectQuery(query_ptr_, context_, nullptr, std::nullopt, storage_, options_.copy().noSubquery(), {}, metadata_snapshot_) {} InterpreterSelectQuery::~InterpreterSelectQuery() = default; @@ -214,7 +215,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( std::optional input_pipe_, const StoragePtr & storage_, const SelectQueryOptions & options_, - const Names & required_result_column_names) + const Names & required_result_column_names, + const StorageMetadataPtr & metadata_snapshot_) : options(options_) /// NOTE: the query almost always should be cloned because it will be modified during analysis. , query_ptr(options.modify_inplace ? query_ptr_ : query_ptr_->clone()) @@ -223,6 +225,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( , input(input_) , input_pipe(std::move(input_pipe_)) , log(&Poco::Logger::get("InterpreterSelectQuery")) + , metadata_snapshot(metadata_snapshot_) { checkStackSize(); @@ -255,7 +258,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( table_lock = storage->lockStructureForShare( false, context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); table_id = storage->getStorageID(); - metadata_snapshot = storage->getInMemoryMetadataPtr(); + if (metadata_snapshot == nullptr) + metadata_snapshot = storage->getInMemoryMetadataPtr(); } if (has_input || !joined_tables.resolveTables()) diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index 8f7237ffd7e..2f0faa2ba72 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -70,6 +70,7 @@ public: const ASTPtr & query_ptr_, const Context & context_, const StoragePtr & storage_, + const StorageMetadataPtr & metadata_snapshot_ = nullptr, const SelectQueryOptions & = {}); ~InterpreterSelectQuery() override; @@ -98,7 +99,8 @@ private: std::optional input_pipe, const StoragePtr & storage_, const SelectQueryOptions &, - const Names & required_result_column_names = {}); + const Names & required_result_column_names = {}, + const StorageMetadataPtr & metadata_snapshot_= nullptr); ASTSelectQuery & getSelectQuery() { return query_ptr->as(); } @@ -184,13 +186,13 @@ private: StoragePtr storage; StorageID table_id = StorageID::createEmpty(); /// Will be initialized if storage is not nullptr TableStructureReadLockHolder table_lock; - StorageMetadataPtr metadata_snapshot; /// Used when we read from prepared input, not table or subquery. BlockInputStreamPtr input; std::optional input_pipe; Poco::Logger * log; + StorageMetadataPtr metadata_snapshot; }; } diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 0c0227d476f..1a38fcf40f3 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -163,6 +163,7 @@ ColumnDependencies getAllColumnDependencies(const StorageMetadataPtr & metadata_ bool isStorageTouchedByMutations( StoragePtr storage, + const StorageMetadataPtr & metadata_snapshot, const std::vector & commands, Context context_copy) { @@ -183,7 +184,7 @@ bool isStorageTouchedByMutations( /// Interpreter must be alive, when we use result of execute() method. /// For some reason it may copy context and and give it into ExpressionBlockInputStream /// after that we will use context from destroyed stack frame in our stream. - InterpreterSelectQuery interpreter(select_query, context_copy, storage, SelectQueryOptions().ignoreLimits()); + InterpreterSelectQuery interpreter(select_query, context_copy, storage, metadata_snapshot, SelectQueryOptions().ignoreLimits()); BlockInputStreamPtr in = interpreter.execute().getInputStream(); Block block = in->read(); @@ -200,18 +201,19 @@ bool isStorageTouchedByMutations( MutationsInterpreter::MutationsInterpreter( StoragePtr storage_, + const StorageMetadataPtr & metadata_snapshot_, MutationCommands commands_, const Context & context_, bool can_execute_) : storage(std::move(storage_)) - , metadata_snapshot(storage->getInMemoryMetadataPtr()) + , metadata_snapshot(metadata_snapshot_) , commands(std::move(commands_)) , context(context_) , can_execute(can_execute_) { mutation_ast = prepare(!can_execute); SelectQueryOptions limits = SelectQueryOptions().analyze(!can_execute).ignoreLimits(); - select_interpreter = std::make_unique(mutation_ast, context, storage, limits); + select_interpreter = std::make_unique(mutation_ast, context, storage, metadata_snapshot_, limits); } static NameSet getKeyColumns(const StoragePtr & storage, const StorageMetadataPtr & metadata_snapshot) @@ -504,7 +506,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) } const ASTPtr select_query = prepareInterpreterSelectQuery(stages_copy, /* dry_run = */ true); - InterpreterSelectQuery interpreter{select_query, context, storage, SelectQueryOptions().analyze(/* dry_run = */ false).ignoreLimits()}; + InterpreterSelectQuery interpreter{select_query, context, storage, metadata_snapshot, SelectQueryOptions().analyze(/* dry_run = */ false).ignoreLimits()}; auto first_stage_header = interpreter.getSampleBlock(); auto in = std::make_shared(first_stage_header); diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h index 158ed8d55af..3c3175c1856 100644 --- a/src/Interpreters/MutationsInterpreter.h +++ b/src/Interpreters/MutationsInterpreter.h @@ -15,7 +15,8 @@ namespace DB class Context; /// Return false if the data isn't going to be changed by mutations. -bool isStorageTouchedByMutations(StoragePtr storage, const std::vector & commands, Context context_copy); +bool isStorageTouchedByMutations( + StoragePtr storage, const StorageMetadataPtr & metadata_snapshot, const std::vector & commands, Context context_copy); /// Create an input stream that will read data from storage and apply mutation commands (UPDATEs, DELETEs, MATERIALIZEs) /// to this data. @@ -24,7 +25,12 @@ class MutationsInterpreter public: /// Storage to mutate, array of mutations commands and context. If you really want to execute mutation /// use can_execute = true, in other cases (validation, amount of commands) it can be false - MutationsInterpreter(StoragePtr storage_, MutationCommands commands_, const Context & context_, bool can_execute_); + MutationsInterpreter( + StoragePtr storage_, + const StorageMetadataPtr & metadata_snapshot_, + MutationCommands commands_, + const Context & context_, + bool can_execute_); void validate(TableStructureReadLockHolder & table_lock_holder); diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index cb4964f3c55..f1b9459b3d3 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -147,7 +147,7 @@ BlockInputStreamPtr StorageLiveView::completeQuery(Pipes pipes) }; block_context->addExternalTable(getBlocksTableName(), TemporaryTableHolder(global_context, creator)); - InterpreterSelectQuery select(getInnerBlocksQuery(), *block_context, StoragePtr(), SelectQueryOptions(QueryProcessingStage::Complete)); + InterpreterSelectQuery select(getInnerBlocksQuery(), *block_context, StoragePtr(), nullptr, SelectQueryOptions(QueryProcessingStage::Complete)); BlockInputStreamPtr data = std::make_shared(select.execute().getInputStream()); /// Squashing is needed here because the view query can generate a lot of blocks @@ -214,7 +214,7 @@ void StorageLiveView::writeIntoLiveView( }; TemporaryTableHolder blocks_storage(context, creator); - InterpreterSelectQuery select_block(mergeable_query, context, blocks_storage.getTable(), + InterpreterSelectQuery select_block(mergeable_query, context, blocks_storage.getTable(), blocks_storage.getTable()->getInMemoryMetadataPtr(), QueryProcessingStage::WithMergeableState); auto data_mergeable_stream = std::make_shared( diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index fb08f379ee3..02279fe3298 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -1019,7 +1019,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor commands_for_part.emplace_back(command); } - if (!isStorageTouchedByMutations(storage_from_source_part, commands_for_part, context_for_reading)) + if (!isStorageTouchedByMutations(storage_from_source_part, metadata_snapshot, commands_for_part, context_for_reading)) { LOG_TRACE(log, "Part {} doesn't change up to mutation version {}", source_part->name, future_part.part_info.mutation); return data.cloneAndLoadDataPartOnSameDisk(source_part, "tmp_clone_", future_part.part_info); @@ -1045,7 +1045,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor if (!for_interpreter.empty()) { - interpreter.emplace(storage_from_source_part, for_interpreter, context_for_reading, true); + interpreter.emplace(storage_from_source_part, metadata_snapshot, for_interpreter, context_for_reading, true); in = interpreter->execute(table_lock_holder); updated_header = interpreter->getUpdatedHeader(); in->setProgressCallback(MergeProgressCallback(merge_entry, watch_prev_elapsed, stage_progress)); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index c13070fa4a5..f259d74b9ea 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -789,7 +789,8 @@ bool StorageMergeTree::tryMutatePart() if (!commands_for_size_validation.empty()) { - MutationsInterpreter interpreter(shared_from_this(), commands_for_size_validation, global_context, false); + MutationsInterpreter interpreter( + shared_from_this(), metadata_snapshot, commands_for_size_validation, global_context, false); commands_size += interpreter.evaluateCommandsSize(); } From 5abbaeecf5e7185b8bd7da57ed5fd6d2850b4943 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 17 Jun 2020 13:02:04 +0300 Subject: [PATCH 117/318] distinct combinator for single numeric arguments --- .../AggregateFunctionDistinct.cpp | 29 ++- .../AggregateFunctionDistinct.h | 205 ++++++++++++------ .../AggregateFunctionGroupUniqArray.h | 38 +--- src/AggregateFunctions/KeyHolderHelpers.h | 34 +++ .../01259_combinator_distinct.reference | 1 + .../0_stateless/01259_combinator_distinct.sql | 11 +- 6 files changed, 206 insertions(+), 112 deletions(-) create mode 100644 src/AggregateFunctions/KeyHolderHelpers.h diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.cpp b/src/AggregateFunctions/AggregateFunctionDistinct.cpp index 820c2f0f72c..1661277d525 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.cpp +++ b/src/AggregateFunctions/AggregateFunctionDistinct.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include "registerAggregateFunctions.h" @@ -9,6 +10,7 @@ namespace DB namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int ILLEGAL_TYPE_OF_ARGUMENT; } class AggregateFunctionCombinatorDistinct final : public IAggregateFunctionCombinator @@ -22,19 +24,30 @@ public: throw Exception("Incorrect number of arguments for aggregate function with " + getName() + " suffix", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - DataTypes nested_arguments; - for (const auto & type : arguments) - { - nested_arguments.push_back(type); - } - - return nested_arguments; + return arguments; } AggregateFunctionPtr transformAggregateFunction( const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array &) const override { - return std::make_shared(nested_function, arguments); + AggregateFunctionPtr res; + if (arguments.size() == 1) + { + res = AggregateFunctionPtr(createWithNumericType(*arguments[0], nested_function, arguments)); + if (res) + return res; + + if (arguments[0]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion()) + return std::make_shared>(nested_function, arguments); + else + return std::make_shared>(nested_function, arguments); + } + + if (!res) + throw Exception("Illegal type " /* + argument_type->getName() + */ + " of argument for aggregate function " + nested_function->getName() + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return res; } }; diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index 32f5df6d8f0..72099a33cfd 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -16,34 +17,22 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } -struct AggregateFunctionDistinctData +template +struct AggregateFunctionDistinctSingleNumericData { - using Key = UInt128; - - HashSet< - Key, - UInt128TrivialHash, - HashTableGrower<3>, - HashTableAllocatorWithStackMemory - > set; - std::mutex mutex; - - bool ALWAYS_INLINE tryToInsert(const Key& key) - { - return set.insert(key).second; - } + /// When creating, the hash table must be small. + using Set = HashSetWithStackMemory, 4>; + Set value; }; -/** Adaptor for aggregate functions. - * Adding -Distinct suffix to aggregate function -**/ - -class AggregateFunctionDistinct final : public IAggregateFunctionDataHelper +template +class AggregateFunctionDistinctBase : public IAggregateFunctionDataHelper { -private: +protected: + static constexpr size_t prefix_size = sizeof(Data); AggregateFunctionPtr nested_func; size_t num_arguments; - size_t prefix_size; + AggregateDataPtr getNestedPlace(AggregateDataPtr place) const noexcept { @@ -56,14 +45,22 @@ private: } public: - AggregateFunctionDistinct(AggregateFunctionPtr nested, const DataTypes & arguments) - : IAggregateFunctionDataHelper(arguments, {}) - , nested_func(nested), num_arguments(arguments.size()) - { - prefix_size = sizeof(AggregateFunctionDistinctData); - if (arguments.empty()) - throw Exception("Aggregate function " + getName() + " require at least one argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + size_t sizeOfData() const override + { + return prefix_size + nested_func->sizeOfData(); + } + + void create(AggregateDataPtr place) const override + { + new (place) Data; + nested_func->create(getNestedPlace(place)); + } + + void destroy(AggregateDataPtr place) const noexcept override + { + this->data(place).~Data(); + nested_func->destroy(getNestedPlace(place)); } String getName() const override @@ -76,71 +73,151 @@ public: return nested_func->getReturnType(); } - void create(AggregateDataPtr place) const override + bool allocatesMemoryInArena() const override { - new (place) AggregateFunctionDistinctData; - nested_func->create(getNestedPlace(place)); + return true; } - void destroy(AggregateDataPtr place) const noexcept override + AggregateFunctionDistinctBase(AggregateFunctionPtr nested, const DataTypes & arguments) + : IAggregateFunctionDataHelper(arguments, {}) + , nested_func(nested), num_arguments(arguments.size()) { - data(place).~AggregateFunctionDistinctData(); - nested_func->destroy(getNestedPlace(place)); + if (arguments.empty()) + throw Exception("Aggregate function " + getName() + " require at least one argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); } +}; - size_t sizeOfData() const override - { - return prefix_size + nested_func->sizeOfData(); - } - size_t alignOfData() const override - { - return nested_func->alignOfData(); - } +/** Adaptor for aggregate functions. + * Adding -Distinct suffix to aggregate function +**/ +template +class AggregateFunctionDistinctSingleNumericImpl final + : public AggregateFunctionDistinctBase, + AggregateFunctionDistinctSingleNumericImpl> +{ +public: - bool hasTrivialDestructor() const override - { - return nested_func->hasTrivialDestructor(); - } + AggregateFunctionDistinctSingleNumericImpl(AggregateFunctionPtr nested, const DataTypes & arguments) + : AggregateFunctionDistinctBase< + AggregateFunctionDistinctSingleNumericData, + AggregateFunctionDistinctSingleNumericImpl>(nested, arguments) {} void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override { - SipHash hash; - for (size_t i = 0; i < num_arguments; ++i) - { - columns[i]->updateHashWithValue(row_num, hash); - } - - UInt128 key; - hash.get128(key.low, key.high); - - if (this->data(place).tryToInsert(key)) - nested_func->add(getNestedPlace(place), columns, row_num, arena); + const auto & vec = assert_cast &>(*columns[0]).getData(); + if (this->data(place).value.insert(vec[row_num]).second) + this->nested_func->add(this->getNestedPlace(place), columns, row_num, arena); } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { - nested_func->merge(getNestedPlace(place), rhs, arena); + auto & cur_set = this->data(place).value; + auto & rhs_set = this->data(rhs).value; + + auto arguments = this->argument_types[0]->createColumn(); + for (auto & elem : rhs_set) + if (cur_set.insert(elem.getValue()).second) + arguments->insert(elem.getValue()); + + const auto * arguments_ptr = arguments.get(); + if (!arguments->empty()) + this->nested_func->addBatchSinglePlace(arguments->size(), this->getNestedPlace(place), &arguments_ptr, arena); } void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { - nested_func->serialize(getNestedPlace(place), buf); + this->data(place).value.write(buf); + this->nested_func->serialize(this->getNestedPlace(place), buf); } void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override { - nested_func->deserialize(getNestedPlace(place), buf, arena); + this->data(place).value.read(buf); + this->nested_func->deserialize(this->getNestedPlace(place), buf, arena); } void insertResultInto(AggregateDataPtr place, IColumn & to) const override { - nested_func->insertResultInto(getNestedPlace(place), to); + this->nested_func->insertResultInto(this->getNestedPlace(place), to); + } +}; + +struct AggregateFunctionDistinctSingleGenericData +{ + using Set = HashSetWithSavedHashWithStackMemory; + Set value; +}; + +template +class AggregateFunctionDistinctSingleGenericImpl final + : public AggregateFunctionDistinctBase> +{ +public: + using Data = AggregateFunctionDistinctSingleGenericData; + + AggregateFunctionDistinctSingleGenericImpl(AggregateFunctionPtr nested, const DataTypes & arguments) + : AggregateFunctionDistinctBase< + AggregateFunctionDistinctSingleGenericData, + AggregateFunctionDistinctSingleGenericImpl>(nested, arguments) {} + + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override + { + auto & set = this->data(place).value; + + Data::Set::LookupResult it; + bool inserted; + auto key_holder = getKeyHolder(*columns[0], row_num, *arena); + set.emplace(key_holder, it, inserted); + if (inserted) + this->nested_func->add(this->getNestedPlace(place), columns, row_num, arena); } - bool allocatesMemoryInArena() const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { - return nested_func->allocatesMemoryInArena(); + auto & cur_set = this->data(place).value; + auto & rhs_set = this->data(rhs).value; + + Data::Set::LookupResult it; + bool inserted; + auto arguments = this->argument_types[0]->createColumn(); + for (auto & elem : rhs_set) + { + cur_set.emplace(ArenaKeyHolder{elem.getValue(), *arena}, it, inserted); + if (inserted) + deserializeAndInsert(elem.getValue(), *arguments); + } + + const auto * arguments_ptr = arguments.get(); + if (!arguments->empty()) + this->nested_func->addBatchSinglePlace(arguments->size(), this->getNestedPlace(place), &arguments_ptr, arena); + } + + void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + { + auto & set = this->data(place).value; + writeVarUInt(set.size(), buf); + for (const auto & elem : set) + writeStringBinary(elem.getValue(), buf); + + this->nested_func->serialize(this->getNestedPlace(place), buf); + } + + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override + { + auto & set = this->data(place).value; + size_t size; + readVarUInt(size, buf); + for (size_t i = 0; i < size; ++i) + set.insert(readStringBinaryInto(*arena, buf)); + + this->nested_func->deserialize(this->getNestedPlace(place), buf, arena); + } + + void insertResultInto(AggregateDataPtr place, IColumn & to) const override + { + this->nested_func->insertResultInto(this->getNestedPlace(place), to); } }; diff --git a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h index 88b1c87f526..b6683567404 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h +++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h @@ -16,6 +16,7 @@ #include #include +#include #define AGGREGATE_FUNCTION_GROUP_ARRAY_UNIQ_MAX_SIZE 0xFFFFFF @@ -147,26 +148,6 @@ class AggregateFunctionGroupUniqArrayGeneric using State = AggregateFunctionGroupUniqArrayGenericData; - static auto getKeyHolder(const IColumn & column, size_t row_num, Arena & arena) - { - if constexpr (is_plain_column) - { - return ArenaKeyHolder{column.getDataAt(row_num), arena}; - } - else - { - const char * begin = nullptr; - StringRef serialized = column.serializeValueIntoArena(row_num, arena, begin); - assert(serialized.data != nullptr); - return SerializedKeyHolder{serialized, arena}; - } - } - - static void deserializeAndInsert(StringRef str, IColumn & data_to) - { - return deserializeAndInsertImpl(str, data_to); - } - public: AggregateFunctionGroupUniqArrayGeneric(const DataTypePtr & input_data_type_, UInt64 max_elems_ = std::numeric_limits::max()) : IAggregateFunctionDataHelper>({input_data_type_}, {}) @@ -215,7 +196,7 @@ public: bool inserted; State::Set::LookupResult it; - auto key_holder = getKeyHolder(*columns[0], row_num, *arena); + auto key_holder = getKeyHolder(*columns[0], row_num, *arena); set.emplace(key_holder, it, inserted); } @@ -247,22 +228,9 @@ public: offsets_to.push_back(offsets_to.back() + set.size()); for (auto & elem : set) - deserializeAndInsert(elem.getValue(), data_to); + deserializeAndInsert(elem.getValue(), data_to); } }; - -template <> -inline void deserializeAndInsertImpl(StringRef str, IColumn & data_to) -{ - data_to.deserializeAndInsertFromArena(str.data); -} - -template <> -inline void deserializeAndInsertImpl(StringRef str, IColumn & data_to) -{ - data_to.insertData(str.data, str.size); -} - #undef AGGREGATE_FUNCTION_GROUP_ARRAY_UNIQ_MAX_SIZE } diff --git a/src/AggregateFunctions/KeyHolderHelpers.h b/src/AggregateFunctions/KeyHolderHelpers.h new file mode 100644 index 00000000000..01b3cf2a369 --- /dev/null +++ b/src/AggregateFunctions/KeyHolderHelpers.h @@ -0,0 +1,34 @@ +#pragma once + +#include +#include + +namespace DB +{ + +template +static auto getKeyHolder(const IColumn & column, size_t row_num, Arena & arena) +{ + if constexpr (is_plain_column) + { + return ArenaKeyHolder{column.getDataAt(row_num), arena}; + } + else + { + const char * begin = nullptr; + StringRef serialized = column.serializeValueIntoArena(row_num, arena, begin); + assert(serialized.data != nullptr); + return SerializedKeyHolder{serialized, arena}; + } +} + +template +static void deserializeAndInsert(StringRef str, IColumn & data_to) +{ + if constexpr (is_plain_column) + data_to.insertData(str.data, str.size); + else + data_to.deserializeAndInsertFromArena(str.data); +} + +} diff --git a/tests/queries/0_stateless/01259_combinator_distinct.reference b/tests/queries/0_stateless/01259_combinator_distinct.reference index 739d225ad67..83756ffdaa4 100644 --- a/tests/queries/0_stateless/01259_combinator_distinct.reference +++ b/tests/queries/0_stateless/01259_combinator_distinct.reference @@ -2,4 +2,5 @@ 78 [0,1,2,3,4,5,6,7,8,9,10,11,12] [0,1,2,3,4,5,6,7,8,9,10,11,12] +20 5.669227916063075e-17 diff --git a/tests/queries/0_stateless/01259_combinator_distinct.sql b/tests/queries/0_stateless/01259_combinator_distinct.sql index 3f07dc443dd..adfddeb34e4 100644 --- a/tests/queries/0_stateless/01259_combinator_distinct.sql +++ b/tests/queries/0_stateless/01259_combinator_distinct.sql @@ -1,5 +1,6 @@ -SELECT sum(DISTINCT x) FROM (SELECT number AS x FROM system.numbers LIMIT 1000); -SELECT sum(DISTINCT x) FROM (SELECT number % 13 AS x FROM system.numbers LIMIT 1000); -SELECT groupArray(DISTINCT x) FROM (SELECT number % 13 AS x FROM system.numbers LIMIT 1000); -SELECT groupArray(DISTINCT x) FROM (SELECT number % 13 AS x FROM system.numbers_mt LIMIT 1000); -SELECT corrStableDistinct(DISTINCT x, y) FROM (SELECT number % 11 AS x, number % 13 AS y FROM system.numbers LIMIT 1000); \ No newline at end of file +SELECT sum(DISTINCT x) FROM (SELECT number AS x FROM system.numbers_mt LIMIT 100000); +SELECT sum(DISTINCT x) FROM (SELECT number % 13 AS x FROM system.numbers_mt LIMIT 100000); +SELECT groupArray(DISTINCT x) FROM (SELECT number % 13 AS x FROM system.numbers_mt LIMIT 100000); +SELECT groupArray(DISTINCT x) FROM (SELECT number % 13 AS x FROM system.numbers_mt LIMIT 100000); +SELECT finalizeAggregation(countState(DISTINCT toString(number % 20))) FROM numbers_mt (100000); +-- SELECT corrStableDistinct(DISTINCT x, y) FROM (SELECT number % 11 AS x, number % 13 AS y FROM system.numbers LIMIT 1000); From 1da393b2180bedc1827bdd7b8c6356e06db7a993 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 17 Jun 2020 15:07:09 +0300 Subject: [PATCH 118/318] Sampling key in StorageInMemoryMetadata --- src/Interpreters/ExpressionAnalyzer.cpp | 2 +- src/Storages/IStorage.cpp | 23 ------------------- src/Storages/IStorage.h | 14 +---------- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 4 ++-- .../ReplicatedMergeTreeTableMetadata.cpp | 2 +- src/Storages/StorageInMemoryMetadata.cpp | 23 +++++++++++++++++++ src/Storages/StorageInMemoryMetadata.h | 11 +++++++++ src/Storages/System/StorageSystemColumns.cpp | 2 +- src/Storages/System/StorageSystemTables.cpp | 2 +- 9 files changed, 41 insertions(+), 42 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 28aa42877d6..9d9a403ab59 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1062,7 +1062,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( if (storage && (query.sampleSize() || settings.parallel_replicas_count > 1)) { - Names columns_for_sampling = storage->getColumnsRequiredForSampling(); + Names columns_for_sampling = metadata_snapshot->getColumnsRequiredForSampling(); additional_required_columns_after_prewhere.insert(additional_required_columns_after_prewhere.end(), columns_for_sampling.begin(), columns_for_sampling.end()); } diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 0c2ddc09da8..0a50cf95559 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -348,35 +348,12 @@ Names IStorage::getPrimaryKeyColumns() const return {}; } -const KeyDescription & IStorage::getSamplingKey() const -{ - return metadata->sampling_key; -} - -bool IStorage::isSamplingKeyDefined() const -{ - return metadata->sampling_key.definition_ast != nullptr; -} - -bool IStorage::hasSamplingKey() const -{ - return !metadata->sampling_key.column_names.empty(); -} - -Names IStorage::getColumnsRequiredForSampling() const -{ - if (hasSamplingKey()) - return metadata->sampling_key.expression->getRequiredColumns(); - return {}; -} - TTLTableDescription IStorage::getTableTTLs() const { std::lock_guard lock(ttl_mutex); return metadata->table_ttl; } - bool IStorage::hasAnyTableTTL() const { return hasAnyMoveTTL() || hasRowsTTL(); diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 4e1ca81dd10..5aa595b1375 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -101,7 +101,7 @@ public: virtual bool isView() const { return false; } /// Returns true if the storage supports queries with the SAMPLE section. - virtual bool supportsSampling() const { return hasSamplingKey(); } + virtual bool supportsSampling() const { return getInMemoryMetadataPtr()->hasSamplingKey(); } /// Returns true if the storage supports queries with the FINAL section. virtual bool supportsFinal() const { return false; } @@ -442,18 +442,6 @@ public: /// * y', 'toStartOfMonth(date)', etc. Names getPrimaryKeyColumns() const; - /// Returns structure with sampling key. - const KeyDescription & getSamplingKey() const; - /// Returns sampling expression AST for storage or nullptr if there is none. - ASTPtr getSamplingKeyAST() const { return metadata->sampling_key.definition_ast; } - /// Storage has user-defined (in CREATE query) sampling key. - bool isSamplingKeyDefined() const; - /// Storage has sampling key. - bool hasSamplingKey() const; - /// Returns column names that need to be read to calculate sampling key. - Names getColumnsRequiredForSampling() const; - - /// Returns storage policy if storage supports it. virtual StoragePolicyPtr getStoragePolicy() const { return {}; } diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 9d198c7ed65..58214bae5ca 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -390,7 +390,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( used_sample_factor = 1.0 / boost::rational_cast(relative_sample_size); RelativeSize size_of_universum = 0; - const auto & sampling_key = data.getSamplingKey(); + const auto & sampling_key = metadata_snapshot->getSamplingKey(); DataTypePtr sampling_column_type = sampling_key.data_types[0]; if (typeid_cast(sampling_column_type.get())) @@ -459,7 +459,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( /// If sample and final are used together no need to calculate sampling expression twice. /// The first time it was calculated for final, because sample key is a part of the PK. /// So, assume that we already have calculated column. - ASTPtr sampling_key_ast = data.getSamplingKeyAST(); + ASTPtr sampling_key_ast = metadata_snapshot->getSamplingKeyAST(); if (select.final()) { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 81366db5b2a..c3d91f4a5a9 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -29,7 +29,7 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr date_column = data.minmax_idx_columns[data.minmax_idx_date_column_pos]; const auto data_settings = data.getSettings(); - sampling_expression = formattedAST(data.getSamplingKeyAST()); + sampling_expression = formattedAST(metadata_snapshot->getSamplingKeyAST()); index_granularity = data_settings->index_granularity; merging_params_mode = static_cast(data.merging_params.mode); sign_column = data.merging_params.sign_column; diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 3c7f474c482..6c5429fc556 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -342,4 +342,27 @@ Names StorageInMemoryMetadata::getSortingKeyColumns() const return sorting_key.column_names; return {}; } + +const KeyDescription & StorageInMemoryMetadata::getSamplingKey() const +{ + return sampling_key; +} + +bool StorageInMemoryMetadata::isSamplingKeyDefined() const +{ + return sampling_key.definition_ast != nullptr; +} + +bool StorageInMemoryMetadata::hasSamplingKey() const +{ + return !sampling_key.column_names.empty(); +} + +Names StorageInMemoryMetadata::getColumnsRequiredForSampling() const +{ + if (hasSamplingKey()) + return sampling_key.expression->getRequiredColumns(); + return {}; +} + } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index f7370057410..1abea7d250c 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -140,6 +140,17 @@ struct StorageInMemoryMetadata /// Returns column names that need to be read for FINAL to work. Names getColumnsRequiredForFinal() const { return getColumnsRequiredForSortingKey(); } + + /// Returns structure with sampling key. + const KeyDescription & getSamplingKey() const; + /// Returns sampling expression AST for storage or nullptr if there is none. + ASTPtr getSamplingKeyAST() const { return sampling_key.definition_ast; } + /// Storage has user-defined (in CREATE query) sampling key. + bool isSamplingKeyDefined() const; + /// Storage has sampling key. + bool hasSamplingKey() const; + /// Returns column names that need to be read to calculate sampling key. + Names getColumnsRequiredForSampling() const; }; using StorageMetadataPtr = std::shared_ptr; diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 016d52ffdcb..f998dc27cab 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -128,7 +128,7 @@ protected: cols_required_for_partition_key = metadata_snapshot->getColumnsRequiredForPartitionKey(); cols_required_for_sorting_key = metadata_snapshot->getColumnsRequiredForSortingKey(); cols_required_for_primary_key = storage->getColumnsRequiredForPrimaryKey(); - cols_required_for_sampling = storage->getColumnsRequiredForSampling(); + cols_required_for_sampling = metadata_snapshot->getColumnsRequiredForSampling(); column_sizes = storage->getColumnSizes(); } diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 50982e3c4b7..0852a96e8ba 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -393,7 +393,7 @@ protected: if (columns_mask[src_index++]) { assert(table != nullptr); - if ((expression_ptr = table->getSamplingKeyAST())) + if ((expression_ptr = metadata_snapshot->getSamplingKeyAST())) res_columns[res_index++]->insert(queryToString(expression_ptr)); else res_columns[res_index++]->insertDefault(); From 1afdebeebdb8dcb0559ff51eacd259ea6094a409 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 17 Jun 2020 15:39:20 +0300 Subject: [PATCH 119/318] Primary key in storage metadata --- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- src/Storages/IStorage.cpp | 29 ------------------- src/Storages/IStorage.h | 15 ---------- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 5 ++-- src/Storages/MergeTree/IMergeTreeDataPart.h | 1 + .../MergeTree/IMergeTreeDataPartWriter.cpp | 6 ++-- .../MergeTree/IMergeTreeDataPartWriter.h | 2 ++ src/Storages/MergeTree/MergeTreeData.cpp | 19 ++++++------ src/Storages/MergeTree/MergeTreeData.h | 2 +- .../MergeTree/MergeTreeDataMergerMutator.cpp | 2 +- .../MergeTree/MergeTreeDataPartCompact.cpp | 5 ++-- .../MergeTree/MergeTreeDataPartCompact.h | 1 + .../MergeTree/MergeTreeDataPartWide.cpp | 3 +- .../MergeTree/MergeTreeDataPartWide.h | 1 + .../MergeTreeDataPartWriterCompact.cpp | 8 ++--- .../MergeTreeDataPartWriterCompact.h | 1 + .../MergeTree/MergeTreeDataPartWriterWide.cpp | 8 ++--- .../MergeTree/MergeTreeDataPartWriterWide.h | 1 + .../MergeTree/MergeTreeDataSelectExecutor.cpp | 22 +++++++++----- .../MergeTree/MergeTreeDataSelectExecutor.h | 2 ++ .../MergeTree/MergeTreeWhereOptimizer.cpp | 13 +++++---- .../MergeTree/MergeTreeWhereOptimizer.h | 3 ++ .../MergeTree/MergedBlockOutputStream.cpp | 4 +-- .../MergedColumnOnlyOutputStream.cpp | 1 + .../ReplicatedMergeTreeTableMetadata.cpp | 4 +-- src/Storages/StorageInMemoryMetadata.cpp | 28 ++++++++++++++++++ src/Storages/StorageInMemoryMetadata.h | 15 ++++++++++ src/Storages/System/StorageSystemTables.cpp | 2 +- 28 files changed, 114 insertions(+), 91 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 42ce69f34c7..41b2abc33c6 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -330,7 +330,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( current_info.query = query_ptr; current_info.syntax_analyzer_result = syntax_analyzer_result; - MergeTreeWhereOptimizer{current_info, *context, *merge_tree, syntax_analyzer_result->requiredSourceColumns(), log}; + MergeTreeWhereOptimizer{current_info, *context, *merge_tree, metadata_snapshot, syntax_analyzer_result->requiredSourceColumns(), log}; } } diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 0a50cf95559..f9b7f41f139 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -319,35 +319,6 @@ NamesAndTypesList IStorage::getVirtuals() const return {}; } -const KeyDescription & IStorage::getPrimaryKey() const -{ - return metadata->primary_key; -} - -bool IStorage::isPrimaryKeyDefined() const -{ - return metadata->primary_key.definition_ast != nullptr; -} - -bool IStorage::hasPrimaryKey() const -{ - return !metadata->primary_key.column_names.empty(); -} - -Names IStorage::getColumnsRequiredForPrimaryKey() const -{ - if (hasPrimaryKey()) - return metadata->primary_key.expression->getRequiredColumns(); - return {}; -} - -Names IStorage::getPrimaryKeyColumns() const -{ - if (!metadata->primary_key.column_names.empty()) - return metadata->primary_key.column_names; - return {}; -} - TTLTableDescription IStorage::getTableTTLs() const { std::lock_guard lock(ttl_mutex); diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 5aa595b1375..787b96c9197 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -427,21 +427,6 @@ public: /// Returns data paths if storage supports it, empty vector otherwise. virtual Strings getDataPaths() const { return {}; } - /// Returns structure with primary key. - const KeyDescription & getPrimaryKey() const; - /// Returns ASTExpressionList of primary key expression for storage or nullptr if there is none. - ASTPtr getPrimaryKeyAST() const { return metadata->primary_key.definition_ast; } - /// Storage has user-defined (in CREATE query) sorting key. - bool isPrimaryKeyDefined() const; - /// Storage has primary key (maybe part of some other key). It means, that - /// it contains at least one column. - bool hasPrimaryKey() const; - /// Returns column names that need to be read to calculate primary key. - Names getColumnsRequiredForPrimaryKey() const; - /// Returns columns names in sorting key specified by. For example: 'a', 'x - /// * y', 'toStartOfMonth(date)', etc. - Names getPrimaryKeyColumns() const; - /// Returns storage policy if storage supports it. virtual StoragePolicyPtr getStoragePolicy() const { return {}; } diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 03b2dea23ba..17ff2259436 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -437,7 +437,8 @@ void IMergeTreeDataPart::loadIndex() if (!index_granularity.isInitialized()) throw Exception("Index granularity is not loaded before index loading", ErrorCodes::LOGICAL_ERROR); - const auto & primary_key = storage.getPrimaryKey(); + auto metadata_snapshot = storage.getInMemoryMetadataPtr(); + const auto & primary_key = metadata_snapshot->getPrimaryKey(); size_t key_size = primary_key.column_names.size(); if (key_size) @@ -842,7 +843,7 @@ void IMergeTreeDataPart::checkConsistencyBase() const String path = getFullRelativePath(); auto metadata_snapshot = storage.getInMemoryMetadataPtr(); - const auto & pk = storage.getPrimaryKey(); + const auto & pk = metadata_snapshot->getPrimaryKey(); if (!checksums.empty()) { if (!pk.column_names.empty() && !checksums.files.count("primary.idx")) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index f5ca0fee070..04babece83e 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -86,6 +86,7 @@ public: virtual MergeTreeWriterPtr getWriter( const NamesAndTypesList & columns_list, + const StorageMetadataPtr & metadata_snapshot, const std::vector & indices_to_recalc, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & writer_settings, diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp index cfda613d31d..73ac7fc0064 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp @@ -65,6 +65,7 @@ void IMergeTreeDataPartWriter::Stream::addToChecksums(MergeTreeData::DataPart::C IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( const MergeTreeData::DataPartPtr & data_part_, const NamesAndTypesList & columns_list_, + const StorageMetadataPtr & metadata_snapshot_, const std::vector & indices_to_recalc_, const String & marks_file_extension_, const CompressionCodecPtr & default_codec_, @@ -73,6 +74,7 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter( : data_part(data_part_) , part_path(data_part_->getFullRelativePath()) , storage(data_part_->storage) + , metadata_snapshot(metadata_snapshot_) , columns_list(columns_list_) , marks_file_extension(marks_file_extension_) , index_granularity(index_granularity_) @@ -162,7 +164,7 @@ void IMergeTreeDataPartWriter::fillIndexGranularity(size_t index_granularity_for void IMergeTreeDataPartWriter::initPrimaryIndex() { - if (storage.hasPrimaryKey()) + if (metadata_snapshot->hasPrimaryKey()) { index_file_stream = data_part->volume->getDisk()->writeFile(part_path + "primary.idx", DBMS_DEFAULT_BUFFER_SIZE, WriteMode::Rewrite); index_stream = std::make_unique(*index_file_stream); @@ -221,7 +223,7 @@ void IMergeTreeDataPartWriter::calculateAndSerializePrimaryIndex(const Block & p while (index_mark < total_marks && current_row < rows) { - if (storage.hasPrimaryKey()) + if (metadata_snapshot->hasPrimaryKey()) { for (size_t j = 0; j < primary_columns_num; ++j) { diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h index 149aeaa2f0d..2f849e7c895 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.h +++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.h @@ -63,6 +63,7 @@ public: IMergeTreeDataPartWriter( const MergeTreeData::DataPartPtr & data_part, const NamesAndTypesList & columns_list, + const StorageMetadataPtr & metadata_snapshot_, const std::vector & indices_to_recalc, const String & marks_file_extension, const CompressionCodecPtr & default_codec, @@ -119,6 +120,7 @@ protected: MergeTreeData::DataPartPtr data_part; String part_path; const MergeTreeData & storage; + StorageMetadataPtr metadata_snapshot; NamesAndTypesList columns_list; const String marks_file_extension; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index a1487b3a1fe..ff38a21a15f 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -173,11 +173,11 @@ MergeTreeData::MergeTreeData( const auto settings = getSettings(); /// NOTE: using the same columns list as is read when performing actual merges. - merging_params.check(getColumns().getAllPhysical()); + merging_params.check(metadata_.getColumns().getAllPhysical()); if (metadata_.sampling_key.definition_ast != nullptr) { - const auto & pk_sample_block = getPrimaryKey().sample_block; + const auto & pk_sample_block = metadata_.getPrimaryKey().sample_block; if (!pk_sample_block.has(metadata_.sampling_key.column_names[0]) && !attach && !settings->compatibility_allow_sampling_expression_not_in_primary_key) /// This is for backward compatibility. throw Exception("Sampling expression must be present in the primary key", ErrorCodes::BAD_ARGUMENTS); @@ -410,7 +410,7 @@ ExpressionActionsPtr getCombinedIndicesExpression( ExpressionActionsPtr MergeTreeData::getPrimaryKeyAndSkipIndicesExpression(const StorageMetadataPtr & metadata_snapshot) const { - return getCombinedIndicesExpression(getPrimaryKey(), metadata_snapshot->getSecondaryIndices(), metadata_snapshot->getColumns(), global_context); + return getCombinedIndicesExpression(metadata_snapshot->getPrimaryKey(), metadata_snapshot->getSecondaryIndices(), metadata_snapshot->getColumns(), global_context); } ExpressionActionsPtr MergeTreeData::getSortingKeyAndSkipIndicesExpression(const StorageMetadataPtr & metadata_snapshot) const @@ -2915,11 +2915,12 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(MergeTreeData: return total_covered_parts; } -bool MergeTreeData::isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node) const +bool MergeTreeData::isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions( + const ASTPtr & node, const StorageMetadataPtr & metadata_snapshot) const { const String column_name = node->getColumnName(); - for (const auto & name : getPrimaryKeyColumns()) + for (const auto & name : metadata_snapshot->getPrimaryKeyColumns()) if (column_name == name) return true; @@ -2929,7 +2930,7 @@ bool MergeTreeData::isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(const A if (const auto * func = node->as()) if (func->arguments->children.size() == 1) - return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(func->arguments->children.front()); + return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(func->arguments->children.front(), metadata_snapshot); return false; } @@ -2946,14 +2947,14 @@ bool MergeTreeData::mayBenefitFromIndexForIn( { for (const auto & item : left_in_operand_tuple->arguments->children) { - if (isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(item)) + if (isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(item, metadata_snapshot)) return true; for (const auto & index : metadata_snapshot->getSecondaryIndices()) if (index_wrapper_factory.get(index)->mayBenefitFromIndexForIn(item)) return true; } /// The tuple itself may be part of the primary key, so check that as a last resort. - return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(left_in_operand); + return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(left_in_operand, metadata_snapshot); } else { @@ -2961,7 +2962,7 @@ bool MergeTreeData::mayBenefitFromIndexForIn( if (index_wrapper_factory.get(index)->mayBenefitFromIndexForIn(left_in_operand)) return true; - return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(left_in_operand); + return isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(left_in_operand, metadata_snapshot); } } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 863b5ba1644..fdbe36d10ec 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -812,7 +812,7 @@ protected: DataPartsLock & data_parts_lock) const; /// Checks whether the column is in the primary key, possibly wrapped in a chain of functions with single argument. - bool isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node) const; + bool isPrimaryOrMinMaxKeyColumnPossiblyWrappedInFunctions(const ASTPtr & node, const StorageMetadataPtr & metadata_snapshot) const; /// Common part for |freezePartition()| and |freezeAll()|. using MatcherFn = std::function; diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 02279fe3298..fad65e492c0 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -1604,7 +1604,7 @@ void MergeTreeDataMergerMutator::mutateAllPartColumns( if (mutating_stream == nullptr) throw Exception("Cannot mutate part columns with uninitialized mutations stream. It's a bug", ErrorCodes::LOGICAL_ERROR); - if (data.hasPrimaryKey() || metadata_snapshot->hasSecondaryIndices()) + if (metadata_snapshot->hasPrimaryKey() || metadata_snapshot->hasSecondaryIndices()) mutating_stream = std::make_shared( std::make_shared(mutating_stream, data.getPrimaryKeyAndSkipIndicesExpression(metadata_snapshot))); diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index 32acc266e42..d45aa882b2a 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -54,6 +54,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader( IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartCompact::getWriter( const NamesAndTypesList & columns_list, + const StorageMetadataPtr & metadata_snapshot, const std::vector & indices_to_recalc, const CompressionCodecPtr & default_codec, const MergeTreeWriterSettings & writer_settings, @@ -68,8 +69,8 @@ IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartCompact::getWriter( { return *getColumnPosition(lhs.name) < *getColumnPosition(rhs.name); }); return std::make_unique( - shared_from_this(), ordered_columns_list, indices_to_recalc, - index_granularity_info.marks_file_extension, + shared_from_this(), ordered_columns_list, metadata_snapshot, + indices_to_recalc, index_granularity_info.marks_file_extension, default_codec, writer_settings, computed_index_granularity); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h index b4a2b5fa797..0b27dd53339 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h @@ -46,6 +46,7 @@ public: MergeTreeWriterPtr getWriter( const NamesAndTypesList & columns_list, + const StorageMetadataPtr & metadata_snapshot, const std::vector & indices_to_recalc, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & writer_settings, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index d96b61b4bea..e4901b1f74d 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -53,13 +53,14 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader( IMergeTreeDataPart::MergeTreeWriterPtr MergeTreeDataPartWide::getWriter( const NamesAndTypesList & columns_list, + const StorageMetadataPtr & metadata_snapshot, const std::vector & indices_to_recalc, const CompressionCodecPtr & default_codec, const MergeTreeWriterSettings & writer_settings, const MergeTreeIndexGranularity & computed_index_granularity) const { return std::make_unique( - shared_from_this(), columns_list, indices_to_recalc, + shared_from_this(), columns_list, metadata_snapshot, indices_to_recalc, index_granularity_info.marks_file_extension, default_codec, writer_settings, computed_index_granularity); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h index ba9e0765510..144dfa86cfb 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h @@ -39,6 +39,7 @@ public: MergeTreeWriterPtr getWriter( const NamesAndTypesList & columns_list, + const StorageMetadataPtr & metadata_snapshot, const std::vector & indices_to_recalc, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & writer_settings, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp index 1a7a757c149..f7a3ad75cf5 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp @@ -3,19 +3,17 @@ namespace DB { - - MergeTreeDataPartWriterCompact::MergeTreeDataPartWriterCompact( const MergeTreeData::DataPartPtr & data_part_, const NamesAndTypesList & columns_list_, + const StorageMetadataPtr & metadata_snapshot_, const std::vector & indices_to_recalc_, const String & marks_file_extension_, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_) - : IMergeTreeDataPartWriter(data_part_, columns_list_, - indices_to_recalc_, marks_file_extension_, - default_codec_, settings_, index_granularity_) + : IMergeTreeDataPartWriter( + data_part_, columns_list_, metadata_snapshot_, indices_to_recalc_, marks_file_extension_, default_codec_, settings_, index_granularity_) { using DataPart = MergeTreeDataPartCompact; String data_file_name = DataPart::DATA_FILE_NAME; diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h index 07caba94712..8183c038c4c 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.h @@ -10,6 +10,7 @@ public: MergeTreeDataPartWriterCompact( const MergeTreeData::DataPartPtr & data_part, const NamesAndTypesList & columns_list, + const StorageMetadataPtr & metadata_snapshot_, const std::vector & indices_to_recalc, const String & marks_file_extension, const CompressionCodecPtr & default_codec, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp index 1ab10b55409..e71ea4d4b94 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp @@ -16,16 +16,16 @@ namespace MergeTreeDataPartWriterWide::MergeTreeDataPartWriterWide( const MergeTreeData::DataPartPtr & data_part_, const NamesAndTypesList & columns_list_, + const StorageMetadataPtr & metadata_snapshot_, const std::vector & indices_to_recalc_, const String & marks_file_extension_, const CompressionCodecPtr & default_codec_, const MergeTreeWriterSettings & settings_, const MergeTreeIndexGranularity & index_granularity_) - : IMergeTreeDataPartWriter(data_part_, columns_list_, - indices_to_recalc_, marks_file_extension_, - default_codec_, settings_, index_granularity_) + : IMergeTreeDataPartWriter( + data_part_, columns_list_, metadata_snapshot_, indices_to_recalc_, marks_file_extension_, default_codec_, settings_, index_granularity_) { - const auto & columns = storage.getColumns(); + const auto & columns = metadata_snapshot->getColumns(); for (const auto & it : columns_list) addStreams(it.name, *it.type, columns.getCodecOrDefault(it.name, default_codec), settings.estimated_size); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h index acd7f749d00..f5a9d17f63c 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h @@ -13,6 +13,7 @@ public: MergeTreeDataPartWriterWide( const MergeTreeData::DataPartPtr & data_part, const NamesAndTypesList & columns_list, + const StorageMetadataPtr & metadata_snapshot, const std::vector & indices_to_recalc, const String & marks_file_extension, const CompressionCodecPtr & default_codec, diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 58214bae5ca..2cc5fc7dd5b 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -100,7 +100,10 @@ static Block getBlockWithPartColumn(const MergeTreeData::DataPartsVector & parts size_t MergeTreeDataSelectExecutor::getApproximateTotalRowsToRead( - const MergeTreeData::DataPartsVector & parts, const KeyCondition & key_condition, const Settings & settings) const + const MergeTreeData::DataPartsVector & parts, + const StorageMetadataPtr & metadata_snapshot, + const KeyCondition & key_condition, + const Settings & settings) const { size_t rows_count = 0; @@ -109,7 +112,7 @@ size_t MergeTreeDataSelectExecutor::getApproximateTotalRowsToRead( for (const auto & part : parts) { - MarkRanges ranges = markRangesFromPKRange(part, key_condition, settings); + MarkRanges ranges = markRangesFromPKRange(part, metadata_snapshot, key_condition, settings); /** In order to get a lower bound on the number of rows that match the condition on PK, * consider only guaranteed full marks. @@ -224,7 +227,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( data.check(real_column_names); const Settings & settings = context.getSettingsRef(); - const auto & primary_key = data.getPrimaryKey(); + const auto & primary_key = metadata_snapshot->getPrimaryKey(); Names primary_key_columns = primary_key.column_names; KeyCondition key_condition(query_info, context, primary_key_columns, primary_key.expression); @@ -326,7 +329,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( /// Convert absolute value of the sampling (in form `SAMPLE 1000000` - how many rows to read) into the relative `SAMPLE 0.1` (how much data to read). size_t approx_total_rows = 0; if (relative_sample_size > 1 || relative_sample_offset > 1) - approx_total_rows = getApproximateTotalRowsToRead(parts, key_condition, settings); + approx_total_rows = getApproximateTotalRowsToRead(parts, metadata_snapshot, key_condition, settings); if (relative_sample_size > 1) { @@ -565,8 +568,8 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( { RangesInDataPart ranges(part, part_index++); - if (data.hasPrimaryKey()) - ranges.ranges = markRangesFromPKRange(part, key_condition, settings); + if (metadata_snapshot->hasPrimaryKey()) + ranges.ranges = markRangesFromPKRange(part, metadata_snapshot, key_condition, settings); else { size_t total_marks_count = part->getMarksCount(); @@ -1297,7 +1300,10 @@ void MergeTreeDataSelectExecutor::createPositiveSignCondition( /// Calculates a set of mark ranges, that could possibly contain keys, required by condition. /// In other words, it removes subranges from whole range, that definitely could not contain required keys. MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( - const MergeTreeData::DataPartPtr & part, const KeyCondition & key_condition, const Settings & settings) const + const MergeTreeData::DataPartPtr & part, + const StorageMetadataPtr & metadata_snapshot, + const KeyCondition & key_condition, + const Settings & settings) const { MarkRanges res; @@ -1335,7 +1341,7 @@ MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( std::function create_field_ref; /// If there are no monotonic functions, there is no need to save block reference. /// Passing explicit field to FieldRef allows to optimize ranges and shows better performance. - const auto & primary_key = data.getPrimaryKey(); + const auto & primary_key = metadata_snapshot->getPrimaryKey(); if (key_condition.hasMonotonicFunctionsChain()) { auto index_block = std::make_shared(); diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index 7811eb53b71..ba0613a832d 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -91,6 +91,7 @@ private: /// Get the approximate value (bottom estimate - only by full marks) of the number of rows falling under the index. size_t getApproximateTotalRowsToRead( const MergeTreeData::DataPartsVector & parts, + const StorageMetadataPtr & metadata_snapshot, const KeyCondition & key_condition, const Settings & settings) const; @@ -102,6 +103,7 @@ private: MarkRanges markRangesFromPKRange( const MergeTreeData::DataPartPtr & part, + const StorageMetadataPtr & metadata_snapshot, const KeyCondition & key_condition, const Settings & settings) const; diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 61f99ac6d88..7e51bcff27d 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -31,15 +31,16 @@ MergeTreeWhereOptimizer::MergeTreeWhereOptimizer( SelectQueryInfo & query_info, const Context & context, const MergeTreeData & data, + const StorageMetadataPtr & metadata_snapshot, const Names & queried_columns_, Poco::Logger * log_) - : table_columns{ext::map(data.getColumns().getAllPhysical(), - [] (const NameAndTypePair & col) { return col.name; })}, - queried_columns{queried_columns_}, - block_with_constants{KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context)}, - log{log_} + : table_columns{ext::map( + metadata_snapshot->getColumns().getAllPhysical(), [](const NameAndTypePair & col) { return col.name; })} + , queried_columns{queried_columns_} + , block_with_constants{KeyCondition::getBlockWithConstants(query_info.query, query_info.syntax_analyzer_result, context)} + , log{log_} { - const auto & primary_key = data.getPrimaryKey(); + const auto & primary_key = metadata_snapshot->getPrimaryKey(); if (!primary_key.column_names.empty()) first_primary_key_column = primary_key.column_names[0]; diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h index f9fdc01812b..866d0a8754e 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h @@ -16,6 +16,8 @@ namespace DB class ASTSelectQuery; class ASTFunction; class MergeTreeData; +struct StorageInMemoryMetadata; +using StorageMetadataPtr = std::shared_ptr; /** Identifies WHERE expressions that can be placed in PREWHERE by calculating respective * sizes of columns used in particular expression and identifying "good" conditions of @@ -31,6 +33,7 @@ public: SelectQueryInfo & query_info, const Context & context, const MergeTreeData & data, + const StorageMetadataPtr & metadata_snapshot, const Names & queried_columns_, Poco::Logger * log_); diff --git a/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/src/Storages/MergeTree/MergedBlockOutputStream.cpp index 9cbdc338367..e776a35f21f 100644 --- a/src/Storages/MergeTree/MergedBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergedBlockOutputStream.cpp @@ -59,7 +59,7 @@ MergedBlockOutputStream::MergedBlockOutputStream( volume->getDisk()->createDirectories(part_path); - writer = data_part->getWriter(columns_list, skip_indices, default_codec, writer_settings); + writer = data_part->getWriter(columns_list, metadata_snapshot, skip_indices, default_codec, writer_settings); writer->initPrimaryIndex(); writer->initSkipIndices(); } @@ -169,7 +169,7 @@ void MergedBlockOutputStream::writeImpl(const Block & block, const IColumn::Perm std::inserter(skip_indexes_column_names_set, skip_indexes_column_names_set.end())); Names skip_indexes_column_names(skip_indexes_column_names_set.begin(), skip_indexes_column_names_set.end()); - Block primary_key_block = getBlockAndPermute(block, storage.getPrimaryKeyColumns(), permutation); + Block primary_key_block = getBlockAndPermute(block, metadata_snapshot->getPrimaryKeyColumns(), permutation); Block skip_indexes_block = getBlockAndPermute(block, skip_indexes_column_names, permutation); writer->write(block, permutation, primary_key_block, skip_indexes_block); diff --git a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp index b74a8243437..1faadd0d720 100644 --- a/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp +++ b/src/Storages/MergeTree/MergedColumnOnlyOutputStream.cpp @@ -28,6 +28,7 @@ MergedColumnOnlyOutputStream::MergedColumnOnlyOutputStream( writer = data_part->getWriter( header.getNamesAndTypesList(), + metadata_snapshot_, indices_to_recalc, default_codec, std::move(writer_settings), diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index c3d91f4a5a9..cbb0a665b88 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -40,11 +40,11 @@ ReplicatedMergeTreeTableMetadata::ReplicatedMergeTreeTableMetadata(const MergeTr /// So rules in zookeeper metadata is following: /// - When we have only ORDER BY, than store it in "primary key:" row of /metadata /// - When we have both, than store PRIMARY KEY in "primary key:" row and ORDER BY in "sorting key:" row of /metadata - if (!data.isPrimaryKeyDefined()) + if (!metadata_snapshot->isPrimaryKeyDefined()) primary_key = formattedAST(metadata_snapshot->getSortingKey().expression_list_ast); else { - primary_key = formattedAST(data.getPrimaryKey().expression_list_ast); + primary_key = formattedAST(metadata_snapshot->getPrimaryKey().expression_list_ast); sorting_key = formattedAST(metadata_snapshot->getSortingKey().expression_list_ast); } diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 6c5429fc556..404baa6677f 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -365,4 +365,32 @@ Names StorageInMemoryMetadata::getColumnsRequiredForSampling() const return {}; } +const KeyDescription & StorageInMemoryMetadata::getPrimaryKey() const +{ + return primary_key; +} + +bool StorageInMemoryMetadata::isPrimaryKeyDefined() const +{ + return primary_key.definition_ast != nullptr; +} + +bool StorageInMemoryMetadata::hasPrimaryKey() const +{ + return !primary_key.column_names.empty(); +} + +Names StorageInMemoryMetadata::getColumnsRequiredForPrimaryKey() const +{ + if (hasPrimaryKey()) + return primary_key.expression->getRequiredColumns(); + return {}; +} + +Names StorageInMemoryMetadata::getPrimaryKeyColumns() const +{ + if (!primary_key.column_names.empty()) + return primary_key.column_names; + return {}; +} } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 1abea7d250c..51036403e1f 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -151,6 +151,21 @@ struct StorageInMemoryMetadata bool hasSamplingKey() const; /// Returns column names that need to be read to calculate sampling key. Names getColumnsRequiredForSampling() const; + + /// Returns structure with primary key. + const KeyDescription & getPrimaryKey() const; + /// Returns ASTExpressionList of primary key expression for storage or nullptr if there is none. + ASTPtr getPrimaryKeyAST() const { return primary_key.definition_ast; } + /// Storage has user-defined (in CREATE query) sorting key. + bool isPrimaryKeyDefined() const; + /// Storage has primary key (maybe part of some other key). It means, that + /// it contains at least one column. + bool hasPrimaryKey() const; + /// Returns column names that need to be read to calculate primary key. + Names getColumnsRequiredForPrimaryKey() const; + /// Returns columns names in sorting key specified by. For example: 'a', 'x + /// * y', 'toStartOfMonth(date)', etc. + Names getPrimaryKeyColumns() const; }; using StorageMetadataPtr = std::shared_ptr; diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 0852a96e8ba..f8f40026940 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -384,7 +384,7 @@ protected: if (columns_mask[src_index++]) { assert(table != nullptr); - if ((expression_ptr = table->getPrimaryKey().expression_list_ast)) + if ((expression_ptr = metadata_snapshot->getPrimaryKey().expression_list_ast)) res_columns[res_index++]->insert(queryToString(expression_ptr)); else res_columns[res_index++]->insertDefault(); From 7064a366e2638588aa8352ebe105c76b86bd9e92 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 17 Jun 2020 15:40:27 +0300 Subject: [PATCH 120/318] Missed change for primary key --- src/Storages/System/StorageSystemColumns.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index f998dc27cab..83178870ba9 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -127,7 +127,7 @@ protected: cols_required_for_partition_key = metadata_snapshot->getColumnsRequiredForPartitionKey(); cols_required_for_sorting_key = metadata_snapshot->getColumnsRequiredForSortingKey(); - cols_required_for_primary_key = storage->getColumnsRequiredForPrimaryKey(); + cols_required_for_primary_key = metadata_snapshot->getColumnsRequiredForPrimaryKey(); cols_required_for_sampling = metadata_snapshot->getColumnsRequiredForSampling(); column_sizes = storage->getColumnSizes(); } From ed8f3b2fc42f73a82ca133240ff0642c990ab129 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 17 Jun 2020 16:39:26 +0300 Subject: [PATCH 121/318] TTL in storage in memory metadata --- src/DataStreams/TTLBlockInputStream.cpp | 32 +++++++------ src/DataStreams/TTLBlockInputStream.h | 2 + src/Interpreters/InterpreterAlterQuery.cpp | 2 +- src/Interpreters/InterpreterOptimizeQuery.cpp | 3 +- src/Interpreters/MutationsInterpreter.cpp | 2 +- src/Storages/IStorage.cpp | 47 ------------------- src/Storages/IStorage.h | 27 +++-------- src/Storages/MergeTree/MergeTreeData.cpp | 6 ++- src/Storages/MergeTree/MergeTreeData.h | 20 ++++---- .../MergeTree/MergeTreeDataMergerMutator.cpp | 8 ++-- .../MergeTree/MergeTreeDataWriter.cpp | 10 ++-- src/Storages/StorageBuffer.cpp | 15 ++++-- src/Storages/StorageBuffer.h | 2 +- src/Storages/StorageInMemoryMetadata.h | 3 ++ src/Storages/StorageMaterializedView.cpp | 12 ++++- src/Storages/StorageMaterializedView.h | 8 +++- src/Storages/StorageMergeTree.cpp | 9 +++- src/Storages/StorageMergeTree.h | 8 +++- src/Storages/StorageReplicatedMergeTree.cpp | 9 +++- src/Storages/StorageReplicatedMergeTree.h | 2 +- 20 files changed, 109 insertions(+), 118 deletions(-) diff --git a/src/DataStreams/TTLBlockInputStream.cpp b/src/DataStreams/TTLBlockInputStream.cpp index c79abff98cd..65e01a73f95 100644 --- a/src/DataStreams/TTLBlockInputStream.cpp +++ b/src/DataStreams/TTLBlockInputStream.cpp @@ -20,10 +20,12 @@ namespace ErrorCodes TTLBlockInputStream::TTLBlockInputStream( const BlockInputStreamPtr & input_, const MergeTreeData & storage_, + const StorageMetadataPtr & metadata_snapshot_, const MergeTreeData::MutableDataPartPtr & data_part_, time_t current_time_, bool force_) : storage(storage_) + , metadata_snapshot(metadata_snapshot_) , data_part(data_part_) , current_time(current_time_) , force(force_) @@ -34,11 +36,11 @@ TTLBlockInputStream::TTLBlockInputStream( children.push_back(input_); header = children.at(0)->getHeader(); - const auto & storage_columns = storage.getColumns(); + const auto & storage_columns = metadata_snapshot->getColumns(); const auto & column_defaults = storage_columns.getDefaults(); ASTPtr default_expr_list = std::make_shared(); - for (const auto & [name, _] : storage.getColumnTTLs()) + for (const auto & [name, _] : metadata_snapshot->getColumnTTLs()) { auto it = column_defaults.find(name); if (it != column_defaults.end()) @@ -65,13 +67,12 @@ TTLBlockInputStream::TTLBlockInputStream( if (!default_expr_list->children.empty()) { - auto syntax_result = SyntaxAnalyzer(storage.global_context).analyze( - default_expr_list, storage.getColumns().getAllPhysical()); + auto syntax_result = SyntaxAnalyzer(storage.global_context).analyze(default_expr_list, metadata_snapshot->getColumns().getAllPhysical()); defaults_expression = ExpressionAnalyzer{default_expr_list, syntax_result, storage.global_context}.getActions(true); } - auto storage_rows_ttl = storage.getRowsTTL(); - if (storage.hasRowsTTL() && storage_rows_ttl.mode == TTLMode::GROUP_BY) + auto storage_rows_ttl = metadata_snapshot->getRowsTTL(); + if (metadata_snapshot->hasRowsTTL() && storage_rows_ttl.mode == TTLMode::GROUP_BY) { current_key_value.resize(storage_rows_ttl.group_by_keys.size()); @@ -106,14 +107,15 @@ bool TTLBlockInputStream::isTTLExpired(time_t ttl) const Block TTLBlockInputStream::readImpl() { /// Skip all data if table ttl is expired for part - auto storage_rows_ttl = storage.getRowsTTL(); - if (storage.hasRowsTTL() && !storage_rows_ttl.where_expression && - storage_rows_ttl.mode != TTLMode::GROUP_BY && isTTLExpired(old_ttl_infos.table_ttl.max)) + auto storage_rows_ttl = metadata_snapshot->getRowsTTL(); + if (metadata_snapshot->hasRowsTTL() && !storage_rows_ttl.where_expression && storage_rows_ttl.mode != TTLMode::GROUP_BY + && isTTLExpired(old_ttl_infos.table_ttl.max)) { rows_removed = data_part->rows_count; return {}; } + Block block = children.at(0)->read(); if (!block) { @@ -127,7 +129,7 @@ Block TTLBlockInputStream::readImpl() return block; } - if (storage.hasRowsTTL() && (force || isTTLExpired(old_ttl_infos.table_ttl.min))) + if (metadata_snapshot->hasRowsTTL() && (force || isTTLExpired(old_ttl_infos.table_ttl.min))) removeRowsWithExpiredTableTTL(block); removeValuesWithExpiredColumnTTL(block); @@ -153,7 +155,7 @@ void TTLBlockInputStream::readSuffixImpl() void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block) { - auto rows_ttl = storage.getRowsTTL(); + auto rows_ttl = metadata_snapshot->getRowsTTL(); rows_ttl.expression->execute(block); if (rows_ttl.where_expression) @@ -201,7 +203,7 @@ void TTLBlockInputStream::removeRowsWithExpiredTableTTL(Block & block) size_t rows_aggregated = 0; size_t current_key_start = 0; size_t rows_with_current_key = 0; - auto storage_rows_ttl = storage.getRowsTTL(); + auto storage_rows_ttl = metadata_snapshot->getRowsTTL(); for (size_t i = 0; i < block.rows(); ++i) { UInt32 cur_ttl = getTimestampByIndex(ttl_column, i); @@ -278,7 +280,7 @@ void TTLBlockInputStream::finalizeAggregates(MutableColumns & result_columns) if (!agg_result.empty()) { auto aggregated_res = aggregator->convertToBlocks(agg_result, true, 1); - auto storage_rows_ttl = storage.getRowsTTL(); + auto storage_rows_ttl = metadata_snapshot->getRowsTTL(); for (auto & agg_block : aggregated_res) { for (const auto & it : storage_rows_ttl.set_parts) @@ -310,7 +312,7 @@ void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block) } std::vector columns_to_remove; - for (const auto & [name, ttl_entry] : storage.getColumnTTLs()) + for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs()) { /// If we read not all table columns. E.g. while mutation. if (!block.has(name)) @@ -371,7 +373,7 @@ void TTLBlockInputStream::removeValuesWithExpiredColumnTTL(Block & block) void TTLBlockInputStream::updateMovesTTL(Block & block) { std::vector columns_to_remove; - for (const auto & ttl_entry : storage.getMoveTTLs()) + for (const auto & ttl_entry : metadata_snapshot->getMoveTTLs()) { auto & new_ttl_info = new_ttl_infos.moves_ttl[ttl_entry.result_column]; diff --git a/src/DataStreams/TTLBlockInputStream.h b/src/DataStreams/TTLBlockInputStream.h index 060306f7d2d..3f37f35426c 100644 --- a/src/DataStreams/TTLBlockInputStream.h +++ b/src/DataStreams/TTLBlockInputStream.h @@ -16,6 +16,7 @@ public: TTLBlockInputStream( const BlockInputStreamPtr & input_, const MergeTreeData & storage_, + const StorageMetadataPtr & metadata_snapshot_, const MergeTreeData::MutableDataPartPtr & data_part_, time_t current_time, bool force_ @@ -33,6 +34,7 @@ protected: private: const MergeTreeData & storage; + StorageMetadataPtr metadata_snapshot; /// ttl_infos and empty_columns are updating while reading const MergeTreeData::MutableDataPartPtr & data_part; diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 3736b482ddf..958291d5882 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -69,7 +69,7 @@ BlockIO InterpreterAlterQuery::execute() } else if (auto mut_command = MutationCommand::parse(command_ast)) { - if (mut_command->type == MutationCommand::MATERIALIZE_TTL && !table->hasAnyTTL()) + if (mut_command->type == MutationCommand::MATERIALIZE_TTL && !metadata_snapshot->hasAnyTTL()) throw Exception("Cannot MATERIALIZE TTL as there is no TTL set for table " + table->getStorageID().getNameForLogs(), ErrorCodes::INCORRECT_QUERY); diff --git a/src/Interpreters/InterpreterOptimizeQuery.cpp b/src/Interpreters/InterpreterOptimizeQuery.cpp index c47fe1160cf..680dd9b803b 100644 --- a/src/Interpreters/InterpreterOptimizeQuery.cpp +++ b/src/Interpreters/InterpreterOptimizeQuery.cpp @@ -26,7 +26,8 @@ BlockIO InterpreterOptimizeQuery::execute() auto table_id = context.resolveStorageID(ast, Context::ResolveOrdinary); StoragePtr table = DatabaseCatalog::instance().getTable(table_id, context); - table->optimize(query_ptr, ast.partition, ast.final, ast.deduplicate, context); + auto metadata_snapshot = table->getInMemoryMetadataPtr(); + table->optimize(query_ptr, metadata_snapshot, ast.partition, ast.final, ast.deduplicate, context); return {}; } diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 1a38fcf40f3..694e114af7a 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -411,7 +411,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) } else if (command.type == MutationCommand::MATERIALIZE_TTL) { - if (storage->hasRowsTTL()) + if (metadata_snapshot->hasRowsTTL()) { for (const auto & column : all_columns) dependencies.emplace(column.name, ColumnDependency::TTL_TARGET); diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index f9b7f41f139..3741a06fc4a 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -319,53 +319,6 @@ NamesAndTypesList IStorage::getVirtuals() const return {}; } -TTLTableDescription IStorage::getTableTTLs() const -{ - std::lock_guard lock(ttl_mutex); - return metadata->table_ttl; -} - -bool IStorage::hasAnyTableTTL() const -{ - return hasAnyMoveTTL() || hasRowsTTL(); -} - -TTLColumnsDescription IStorage::getColumnTTLs() const -{ - std::lock_guard lock(ttl_mutex); - return metadata->column_ttls_by_name; -} - -bool IStorage::hasAnyColumnTTL() const -{ - std::lock_guard lock(ttl_mutex); - return !metadata->column_ttls_by_name.empty(); -} - -TTLDescription IStorage::getRowsTTL() const -{ - std::lock_guard lock(ttl_mutex); - return metadata->table_ttl.rows_ttl; -} - -bool IStorage::hasRowsTTL() const -{ - std::lock_guard lock(ttl_mutex); - return metadata->table_ttl.rows_ttl.expression != nullptr; -} - -TTLDescriptions IStorage::getMoveTTLs() const -{ - std::lock_guard lock(ttl_mutex); - return metadata->table_ttl.move_ttl; -} - -bool IStorage::hasAnyMoveTTL() const -{ - std::lock_guard lock(ttl_mutex); - return !metadata->table_ttl.move_ttl.empty(); -} - ASTPtr IStorage::getSettingsChanges() const { if (metadata->settings_changes) diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 787b96c9197..4eea343db5d 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -129,8 +129,6 @@ public: /// Example is StorageSystemNumbers. virtual bool hasEvenlyDistributedRead() const { return false; } - /// Returns true if there is set table TTL, any column TTL or any move TTL. - bool hasAnyTTL() const { return hasAnyColumnTTL() || hasAnyTableTTL(); } /// Optional size information of each physical column. /// Currently it's only used by the MergeTree family for query optimizations. @@ -362,7 +360,13 @@ public: /** Perform any background work. For example, combining parts in a MergeTree type table. * Returns whether any work has been done. */ - virtual bool optimize(const ASTPtr & /*query*/, const ASTPtr & /*partition*/, bool /*final*/, bool /*deduplicate*/, const Context & /*context*/) + virtual bool optimize( + const ASTPtr & /*query*/, + const StorageMetadataPtr & /*metadata_snapshot*/, + const ASTPtr & /*partition*/, + bool /*final*/, + bool /*deduplicate*/, + const Context & /*context*/) { throw Exception("Method optimize is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } @@ -430,23 +434,6 @@ public: /// Returns storage policy if storage supports it. virtual StoragePolicyPtr getStoragePolicy() const { return {}; } - /// Common tables TTLs (for rows and moves). - TTLTableDescription getTableTTLs() const; - bool hasAnyTableTTL() const; - - /// Separate TTLs for columns. - TTLColumnsDescription getColumnTTLs() const; - bool hasAnyColumnTTL() const; - - /// Just wrapper for table TTLs, return rows part of table TTLs. - TTLDescription getRowsTTL() const; - bool hasRowsTTL() const; - - /// Just wrapper for table TTLs, return moves (to disks or volumes) parts of - /// table TTL. - TTLDescriptions getMoveTTLs() const; - bool hasAnyMoveTTL() const; - /// If it is possible to quickly determine exact number of rows in the table at this moment of time, then return it. /// Used for: /// - Simple count() opimization diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index ff38a21a15f..8c94325cd4b 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2801,8 +2801,9 @@ MergeTreeData::selectTTLEntryForTTLInfos(const IMergeTreeDataPart::TTLInfos & tt { time_t max_max_ttl = 0; TTLDescriptions::const_iterator best_entry_it; + auto metadata_snapshot = getInMemoryMetadataPtr(); - const auto & move_ttl_entries = getMoveTTLs(); + const auto & move_ttl_entries = metadata_snapshot->getMoveTTLs(); for (auto ttl_entry_it = move_ttl_entries.begin(); ttl_entry_it != move_ttl_entries.end(); ++ttl_entry_it) { auto ttl_info_it = ttl_infos.moves_ttl.find(ttl_entry_it->result_column); @@ -3235,11 +3236,12 @@ bool MergeTreeData::selectPartsAndMove() bool MergeTreeData::areBackgroundMovesNeeded() const { auto policy = getStoragePolicy(); + auto metadata_snapshot = getInMemoryMetadataPtr(); if (policy->getVolumes().size() > 1) return true; - return policy->getVolumes().size() == 1 && policy->getVolumes()[0]->getDisks().size() > 1 && hasAnyMoveTTL(); + return policy->getVolumes().size() == 1 && policy->getVolumes()[0]->getDisks().size() > 1 && metadata_snapshot->hasAnyMoveTTL(); } bool MergeTreeData::movePartsToSpace(const DataPartsVector & parts, SpacePtr space) diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index fdbe36d10ec..af6bee4936c 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -607,14 +607,18 @@ public: static ReservationPtr tryReserveSpace(UInt64 expected_size, SpacePtr space); /// Reserves space at least 1MB preferring best destination according to `ttl_infos`. - ReservationPtr reserveSpacePreferringTTLRules(UInt64 expected_size, - const IMergeTreeDataPart::TTLInfos & ttl_infos, - time_t time_of_move, - size_t min_volume_index = 0) const; - ReservationPtr tryReserveSpacePreferringTTLRules(UInt64 expected_size, - const IMergeTreeDataPart::TTLInfos & ttl_infos, - time_t time_of_move, - size_t min_volume_index = 0) const; + ReservationPtr reserveSpacePreferringTTLRules( + UInt64 expected_size, + const IMergeTreeDataPart::TTLInfos & ttl_infos, + time_t time_of_move, + size_t min_volume_index = 0) const; + + ReservationPtr tryReserveSpacePreferringTTLRules( + UInt64 expected_size, + const IMergeTreeDataPart::TTLInfos & ttl_infos, + time_t time_of_move, + size_t min_volume_index = 0) const; + /// Choose disk with max available free space /// Reserves 0 bytes ReservationPtr makeEmptyReservationOnLargestDisk() { return getStoragePolicy()->makeEmptyReservationOnLargestDisk(); } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index fad65e492c0..3024adafb27 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -801,7 +801,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor merged_stream = std::make_shared(merged_stream, SizeLimits(), 0 /*limit_hint*/, Names()); if (need_remove_expired_values) - merged_stream = std::make_shared(merged_stream, data, new_data_part, time_of_merge, force_ttl); + merged_stream = std::make_shared(merged_stream, data, metadata_snapshot, new_data_part, time_of_merge, force_ttl); if (metadata_snapshot->hasSecondaryIndices()) @@ -1576,7 +1576,7 @@ std::set MergeTreeDataMergerMutator::getIndicesToRecalculate( bool MergeTreeDataMergerMutator::shouldExecuteTTL(const StorageMetadataPtr & metadata_snapshot, const Names & columns, const MutationCommands & commands) const { - if (!data.hasAnyTTL()) + if (!metadata_snapshot->hasAnyTTL()) return false; for (const auto & command : commands) @@ -1609,7 +1609,7 @@ void MergeTreeDataMergerMutator::mutateAllPartColumns( std::make_shared(mutating_stream, data.getPrimaryKeyAndSkipIndicesExpression(metadata_snapshot))); if (need_remove_expired_values) - mutating_stream = std::make_shared(mutating_stream, data, new_data_part, time_of_mutation, true); + mutating_stream = std::make_shared(mutating_stream, data, metadata_snapshot, new_data_part, time_of_mutation, true); IMergeTreeDataPart::MinMaxIndex minmax_idx; @@ -1656,7 +1656,7 @@ void MergeTreeDataMergerMutator::mutateSomePartColumns( throw Exception("Cannot mutate part columns with uninitialized mutations stream. It's a bug", ErrorCodes::LOGICAL_ERROR); if (need_remove_expired_values) - mutating_stream = std::make_shared(mutating_stream, data, new_data_part, time_of_mutation, true); + mutating_stream = std::make_shared(mutating_stream, data, metadata_snapshot, new_data_part, time_of_mutation, true); IMergedBlockOutputStream::WrittenOffsetColumns unused_written_offsets; MergedColumnOnlyOutputStream out( diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index c31cfd3da6f..5974f366b66 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -230,11 +230,11 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa size_t expected_size = block.bytes(); DB::IMergeTreeDataPart::TTLInfos move_ttl_infos; - const auto & move_ttl_entries = data.getMoveTTLs(); + const auto & move_ttl_entries = metadata_snapshot->getMoveTTLs(); for (const auto & ttl_entry : move_ttl_entries) updateTTL(ttl_entry, move_ttl_infos, move_ttl_infos.moves_ttl[ttl_entry.result_column], block, false); - NamesAndTypesList columns = data.getColumns().getAllPhysical().filter(block.getNames()); + NamesAndTypesList columns = metadata_snapshot->getColumns().getAllPhysical().filter(block.getNames()); ReservationPtr reservation = data.reserveSpacePreferringTTLRules(expected_size, move_ttl_infos, time(nullptr)); VolumePtr volume = data.getStoragePolicy()->getVolume(0); @@ -289,10 +289,10 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa ProfileEvents::increment(ProfileEvents::MergeTreeDataWriterBlocksAlreadySorted); } - if (data.hasRowsTTL()) - updateTTL(data.getRowsTTL(), new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true); + if (metadata_snapshot->hasRowsTTL()) + updateTTL(metadata_snapshot->getRowsTTL(), new_data_part->ttl_infos, new_data_part->ttl_infos.table_ttl, block, true); - for (const auto & [name, ttl_entry] : data.getColumnTTLs()) + for (const auto & [name, ttl_entry] : metadata_snapshot->getColumnTTLs()) updateTTL(ttl_entry, new_data_part->ttl_infos, new_data_part->ttl_infos.columns_ttl[name], block, true); new_data_part->ttl_infos.update(move_ttl_infos); diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 5eaaf98d397..13b37980c56 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -484,7 +484,7 @@ void StorageBuffer::shutdown() try { - optimize(nullptr /*query*/, {} /*partition*/, false /*final*/, false /*deduplicate*/, global_context); + optimize(nullptr /*query*/, getInMemoryMetadataPtr(), {} /*partition*/, false /*final*/, false /*deduplicate*/, global_context); } catch (...) { @@ -503,7 +503,13 @@ void StorageBuffer::shutdown() * * This kind of race condition make very hard to implement proper tests. */ -bool StorageBuffer::optimize(const ASTPtr & /*query*/, const ASTPtr & partition, bool final, bool deduplicate, const Context & /*context*/) +bool StorageBuffer::optimize( + const ASTPtr & /*query*/, + const StorageMetadataPtr & /*metadata_snapshot*/, + const ASTPtr & partition, + bool final, + bool deduplicate, + const Context & /*context*/) { if (partition) throw Exception("Partition cannot be specified when optimizing table of type Buffer", ErrorCodes::NOT_IMPLEMENTED); @@ -793,11 +799,12 @@ void StorageBuffer::alter(const AlterCommands & params, const Context & context, auto table_id = getStorageID(); checkAlterIsPossible(params, context.getSettingsRef()); + auto metadata_snapshot = getInMemoryMetadataPtr(); /// So that no blocks of the old structure remain. - optimize({} /*query*/, {} /*partition_id*/, false /*final*/, false /*deduplicate*/, context); + optimize({} /*query*/, metadata_snapshot, {} /*partition_id*/, false /*final*/, false /*deduplicate*/, context); - StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); + StorageInMemoryMetadata new_metadata = *metadata_snapshot; params.apply(new_metadata, context); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); setInMemoryMetadata(new_metadata); diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index 403b6c53172..ceedbd25a0c 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -69,7 +69,7 @@ public: void startup() override; /// Flush all buffers into the subordinate table and stop background thread. void shutdown() override; - bool optimize(const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & context) override; + bool optimize(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, const ASTPtr & partition, bool final, bool deduplicate, const Context & context) override; bool supportsSampling() const override { return true; } bool supportsPrewhere() const override diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 51036403e1f..cc6fcfbe083 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -86,6 +86,9 @@ struct StorageInMemoryMetadata const ConstraintsDescription & getConstraints() const; + /// Returns true if there is set table TTL, any column TTL or any move TTL. + bool hasAnyTTL() const { return hasAnyColumnTTL() || hasAnyTableTTL(); } + /// Common tables TTLs (for rows and moves). TTLTableDescription getTableTTLs() const; bool hasAnyTableTTL() const; diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 4eba4d6a165..319e1631ae6 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -187,10 +187,18 @@ void StorageMaterializedView::checkStatementCanBeForwarded() const + "Execute the statement directly on it.", ErrorCodes::INCORRECT_QUERY); } -bool StorageMaterializedView::optimize(const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & context) +bool StorageMaterializedView::optimize( + const ASTPtr & query, + const StorageMetadataPtr & /*metadata_snapshot*/, + const ASTPtr & partition, + bool final, + bool deduplicate, + const Context & context) { checkStatementCanBeForwarded(); - return getTargetTable()->optimize(query, partition, final, deduplicate, context); + auto storage_ptr = getTargetTable(); + auto metadata_snapshot = storage_ptr->getInMemoryMetadataPtr(); + return getTargetTable()->optimize(query, metadata_snapshot, partition, final, deduplicate, context); } void StorageMaterializedView::alter( diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index ef895ff0165..6f462c2cccc 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -39,7 +39,13 @@ public: void truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) override; - bool optimize(const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & context) override; + bool optimize( + const ASTPtr & query, + const StorageMetadataPtr & metadata_snapshot, + const ASTPtr & partition, + bool final, + bool deduplicate, + const Context & context) override; void alter(const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder) override; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index f259d74b9ea..40b3aeffb8a 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -693,7 +693,7 @@ bool StorageMergeTree::merge( { /// Force filter by TTL in 'OPTIMIZE ... FINAL' query to remove expired values from old parts /// without TTL infos or with outdated TTL infos, e.g. after 'ALTER ... MODIFY TTL' query. - bool force_ttl = (final && hasAnyTTL()); + bool force_ttl = (final && metadata_snapshot->hasAnyTTL()); new_part = merger_mutator.mergePartsToTemporaryPart( future_part, metadata_snapshot, *merge_entry, table_lock_holder, time(nullptr), @@ -965,7 +965,12 @@ void StorageMergeTree::clearOldMutations(bool truncate) } bool StorageMergeTree::optimize( - const ASTPtr & /*query*/, const ASTPtr & partition, bool final, bool deduplicate, const Context & context) + const ASTPtr & /*query*/, + const StorageMetadataPtr & /*metadata_snapshot*/, + const ASTPtr & partition, + bool final, + bool deduplicate, + const Context & context) { String disable_reason; if (!partition && final) diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 4b6da58572b..69ee6714164 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -53,7 +53,13 @@ public: /** Perform the next step in combining the parts. */ - bool optimize(const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & context) override; + bool optimize( + const ASTPtr & query, + const StorageMetadataPtr & /*metadata_snapshot*/, + const ASTPtr & partition, + bool final, + bool deduplicate, + const Context & context) override; void alterPartition( const ASTPtr & query, diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 061be8b2821..b0a7e550233 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3475,7 +3475,12 @@ BlockOutputStreamPtr StorageReplicatedMergeTree::write(const ASTPtr & /*query*/, bool StorageReplicatedMergeTree::optimize( - const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & query_context) + const ASTPtr & query, + const StorageMetadataPtr & metadata_snapshot, + const ASTPtr & partition, + bool final, + bool deduplicate, + const Context & query_context) { assertNotReadonly(); @@ -3498,7 +3503,7 @@ bool StorageReplicatedMergeTree::optimize( return false; }; - bool force_ttl = (final && hasAnyTTL()); + bool force_ttl = (final && metadata_snapshot->hasAnyTTL()); const auto storage_settings_ptr = getSettings(); if (!partition && final) diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index fe1b052b717..c98fcb0ae3d 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -101,7 +101,7 @@ public: BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; - bool optimize(const ASTPtr & query, const ASTPtr & partition, bool final, bool deduplicate, const Context & query_context) override; + bool optimize(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, const ASTPtr & partition, bool final, bool deduplicate, const Context & query_context) override; void alter(const AlterCommands & params, const Context & query_context, TableStructureWriteLockHolder & table_lock_holder) override; From eaaef837426ac9dc61d7c8c257261f82ba896744 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 17 Jun 2020 16:46:01 +0300 Subject: [PATCH 122/318] Settings changes in StorageInMemoryMetadata --- src/Storages/IStorage.cpp | 7 ------- src/Storages/IStorage.h | 5 ----- src/Storages/MergeTree/MergeTreeData.cpp | 4 ++-- src/Storages/StorageInMemoryMetadata.cpp | 8 ++++++++ src/Storages/StorageInMemoryMetadata.h | 4 ++++ 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 3741a06fc4a..6b3aafa1784 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -319,13 +319,6 @@ NamesAndTypesList IStorage::getVirtuals() const return {}; } -ASTPtr IStorage::getSettingsChanges() const -{ - if (metadata->settings_changes) - return metadata->settings_changes->clone(); - return nullptr; -} - const SelectQueryDescription & IStorage::getSelectQuery() const { return metadata->select; diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 4eea343db5d..d9e9aa247c2 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -138,11 +138,6 @@ public: public: /// thread-unsafe part. lockStructure must be acquired const ColumnsDescription & getColumns() const; /// returns combined set of columns - - /// Storage settings - ASTPtr getSettingsChanges() const; - bool hasSettingsChanges() const { return metadata->settings_changes != nullptr; } - /// Select query for *View storages. const SelectQueryDescription & getSelectQuery() const; bool hasSelectQuery() const; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 8c94325cd4b..115e0b78bf0 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1333,10 +1333,10 @@ void MergeTreeData::checkAlterIsPossible(const AlterCommands & commands, const S checkTTLExpressions(new_metadata, old_metadata); - if (hasSettingsChanges()) + if (old_metadata.hasSettingsChanges()) { - const auto current_changes = getSettingsChanges()->as().changes; + const auto current_changes = old_metadata.getSettingsChanges()->as().changes; const auto & new_changes = new_metadata.settings_changes->as().changes; for (const auto & changed_setting : new_changes) { diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 404baa6677f..5f8c83d6e14 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -393,4 +393,12 @@ Names StorageInMemoryMetadata::getPrimaryKeyColumns() const return primary_key.column_names; return {}; } + +ASTPtr StorageInMemoryMetadata::getSettingsChanges() const +{ + if (settings_changes) + return settings_changes->clone(); + return nullptr; +} + } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index cc6fcfbe083..1de17d768ae 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -169,6 +169,10 @@ struct StorageInMemoryMetadata /// Returns columns names in sorting key specified by. For example: 'a', 'x /// * y', 'toStartOfMonth(date)', etc. Names getPrimaryKeyColumns() const; + + /// Storage settings + ASTPtr getSettingsChanges() const; + bool hasSettingsChanges() const { return settings_changes != nullptr; } }; using StorageMetadataPtr = std::shared_ptr; From 31abbe5dbd3b43c2968bb558c747598a6904f326 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 17 Jun 2020 17:06:22 +0300 Subject: [PATCH 123/318] Select query in metadata --- src/DataStreams/PushingToViewsBlockOutputStream.cpp | 2 +- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- src/Storages/IStorage.cpp | 10 ---------- src/Storages/IStorage.h | 3 --- src/Storages/StorageInMemoryMetadata.cpp | 9 +++++++++ src/Storages/StorageInMemoryMetadata.h | 4 ++++ src/Storages/StorageMaterializedView.cpp | 11 +++++++---- src/Storages/StorageView.cpp | 2 +- src/Storages/StorageView.h | 4 ++-- 9 files changed, 25 insertions(+), 22 deletions(-) diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/src/DataStreams/PushingToViewsBlockOutputStream.cpp index 2e02c26d38c..e6e368f78e9 100644 --- a/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -79,7 +79,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( StoragePtr inner_table = materialized_view->getTargetTable(); auto inner_table_id = inner_table->getStorageID(); - query = materialized_view->getSelectQuery().inner_query; + query = dependent_metadata_snapshot->getSelectQuery().inner_query; std::unique_ptr insert = std::make_unique(); insert->table_id = inner_table_id; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 41b2abc33c6..509825e75e4 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -308,7 +308,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it. ASTPtr view_table; if (view) - view->replaceWithSubquery(getSelectQuery(), view_table); + view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot); syntax_analyzer_result = SyntaxAnalyzer(*context).analyzeSelect( query_ptr, SyntaxAnalyzerResult(source_header.getNamesAndTypesList(), storage), diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 6b3aafa1784..a67229d6231 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -319,14 +319,4 @@ NamesAndTypesList IStorage::getVirtuals() const return {}; } -const SelectQueryDescription & IStorage::getSelectQuery() const -{ - return metadata->select; -} - -bool IStorage::hasSelectQuery() const -{ - return metadata->select.select_query != nullptr; -} - } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index d9e9aa247c2..4b712853b53 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -138,9 +138,6 @@ public: public: /// thread-unsafe part. lockStructure must be acquired const ColumnsDescription & getColumns() const; /// returns combined set of columns - /// Select query for *View storages. - const SelectQueryDescription & getSelectQuery() const; - bool hasSelectQuery() const; StorageInMemoryMetadata getInMemoryMetadata() const { return *metadata; } StorageMetadataPtr getInMemoryMetadataPtr() const { return metadata; } diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 5f8c83d6e14..3b72dd97089 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -400,5 +400,14 @@ ASTPtr StorageInMemoryMetadata::getSettingsChanges() const return settings_changes->clone(); return nullptr; } +const SelectQueryDescription & StorageInMemoryMetadata::getSelectQuery() const +{ + return select; +} + +bool StorageInMemoryMetadata::hasSelectQuery() const +{ + return select.select_query != nullptr; +} } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 1de17d768ae..efda4377dfc 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -173,6 +173,10 @@ struct StorageInMemoryMetadata /// Storage settings ASTPtr getSettingsChanges() const; bool hasSettingsChanges() const { return settings_changes != nullptr; } + + /// Select query for *View storages. + const SelectQueryDescription & getSelectQuery() const; + bool hasSelectQuery() const; }; using StorageMetadataPtr = std::shared_ptr; diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 319e1631ae6..3d3137fe1a6 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -165,7 +165,7 @@ static void executeDropQuery(ASTDropQuery::Kind kind, Context & global_context, void StorageMaterializedView::drop() { auto table_id = getStorageID(); - const auto & select_query = getSelectQuery(); + const auto & select_query = getInMemoryMetadataPtr()->getSelectQuery(); if (!select_query.select_table_id.empty()) DatabaseCatalog::instance().removeDependency(select_query.select_table_id, table_id); @@ -209,13 +209,14 @@ void StorageMaterializedView::alter( lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto table_id = getStorageID(); StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); + StorageInMemoryMetadata old_metadata = getInMemoryMetadata(); params.apply(new_metadata, context); /// start modify query if (context.getSettingsRef().allow_experimental_alter_materialized_view_structure) { const auto & new_select = new_metadata.select; - const auto & old_select = getSelectQuery(); + const auto & old_select = old_metadata.getSelectQuery(); DatabaseCatalog::instance().updateDependency(old_select.select_table_id, table_id, new_select.select_table_id, table_id); @@ -268,6 +269,7 @@ void StorageMaterializedView::mutate(const MutationCommands & commands, const Co void StorageMaterializedView::renameInMemory(const StorageID & new_table_id) { auto old_table_id = getStorageID(); + auto metadata_snapshot = getInMemoryMetadataPtr(); bool from_atomic_to_atomic_database = old_table_id.hasUUID() && new_table_id.hasUUID(); if (has_inner_table && tryGetTargetTable() && !from_atomic_to_atomic_database) @@ -293,14 +295,15 @@ void StorageMaterializedView::renameInMemory(const StorageID & new_table_id) } IStorage::renameInMemory(new_table_id); - const auto & select_query = getSelectQuery(); + const auto & select_query = metadata_snapshot->getSelectQuery(); // TODO Actually we don't need to update dependency if MV has UUID, but then db and table name will be outdated DatabaseCatalog::instance().updateDependency(select_query.select_table_id, old_table_id, select_query.select_table_id, getStorageID()); } void StorageMaterializedView::shutdown() { - const auto & select_query = getSelectQuery(); + auto metadata_snapshot = getInMemoryMetadataPtr(); + const auto & select_query = metadata_snapshot->getSelectQuery(); /// Make sure the dependency is removed after DETACH TABLE if (!select_query.select_table_id.empty()) DatabaseCatalog::instance().removeDependency(select_query.select_table_id, getStorageID()); diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 7e49580d6c2..006b1b3caec 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -63,7 +63,7 @@ Pipes StorageView::read( { Pipes pipes; - ASTPtr current_inner_query = getSelectQuery().inner_query; + ASTPtr current_inner_query = metadata_snapshot->getSelectQuery().inner_query; if (query_info.view_query) { diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index 143ed3c06c4..9de1f3f0bd8 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -30,9 +30,9 @@ public: size_t max_block_size, unsigned num_streams) override; - void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name) const + void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot) const { - replaceWithSubquery(select_query, getSelectQuery().inner_query->clone(), view_name); + replaceWithSubquery(select_query, metadata_snapshot->getSelectQuery().inner_query->clone(), view_name); } static void replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name); From 33c27de54d535fdc3f0445db55638025644cfbad Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 17 Jun 2020 17:32:25 +0300 Subject: [PATCH 124/318] Check methods in metadata --- src/Storages/IStorage.cpp | 170 ----------------- src/Storages/IStorage.h | 15 -- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +- .../MergeTree/MergeTreeDataWriter.cpp | 2 +- .../ReplicatedMergeTreeBlockOutputStream.cpp | 2 +- src/Storages/StorageBuffer.cpp | 2 +- src/Storages/StorageGenerateRandom.cpp | 4 +- src/Storages/StorageInMemoryMetadata.cpp | 172 ++++++++++++++++++ src/Storages/StorageInMemoryMetadata.h | 15 ++ src/Storages/StorageJoin.cpp | 2 +- src/Storages/StorageLog.cpp | 6 +- src/Storages/StorageMemory.cpp | 4 +- src/Storages/StorageMySQL.cpp | 2 +- src/Storages/StorageStripeLog.cpp | 2 +- src/Storages/StorageTinyLog.cpp | 4 +- src/Storages/StorageValues.cpp | 4 +- src/Storages/StorageXDBC.cpp | 2 +- src/Storages/System/IStorageSystemOneBlock.h | 2 +- src/Storages/System/StorageSystemColumns.cpp | 2 +- src/Storages/System/StorageSystemDisks.cpp | 2 +- src/Storages/System/StorageSystemNumbers.cpp | 4 +- src/Storages/System/StorageSystemOne.cpp | 4 +- .../System/StorageSystemPartsBase.cpp | 6 +- src/Storages/System/StorageSystemPartsBase.h | 4 +- src/Storages/System/StorageSystemReplicas.cpp | 2 +- .../System/StorageSystemStoragePolicies.cpp | 2 +- src/Storages/System/StorageSystemTables.cpp | 2 +- src/Storages/System/StorageSystemZeros.cpp | 4 +- 28 files changed, 223 insertions(+), 221 deletions(-) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index a67229d6231..38fdaa832bd 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -37,176 +37,6 @@ const ColumnsDescription & IStorage::getColumns() const return metadata->columns; } -namespace -{ -#if !defined(ARCADIA_BUILD) - using NamesAndTypesMap = google::dense_hash_map; - using UniqueStrings = google::dense_hash_set; -#else - using NamesAndTypesMap = google::sparsehash::dense_hash_map; - using UniqueStrings = google::sparsehash::dense_hash_set; -#endif - - String listOfColumns(const NamesAndTypesList & available_columns) - { - std::stringstream ss; - for (auto it = available_columns.begin(); it != available_columns.end(); ++it) - { - if (it != available_columns.begin()) - ss << ", "; - ss << it->name; - } - return ss.str(); - } - - NamesAndTypesMap getColumnsMap(const NamesAndTypesList & columns) - { - NamesAndTypesMap res; - res.set_empty_key(StringRef()); - - for (const auto & column : columns) - res.insert({column.name, column.type.get()}); - - return res; - } - - UniqueStrings initUniqueStrings() - { - UniqueStrings strings; - strings.set_empty_key(StringRef()); - return strings; - } -} - -void IStorage::check(const Names & column_names, bool include_virtuals) const -{ - NamesAndTypesList available_columns = getColumns().getAllPhysical(); - if (include_virtuals) - { - auto virtuals = getVirtuals(); - available_columns.insert(available_columns.end(), virtuals.begin(), virtuals.end()); - } - - const String list_of_columns = listOfColumns(available_columns); - - if (column_names.empty()) - throw Exception("Empty list of columns queried. There are columns: " + list_of_columns, ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED); - - const auto columns_map = getColumnsMap(available_columns); - - auto unique_names = initUniqueStrings(); - for (const auto & name : column_names) - { - if (columns_map.end() == columns_map.find(name)) - throw Exception( - "There is no column with name " + backQuote(name) + " in table " + getStorageID().getNameForLogs() + ". There are columns: " + list_of_columns, - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); - - if (unique_names.end() != unique_names.find(name)) - throw Exception("Column " + name + " queried more than once", ErrorCodes::COLUMN_QUERIED_MORE_THAN_ONCE); - unique_names.insert(name); - } -} - -void IStorage::check(const NamesAndTypesList & provided_columns) const -{ - const NamesAndTypesList & available_columns = getColumns().getAllPhysical(); - const auto columns_map = getColumnsMap(available_columns); - - auto unique_names = initUniqueStrings(); - for (const NameAndTypePair & column : provided_columns) - { - auto it = columns_map.find(column.name); - if (columns_map.end() == it) - throw Exception( - "There is no column with name " + column.name + ". There are columns: " + listOfColumns(available_columns), - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); - - if (!column.type->equals(*it->second)) - throw Exception( - "Type mismatch for column " + column.name + ". Column has type " + it->second->getName() + ", got type " - + column.type->getName(), - ErrorCodes::TYPE_MISMATCH); - - if (unique_names.end() != unique_names.find(column.name)) - throw Exception("Column " + column.name + " queried more than once", ErrorCodes::COLUMN_QUERIED_MORE_THAN_ONCE); - unique_names.insert(column.name); - } -} - -void IStorage::check(const NamesAndTypesList & provided_columns, const Names & column_names) const -{ - const NamesAndTypesList & available_columns = getColumns().getAllPhysical(); - const auto available_columns_map = getColumnsMap(available_columns); - const auto & provided_columns_map = getColumnsMap(provided_columns); - - if (column_names.empty()) - throw Exception( - "Empty list of columns queried. There are columns: " + listOfColumns(available_columns), - ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED); - - auto unique_names = initUniqueStrings(); - for (const String & name : column_names) - { - auto it = provided_columns_map.find(name); - if (provided_columns_map.end() == it) - continue; - - auto jt = available_columns_map.find(name); - if (available_columns_map.end() == jt) - throw Exception( - "There is no column with name " + name + ". There are columns: " + listOfColumns(available_columns), - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); - - if (!it->second->equals(*jt->second)) - throw Exception( - "Type mismatch for column " + name + ". Column has type " + jt->second->getName() + ", got type " + it->second->getName(), - ErrorCodes::TYPE_MISMATCH); - - if (unique_names.end() != unique_names.find(name)) - throw Exception("Column " + name + " queried more than once", ErrorCodes::COLUMN_QUERIED_MORE_THAN_ONCE); - unique_names.insert(name); - } -} - -void IStorage::check(const Block & block, bool need_all) const -{ - const NamesAndTypesList & available_columns = getColumns().getAllPhysical(); - const auto columns_map = getColumnsMap(available_columns); - - NameSet names_in_block; - - block.checkNumberOfRows(); - - for (const auto & column : block) - { - if (names_in_block.count(column.name)) - throw Exception("Duplicate column " + column.name + " in block", ErrorCodes::DUPLICATE_COLUMN); - - names_in_block.insert(column.name); - - auto it = columns_map.find(column.name); - if (columns_map.end() == it) - throw Exception( - "There is no column with name " + column.name + ". There are columns: " + listOfColumns(available_columns), - ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); - - if (!column.type->equals(*it->second)) - throw Exception( - "Type mismatch for column " + column.name + ". Column has type " + it->second->getName() + ", got type " - + column.type->getName(), - ErrorCodes::TYPE_MISMATCH); - } - - if (need_all && names_in_block.size() < columns_map.size()) - { - for (const auto & available_column : available_columns) - { - if (!names_in_block.count(available_column.name)) - throw Exception("Expected column " + available_column.name, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); - } - } -} bool IStorage::isVirtualColumn(const String & column_name) const { diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 4b712853b53..bb4bf2ed09b 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -138,25 +138,10 @@ public: public: /// thread-unsafe part. lockStructure must be acquired const ColumnsDescription & getColumns() const; /// returns combined set of columns - StorageInMemoryMetadata getInMemoryMetadata() const { return *metadata; } StorageMetadataPtr getInMemoryMetadataPtr() const { return metadata; } void setInMemoryMetadata(const StorageInMemoryMetadata & metadata_) { metadata = std::make_shared(metadata_); } - /// Verify that all the requested names are in the table and are set correctly: - /// list of names is not empty and the names do not repeat. - void check(const Names & column_names, bool include_virtuals = false) const; - - /// Check that all the requested names are in the table and have the correct types. - void check(const NamesAndTypesList & columns) const; - - /// Check that all names from the intersection of `names` and `columns` are in the table and have the same types. - void check(const NamesAndTypesList & columns, const Names & column_names) const; - - /// Check that the data block contains all the columns of the table with the correct types, - /// contains only the columns of the table, and all the columns are different. - /// If |need_all| is set, then checks that all the columns of the table are in the block. - void check(const Block & block, bool need_all = false) const; /// Return list of virtual columns (like _part, _table, etc). In the vast /// majority of cases virtual columns are static constant part of Storage diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 2cc5fc7dd5b..b72c46afca3 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -224,7 +224,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( std::multiset part_values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); - data.check(real_column_names); + metadata_snapshot->check(real_column_names, data.getVirtuals()); const Settings & settings = context.getSettingsRef(); const auto & primary_key = metadata_snapshot->getPrimaryKey(); diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 5974f366b66..099480aca2f 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -138,7 +138,7 @@ BlocksWithPartition MergeTreeDataWriter::splitBlockIntoParts(const Block & block if (!block || !block.rows()) return result; - data.check(block, true); + metadata_snapshot->check(block, true); if (!metadata_snapshot->hasPartitionKey()) /// Table is not partitioned. { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp index 13df5ef23f1..bdefc5f1b14 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp @@ -211,7 +211,7 @@ void ReplicatedMergeTreeBlockOutputStream::writeExistingPart(MergeTreeData::Muta void ReplicatedMergeTreeBlockOutputStream::commitPart( zkutil::ZooKeeperPtr & zookeeper, MergeTreeData::MutableDataPartPtr & part, const String & block_id) { - storage.check(part->getColumns()); + metadata_snapshot->check(part->getColumns()); assertSessionIsNotExpired(zookeeper); /// Obtain incremental block number and lock it. The lock holds our intention to add the block to the filesystem. diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 13b37980c56..2ce258a2d25 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -342,7 +342,7 @@ public: return; // Check table structure. - storage.check(block, true); + metadata_snapshot->check(block, true); size_t rows = block.rows(); if (!rows) diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index f1d97a4e5c4..bcebeec09dd 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -429,14 +429,14 @@ void registerStorageGenerateRandom(StorageFactory & factory) Pipes StorageGenerateRandom::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, unsigned num_streams) { - check(column_names, true); + metadata_snapshot->check(column_names, getVirtuals()); Pipes pipes; pipes.reserve(num_streams); diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 3b72dd97089..a394e196eac 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -1,6 +1,10 @@ #include +#include +#include #include +#include + namespace DB { @@ -410,4 +414,172 @@ bool StorageInMemoryMetadata::hasSelectQuery() const return select.select_query != nullptr; } +namespace +{ +#if !defined(ARCADIA_BUILD) + using NamesAndTypesMap = google::dense_hash_map; + using UniqueStrings = google::dense_hash_set; +#else + using NamesAndTypesMap = google::sparsehash::dense_hash_map; + using UniqueStrings = google::sparsehash::dense_hash_set; +#endif + + String listOfColumns(const NamesAndTypesList & available_columns) + { + std::stringstream ss; + for (auto it = available_columns.begin(); it != available_columns.end(); ++it) + { + if (it != available_columns.begin()) + ss << ", "; + ss << it->name; + } + return ss.str(); + } + + NamesAndTypesMap getColumnsMap(const NamesAndTypesList & columns) + { + NamesAndTypesMap res; + res.set_empty_key(StringRef()); + + for (const auto & column : columns) + res.insert({column.name, column.type.get()}); + + return res; + } + + UniqueStrings initUniqueStrings() + { + UniqueStrings strings; + strings.set_empty_key(StringRef()); + return strings; + } +} + +void StorageInMemoryMetadata::check(const Names & column_names, const NamesAndTypesList & virtuals) const +{ + NamesAndTypesList available_columns = getColumns().getAllPhysical(); + available_columns.insert(available_columns.end(), virtuals.begin(), virtuals.end()); + + const String list_of_columns = listOfColumns(available_columns); + + if (column_names.empty()) + throw Exception("Empty list of columns queried. There are columns: " + list_of_columns, ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED); + + const auto columns_map = getColumnsMap(available_columns); + + auto unique_names = initUniqueStrings(); + for (const auto & name : column_names) + { + if (columns_map.end() == columns_map.find(name)) + throw Exception( + "There is no column with name " + backQuote(name) + " in table " + /* TODO alesap getStorageID().getNameForLogs() +*/ ". There are columns: " + list_of_columns, + ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + + if (unique_names.end() != unique_names.find(name)) + throw Exception("Column " + name + " queried more than once", ErrorCodes::COLUMN_QUERIED_MORE_THAN_ONCE); + unique_names.insert(name); + } +} + +void StorageInMemoryMetadata::check(const NamesAndTypesList & provided_columns) const +{ + const NamesAndTypesList & available_columns = getColumns().getAllPhysical(); + const auto columns_map = getColumnsMap(available_columns); + + auto unique_names = initUniqueStrings(); + for (const NameAndTypePair & column : provided_columns) + { + auto it = columns_map.find(column.name); + if (columns_map.end() == it) + throw Exception( + "There is no column with name " + column.name + ". There are columns: " + listOfColumns(available_columns), + ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + + if (!column.type->equals(*it->second)) + throw Exception( + "Type mismatch for column " + column.name + ". Column has type " + it->second->getName() + ", got type " + + column.type->getName(), + ErrorCodes::TYPE_MISMATCH); + + if (unique_names.end() != unique_names.find(column.name)) + throw Exception("Column " + column.name + " queried more than once", ErrorCodes::COLUMN_QUERIED_MORE_THAN_ONCE); + unique_names.insert(column.name); + } +} + +void StorageInMemoryMetadata::check(const NamesAndTypesList & provided_columns, const Names & column_names) const +{ + const NamesAndTypesList & available_columns = getColumns().getAllPhysical(); + const auto available_columns_map = getColumnsMap(available_columns); + const auto & provided_columns_map = getColumnsMap(provided_columns); + + if (column_names.empty()) + throw Exception( + "Empty list of columns queried. There are columns: " + listOfColumns(available_columns), + ErrorCodes::EMPTY_LIST_OF_COLUMNS_QUERIED); + + auto unique_names = initUniqueStrings(); + for (const String & name : column_names) + { + auto it = provided_columns_map.find(name); + if (provided_columns_map.end() == it) + continue; + + auto jt = available_columns_map.find(name); + if (available_columns_map.end() == jt) + throw Exception( + "There is no column with name " + name + ". There are columns: " + listOfColumns(available_columns), + ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + + if (!it->second->equals(*jt->second)) + throw Exception( + "Type mismatch for column " + name + ". Column has type " + jt->second->getName() + ", got type " + it->second->getName(), + ErrorCodes::TYPE_MISMATCH); + + if (unique_names.end() != unique_names.find(name)) + throw Exception("Column " + name + " queried more than once", ErrorCodes::COLUMN_QUERIED_MORE_THAN_ONCE); + unique_names.insert(name); + } +} + +void StorageInMemoryMetadata::check(const Block & block, bool need_all) const +{ + const NamesAndTypesList & available_columns = getColumns().getAllPhysical(); + const auto columns_map = getColumnsMap(available_columns); + + NameSet names_in_block; + + block.checkNumberOfRows(); + + for (const auto & column : block) + { + if (names_in_block.count(column.name)) + throw Exception("Duplicate column " + column.name + " in block", ErrorCodes::DUPLICATE_COLUMN); + + names_in_block.insert(column.name); + + auto it = columns_map.find(column.name); + if (columns_map.end() == it) + throw Exception( + "There is no column with name " + column.name + ". There are columns: " + listOfColumns(available_columns), + ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + + if (!column.type->equals(*it->second)) + throw Exception( + "Type mismatch for column " + column.name + ". Column has type " + it->second->getName() + ", got type " + + column.type->getName(), + ErrorCodes::TYPE_MISMATCH); + } + + if (need_all && names_in_block.size() < columns_map.size()) + { + for (const auto & available_column : available_columns) + { + if (!names_in_block.count(available_column.name)) + throw Exception("Expected column " + available_column.name, ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); + } + } +} + + } diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index efda4377dfc..e4755bb0464 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -177,6 +177,21 @@ struct StorageInMemoryMetadata /// Select query for *View storages. const SelectQueryDescription & getSelectQuery() const; bool hasSelectQuery() const; + + /// Verify that all the requested names are in the table and are set correctly: + /// list of names is not empty and the names do not repeat. + void check(const Names & column_names, const NamesAndTypesList & virtuals) const; + + /// Check that all the requested names are in the table and have the correct types. + void check(const NamesAndTypesList & columns) const; + + /// Check that all names from the intersection of `names` and `columns` are in the table and have the same types. + void check(const NamesAndTypesList & columns, const Names & column_names) const; + + /// Check that the data block contains all the columns of the table with the correct types, + /// contains only the columns of the table, and all the columns are different. + /// If |need_all| is set, then checks that all the columns of the table are in the block. + void check(const Block & block, bool need_all = false) const; }; using StorageMetadataPtr = std::shared_ptr; diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 68b974c0dde..300ab400a46 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -446,7 +446,7 @@ Pipes StorageJoin::read( size_t max_block_size, unsigned /*num_streams*/) { - check(column_names); + metadata_snapshot->check(column_names, getVirtuals()); Pipes pipes; pipes.emplace_back(std::make_shared(*join, max_block_size, metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals()))); diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 79cc3e5bf68..fcae9c9aa82 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -276,7 +276,7 @@ void LogSource::readData(const String & name, const IDataType & type, IColumn & void LogBlockOutputStream::write(const Block & block) { - storage.check(block, true); + metadata_snapshot->check(block, true); /// The set of written offset columns so that you do not write shared offsets of columns for nested structures multiple times WrittenStreams written_streams; @@ -580,14 +580,14 @@ const StorageLog::Marks & StorageLog::getMarksWithRealRowCount() const Pipes StorageLog::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & /*query_info*/, const Context & context, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, unsigned num_streams) { - check(column_names); + metadata_snapshot->check(column_names, getVirtuals()); loadMarks(); NamesAndTypesList all_columns = Nested::collect(getColumns().getAllPhysical().addTypes(column_names)); diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 442c5a3d67b..f9ef3cfcc98 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -81,7 +81,7 @@ public: void write(const Block & block) override { - storage.check(block, true); + metadata_snapshot->check(block, true); std::lock_guard lock(storage.mutex); storage.data.push_back(block); } @@ -110,7 +110,7 @@ Pipes StorageMemory::read( size_t /*max_block_size*/, unsigned num_streams) { - check(column_names); + metadata_snapshot->check(column_names, getVirtuals()); std::lock_guard lock(mutex); diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index b1262771d21..3e9b48e976b 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -72,7 +72,7 @@ Pipes StorageMySQL::read( size_t max_block_size_, unsigned) { - check(column_names_); + metadata_snapshot->check(column_names_, getVirtuals()); String query = transformQueryForExternalDatabase( query_info_, metadata_snapshot->getColumns().getOrdinary(), diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 4d9f08a60b7..407c9b164ff 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -276,7 +276,7 @@ Pipes StorageStripeLog::read( { std::shared_lock lock(rwlock); - check(column_names); + metadata_snapshot->check(column_names, getVirtuals()); NameSet column_names_set(column_names.begin(), column_names.end()); diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index ba524c7761e..4015d8ca574 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -309,7 +309,7 @@ void TinyLogBlockOutputStream::writeSuffix() void TinyLogBlockOutputStream::write(const Block & block) { - storage.check(block, true); + metadata_snapshot->check(block, true); /// The set of written offset columns so that you do not write shared columns for nested structures multiple times WrittenStreams written_streams; @@ -402,7 +402,7 @@ Pipes StorageTinyLog::read( const size_t max_block_size, const unsigned /*num_streams*/) { - check(column_names); + metadata_snapshot->check(column_names, getVirtuals()); Pipes pipes; diff --git a/src/Storages/StorageValues.cpp b/src/Storages/StorageValues.cpp index bb29b4a0932..063cd3d5224 100644 --- a/src/Storages/StorageValues.cpp +++ b/src/Storages/StorageValues.cpp @@ -23,14 +23,14 @@ StorageValues::StorageValues( Pipes StorageValues::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t /*max_block_size*/, unsigned /*num_streams*/) { - check(column_names, true); + metadata_snapshot->check(column_names, getVirtuals()); Pipes pipes; diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index c3c62ea1f0a..ab8b37db7db 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -90,7 +90,7 @@ Pipes StorageXDBC::read( size_t max_block_size, unsigned num_streams) { - check(column_names); + metadata_snapshot->check(column_names, getVirtuals()); bridge_helper->startBridgeSync(); return IStorageURLBase::read(column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams); diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index de7e1a0e933..b3a2a6fe53b 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -37,7 +37,7 @@ public: size_t /*max_block_size*/, unsigned /*num_streams*/) override { - check(column_names); + metadata_snapshot->check(column_names, getVirtuals()); Block sample_block = metadata_snapshot->getSampleBlock(); MutableColumns res_columns = sample_block.cloneEmptyColumns(); diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 83178870ba9..319ef257d6d 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -249,7 +249,7 @@ Pipes StorageSystemColumns::read( const size_t max_block_size, const unsigned /*num_streams*/) { - check(column_names); + metadata_snapshot->check(column_names, getVirtuals()); /// Create a mask of what columns are needed in the result. diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index d13ea29804d..fbcdd78988a 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -35,7 +35,7 @@ Pipes StorageSystemDisks::read( const size_t /*max_block_size*/, const unsigned /*num_streams*/) { - check(column_names); + metadata_snapshot->check(column_names, getVirtuals()); MutableColumnPtr col_name = ColumnString::create(); MutableColumnPtr col_path = ColumnString::create(); diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index fd7e04cfb1f..50921c53fb6 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -125,14 +125,14 @@ StorageSystemNumbers::StorageSystemNumbers(const StorageID & table_id, bool mult Pipes StorageSystemNumbers::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo &, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, unsigned num_streams) { - check(column_names); + metadata_snapshot->check(column_names, getVirtuals()); if (limit && *limit < max_block_size) { diff --git a/src/Storages/System/StorageSystemOne.cpp b/src/Storages/System/StorageSystemOne.cpp index af736c215b5..20d61d5da1b 100644 --- a/src/Storages/System/StorageSystemOne.cpp +++ b/src/Storages/System/StorageSystemOne.cpp @@ -22,14 +22,14 @@ StorageSystemOne::StorageSystemOne(const std::string & name_) Pipes StorageSystemOne::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo &, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, const size_t /*max_block_size*/, const unsigned /*num_streams*/) { - check(column_names); + metadata_snapshot->check(column_names, getVirtuals()); Block header{ColumnWithTypeAndName( DataTypeUInt8().createColumn(), diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 4f99e1e8c6a..b998b60c02d 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -26,7 +26,7 @@ namespace ErrorCodes extern const int TABLE_IS_DROPPED; } -bool StorageSystemPartsBase::hasStateColumn(const Names & column_names) const +bool StorageSystemPartsBase::hasStateColumn(const Names & column_names, const StorageMetadataPtr & metadata_snapshot) const { bool has_state_column = false; Names real_column_names; @@ -41,7 +41,7 @@ bool StorageSystemPartsBase::hasStateColumn(const Names & column_names) const /// Do not check if only _state column is requested if (!(has_state_column && real_column_names.empty())) - check(real_column_names); + metadata_snapshot->check(real_column_names, {}); return has_state_column; } @@ -232,7 +232,7 @@ Pipes StorageSystemPartsBase::read( const size_t /*max_block_size*/, const unsigned /*num_streams*/) { - bool has_state_column = hasStateColumn(column_names); + bool has_state_column = hasStateColumn(column_names, metadata_snapshot); StoragesInfoStream stream(query_info, context); diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index a46cecec9dd..8af1f46d8a7 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -57,7 +57,7 @@ class StorageSystemPartsBase : public IStorage public: Pipes read( const Names & column_names, - const StorageMetadataPtr & metadata_, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -67,7 +67,7 @@ public: NamesAndTypesList getVirtuals() const override; private: - bool hasStateColumn(const Names & column_names) const; + bool hasStateColumn(const Names & column_names, const StorageMetadataPtr & metadata_snapshot) const; protected: const FormatSettings format_settings; diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index 8fb6a89ddd1..f79e9138500 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -66,7 +66,7 @@ Pipes StorageSystemReplicas::read( const size_t /*max_block_size*/, const unsigned /*num_streams*/) { - check(column_names); + metadata_snapshot->check(column_names, getVirtuals()); const auto access = context.getAccess(); const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); diff --git a/src/Storages/System/StorageSystemStoragePolicies.cpp b/src/Storages/System/StorageSystemStoragePolicies.cpp index 44252a788b9..a6092a28a47 100644 --- a/src/Storages/System/StorageSystemStoragePolicies.cpp +++ b/src/Storages/System/StorageSystemStoragePolicies.cpp @@ -39,7 +39,7 @@ Pipes StorageSystemStoragePolicies::read( const size_t /*max_block_size*/, const unsigned /*num_streams*/) { - check(column_names); + metadata_snapshot->check(column_names, getVirtuals()); MutableColumnPtr col_policy_name = ColumnString::create(); MutableColumnPtr col_volume_name = ColumnString::create(); diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index f8f40026940..84635acb887 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -456,7 +456,7 @@ Pipes StorageSystemTables::read( const size_t max_block_size, const unsigned /*num_streams*/) { - check(column_names); + metadata_snapshot->check(column_names, getVirtuals()); /// Create a mask of what columns are needed in the result. diff --git a/src/Storages/System/StorageSystemZeros.cpp b/src/Storages/System/StorageSystemZeros.cpp index 2bc53b5093e..d325840091e 100644 --- a/src/Storages/System/StorageSystemZeros.cpp +++ b/src/Storages/System/StorageSystemZeros.cpp @@ -92,14 +92,14 @@ StorageSystemZeros::StorageSystemZeros(const StorageID & table_id_, bool multith Pipes StorageSystemZeros::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo &, const Context & /*context*/, QueryProcessingStage::Enum /*processed_stage*/, size_t max_block_size, unsigned num_streams) { - check(column_names); + metadata_snapshot->check(column_names, getVirtuals()); bool use_multiple_streams = multithreaded; From ef8781cce77dddff57f44aaa1005b0f88e30dcdf Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 17 Jun 2020 17:37:21 +0300 Subject: [PATCH 125/318] Better getVirtuals method --- src/Storages/IStorage.cpp | 4 ++-- src/Storages/IStorage.h | 2 +- src/Storages/StorageDistributed.cpp | 4 ++-- src/Storages/StorageMerge.cpp | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 38fdaa832bd..884982c93b9 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -38,10 +38,10 @@ const ColumnsDescription & IStorage::getColumns() const } -bool IStorage::isVirtualColumn(const String & column_name) const +bool IStorage::isVirtualColumn(const String & column_name, const StorageMetadataPtr & metadata_snapshot) const { /// Virtual column maybe overriden by real column - return !getColumns().has(column_name) && getVirtuals().contains(column_name); + return !metadata_snapshot->getColumns().has(column_name) && getVirtuals().contains(column_name); } RWLockImpl::LockHolder IStorage::tryLockTimed( diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index bb4bf2ed09b..a125c6f8310 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -159,7 +159,7 @@ protected: /// Returns whether the column is virtual - by default all columns are real. /// Initially reserved virtual column name may be shadowed by real column. - bool isVirtualColumn(const String & column_name) const; + bool isVirtualColumn(const String & column_name, const StorageMetadataPtr & metadata_snapshot) const; private: diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 6868f468f2e..ce4fcbb3513 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -464,7 +464,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Con Pipes StorageDistributed::read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -497,7 +497,7 @@ Pipes StorageDistributed::read( const Scalars & scalars = context.hasQueryContext() ? context.getQueryContext().getScalars() : Scalars{}; bool has_virtual_shard_num_column = std::find(column_names.begin(), column_names.end(), "_shard_num") != column_names.end(); - if (has_virtual_shard_num_column && !isVirtualColumn("_shard_num")) + if (has_virtual_shard_num_column && !isVirtualColumn("_shard_num", metadata_snapshot)) has_virtual_shard_num_column = false; ClusterProxy::SelectStreamFactory select_stream_factory = remote_table_function_ptr diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 9765db35fc3..92e965c420e 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -146,7 +146,7 @@ Pipes StorageMerge::read( for (const auto & column_name : column_names) { - if (column_name == "_table" && isVirtualColumn(column_name)) + if (column_name == "_table" && isVirtualColumn(column_name, metadata_snapshot)) has_table_virtual_column = true; else real_column_names.push_back(column_name); From dffdece3501fce6ef74b1ae7c970f3f477024bb6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 17 Jun 2020 19:39:58 +0300 Subject: [PATCH 126/318] getColumns in StorageInMemoryMetadta (only compilable) --- src/Core/iostream_debug_helpers.cpp | 2 +- .../InputStreamFromASTInsertQuery.cpp | 9 +++++-- .../InputStreamFromASTInsertQuery.h | 5 +++- .../PushingToViewsBlockOutputStream.cpp | 5 ++-- src/DataStreams/RemoteQueryExecutor.cpp | 2 +- src/Databases/DatabaseMySQL.cpp | 3 ++- src/Functions/hasColumnInTable.cpp | 3 ++- src/Interpreters/InterpreterDescribeQuery.cpp | 3 ++- src/Interpreters/InterpreterInsertQuery.cpp | 4 +-- src/Interpreters/InterpreterSelectQuery.cpp | 13 +++++----- src/Interpreters/InterpreterWatchQuery.cpp | 2 +- src/Interpreters/JoinedTables.cpp | 4 +-- src/Interpreters/JoinedTables.h | 4 ++- src/Interpreters/MutationsInterpreter.cpp | 4 +-- src/Interpreters/SyntaxAnalyzer.cpp | 11 +++++--- src/Interpreters/SyntaxAnalyzer.h | 17 ++++++++++-- src/Interpreters/getTableExpressions.cpp | 6 +++-- src/Interpreters/interpretSubquery.cpp | 4 +-- src/Server/MySQLHandler.cpp | 3 ++- src/Server/TCPHandler.cpp | 10 ++++--- src/Storages/IStorage.cpp | 6 ----- src/Storages/IStorage.h | 1 - src/Storages/LiveView/StorageLiveView.cpp | 12 ++++++--- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 7 ++--- src/Storages/MergeTree/IMergeTreeDataPart.h | 3 ++- src/Storages/MergeTree/IMergeTreeReader.cpp | 26 +++++++++++++------ src/Storages/MergeTree/IMergeTreeReader.h | 5 +++- .../MergeTree/MergeTreeBlockReadUtils.cpp | 21 +++++++++------ .../MergeTree/MergeTreeBlockReadUtils.h | 11 +++++--- .../MergeTree/MergeTreeDataMergerMutator.cpp | 6 ++--- .../MergeTree/MergeTreeDataPartCompact.cpp | 3 ++- .../MergeTree/MergeTreeDataPartCompact.h | 1 + .../MergeTree/MergeTreeDataPartWide.cpp | 3 ++- .../MergeTree/MergeTreeDataPartWide.h | 1 + .../MergeTree/MergeTreeDataSelectExecutor.cpp | 25 +----------------- .../MergeTree/MergeTreeDataSelectExecutor.h | 6 ----- src/Storages/MergeTree/MergeTreeReadPool.cpp | 2 +- .../MergeTree/MergeTreeReaderCompact.cpp | 25 ++++++++++++------ .../MergeTree/MergeTreeReaderCompact.h | 1 + .../MergeTree/MergeTreeReaderWide.cpp | 11 ++++++-- src/Storages/MergeTree/MergeTreeReaderWide.h | 1 + .../MergeTreeReverseSelectProcessor.cpp | 9 ++++--- .../MergeTree/MergeTreeSelectProcessor.cpp | 8 +++--- .../MergeTree/MergeTreeSequentialSource.cpp | 4 +-- ...rgeTreeThreadSelectBlockInputProcessor.cpp | 8 +++--- src/Storages/StorageBuffer.cpp | 10 +++---- src/Storages/StorageDistributed.cpp | 19 +++++++++----- src/Storages/StorageDistributed.h | 4 +-- src/Storages/StorageFile.cpp | 2 +- src/Storages/StorageGenerateRandom.cpp | 2 +- src/Storages/StorageLog.cpp | 17 ++++++------ src/Storages/StorageLog.h | 4 +-- src/Storages/StorageMerge.cpp | 10 ++++--- src/Storages/StorageReplicatedMergeTree.cpp | 8 +++--- src/Storages/StorageTinyLog.cpp | 21 ++++++++------- src/Storages/StorageURL.cpp | 14 +++++++--- src/Storages/StorageURL.h | 15 ++++++----- src/Storages/StorageXDBC.cpp | 12 ++++++--- src/Storages/StorageXDBC.h | 5 +++- .../System/StorageSystemPartsColumns.cpp | 2 +- src/Storages/TTLDescription.cpp | 4 +-- src/Storages/getStructureOfRemoteTable.cpp | 8 ++++-- src/Storages/tests/gtest_storage_log.cpp | 2 +- src/TableFunctions/TableFunctionMerge.cpp | 2 +- 64 files changed, 285 insertions(+), 196 deletions(-) diff --git a/src/Core/iostream_debug_helpers.cpp b/src/Core/iostream_debug_helpers.cpp index 3a77b1f42be..8683bb14db6 100644 --- a/src/Core/iostream_debug_helpers.cpp +++ b/src/Core/iostream_debug_helpers.cpp @@ -49,7 +49,7 @@ std::ostream & operator<<(std::ostream & stream, const IStorage & what) { auto table_id = what.getStorageID(); stream << "IStorage(name = " << what.getName() << ", tableName = " << table_id.table_name << ") {" - << what.getColumns().getAllPhysical().toString() << "}"; + << what.getInMemoryMetadataPtr()->getColumns().getAllPhysical().toString() << "}"; return stream; } diff --git a/src/DataStreams/InputStreamFromASTInsertQuery.cpp b/src/DataStreams/InputStreamFromASTInsertQuery.cpp index 47b61294da3..19c6fe41eca 100644 --- a/src/DataStreams/InputStreamFromASTInsertQuery.cpp +++ b/src/DataStreams/InputStreamFromASTInsertQuery.cpp @@ -21,7 +21,11 @@ namespace ErrorCodes InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery( - const ASTPtr & ast, ReadBuffer * input_buffer_tail_part, const Block & header, const Context & context, const ASTPtr & input_function) + const ASTPtr & ast, + ReadBuffer * input_buffer_tail_part, + const Block & header, + const Context & context, + const ASTPtr & input_function) { const auto * ast_insert_query = ast->as(); @@ -59,7 +63,8 @@ InputStreamFromASTInsertQuery::InputStreamFromASTInsertQuery( if (context.getSettingsRef().input_format_defaults_for_omitted_fields && ast_insert_query->table_id && !input_function) { StoragePtr storage = DatabaseCatalog::instance().getTable(ast_insert_query->table_id, context); - auto column_defaults = storage->getColumns().getDefaults(); + auto metadata_snapshot = storage->getInMemoryMetadataPtr(); + auto column_defaults = metadata_snapshot->getColumns().getDefaults(); if (!column_defaults.empty()) res_stream = std::make_shared(res_stream, column_defaults, context); } diff --git a/src/DataStreams/InputStreamFromASTInsertQuery.h b/src/DataStreams/InputStreamFromASTInsertQuery.h index a57e9199603..0604f011e28 100644 --- a/src/DataStreams/InputStreamFromASTInsertQuery.h +++ b/src/DataStreams/InputStreamFromASTInsertQuery.h @@ -11,6 +11,8 @@ namespace DB struct BlockIO; class Context; +struct StorageInMemoryMetadata; +using StorageMetadataPtr = std::shared_ptr; /** Prepares an input stream which produce data containing in INSERT query * Head of inserting data could be stored in INSERT ast directly @@ -19,7 +21,8 @@ class Context; class InputStreamFromASTInsertQuery : public IBlockInputStream { public: - InputStreamFromASTInsertQuery(const ASTPtr & ast, + InputStreamFromASTInsertQuery( + const ASTPtr & ast, ReadBuffer * input_buffer_tail_part, const Block & header, const Context & context, diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/src/DataStreams/PushingToViewsBlockOutputStream.cpp index e6e368f78e9..72de6b889f1 100644 --- a/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -79,6 +79,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( StoragePtr inner_table = materialized_view->getTargetTable(); auto inner_table_id = inner_table->getStorageID(); + auto inner_metadata_snapshot = inner_table->getInMemoryMetadataPtr(); query = dependent_metadata_snapshot->getSelectQuery().inner_query; std::unique_ptr insert = std::make_unique(); @@ -90,7 +91,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( /// Insert only columns returned by select. auto list = std::make_shared(); - const auto & inner_table_columns = inner_table->getColumns(); + const auto & inner_table_columns = inner_metadata_snapshot->getColumns(); for (auto & column : header) /// But skip columns which storage doesn't have. if (inner_table_columns.hasPhysical(column.name)) @@ -323,7 +324,7 @@ void PushingToViewsBlockOutputStream::process(const Block & block, size_t view_n Context local_context = *select_context; local_context.addViewSource( StorageValues::create( - storage->getStorageID(), storage->getColumns(), block, storage->getVirtuals())); + storage->getStorageID(), metadata_snapshot->getColumns(), block, storage->getVirtuals())); select.emplace(view.query, local_context, SelectQueryOptions()); in = std::make_shared(select->execute().getInputStream()); diff --git a/src/DataStreams/RemoteQueryExecutor.cpp b/src/DataStreams/RemoteQueryExecutor.cpp index 45ddd7c0893..be09cd94814 100644 --- a/src/DataStreams/RemoteQueryExecutor.cpp +++ b/src/DataStreams/RemoteQueryExecutor.cpp @@ -325,7 +325,7 @@ void RemoteQueryExecutor::sendExternalTables() Pipes pipes; pipes = cur->read( - cur->getColumns().getNamesOfPhysical(), + metadata_snapshot->getColumns().getNamesOfPhysical(), metadata_snapshot, {}, context, read_from_table_stage, DEFAULT_BLOCK_SIZE, 1); diff --git a/src/Databases/DatabaseMySQL.cpp b/src/Databases/DatabaseMySQL.cpp index 5d4b81014f9..a73fbafb7f5 100644 --- a/src/Databases/DatabaseMySQL.cpp +++ b/src/Databases/DatabaseMySQL.cpp @@ -139,7 +139,8 @@ static ASTPtr getCreateQueryFromStorage(const StoragePtr & storage, const ASTPtr create_table_query->table = table_id.table_name; create_table_query->database = table_id.database_name; - for (const auto & column_type_and_name : storage->getColumns().getOrdinary()) + auto metadata_snapshot = storage->getInMemoryMetadataPtr(); + for (const auto & column_type_and_name : metadata_snapshot->getColumns().getOrdinary()) { const auto & column_declaration = std::make_shared(); column_declaration->name = column_type_and_name.name; diff --git a/src/Functions/hasColumnInTable.cpp b/src/Functions/hasColumnInTable.cpp index b9ec2b84837..ef447070e7a 100644 --- a/src/Functions/hasColumnInTable.cpp +++ b/src/Functions/hasColumnInTable.cpp @@ -114,7 +114,8 @@ void FunctionHasColumnInTable::executeImpl(Block & block, const ColumnNumbers & if (host_name.empty()) { const StoragePtr & table = DatabaseCatalog::instance().getTable({database_name, table_name}, global_context); - has_column = table->getColumns().hasPhysical(column_name); + auto table_metadata = table->getInMemoryMetadataPtr(); + has_column = table_metadata->getColumns().hasPhysical(column_name); } else { diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index d457fefed6a..535a4280b45 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -91,7 +91,8 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl() auto table_lock = table->lockStructureForShare( false, context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout); - columns = table->getColumns(); + auto metadata_snapshot = table->getInMemoryMetadataPtr(); + columns = metadata_snapshot->getColumns(); } Block sample_block = getSampleBlock(); diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index e7fdf80e297..a39e8961970 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -244,7 +244,7 @@ BlockIO InterpreterInsertQuery::execute() /// Actually we don't know structure of input blocks from query/table, /// because some clients break insertion protocol (columns != header) out = std::make_shared( - out, query_sample_block, out->getHeader(), table->getColumns().getDefaults(), context); + out, query_sample_block, out->getHeader(), metadata_snapshot->getColumns().getDefaults(), context); /// It's important to squash blocks as early as possible (before other transforms), /// because other transforms may work inefficient if block size is small. @@ -295,7 +295,7 @@ BlockIO InterpreterInsertQuery::execute() if (!allow_materialized) { - for (const auto & column : table->getColumns()) + for (const auto & column : metadata_snapshot->getColumns()) if (column.default_desc.kind == ColumnDefaultKind::Materialized && header.has(column.name)) throw Exception("Cannot insert column " + column.name + ", because it is MATERIALIZED column.", ErrorCodes::ILLEGAL_COLUMN); } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 509825e75e4..f601ca74112 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -129,7 +129,7 @@ String InterpreterSelectQuery::generateFilterActions( table_expr->children.push_back(table_expr->database_and_table_name); /// Using separate expression analyzer to prevent any possible alias injection - auto syntax_result = SyntaxAnalyzer(*context).analyzeSelect(query_ast, SyntaxAnalyzerResult({}, storage)); + auto syntax_result = SyntaxAnalyzer(*context).analyzeSelect(query_ast, SyntaxAnalyzerResult({}, storage, metadata_snapshot)); SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, *context, metadata_snapshot); actions = analyzer.simpleSelectActions(); @@ -263,7 +263,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( } if (has_input || !joined_tables.resolveTables()) - joined_tables.makeFakeTable(storage, source_header); + joined_tables.makeFakeTable(storage, metadata_snapshot, source_header); /// Rewrite JOINs if (!has_input && joined_tables.tablesCount() > 1) @@ -311,8 +311,9 @@ InterpreterSelectQuery::InterpreterSelectQuery( view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot); syntax_analyzer_result = SyntaxAnalyzer(*context).analyzeSelect( - query_ptr, SyntaxAnalyzerResult(source_header.getNamesAndTypesList(), storage), - options, joined_tables.tablesWithColumns(), required_result_column_names, table_join); + query_ptr, + SyntaxAnalyzerResult(source_header.getNamesAndTypesList(), storage, metadata_snapshot), + options, joined_tables.tablesWithColumns(), required_result_column_names, table_join); if (view) { @@ -1087,7 +1088,7 @@ void InterpreterSelectQuery::executeFetchColumns( /// Detect, if ALIAS columns are required for query execution auto alias_columns_required = false; - const ColumnsDescription & storage_columns = storage->getColumns(); + const ColumnsDescription & storage_columns = metadata_snapshot->getColumns(); for (const auto & column_name : required_columns) { auto column_default = storage_columns.getDefault(column_name); @@ -1210,7 +1211,7 @@ void InterpreterSelectQuery::executeFetchColumns( prewhere_info->prewhere_actions = std::move(new_actions); auto analyzed_result - = SyntaxAnalyzer(*context).analyze(required_columns_from_prewhere_expr, storage->getColumns().getAllPhysical()); + = SyntaxAnalyzer(*context).analyze(required_columns_from_prewhere_expr, metadata_snapshot->getColumns().getAllPhysical()); prewhere_info->alias_actions = ExpressionAnalyzer(required_columns_from_prewhere_expr, analyzed_result, *context).getActions(true, false); diff --git a/src/Interpreters/InterpreterWatchQuery.cpp b/src/Interpreters/InterpreterWatchQuery.cpp index 489be488b4d..71ec1609046 100644 --- a/src/Interpreters/InterpreterWatchQuery.cpp +++ b/src/Interpreters/InterpreterWatchQuery.cpp @@ -47,7 +47,7 @@ BlockIO InterpreterWatchQuery::execute() ErrorCodes::UNKNOWN_TABLE); /// List of columns to read to execute the query. - Names required_columns = storage->getColumns().getNamesOfPhysical(); + Names required_columns = storage->getInMemoryMetadataPtr()->getColumns().getNamesOfPhysical(); context.checkAccess(AccessType::SELECT, table_id, required_columns); /// Get context settings for this query diff --git a/src/Interpreters/JoinedTables.cpp b/src/Interpreters/JoinedTables.cpp index 7450890952a..127df9b5eac 100644 --- a/src/Interpreters/JoinedTables.cpp +++ b/src/Interpreters/JoinedTables.cpp @@ -207,11 +207,11 @@ bool JoinedTables::resolveTables() return !tables_with_columns.empty(); } -void JoinedTables::makeFakeTable(StoragePtr storage, const Block & source_header) +void JoinedTables::makeFakeTable(StoragePtr storage, const StorageMetadataPtr & metadata_snapshot, const Block & source_header) { if (storage) { - const ColumnsDescription & storage_columns = storage->getColumns(); + const ColumnsDescription & storage_columns = metadata_snapshot->getColumns(); tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, storage_columns.getOrdinary()); auto & table = tables_with_columns.back(); diff --git a/src/Interpreters/JoinedTables.h b/src/Interpreters/JoinedTables.h index 2591b49527b..cff86c5a535 100644 --- a/src/Interpreters/JoinedTables.h +++ b/src/Interpreters/JoinedTables.h @@ -13,6 +13,8 @@ namespace DB class ASTSelectQuery; class TableJoin; struct SelectQueryOptions; +struct StorageInMemoryMetadata; +using StorageMetadataPtr = std::shared_ptr; /// Joined tables' columns resolver. /// We want to get each table structure at most once per table occurance. Or even better once per table. @@ -31,7 +33,7 @@ public: bool resolveTables(); /// Make fake tables_with_columns[0] in case we have predefined input in InterpreterSelectQuery - void makeFakeTable(StoragePtr storage, const Block & source_header); + void makeFakeTable(StoragePtr storage, const StorageMetadataPtr & metadata_snapshot, const Block & source_header); std::shared_ptr makeTableJoin(const ASTSelectQuery & select_query); const TablesWithColumns & tablesWithColumns() const { return tables_with_columns; } diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 694e114af7a..3ad813a15b7 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -419,7 +419,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) else { NameSet new_updated_columns; - auto column_ttls = storage->getColumns().getColumnTTLs(); + auto column_ttls = metadata_snapshot->getColumns().getColumnTTLs(); for (const auto & elem : column_ttls) { dependencies.emplace(elem.first, ColumnDependency::TTL_TARGET); @@ -528,7 +528,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & prepared_stages, bool dry_run) { - NamesAndTypesList all_columns = storage->getColumns().getAllPhysical(); + NamesAndTypesList all_columns = metadata_snapshot->getColumns().getAllPhysical(); /// Next, for each stage calculate columns changed by this and previous stages. diff --git a/src/Interpreters/SyntaxAnalyzer.cpp b/src/Interpreters/SyntaxAnalyzer.cpp index 2dc2943d36d..9e927e1eb5a 100644 --- a/src/Interpreters/SyntaxAnalyzer.cpp +++ b/src/Interpreters/SyntaxAnalyzer.cpp @@ -681,7 +681,7 @@ void SyntaxAnalyzerResult::collectSourceColumns(bool add_special) { if (storage) { - const ColumnsDescription & columns = storage->getColumns(); + const ColumnsDescription & columns = metadata_snapshot->getColumns(); auto columns_from_storage = add_special ? columns.getAll() : columns.getAllPhysical(); if (source_columns.empty()) @@ -962,14 +962,19 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyzeSelect( return std::make_shared(result); } -SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(ASTPtr & query, const NamesAndTypesList & source_columns, ConstStoragePtr storage, bool allow_aggregations) const +SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( + ASTPtr & query, + const NamesAndTypesList & source_columns, + ConstStoragePtr storage, + const StorageMetadataPtr & metadata_snapshot, + bool allow_aggregations) const { if (query->as()) throw Exception("Not select analyze for select asts.", ErrorCodes::LOGICAL_ERROR); const auto & settings = context.getSettingsRef(); - SyntaxAnalyzerResult result(source_columns, storage, false); + SyntaxAnalyzerResult result(source_columns, storage, metadata_snapshot, false); normalize(query, result.aliases, settings); diff --git a/src/Interpreters/SyntaxAnalyzer.h b/src/Interpreters/SyntaxAnalyzer.h index 175c2db295a..4308b70c45a 100644 --- a/src/Interpreters/SyntaxAnalyzer.h +++ b/src/Interpreters/SyntaxAnalyzer.h @@ -16,10 +16,13 @@ class Context; struct Settings; struct SelectQueryOptions; using Scalars = std::map; +struct StorageInMemoryMetadata; +using StorageMetadataPtr = std::shared_ptr; struct SyntaxAnalyzerResult { ConstStoragePtr storage; + StorageMetadataPtr metadata_snapshot; std::shared_ptr analyzed_join; NamesAndTypesList source_columns; @@ -51,8 +54,13 @@ struct SyntaxAnalyzerResult /// Results of scalar sub queries Scalars scalars; - SyntaxAnalyzerResult(const NamesAndTypesList & source_columns_, ConstStoragePtr storage_ = {}, bool add_special = true) + SyntaxAnalyzerResult( + const NamesAndTypesList & source_columns_, + ConstStoragePtr storage_ = {}, + const StorageMetadataPtr & metadata_snapshot_ = {}, + bool add_special = true) : storage(storage_) + , metadata_snapshot(metadata_snapshot_) , source_columns(source_columns_) { collectSourceColumns(add_special); @@ -86,7 +94,12 @@ public: {} /// Analyze and rewrite not select query - SyntaxAnalyzerResultPtr analyze(ASTPtr & query, const NamesAndTypesList & source_columns_, ConstStoragePtr storage = {}, bool allow_aggregations = false) const; + SyntaxAnalyzerResultPtr analyze( + ASTPtr & query, + const NamesAndTypesList & source_columns_, + ConstStoragePtr storage = {}, + const StorageMetadataPtr & metadata_snapshot = {}, + bool allow_aggregations = false) const; /// Analyze and rewrite select query SyntaxAnalyzerResultPtr analyzeSelect( diff --git a/src/Interpreters/getTableExpressions.cpp b/src/Interpreters/getTableExpressions.cpp index 6e3fd516e1c..56ca614dc2d 100644 --- a/src/Interpreters/getTableExpressions.cpp +++ b/src/Interpreters/getTableExpressions.cpp @@ -87,7 +87,8 @@ static NamesAndTypesList getColumnsFromTableExpression(const ASTTableExpression const auto table_function = table_expression.table_function; auto * query_context = const_cast(&context.getQueryContext()); const auto & function_storage = query_context->executeTableFunction(table_function); - const auto & columns = function_storage->getColumns(); + auto function_metadata_snapshot = function_storage->getInMemoryMetadataPtr(); + const auto & columns = function_metadata_snapshot->getColumns(); names_and_type_list = columns.getOrdinary(); materialized = columns.getMaterialized(); aliases = columns.getAliases(); @@ -97,7 +98,8 @@ static NamesAndTypesList getColumnsFromTableExpression(const ASTTableExpression { auto table_id = context.resolveStorageID(table_expression.database_and_table_name); const auto & table = DatabaseCatalog::instance().getTable(table_id, context); - const auto & columns = table->getColumns(); + auto table_metadata_snapshot = table->getInMemoryMetadataPtr(); + const auto & columns = table_metadata_snapshot->getColumns(); names_and_type_list = columns.getOrdinary(); materialized = columns.getMaterialized(); aliases = columns.getAliases(); diff --git a/src/Interpreters/interpretSubquery.cpp b/src/Interpreters/interpretSubquery.cpp index c94759897f5..cf343a4fda2 100644 --- a/src/Interpreters/interpretSubquery.cpp +++ b/src/Interpreters/interpretSubquery.cpp @@ -90,14 +90,14 @@ std::shared_ptr interpretSubquery( { auto * query_context = const_cast(&context.getQueryContext()); const auto & storage = query_context->executeTableFunction(table_expression); - columns = storage->getColumns().getOrdinary(); + columns = storage->getInMemoryMetadataPtr()->getColumns().getOrdinary(); select_query->addTableFunction(*const_cast(&table_expression)); // XXX: const_cast should be avoided! } else { auto table_id = context.resolveStorageID(table_expression); const auto & storage = DatabaseCatalog::instance().getTable(table_id, context); - columns = storage->getColumns().getOrdinary(); + columns = storage->getInMemoryMetadataPtr()->getColumns().getOrdinary(); select_query->replaceDatabaseAndTable(table_id); } diff --git a/src/Server/MySQLHandler.cpp b/src/Server/MySQLHandler.cpp index 51b3d7eaef5..68f1bb8efff 100644 --- a/src/Server/MySQLHandler.cpp +++ b/src/Server/MySQLHandler.cpp @@ -254,7 +254,8 @@ void MySQLHandler::comFieldList(ReadBuffer & payload) packet.readPayload(payload); String database = connection_context.getCurrentDatabase(); StoragePtr table_ptr = DatabaseCatalog::instance().getTable({database, packet.table}, connection_context); - for (const NameAndTypePair & column: table_ptr->getColumns().getAll()) + auto metadata_snapshot = table_ptr->getInMemoryMetadataPtr(); + for (const NameAndTypePair & column : metadata_snapshot->getColumns().getAll()) { ColumnDefinition column_definition( database, packet.table, packet.table, column.name, column.name, CharacterSet::binary, 100, ColumnType::MYSQL_TYPE_STRING, 0, 0 diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 009f7ad80f0..056234af45d 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -213,17 +213,18 @@ void TCPHandler::runImpl() if (&context != &query_context.value()) throw Exception("Unexpected context in Input initializer", ErrorCodes::LOGICAL_ERROR); + auto metadata_snapshot = input_storage->getInMemoryMetadataPtr(); state.need_receive_data_for_input = true; /// Send ColumnsDescription for input storage. if (client_revision >= DBMS_MIN_REVISION_WITH_COLUMN_DEFAULTS_METADATA && query_context->getSettingsRef().input_format_defaults_for_omitted_fields) { - sendTableColumns(input_storage->getColumns()); + sendTableColumns(metadata_snapshot->getColumns()); } /// Send block to the client - input storage structure. - state.input_header = input_storage->getInMemoryMetadataPtr()->getSampleBlock(); + state.input_header = metadata_snapshot->getSampleBlock(); sendData(state.input_header); }); @@ -474,7 +475,10 @@ void TCPHandler::processInsertQuery(const Settings & connection_settings) if (query_context->getSettingsRef().input_format_defaults_for_omitted_fields) { if (!table_id.empty()) - sendTableColumns(DatabaseCatalog::instance().getTable(table_id, *query_context)->getColumns()); + { + auto storage_ptr = DatabaseCatalog::instance().getTable(table_id, *query_context); + sendTableColumns(storage_ptr->getInMemoryMetadataPtr()->getColumns()); + } } } diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 884982c93b9..8ee9561466a 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -32,12 +32,6 @@ namespace ErrorCodes extern const int DEADLOCK_AVOIDED; } -const ColumnsDescription & IStorage::getColumns() const -{ - return metadata->columns; -} - - bool IStorage::isVirtualColumn(const String & column_name, const StorageMetadataPtr & metadata_snapshot) const { /// Virtual column maybe overriden by real column diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index a125c6f8310..ba1945d5c79 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -137,7 +137,6 @@ public: public: /// thread-unsafe part. lockStructure must be acquired - const ColumnsDescription & getColumns() const; /// returns combined set of columns StorageInMemoryMetadata getInMemoryMetadata() const { return *metadata; } StorageMetadataPtr getInMemoryMetadataPtr() const { return metadata; } void setInMemoryMetadata(const StorageInMemoryMetadata & metadata_) { metadata = std::make_shared(metadata_); } diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index f1b9459b3d3..ac6bd48f534 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -142,8 +142,10 @@ BlockInputStreamPtr StorageLiveView::completeQuery(Pipes pipes) auto creator = [&](const StorageID & blocks_id_global) { - return StorageBlocks::createStorage(blocks_id_global, getParentStorage()->getColumns(), - std::move(pipes), QueryProcessingStage::WithMergeableState); + auto parent_table_metadata = getParentStorage()->getInMemoryMetadataPtr(); + return StorageBlocks::createStorage( + blocks_id_global, parent_table_metadata->getColumns(), + std::move(pipes), QueryProcessingStage::WithMergeableState); }; block_context->addExternalTable(getBlocksTableName(), TemporaryTableHolder(global_context, creator)); @@ -209,8 +211,10 @@ void StorageLiveView::writeIntoLiveView( auto creator = [&](const StorageID & blocks_id_global) { - return StorageBlocks::createStorage(blocks_id_global, live_view.getParentStorage()->getColumns(), - std::move(pipes), QueryProcessingStage::FetchColumns); + auto parent_metadata = live_view.getParentStorage()->getInMemoryMetadataPtr(); + return StorageBlocks::createStorage( + blocks_id_global, parent_metadata->getColumns(), + std::move(pipes), QueryProcessingStage::FetchColumns); }; TemporaryTableHolder blocks_storage(context, creator); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 17ff2259436..61dfeed6b7c 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -352,9 +352,9 @@ size_t IMergeTreeDataPart::getFileSizeOrZero(const String & file_name) const return checksum->second.file_size; } -String IMergeTreeDataPart::getColumnNameWithMinumumCompressedSize() const +String IMergeTreeDataPart::getColumnNameWithMinumumCompressedSize(const StorageMetadataPtr & metadata_snapshot) const { - const auto & storage_columns = storage.getColumns().getAllPhysical(); + const auto & storage_columns = metadata_snapshot->getColumns().getAllPhysical(); auto alter_conversions = storage.getAlterConversionsForPart(shared_from_this()); std::optional minimum_size_column; @@ -613,6 +613,7 @@ void IMergeTreeDataPart::loadTTLInfos() void IMergeTreeDataPart::loadColumns(bool require) { String path = getFullRelativePath() + "columns.txt"; + auto metadata_snapshot = storage.getInMemoryMetadataPtr(); if (!volume->getDisk()->exists(path)) { /// We can get list of columns only from columns.txt in compact parts. @@ -620,7 +621,7 @@ void IMergeTreeDataPart::loadColumns(bool require) throw Exception("No columns.txt in part " + name, ErrorCodes::NO_FILE_IN_DATA_PART); /// If there is no file with a list of columns, write it down. - for (const NameAndTypePair & column : storage.getColumns().getAllPhysical()) + for (const NameAndTypePair & column : metadata_snapshot->getColumns().getAllPhysical()) if (volume->getDisk()->exists(getFullRelativePath() + getFileNameForColumn(column) + ".bin")) columns.push_back(column); diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 04babece83e..0e73b1370c5 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -77,6 +77,7 @@ public: virtual MergeTreeReaderPtr getReader( const NamesAndTypesList & columns_, + const StorageMetadataPtr & metadata_snapshot, const MarkRanges & mark_ranges, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, @@ -143,7 +144,7 @@ public: /// Returns the name of a column with minimum compressed size (as returned by getColumnSize()). /// If no checksums are present returns the name of the first physically existing column. - String getColumnNameWithMinumumCompressedSize() const; + String getColumnNameWithMinumumCompressedSize(const StorageMetadataPtr & metadata_snapshot) const; bool contains(const IMergeTreeDataPart & other) const { return info.contains(other.info); } diff --git a/src/Storages/MergeTree/IMergeTreeReader.cpp b/src/Storages/MergeTree/IMergeTreeReader.cpp index 5d2a5ac3616..53dc47d8068 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.cpp +++ b/src/Storages/MergeTree/IMergeTreeReader.cpp @@ -22,13 +22,23 @@ namespace ErrorCodes } -IMergeTreeReader::IMergeTreeReader(const MergeTreeData::DataPartPtr & data_part_, - const NamesAndTypesList & columns_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, - const MarkRanges & all_mark_ranges_, const MergeTreeReaderSettings & settings_, +IMergeTreeReader::IMergeTreeReader( + const MergeTreeData::DataPartPtr & data_part_, + const NamesAndTypesList & columns_, + const StorageMetadataPtr & metadata_snapshot_, + UncompressedCache * uncompressed_cache_, + MarkCache * mark_cache_, + const MarkRanges & all_mark_ranges_, + const MergeTreeReaderSettings & settings_, const ValueSizeMap & avg_value_size_hints_) - : data_part(data_part_), avg_value_size_hints(avg_value_size_hints_) - , columns(columns_), uncompressed_cache(uncompressed_cache_), mark_cache(mark_cache_) - , settings(settings_), storage(data_part_->storage) + : data_part(data_part_) + , avg_value_size_hints(avg_value_size_hints_) + , columns(columns_) + , uncompressed_cache(uncompressed_cache_) + , mark_cache(mark_cache_) + , settings(settings_) + , storage(data_part_->storage) + , metadata_snapshot(metadata_snapshot_) , all_mark_ranges(all_mark_ranges_) , alter_conversions(storage.getAlterConversionsForPart(data_part)) { @@ -112,7 +122,7 @@ void IMergeTreeReader::fillMissingColumns(Columns & res_columns, bool & should_e if (res_columns[i] == nullptr) { - if (storage.getColumns().hasDefault(name)) + if (metadata_snapshot->getColumns().hasDefault(name)) { should_evaluate_missing_defaults = true; continue; @@ -170,7 +180,7 @@ void IMergeTreeReader::evaluateMissingDefaults(Block additional_columns, Columns additional_columns.insert({res_columns[pos], name_and_type->type, name_and_type->name}); } - DB::evaluateMissingDefaults(additional_columns, columns, storage.getColumns().getDefaults(), storage.global_context); + DB::evaluateMissingDefaults(additional_columns, columns, metadata_snapshot->getColumns().getDefaults(), storage.global_context); /// Move columns from block. name_and_type = columns.begin(); diff --git a/src/Storages/MergeTree/IMergeTreeReader.h b/src/Storages/MergeTree/IMergeTreeReader.h index 02d8f67f9d0..6e9922b29ed 100644 --- a/src/Storages/MergeTree/IMergeTreeReader.h +++ b/src/Storages/MergeTree/IMergeTreeReader.h @@ -18,8 +18,10 @@ public: using ValueSizeMap = std::map; using DeserializeBinaryBulkStateMap = std::map; - IMergeTreeReader(const MergeTreeData::DataPartPtr & data_part_, + IMergeTreeReader( + const MergeTreeData::DataPartPtr & data_part_, const NamesAndTypesList & columns_, + const StorageMetadataPtr & metadata_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, const MarkRanges & all_mark_ranges_, @@ -75,6 +77,7 @@ protected: MergeTreeReaderSettings settings; const MergeTreeData & storage; + StorageMetadataPtr metadata_snapshot; MarkRanges all_mark_ranges; friend class MergeTreeRangeReader::DelayedStream; diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp index 310c566fb19..03235742a68 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp @@ -13,14 +13,14 @@ namespace ErrorCodes } -NameSet injectRequiredColumns(const MergeTreeData & storage, const MergeTreeData::DataPartPtr & part, Names & columns) +NameSet injectRequiredColumns(const MergeTreeData & storage, const StorageMetadataPtr & metadata_snapshot, const MergeTreeData::DataPartPtr & part, Names & columns) { NameSet required_columns{std::begin(columns), std::end(columns)}; NameSet injected_columns; auto all_column_files_missing = true; - const auto & storage_columns = storage.getColumns(); + const auto & storage_columns = metadata_snapshot->getColumns(); auto alter_conversions = storage.getAlterConversionsForPart(part); for (size_t i = 0; i < columns.size(); ++i) { @@ -66,7 +66,7 @@ NameSet injectRequiredColumns(const MergeTreeData & storage, const MergeTreeData */ if (all_column_files_missing) { - const auto minimum_size_column_name = part->getColumnNameWithMinumumCompressedSize(); + const auto minimum_size_column_name = part->getColumnNameWithMinumumCompressedSize(metadata_snapshot); columns.push_back(minimum_size_column_name); /// correctly report added column injected_columns.insert(columns.back()); @@ -214,14 +214,19 @@ void MergeTreeBlockSizePredictor::update(const Block & sample_block, const Colum } -MergeTreeReadTaskColumns getReadTaskColumns(const MergeTreeData & storage, const MergeTreeData::DataPartPtr & data_part, - const Names & required_columns, const PrewhereInfoPtr & prewhere_info, bool check_columns) +MergeTreeReadTaskColumns getReadTaskColumns( + const MergeTreeData & storage, + const StorageMetadataPtr & metadata_snapshot, + const MergeTreeData::DataPartPtr & data_part, + const Names & required_columns, + const PrewhereInfoPtr & prewhere_info, + bool check_columns) { Names column_names = required_columns; Names pre_column_names; /// inject columns required for defaults evaluation - bool should_reorder = !injectRequiredColumns(storage, data_part, column_names).empty(); + bool should_reorder = !injectRequiredColumns(storage, metadata_snapshot, data_part, column_names).empty(); if (prewhere_info) { @@ -233,7 +238,7 @@ MergeTreeReadTaskColumns getReadTaskColumns(const MergeTreeData & storage, const if (pre_column_names.empty()) pre_column_names.push_back(column_names[0]); - const auto injected_pre_columns = injectRequiredColumns(storage, data_part, pre_column_names); + const auto injected_pre_columns = injectRequiredColumns(storage, metadata_snapshot, data_part, pre_column_names); if (!injected_pre_columns.empty()) should_reorder = true; @@ -251,7 +256,7 @@ MergeTreeReadTaskColumns getReadTaskColumns(const MergeTreeData & storage, const if (check_columns) { - const NamesAndTypesList & physical_columns = storage.getColumns().getAllPhysical(); + const NamesAndTypesList & physical_columns = metadata_snapshot->getColumns().getAllPhysical(); result.pre_columns = physical_columns.addTypes(pre_column_names); result.columns = physical_columns.addTypes(column_names); } diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h index 108742e1101..31d609e4242 100644 --- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h +++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h @@ -22,7 +22,7 @@ using MergeTreeBlockSizePredictorPtr = std::unique_ptrgetColumns().getNamesOfPhysical(); + NamesAndTypesList storage_columns = metadata_snapshot->getColumns().getAllPhysical(); const auto data_settings = data.getSettings(); NamesAndTypesList gathering_columns; @@ -1041,7 +1041,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor UInt64 watch_prev_elapsed = 0; MergeStageProgress stage_progress(1.0); - NamesAndTypesList storage_columns = data.getColumns().getAllPhysical(); + NamesAndTypesList storage_columns = metadata_snapshot->getColumns().getAllPhysical(); if (!for_interpreter.empty()) { diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp index d45aa882b2a..65577eb4ca1 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.cpp @@ -38,6 +38,7 @@ MergeTreeDataPartCompact::MergeTreeDataPartCompact( IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader( const NamesAndTypesList & columns_to_read, + const StorageMetadataPtr & metadata_snapshot, const MarkRanges & mark_ranges, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, @@ -47,7 +48,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartCompact::getReader( { auto ptr = std::static_pointer_cast(shared_from_this()); return std::make_unique( - ptr, columns_to_read, uncompressed_cache, + ptr, columns_to_read, metadata_snapshot, uncompressed_cache, mark_cache, mark_ranges, reader_settings, avg_value_size_hints, profile_callback); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartCompact.h b/src/Storages/MergeTree/MergeTreeDataPartCompact.h index 0b27dd53339..7d5c8628570 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartCompact.h +++ b/src/Storages/MergeTree/MergeTreeDataPartCompact.h @@ -37,6 +37,7 @@ public: MergeTreeReaderPtr getReader( const NamesAndTypesList & columns, + const StorageMetadataPtr & metadata_snapshot, const MarkRanges & mark_ranges, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp index e4901b1f74d..e9383bc917d 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.cpp @@ -37,6 +37,7 @@ MergeTreeDataPartWide::MergeTreeDataPartWide( IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader( const NamesAndTypesList & columns_to_read, + const StorageMetadataPtr & metadata_snapshot, const MarkRanges & mark_ranges, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, @@ -46,7 +47,7 @@ IMergeTreeDataPart::MergeTreeReaderPtr MergeTreeDataPartWide::getReader( { auto ptr = std::static_pointer_cast(shared_from_this()); return std::make_unique( - ptr, columns_to_read, uncompressed_cache, + ptr, columns_to_read, metadata_snapshot, uncompressed_cache, mark_cache, mark_ranges, reader_settings, avg_value_size_hints, profile_callback); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartWide.h b/src/Storages/MergeTree/MergeTreeDataPartWide.h index 144dfa86cfb..8d8b6fa678b 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartWide.h +++ b/src/Storages/MergeTree/MergeTreeDataPartWide.h @@ -30,6 +30,7 @@ public: MergeTreeReaderPtr getReader( const NamesAndTypesList & columns, + const StorageMetadataPtr & metadata_snapshot, const MarkRanges & mark_ranges, UncompressedCache * uncompressed_cache, MarkCache * mark_cache, diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index b72c46afca3..7f7fd203297 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -650,7 +650,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( auto order_key_prefix_ast = metadata_snapshot->getSortingKey().expression_list_ast->clone(); order_key_prefix_ast->children.resize(prefix_size); - auto syntax_result = SyntaxAnalyzer(context).analyze(order_key_prefix_ast, data.getColumns().getAllPhysical()); + auto syntax_result = SyntaxAnalyzer(context).analyze(order_key_prefix_ast, metadata_snapshot->getColumns().getAllPhysical()); auto sorting_key_prefix_expr = ExpressionAnalyzer(order_key_prefix_ast, syntax_result, context).getActions(false); res = spreadMarkRangesAmongStreamsWithOrder( @@ -1274,29 +1274,6 @@ Pipes MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsFinal( return pipes; } - -void MergeTreeDataSelectExecutor::createPositiveSignCondition( - ExpressionActionsPtr & out_expression, String & out_column, const Context & context) const -{ - auto function = std::make_shared(); - auto arguments = std::make_shared(); - auto sign = std::make_shared(data.merging_params.sign_column); - auto one = std::make_shared(1); - - function->name = "equals"; - function->arguments = arguments; - function->children.push_back(arguments); - - arguments->children.push_back(sign); - arguments->children.push_back(one); - - ASTPtr query = function; - auto syntax_result = SyntaxAnalyzer(context).analyze(query, data.getColumns().getAllPhysical()); - out_expression = ExpressionAnalyzer(query, syntax_result, context).getActions(false); - out_column = function->getColumnName(); -} - - /// Calculates a set of mark ranges, that could possibly contain keys, required by condition. /// In other words, it removes subranges from whole range, that definitely could not contain required keys. MarkRanges MergeTreeDataSelectExecutor::markRangesFromPKRange( diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index ba0613a832d..5669e8708b6 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -95,12 +95,6 @@ private: const KeyCondition & key_condition, const Settings & settings) const; - /// Create the expression "Sign == 1". - void createPositiveSignCondition( - ExpressionActionsPtr & out_expression, - String & out_column, - const Context & context) const; - MarkRanges markRangesFromPKRange( const MergeTreeData::DataPartPtr & part, const StorageMetadataPtr & metadata_snapshot, diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index 8c73dc39dfb..fdf3908d21e 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -211,7 +211,7 @@ std::vector MergeTreeReadPool::fillPerPartInfo( per_part_sum_marks.push_back(sum_marks); auto [required_columns, required_pre_columns, should_reorder] = - getReadTaskColumns(data, part.data_part, column_names, prewhere_info, check_columns); + getReadTaskColumns(data, metadata_snapshot, part.data_part, column_names, prewhere_info, check_columns); /// will be used to distinguish between PREWHERE and WHERE columns when applying filter const auto & required_column_names = required_columns.getNames(); diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp index c4a05a8bfac..64e9deec744 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.cpp @@ -17,6 +17,7 @@ namespace ErrorCodes MergeTreeReaderCompact::MergeTreeReaderCompact( DataPartCompactPtr data_part_, NamesAndTypesList columns_, + const StorageMetadataPtr & metadata_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, MarkRanges mark_ranges_, @@ -24,15 +25,23 @@ MergeTreeReaderCompact::MergeTreeReaderCompact( ValueSizeMap avg_value_size_hints_, const ReadBufferFromFileBase::ProfileCallback & profile_callback_, clockid_t clock_type_) - : IMergeTreeReader(std::move(data_part_), std::move(columns_), - uncompressed_cache_, mark_cache_, std::move(mark_ranges_), - std::move(settings_), std::move(avg_value_size_hints_)) + : IMergeTreeReader( + std::move(data_part_), + std::move(columns_), + metadata_snapshot_, + uncompressed_cache_, + mark_cache_, + std::move(mark_ranges_), + std::move(settings_), + std::move(avg_value_size_hints_)) , marks_loader( - data_part->volume->getDisk(), - mark_cache, - data_part->index_granularity_info.getMarksFilePath(data_part->getFullRelativePath() + MergeTreeDataPartCompact::DATA_FILE_NAME), - data_part->getMarksCount(), data_part->index_granularity_info, - settings.save_marks_in_cache, data_part->getColumns().size()) + data_part->volume->getDisk(), + mark_cache, + data_part->index_granularity_info.getMarksFilePath(data_part->getFullRelativePath() + MergeTreeDataPartCompact::DATA_FILE_NAME), + data_part->getMarksCount(), + data_part->index_granularity_info, + settings.save_marks_in_cache, + data_part->getColumns().size()) { size_t buffer_size = settings.max_read_buffer_size; const String full_data_path = data_part->getFullRelativePath() + MergeTreeDataPartCompact::DATA_FILE_NAME_WITH_EXTENSION; diff --git a/src/Storages/MergeTree/MergeTreeReaderCompact.h b/src/Storages/MergeTree/MergeTreeReaderCompact.h index 827306cd983..584d8ed2ff0 100644 --- a/src/Storages/MergeTree/MergeTreeReaderCompact.h +++ b/src/Storages/MergeTree/MergeTreeReaderCompact.h @@ -17,6 +17,7 @@ public: MergeTreeReaderCompact( DataPartCompactPtr data_part_, NamesAndTypesList columns_, + const StorageMetadataPtr & metadata_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, MarkRanges mark_ranges_, diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.cpp b/src/Storages/MergeTree/MergeTreeReaderWide.cpp index 34bf095e57e..2326c012fee 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.cpp +++ b/src/Storages/MergeTree/MergeTreeReaderWide.cpp @@ -28,6 +28,7 @@ namespace ErrorCodes MergeTreeReaderWide::MergeTreeReaderWide( DataPartWidePtr data_part_, NamesAndTypesList columns_, + const StorageMetadataPtr & metadata_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, MarkRanges mark_ranges_, @@ -36,8 +37,14 @@ MergeTreeReaderWide::MergeTreeReaderWide( const ReadBufferFromFileBase::ProfileCallback & profile_callback_, clockid_t clock_type_) : IMergeTreeReader( - std::move(data_part_), std::move(columns_), uncompressed_cache_, std::move(mark_cache_), - std::move(mark_ranges_), std::move(settings_), std::move(avg_value_size_hints_)) + std::move(data_part_), + std::move(columns_), + metadata_snapshot_, + uncompressed_cache_, + std::move(mark_cache_), + std::move(mark_ranges_), + std::move(settings_), + std::move(avg_value_size_hints_)) { try { diff --git a/src/Storages/MergeTree/MergeTreeReaderWide.h b/src/Storages/MergeTree/MergeTreeReaderWide.h index 7684d69f0a5..69652d1e954 100644 --- a/src/Storages/MergeTree/MergeTreeReaderWide.h +++ b/src/Storages/MergeTree/MergeTreeReaderWide.h @@ -17,6 +17,7 @@ public: MergeTreeReaderWide( DataPartWidePtr data_part_, NamesAndTypesList columns_, + const StorageMetadataPtr & metadata_snapshot_, UncompressedCache * uncompressed_cache_, MarkCache * mark_cache_, MarkRanges mark_ranges_, diff --git a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp index c47dd7fb669..b71c343614b 100644 --- a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp @@ -76,7 +76,7 @@ MergeTreeReverseSelectProcessor::MergeTreeReverseSelectProcessor( ordered_names = header_without_virtual_columns.getNames(); - task_columns = getReadTaskColumns(storage, data_part, required_columns, prewhere_info, check_columns); + task_columns = getReadTaskColumns(storage, metadata_snapshot, data_part, required_columns, prewhere_info, check_columns); /// will be used to distinguish between PREWHERE and WHERE columns when applying filter const auto & column_names = task_columns.columns.getNames(); @@ -87,11 +87,12 @@ MergeTreeReverseSelectProcessor::MergeTreeReverseSelectProcessor( owned_mark_cache = storage.global_context.getMarkCache(); - reader = data_part->getReader(task_columns.columns, all_mark_ranges, - owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings); + reader = data_part->getReader(task_columns.columns, metadata_snapshot, + all_mark_ranges, owned_uncompressed_cache.get(), + owned_mark_cache.get(), reader_settings); if (prewhere_info) - pre_reader = data_part->getReader(task_columns.pre_columns, all_mark_ranges, + pre_reader = data_part->getReader(task_columns.pre_columns, metadata_snapshot, all_mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings); } diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index 84c0f44c109..b46b414bfe8 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -68,7 +68,9 @@ try } is_first_task = false; - task_columns = getReadTaskColumns(storage, data_part, required_columns, prewhere_info, check_columns); + task_columns = getReadTaskColumns( + storage, metadata_snapshot, data_part, + required_columns, prewhere_info, check_columns); auto size_predictor = (preferred_block_size_bytes == 0) ? nullptr @@ -90,11 +92,11 @@ try owned_mark_cache = storage.global_context.getMarkCache(); - reader = data_part->getReader(task_columns.columns, all_mark_ranges, + reader = data_part->getReader(task_columns.columns, metadata_snapshot, all_mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings); if (prewhere_info) - pre_reader = data_part->getReader(task_columns.pre_columns, all_mark_ranges, + pre_reader = data_part->getReader(task_columns.pre_columns, metadata_snapshot, all_mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings); } diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index dfd60bd50ef..f8e31db2b5a 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -39,7 +39,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( addTotalRowsApprox(data_part->rows_count); /// Add columns because we don't want to read empty blocks - injectRequiredColumns(storage, data_part, columns_to_read); + injectRequiredColumns(storage, metadata_snapshot, data_part, columns_to_read); NamesAndTypesList columns_for_reader; if (take_column_types_from_storage) { @@ -60,7 +60,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( .save_marks_in_cache = false }; - reader = data_part->getReader(columns_for_reader, + reader = data_part->getReader(columns_for_reader, metadata_snapshot, MarkRanges{MarkRange(0, data_part->getMarksCount())}, /* uncompressed_cache = */ nullptr, mark_cache.get(), reader_settings); } diff --git a/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp b/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp index 784c842d7d6..c332685799c 100644 --- a/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeThreadSelectBlockInputProcessor.cpp @@ -74,12 +74,12 @@ bool MergeTreeThreadSelectBlockInputProcessor::getNewTask() owned_uncompressed_cache = storage.global_context.getUncompressedCache(); owned_mark_cache = storage.global_context.getMarkCache(); - reader = task->data_part->getReader(task->columns, rest_mark_ranges, + reader = task->data_part->getReader(task->columns, metadata_snapshot, rest_mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, IMergeTreeReader::ValueSizeMap{}, profile_callback); if (prewhere_info) - pre_reader = task->data_part->getReader(task->pre_columns, rest_mark_ranges, + pre_reader = task->data_part->getReader(task->pre_columns, metadata_snapshot, rest_mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, IMergeTreeReader::ValueSizeMap{}, profile_callback); } @@ -90,12 +90,12 @@ bool MergeTreeThreadSelectBlockInputProcessor::getNewTask() { auto rest_mark_ranges = pool->getRestMarks(*task->data_part, task->mark_ranges[0]); /// retain avg_value_size_hints - reader = task->data_part->getReader(task->columns, rest_mark_ranges, + reader = task->data_part->getReader(task->columns, metadata_snapshot, rest_mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, reader->getAvgValueSizeHints(), profile_callback); if (prewhere_info) - pre_reader = task->data_part->getReader(task->pre_columns, rest_mark_ranges, + pre_reader = task->data_part->getReader(task->pre_columns, metadata_snapshot, rest_mark_ranges, owned_uncompressed_cache.get(), owned_mark_cache.get(), reader_settings, reader->getAvgValueSizeHints(), profile_callback); } diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 2ce258a2d25..4e659a8e7b1 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -168,9 +168,9 @@ Pipes StorageBuffer::read( auto destination_metadata_snapshot = destination->getInMemoryMetadataPtr(); - const bool dst_has_same_structure = std::all_of(column_names.begin(), column_names.end(), [metadata_snapshot, destination](const String& column_name) + const bool dst_has_same_structure = std::all_of(column_names.begin(), column_names.end(), [metadata_snapshot, destination_metadata_snapshot](const String& column_name) { - const auto & dest_columns = destination->getColumns(); + const auto & dest_columns = destination_metadata_snapshot->getColumns(); const auto & our_columns = metadata_snapshot->getColumns(); return dest_columns.hasPhysical(column_name) && dest_columns.get(column_name).type->equals(*our_columns.get(column_name).type); @@ -192,8 +192,8 @@ Pipes StorageBuffer::read( const Block header = metadata_snapshot->getSampleBlock(); Names columns_intersection = column_names; Block header_after_adding_defaults = header; - const auto & dest_columns = destination->getColumns(); - const auto & our_columns = getColumns(); + const auto & dest_columns = destination_metadata_snapshot->getColumns(); + const auto & our_columns = metadata_snapshot->getColumns(); for (const String & column_name : column_names) { if (!dest_columns.hasPhysical(column_name)) @@ -224,7 +224,7 @@ Pipes StorageBuffer::read( for (auto & pipe : pipes_from_dst) { pipe.addSimpleTransform(std::make_shared( - pipe.getHeader(), header_after_adding_defaults, getColumns().getDefaults(), context)); + pipe.getHeader(), header_after_adding_defaults, metadata_snapshot->getColumns().getDefaults(), context)); pipe.addSimpleTransform(std::make_shared( pipe.getHeader(), header, ConvertingTransform::MatchColumnsMode::Name)); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index ce4fcbb3513..238623c1576 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -290,7 +290,7 @@ StorageDistributed::StorageDistributed( if (sharding_key_) { - sharding_key_expr = buildShardingKeyExpression(sharding_key_, *global_context, getColumns().getAllPhysical(), false); + sharding_key_expr = buildShardingKeyExpression(sharding_key_, *global_context, metadata_.getColumns().getAllPhysical(), false); sharding_key_column_name = sharding_key_->getColumnName(); } @@ -447,6 +447,7 @@ bool StorageDistributed::canForceGroupByNoMerge(const Context &context, QueryPro QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Context &context, QueryProcessingStage::Enum to_stage, const ASTPtr & query_ptr) const { const auto & settings = context.getSettingsRef(); + auto metadata_snapshot = getInMemoryMetadataPtr(); if (canForceGroupByNoMerge(context, to_stage, query_ptr)) return QueryProcessingStage::Complete; @@ -454,7 +455,7 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Con ClusterPtr cluster = getCluster(); if (settings.optimize_skip_unused_shards) { - ClusterPtr optimized_cluster = getOptimizedCluster(context, query_ptr); + ClusterPtr optimized_cluster = getOptimizedCluster(context, metadata_snapshot, query_ptr); if (optimized_cluster) cluster = optimized_cluster; } @@ -476,7 +477,7 @@ Pipes StorageDistributed::read( ClusterPtr cluster = getCluster(); if (settings.optimize_skip_unused_shards) { - ClusterPtr optimized_cluster = getOptimizedCluster(context, query_info.query); + ClusterPtr optimized_cluster = getOptimizedCluster(context, metadata_snapshot, query_info.query); if (optimized_cluster) { LOG_DEBUG(log, "Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}", makeFormattedListOfShards(optimized_cluster)); @@ -683,14 +684,14 @@ ClusterPtr StorageDistributed::getCluster() const return owned_cluster ? owned_cluster : global_context->getCluster(cluster_name); } -ClusterPtr StorageDistributed::getOptimizedCluster(const Context & context, const ASTPtr & query_ptr) const +ClusterPtr StorageDistributed::getOptimizedCluster(const Context & context, const StorageMetadataPtr & metadata_snapshot, const ASTPtr & query_ptr) const { ClusterPtr cluster = getCluster(); const Settings & settings = context.getSettingsRef(); if (has_sharding_key) { - ClusterPtr optimized = skipUnusedShards(cluster, query_ptr, context); + ClusterPtr optimized = skipUnusedShards(cluster, query_ptr, metadata_snapshot, context); if (optimized) return optimized; } @@ -751,7 +752,11 @@ IColumn::Selector StorageDistributed::createSelector(const ClusterPtr cluster, c /// Returns a new cluster with fewer shards if constant folding for `sharding_key_expr` is possible /// using constraints from "PREWHERE" and "WHERE" conditions, otherwise returns `nullptr` -ClusterPtr StorageDistributed::skipUnusedShards(ClusterPtr cluster, const ASTPtr & query_ptr, const Context & context) const +ClusterPtr StorageDistributed::skipUnusedShards( + ClusterPtr cluster, + const ASTPtr & query_ptr, + const StorageMetadataPtr & metadata_snapshot, + const Context & context) const { const auto & select = query_ptr->as(); @@ -770,7 +775,7 @@ ClusterPtr StorageDistributed::skipUnusedShards(ClusterPtr cluster, const ASTPtr condition_ast = select.prewhere() ? select.prewhere()->clone() : select.where()->clone(); } - replaceConstantExpressions(condition_ast, context, getColumns().getAll(), shared_from_this()); + replaceConstantExpressions(condition_ast, context, metadata_snapshot->getColumns().getAll(), shared_from_this()); const auto blocks = evaluateExpressionOverConstantCondition(condition_ast, sharding_key_expr); // Can't get definite answer if we can skip any shards diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index 3f148cfff01..af508a80646 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -120,8 +120,8 @@ public: /// Apply the following settings: /// - optimize_skip_unused_shards /// - force_optimize_skip_unused_shards - ClusterPtr getOptimizedCluster(const Context &, const ASTPtr & query_ptr) const; - ClusterPtr skipUnusedShards(ClusterPtr cluster, const ASTPtr & query_ptr, const Context & context) const; + ClusterPtr getOptimizedCluster(const Context &, const StorageMetadataPtr & metadata_snapshot, const ASTPtr & query_ptr) const; + ClusterPtr skipUnusedShards(ClusterPtr cluster, const ASTPtr & query_ptr, const StorageMetadataPtr & metadata_snapshot, const Context & context) const; ActionLock getActionLock(StorageActionBlockType type) override; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 65f36a48170..8083a8b2145 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -417,7 +417,7 @@ Pipes StorageFile::read( for (size_t i = 0; i < num_streams; ++i) pipes.emplace_back(std::make_shared( - this_ptr, metadata_snapshot, context, max_block_size, files_info, getColumns().getDefaults())); + this_ptr, metadata_snapshot, context, max_block_size, files_info, metadata_snapshot->getColumns().getDefaults())); return pipes; } diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index bcebeec09dd..dad323f7b72 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -441,7 +441,7 @@ Pipes StorageGenerateRandom::read( Pipes pipes; pipes.reserve(num_streams); - const ColumnsDescription & our_columns = getColumns(); + const ColumnsDescription & our_columns = metadata_snapshot->getColumns(); Block block_header; for (const auto & name : column_names) { diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index fcae9c9aa82..e0953283a17 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -358,7 +358,7 @@ void LogBlockOutputStream::writeData(const String & name, const IDataType & type if (written_streams.count(stream_name)) return; - const auto & columns = storage.getColumns(); + const auto & columns = metadata_snapshot->getColumns(); streams.try_emplace( stream_name, storage.disk, @@ -445,7 +445,7 @@ StorageLog::StorageLog( /// create directories if they do not exist disk->createDirectories(table_path); - for (const auto & column : getColumns().getAllPhysical()) + for (const auto & column : metadata_.getColumns().getAllPhysical()) addFiles(column.name, *column.type); marks_file_path = table_path + DBMS_STORAGE_LOG_MARKS_FILE_NAME; @@ -539,13 +539,14 @@ void StorageLog::truncate(const ASTPtr &, const Context &, TableStructureWriteLo { std::shared_lock lock(rwlock); + auto metadata_snapshot = getInMemoryMetadataPtr(); files.clear(); file_count = 0; loaded_marks = false; disk->clearDirectory(table_path); - for (const auto & column : getColumns().getAllPhysical()) + for (const auto & column : metadata_snapshot->getColumns().getAllPhysical()) addFiles(column.name, *column.type); file_checker = FileChecker{disk, table_path + "sizes.json"}; @@ -553,11 +554,11 @@ void StorageLog::truncate(const ASTPtr &, const Context &, TableStructureWriteLo } -const StorageLog::Marks & StorageLog::getMarksWithRealRowCount() const +const StorageLog::Marks & StorageLog::getMarksWithRealRowCount(const StorageMetadataPtr & metadata_snapshot) const { /// There should be at least one physical column - const String column_name = getColumns().getAllPhysical().begin()->name; - const auto column_type = getColumns().getAllPhysical().begin()->type; + const String column_name = metadata_snapshot->getColumns().getAllPhysical().begin()->name; + const auto column_type = metadata_snapshot->getColumns().getAllPhysical().begin()->type; String filename; /** We take marks from first column. @@ -590,13 +591,13 @@ Pipes StorageLog::read( metadata_snapshot->check(column_names, getVirtuals()); loadMarks(); - NamesAndTypesList all_columns = Nested::collect(getColumns().getAllPhysical().addTypes(column_names)); + NamesAndTypesList all_columns = Nested::collect(metadata_snapshot->getColumns().getAllPhysical().addTypes(column_names)); std::shared_lock lock(rwlock); Pipes pipes; - const Marks & marks = getMarksWithRealRowCount(); + const Marks & marks = getMarksWithRealRowCount(metadata_snapshot); size_t marks_size = marks.size(); if (num_streams > marks_size) diff --git a/src/Storages/StorageLog.h b/src/Storages/StorageLog.h index 60f885ce45c..90d0799e1a8 100644 --- a/src/Storages/StorageLog.h +++ b/src/Storages/StorageLog.h @@ -26,7 +26,7 @@ public: Pipes read( const Names & column_names, - const StorageMetadataPtr & /*metadata_snapshot*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum processed_stage, @@ -112,7 +112,7 @@ private: * * Return the first group of marks that contain the number of rows, but not the internals of the arrays. */ - const Marks & getMarksWithRealRowCount() const; + const Marks & getMarksWithRealRowCount(const StorageMetadataPtr & metadata_snapshot) const; }; } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 92e965c420e..228cec99357 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -250,9 +250,11 @@ Pipes StorageMerge::createSources( if (!storage) { - auto pipe = InterpreterSelectQuery(modified_query_info.query, *modified_context, - std::make_shared(header), - SelectQueryOptions(processed_stage).analyze()).execute().pipeline.getPipe(); + auto pipe = InterpreterSelectQuery( + modified_query_info.query, *modified_context, + std::make_shared(header), + SelectQueryOptions(processed_stage).analyze()).execute().pipeline.getPipe(); + pipe.addInterpreterContext(modified_context); pipes.emplace_back(std::move(pipe)); return pipes; @@ -263,7 +265,7 @@ Pipes StorageMerge::createSources( { /// If there are only virtual columns in query, you must request at least one other column. if (real_column_names.empty()) - real_column_names.push_back(ExpressionActions::getSmallestColumn(storage->getColumns().getAllPhysical())); + real_column_names.push_back(ExpressionActions::getSmallestColumn(metadata_snapshot->getColumns().getAllPhysical())); pipes = storage->read(real_column_names, metadata_snapshot, modified_query_info, *modified_context, processed_stage, max_block_size, UInt32(streams_num)); } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index b0a7e550233..e45d54a8c64 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -500,7 +500,7 @@ bool StorageReplicatedMergeTree::createTableIfNotExists(const StorageMetadataPtr ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/metadata", metadata_str, zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/columns", getColumns().toString(), + ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/columns", metadata_snapshot->getColumns().toString(), zkutil::CreateMode::Persistent)); ops.emplace_back(zkutil::makeCreateRequest(zookeeper_path + "/log", "", zkutil::CreateMode::Persistent)); @@ -535,7 +535,7 @@ bool StorageReplicatedMergeTree::createTableIfNotExists(const StorageMetadataPtr zkutil::CreateMode::Persistent)); ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata", metadata_str, zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/columns", getColumns().toString(), + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/columns", metadata_snapshot->getColumns().toString(), zkutil::CreateMode::Persistent)); ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata_version", std::to_string(metadata_version), zkutil::CreateMode::Persistent)); @@ -596,7 +596,7 @@ void StorageReplicatedMergeTree::createReplica(const StorageMetadataPtr & metada zkutil::CreateMode::Persistent)); ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata", ReplicatedMergeTreeTableMetadata(*this, metadata_snapshot).toString(), zkutil::CreateMode::Persistent)); - ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/columns", getColumns().toString(), + ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/columns", metadata_snapshot->getColumns().toString(), zkutil::CreateMode::Persistent)); ops.emplace_back(zkutil::makeCreateRequest(replica_path + "/metadata_version", std::to_string(metadata_version), zkutil::CreateMode::Persistent)); @@ -748,7 +748,7 @@ void StorageReplicatedMergeTree::checkTableStructure(const String & zookeeper_pr Coordination::Stat columns_stat; auto columns_from_zk = ColumnsDescription::parse(zookeeper->get(zookeeper_prefix + "/columns", &columns_stat)); - const ColumnsDescription & old_columns = getColumns(); + const ColumnsDescription & old_columns = metadata_snapshot->getColumns(); if (columns_from_zk != old_columns) { throw Exception("Table columns structure in ZooKeeper is different from local table structure", ErrorCodes::INCOMPATIBLE_COLUMNS); diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index 4015d8ca574..4578a82f650 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -237,8 +237,9 @@ void TinyLogSource::readData(const String & name, const IDataType & type, IColum } -IDataType::OutputStreamGetter TinyLogBlockOutputStream::createStreamGetter(const String & name, - WrittenStreams & written_streams) +IDataType::OutputStreamGetter TinyLogBlockOutputStream::createStreamGetter( + const String & name, + WrittenStreams & written_streams) { return [&] (const IDataType::SubstreamPath & path) -> WriteBuffer * { @@ -247,12 +248,13 @@ IDataType::OutputStreamGetter TinyLogBlockOutputStream::createStreamGetter(const if (!written_streams.insert(stream_name).second) return nullptr; - const auto & columns = storage.getColumns(); + const auto & columns = metadata_snapshot->getColumns(); if (!streams.count(stream_name)) - streams[stream_name] = std::make_unique(storage.disk, - storage.files[stream_name].data_file_path, - columns.getCodecOrDefault(name), - storage.max_compress_block_size); + streams[stream_name] = std::make_unique( + storage.disk, + storage.files[stream_name].data_file_path, + columns.getCodecOrDefault(name), + storage.max_compress_block_size); return &streams[stream_name]->compressed; }; @@ -351,7 +353,7 @@ StorageTinyLog::StorageTinyLog( disk->createDirectories(table_path); } - for (const auto & col : getColumns().getAllPhysical()) + for (const auto & col : metadata_.getColumns().getAllPhysical()) addFiles(col.name, *col.type); } @@ -430,13 +432,14 @@ CheckResults StorageTinyLog::checkData(const ASTPtr & /* query */, const Context void StorageTinyLog::truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) { std::unique_lock lock(rwlock); + auto metadata_snapshot = getInMemoryMetadataPtr(); disk->clearDirectory(table_path); files.clear(); file_checker = FileChecker{disk, table_path + "sizes.json"}; - for (const auto &column : getColumns().getAllPhysical()) + for (const auto & column : metadata_snapshot->getColumns().getAllPhysical()) addFiles(column.name, *column.type); } diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 949d922b611..802ad0571a8 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -136,7 +136,9 @@ std::string IStorageURLBase::getReadMethod() const return Poco::Net::HTTPRequest::HTTP_GET; } -std::vector> IStorageURLBase::getReadURIParams(const Names & /*column_names*/, +std::vector> IStorageURLBase::getReadURIParams( + const Names & /*column_names*/, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum & /*processed_stage*/, @@ -145,7 +147,9 @@ std::vector> IStorageURLBase::getReadURIPara return {}; } -std::function IStorageURLBase::getReadPOSTDataCallback(const Names & /*column_names*/, +std::function IStorageURLBase::getReadPOSTDataCallback( + const Names & /*column_names*/, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum & /*processed_stage*/, @@ -165,7 +169,7 @@ Pipes IStorageURLBase::read( unsigned /*num_streams*/) { auto request_uri = uri; - auto params = getReadURIParams(column_names, query_info, context, processed_stage, max_block_size); + auto params = getReadURIParams(column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size); for (const auto & [param, value] : params) request_uri.addQueryParameter(param, value); @@ -173,7 +177,9 @@ Pipes IStorageURLBase::read( pipes.emplace_back(std::make_shared( request_uri, getReadMethod(), - getReadPOSTDataCallback(column_names, query_info, context, processed_stage, max_block_size), + getReadPOSTDataCallback( + column_names, metadata_snapshot, query_info, + context, processed_stage, max_block_size), format_name, getName(), getHeaderBlock(column_names, metadata_snapshot), diff --git a/src/Storages/StorageURL.h b/src/Storages/StorageURL.h index 04cbb278c37..67ad95d2f91 100644 --- a/src/Storages/StorageURL.h +++ b/src/Storages/StorageURL.h @@ -50,6 +50,7 @@ private: virtual std::vector> getReadURIParams( const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum & processed_stage, @@ -57,6 +58,7 @@ private: virtual std::function getReadPOSTDataCallback( const Names & column_names, + const StorageMetadataPtr & /*metadata_snapshot*/, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum & processed_stage, @@ -68,12 +70,13 @@ private: class StorageURLBlockOutputStream : public IBlockOutputStream { public: - StorageURLBlockOutputStream(const Poco::URI & uri, - const String & format, - const Block & sample_block_, - const Context & context, - const ConnectionTimeouts & timeouts, - const CompressionMethod compression_method); + StorageURLBlockOutputStream( + const Poco::URI & uri, + const String & format, + const Block & sample_block_, + const Context & context, + const ConnectionTimeouts & timeouts, + const CompressionMethod compression_method); Block getHeader() const override { diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index ab8b37db7db..05cf4ed5abf 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -50,7 +50,9 @@ std::string StorageXDBC::getReadMethod() const return Poco::Net::HTTPRequest::HTTP_POST; } -std::vector> StorageXDBC::getReadURIParams(const Names & column_names, +std::vector> StorageXDBC::getReadURIParams( + const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & /*query_info*/, const Context & /*context*/, QueryProcessingStage::Enum & /*processed_stage*/, @@ -59,20 +61,22 @@ std::vector> StorageXDBC::getReadURIParams(c NamesAndTypesList cols; for (const String & name : column_names) { - auto column_data = getColumns().getPhysical(name); + auto column_data = metadata_snapshot->getColumns().getPhysical(name); cols.emplace_back(column_data.name, column_data.type); } return bridge_helper->getURLParams(cols.toString(), max_block_size); } -std::function StorageXDBC::getReadPOSTDataCallback(const Names & /*column_names*/, +std::function StorageXDBC::getReadPOSTDataCallback( + const Names & /*column_names*/, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum & /*processed_stage*/, size_t /*max_block_size*/) const { String query = transformQueryForExternalDatabase(query_info, - getColumns().getOrdinary(), + metadata_snapshot->getColumns().getOrdinary(), bridge_helper->getIdentifierQuotingStyle(), remote_database_name, remote_table_name, diff --git a/src/Storages/StorageXDBC.h b/src/Storages/StorageXDBC.h index 0e227d7d432..44931af4643 100644 --- a/src/Storages/StorageXDBC.h +++ b/src/Storages/StorageXDBC.h @@ -29,7 +29,8 @@ public: const std::string & remote_database_name, const std::string & remote_table_name, const ColumnsDescription & columns_, - const Context & context_, BridgeHelperPtr bridge_helper_); + const Context & context_, + BridgeHelperPtr bridge_helper_); BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; @@ -45,6 +46,7 @@ private: std::vector> getReadURIParams( const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum & processed_stage, @@ -52,6 +54,7 @@ private: std::function getReadPOSTDataCallback( const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, const SelectQueryInfo & query_info, const Context & context, QueryProcessingStage::Enum & processed_stage, diff --git a/src/Storages/System/StorageSystemPartsColumns.cpp b/src/Storages/System/StorageSystemPartsColumns.cpp index 479621fd47f..4631bb9c4c5 100644 --- a/src/Storages/System/StorageSystemPartsColumns.cpp +++ b/src/Storages/System/StorageSystemPartsColumns.cpp @@ -71,7 +71,7 @@ void StorageSystemPartsColumns::processNextStorage(MutableColumns & columns_, co }; std::unordered_map columns_info; - for (const auto & column : info.storage->getColumns()) + for (const auto & column : info.storage->getInMemoryMetadataPtr()->getColumns()) { ColumnInfo column_info; if (column.default_desc.expression) diff --git a/src/Storages/TTLDescription.cpp b/src/Storages/TTLDescription.cpp index ea6b3e64aff..6e0d323e8a0 100644 --- a/src/Storages/TTLDescription.cpp +++ b/src/Storages/TTLDescription.cpp @@ -220,7 +220,7 @@ TTLDescription TTLDescription::getTTLFromAST( if (value->as()) { - auto syntax_result = SyntaxAnalyzer(context).analyze(value, columns.getAllPhysical(), {}, true); + auto syntax_result = SyntaxAnalyzer(context).analyze(value, columns.getAllPhysical(), {}, {}, true); auto expr_actions = ExpressionAnalyzer(value, syntax_result, context).getActions(false); for (const auto & column : expr_actions->getRequiredColumns()) { @@ -249,7 +249,7 @@ TTLDescription TTLDescription::getTTLFromAST( for (auto [name, value] : aggregations) { - auto syntax_result = SyntaxAnalyzer(context).analyze(value, columns.getAllPhysical(), {}, true); + auto syntax_result = SyntaxAnalyzer(context).analyze(value, columns.getAllPhysical(), {}, {}, true); auto expr_analyzer = ExpressionAnalyzer(value, syntax_result, context); TTLAggregateDescription set_part; diff --git a/src/Storages/getStructureOfRemoteTable.cpp b/src/Storages/getStructureOfRemoteTable.cpp index 19d1172f1ff..aca5456d85e 100644 --- a/src/Storages/getStructureOfRemoteTable.cpp +++ b/src/Storages/getStructureOfRemoteTable.cpp @@ -75,7 +75,8 @@ ColumnsDescription getStructureOfRemoteTableInShard( { const auto * table_function = table_func_ptr->as(); TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_function->name, context); - return table_function_ptr->execute(table_func_ptr, context, table_function_ptr->getName())->getColumns(); + auto storage_ptr = table_function_ptr->execute(table_func_ptr, context, table_function_ptr->getName()); + return storage_ptr->getInMemoryMetadataPtr()->getColumns(); } auto table_func_name = queryToString(table_func_ptr); @@ -84,7 +85,10 @@ ColumnsDescription getStructureOfRemoteTableInShard( else { if (shard_info.isLocal()) - return DatabaseCatalog::instance().getTable(table_id, context)->getColumns(); + { + auto storage_ptr = DatabaseCatalog::instance().getTable(table_id, context); + return storage_ptr->getInMemoryMetadataPtr()->getColumns(); + } /// Request for a table description query = "DESC TABLE " + table_id.getFullTableName(); diff --git a/src/Storages/tests/gtest_storage_log.cpp b/src/Storages/tests/gtest_storage_log.cpp index 618d524987b..c97adaf118d 100644 --- a/src/Storages/tests/gtest_storage_log.cpp +++ b/src/Storages/tests/gtest_storage_log.cpp @@ -78,7 +78,7 @@ std::string writeData(int rows, DB::StoragePtr & table, const DB::Context & cont Block block; { - const auto & storage_columns = table->getColumns(); + const auto & storage_columns = metadata_snapshot->getColumns(); ColumnWithTypeAndName column; column.name = "a"; column.type = storage_columns.getPhysical("a").type; diff --git a/src/TableFunctions/TableFunctionMerge.cpp b/src/TableFunctions/TableFunctionMerge.cpp index ee447a13174..7c0c1fb233f 100644 --- a/src/TableFunctions/TableFunctionMerge.cpp +++ b/src/TableFunctions/TableFunctionMerge.cpp @@ -42,7 +42,7 @@ static NamesAndTypesList chooseColumns(const String & source_database, const Str throw Exception("Error while executing table function merge. In database " + source_database + " no one matches regular expression: " + table_name_regexp_, ErrorCodes::UNKNOWN_TABLE); - return any_table->getColumns().getAllPhysical(); + return any_table->getInMemoryMetadataPtr()->getColumns().getAllPhysical(); } From 51a47560e0a980344fb34b0d06d99885a74948f9 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 17 Jun 2020 19:59:26 +0300 Subject: [PATCH 127/318] Fix segmentation fault --- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index f601ca74112..d832bcb7dc0 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -1180,7 +1180,7 @@ void InterpreterSelectQuery::executeFetchColumns( = ext::map(required_columns_after_prewhere, [](const auto & it) { return it.name; }); } - auto syntax_result = SyntaxAnalyzer(*context).analyze(required_columns_all_expr, required_columns_after_prewhere, storage); + auto syntax_result = SyntaxAnalyzer(*context).analyze(required_columns_all_expr, required_columns_after_prewhere, storage, metadata_snapshot); alias_actions = ExpressionAnalyzer(required_columns_all_expr, syntax_result, *context).getActions(true); /// The set of required columns could be added as a result of adding an action to calculate ALIAS. From 88b325dcdc373dd8c34d0479c7cc482b618da6fe Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 17 Jun 2020 22:36:27 +0300 Subject: [PATCH 128/318] rework distinct combinator --- .../AggregateFunctionAggThrow.cpp | 2 +- .../AggregateFunctionArgMinMax.h | 2 +- .../AggregateFunctionArray.h | 4 +- src/AggregateFunctions/AggregateFunctionAvg.h | 2 +- .../AggregateFunctionBitwise.h | 2 +- .../AggregateFunctionBoundingRatio.h | 2 +- ...egateFunctionCategoricalInformationValue.h | 4 +- .../AggregateFunctionCount.h | 4 +- .../AggregateFunctionDistinct.h | 67 +++++++------------ .../AggregateFunctionEntropy.h | 2 +- .../AggregateFunctionForEach.h | 4 +- .../AggregateFunctionGroupArray.h | 6 +- .../AggregateFunctionGroupArrayInsertAt.h | 2 +- .../AggregateFunctionGroupArrayMoving.h | 2 +- .../AggregateFunctionGroupBitmap.h | 4 +- .../AggregateFunctionGroupUniqArray.h | 5 +- .../AggregateFunctionHistogram.h | 2 +- src/AggregateFunctions/AggregateFunctionIf.h | 4 +- .../AggregateFunctionMLMethod.h | 2 +- .../AggregateFunctionMaxIntersections.h | 2 +- .../AggregateFunctionMerge.h | 4 +- .../AggregateFunctionMinMaxAny.h | 2 +- .../AggregateFunctionNothing.h | 2 +- .../AggregateFunctionNull.h | 6 +- .../AggregateFunctionOrFill.h | 9 +-- .../AggregateFunctionQuantile.h | 2 +- .../AggregateFunctionResample.h | 5 +- .../AggregateFunctionRetention.h | 2 +- .../AggregateFunctionSequenceMatch.h | 4 +- .../AggregateFunctionSimpleLinearRegression.h | 4 +- .../AggregateFunctionState.h | 2 +- .../AggregateFunctionStatistics.h | 4 +- .../AggregateFunctionStatisticsSimple.h | 2 +- src/AggregateFunctions/AggregateFunctionSum.h | 2 +- .../AggregateFunctionSumMap.h | 2 +- .../AggregateFunctionTimeSeriesGroupSum.h | 2 +- .../AggregateFunctionTopK.h | 4 +- .../AggregateFunctionUniq.h | 4 +- .../AggregateFunctionUniqCombined.h | 4 +- .../AggregateFunctionUniqUpTo.h | 4 +- .../AggregateFunctionWindowFunnel.h | 2 +- src/AggregateFunctions/IAggregateFunction.h | 2 +- src/Columns/ColumnAggregateFunction.cpp | 2 +- src/Functions/array/arrayReduce.cpp | 2 +- src/Functions/array/arrayReduceInRanges.cpp | 2 +- src/Functions/runningAccumulate.cpp | 2 +- src/Interpreters/Aggregator.cpp | 28 +++++--- src/Interpreters/Aggregator.h | 7 +- .../Algorithms/AggregatingSortedAlgorithm.cpp | 2 +- .../GraphiteRollupSortedAlgorithm.cpp | 2 +- .../Algorithms/SummingSortedAlgorithm.cpp | 2 +- .../01259_combinator_distinct.reference | 4 +- .../0_stateless/01259_combinator_distinct.sql | 9 ++- 53 files changed, 128 insertions(+), 132 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionAggThrow.cpp b/src/AggregateFunctions/AggregateFunctionAggThrow.cpp index ea3eb9b1a20..fada039e20a 100644 --- a/src/AggregateFunctions/AggregateFunctionAggThrow.cpp +++ b/src/AggregateFunctions/AggregateFunctionAggThrow.cpp @@ -93,7 +93,7 @@ public: buf.read(c); } - void insertResultInto(AggregateDataPtr, IColumn & to) const override + void insertResultInto(AggregateDataPtr, IColumn & to, Arena *) const override { to.insertDefault(); } diff --git a/src/AggregateFunctions/AggregateFunctionArgMinMax.h b/src/AggregateFunctions/AggregateFunctionArgMinMax.h index 9a0c428d75b..9470b1b8692 100644 --- a/src/AggregateFunctions/AggregateFunctionArgMinMax.h +++ b/src/AggregateFunctions/AggregateFunctionArgMinMax.h @@ -85,7 +85,7 @@ public: return Data::allocatesMemoryInArena(); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { this->data(place).result.insertResultInto(to); } diff --git a/src/AggregateFunctions/AggregateFunctionArray.h b/src/AggregateFunctions/AggregateFunctionArray.h index 4fe5e459ae1..24b07010707 100644 --- a/src/AggregateFunctions/AggregateFunctionArray.h +++ b/src/AggregateFunctions/AggregateFunctionArray.h @@ -119,9 +119,9 @@ public: nested_func->deserialize(place, buf, arena); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override { - nested_func->insertResultInto(place, to); + nested_func->insertResultInto(place, to, arena); } bool allocatesMemoryInArena() const override diff --git a/src/AggregateFunctions/AggregateFunctionAvg.h b/src/AggregateFunctions/AggregateFunctionAvg.h index d9ef8647b82..1f3426160cb 100644 --- a/src/AggregateFunctions/AggregateFunctionAvg.h +++ b/src/AggregateFunctions/AggregateFunctionAvg.h @@ -80,7 +80,7 @@ public: readBinary(this->data(place).denominator, buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { auto & column = static_cast(to); column.getData().push_back(this->data(place).template result()); diff --git a/src/AggregateFunctions/AggregateFunctionBitwise.h b/src/AggregateFunctions/AggregateFunctionBitwise.h index a4e5f7ddafa..6d9eb3c36e1 100644 --- a/src/AggregateFunctions/AggregateFunctionBitwise.h +++ b/src/AggregateFunctions/AggregateFunctionBitwise.h @@ -74,7 +74,7 @@ public: readBinary(this->data(place).value, buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { assert_cast &>(to).getData().push_back(this->data(place).value); } diff --git a/src/AggregateFunctions/AggregateFunctionBoundingRatio.h b/src/AggregateFunctions/AggregateFunctionBoundingRatio.h index 81846db4bac..9ceb7976f4a 100644 --- a/src/AggregateFunctions/AggregateFunctionBoundingRatio.h +++ b/src/AggregateFunctions/AggregateFunctionBoundingRatio.h @@ -150,7 +150,7 @@ public: data(place).deserialize(buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { assert_cast(to).getData().push_back(getBoundingRatio(data(place))); } diff --git a/src/AggregateFunctions/AggregateFunctionCategoricalInformationValue.h b/src/AggregateFunctions/AggregateFunctionCategoricalInformationValue.h index 1c397c26631..aa205a71c97 100644 --- a/src/AggregateFunctions/AggregateFunctionCategoricalInformationValue.h +++ b/src/AggregateFunctions/AggregateFunctionCategoricalInformationValue.h @@ -119,8 +119,8 @@ public: void insertResultInto( AggregateDataPtr place, - IColumn & to - ) const override + IColumn & to, + Arena *) const override { auto & col = static_cast(to); auto & data_col = static_cast(col.getData()); diff --git a/src/AggregateFunctions/AggregateFunctionCount.h b/src/AggregateFunctions/AggregateFunctionCount.h index e54f014f7a4..51040bdcfad 100644 --- a/src/AggregateFunctions/AggregateFunctionCount.h +++ b/src/AggregateFunctions/AggregateFunctionCount.h @@ -57,7 +57,7 @@ public: readVarUInt(data(place).count, buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { assert_cast(to).getData().push_back(data(place).count); } @@ -114,7 +114,7 @@ public: readVarUInt(data(place).count, buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { assert_cast(to).getData().push_back(data(place).count); } diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index 72099a33cfd..5c663bb6441 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -32,7 +32,6 @@ protected: static constexpr size_t prefix_size = sizeof(Data); AggregateFunctionPtr nested_func; size_t num_arguments; - AggregateDataPtr getNestedPlace(AggregateDataPtr place) const noexcept { @@ -103,43 +102,37 @@ public: AggregateFunctionDistinctSingleNumericData, AggregateFunctionDistinctSingleNumericImpl>(nested, arguments) {} - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override { const auto & vec = assert_cast &>(*columns[0]).getData(); - if (this->data(place).value.insert(vec[row_num]).second) - this->nested_func->add(this->getNestedPlace(place), columns, row_num, arena); + this->data(place).value.insert(vec[row_num]); } - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override { - auto & cur_set = this->data(place).value; - auto & rhs_set = this->data(rhs).value; - - auto arguments = this->argument_types[0]->createColumn(); - for (auto & elem : rhs_set) - if (cur_set.insert(elem.getValue()).second) - arguments->insert(elem.getValue()); - - const auto * arguments_ptr = arguments.get(); - if (!arguments->empty()) - this->nested_func->addBatchSinglePlace(arguments->size(), this->getNestedPlace(place), &arguments_ptr, arena); + this->data(place).value.merge(this->data(rhs).value); } void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { this->data(place).value.write(buf); - this->nested_func->serialize(this->getNestedPlace(place), buf); } - void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).value.read(buf); - this->nested_func->deserialize(this->getNestedPlace(place), buf, arena); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override { - this->nested_func->insertResultInto(this->getNestedPlace(place), to); + const auto & set = this->data(place).value; + auto arguments = this->argument_types[0]->createColumn(); + for (const auto & elem : set) + arguments->insert(elem.getValue()); + + const auto * arguments_ptr = arguments.get(); + this->nested_func->addBatchSinglePlace(arguments->size(), this->getNestedPlace(place), &arguments_ptr, arena); + this->nested_func->insertResultInto(this->getNestedPlace(place), to, arena); } }; @@ -170,38 +163,25 @@ public: bool inserted; auto key_holder = getKeyHolder(*columns[0], row_num, *arena); set.emplace(key_holder, it, inserted); - if (inserted) - this->nested_func->add(this->getNestedPlace(place), columns, row_num, arena); } void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override { auto & cur_set = this->data(place).value; - auto & rhs_set = this->data(rhs).value; + const auto & rhs_set = this->data(rhs).value; Data::Set::LookupResult it; bool inserted; - auto arguments = this->argument_types[0]->createColumn(); - for (auto & elem : rhs_set) - { + for (const auto & elem : rhs_set) cur_set.emplace(ArenaKeyHolder{elem.getValue(), *arena}, it, inserted); - if (inserted) - deserializeAndInsert(elem.getValue(), *arguments); - } - - const auto * arguments_ptr = arguments.get(); - if (!arguments->empty()) - this->nested_func->addBatchSinglePlace(arguments->size(), this->getNestedPlace(place), &arguments_ptr, arena); } void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override { - auto & set = this->data(place).value; + const auto & set = this->data(place).value; writeVarUInt(set.size(), buf); for (const auto & elem : set) writeStringBinary(elem.getValue(), buf); - - this->nested_func->serialize(this->getNestedPlace(place), buf); } void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override @@ -211,13 +191,18 @@ public: readVarUInt(size, buf); for (size_t i = 0; i < size; ++i) set.insert(readStringBinaryInto(*arena, buf)); - - this->nested_func->deserialize(this->getNestedPlace(place), buf, arena); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override { - this->nested_func->insertResultInto(this->getNestedPlace(place), to); + const auto & set = this->data(place).value; + auto arguments = this->argument_types[0]->createColumn(); + for (const auto & elem : set) + deserializeAndInsert(elem.getValue(), *arguments); + + const auto * arguments_ptr = arguments.get(); + this->nested_func->addBatchSinglePlace(arguments->size(), this->getNestedPlace(place), &arguments_ptr, arena); + this->nested_func->insertResultInto(this->getNestedPlace(place), to, arena); } }; diff --git a/src/AggregateFunctions/AggregateFunctionEntropy.h b/src/AggregateFunctions/AggregateFunctionEntropy.h index ff233a5ac93..656aca43f60 100644 --- a/src/AggregateFunctions/AggregateFunctionEntropy.h +++ b/src/AggregateFunctions/AggregateFunctionEntropy.h @@ -132,7 +132,7 @@ public: this->data(place).deserialize(buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { auto & column = assert_cast &>(to); column.getData().push_back(this->data(place).get()); diff --git a/src/AggregateFunctions/AggregateFunctionForEach.h b/src/AggregateFunctions/AggregateFunctionForEach.h index 23a3487de47..19f2994d3f1 100644 --- a/src/AggregateFunctions/AggregateFunctionForEach.h +++ b/src/AggregateFunctions/AggregateFunctionForEach.h @@ -225,7 +225,7 @@ public: } } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override { AggregateFunctionForEachData & state = data(place); @@ -236,7 +236,7 @@ public: char * nested_state = state.array_of_aggregate_datas; for (size_t i = 0; i < state.dynamic_array_size; ++i) { - nested_func->insertResultInto(nested_state, elems_to); + nested_func->insertResultInto(nested_state, elems_to, arena); nested_state += nested_size_of_data; } diff --git a/src/AggregateFunctions/AggregateFunctionGroupArray.h b/src/AggregateFunctions/AggregateFunctionGroupArray.h index b76efd9f6c2..f3d31eb599b 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArray.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArray.h @@ -282,7 +282,7 @@ public: // if constexpr (Trait::sampler == Sampler::DETERMINATOR) } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { const auto & value = this->data(place).value; size_t size = value.size(); @@ -600,7 +600,7 @@ public: // if constexpr (Trait::sampler == Sampler::DETERMINATOR) } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { auto & column_array = assert_cast(to); @@ -815,7 +815,7 @@ public: data(place).last = prev; } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { auto & column_array = assert_cast(to); diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h b/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h index 0eec38c51a7..d84c99aec57 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayInsertAt.h @@ -179,7 +179,7 @@ public: } } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { ColumnArray & to_array = assert_cast(to); IColumn & to_data = to_array.getData(); diff --git a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h index 8f93a7eb25a..19562b37a12 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h +++ b/src/AggregateFunctions/AggregateFunctionGroupArrayMoving.h @@ -158,7 +158,7 @@ public: this->data(place).sum = value.back(); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { const auto & data = this->data(place); size_t size = data.value.size(); diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmap.h b/src/AggregateFunctions/AggregateFunctionGroupBitmap.h index 766479cc08d..a6470aa6943 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupBitmap.h +++ b/src/AggregateFunctions/AggregateFunctionGroupBitmap.h @@ -48,7 +48,7 @@ public: this->data(place).rbs.read(buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { assert_cast &>(to).getData().push_back(this->data(place).rbs.size()); } @@ -113,7 +113,7 @@ public: this->data(place).rbs.read(buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { assert_cast &>(to).getData().push_back(this->data(place).rbs.size()); } diff --git a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h index b6683567404..2ee9d0f6e1c 100644 --- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h +++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h @@ -98,7 +98,7 @@ public: this->data(place).value.read(buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { ColumnArray & arr_to = assert_cast(to); ColumnArray::Offsets & offsets_to = arr_to.getOffsets(); @@ -218,7 +218,7 @@ public: } } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { ColumnArray & arr_to = assert_cast(to); ColumnArray::Offsets & offsets_to = arr_to.getOffsets(); @@ -231,6 +231,7 @@ public: deserializeAndInsert(elem.getValue(), data_to); } }; + #undef AGGREGATE_FUNCTION_GROUP_ARRAY_UNIQ_MAX_SIZE } diff --git a/src/AggregateFunctions/AggregateFunctionHistogram.h b/src/AggregateFunctions/AggregateFunctionHistogram.h index 8eaa42fdac4..bc9c95ecf2a 100644 --- a/src/AggregateFunctions/AggregateFunctionHistogram.h +++ b/src/AggregateFunctions/AggregateFunctionHistogram.h @@ -353,7 +353,7 @@ public: this->data(place).read(buf, max_bins); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { auto & data = this->data(place); diff --git a/src/AggregateFunctions/AggregateFunctionIf.h b/src/AggregateFunctions/AggregateFunctionIf.h index bf4f0b24de3..f04450c9142 100644 --- a/src/AggregateFunctions/AggregateFunctionIf.h +++ b/src/AggregateFunctions/AggregateFunctionIf.h @@ -95,9 +95,9 @@ public: nested_func->deserialize(place, buf, arena); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override { - nested_func->insertResultInto(place, to); + nested_func->insertResultInto(place, to, arena); } bool allocatesMemoryInArena() const override diff --git a/src/AggregateFunctions/AggregateFunctionMLMethod.h b/src/AggregateFunctions/AggregateFunctionMLMethod.h index a11ca9032a5..8a93b66ab3b 100644 --- a/src/AggregateFunctions/AggregateFunctionMLMethod.h +++ b/src/AggregateFunctions/AggregateFunctionMLMethod.h @@ -388,7 +388,7 @@ public: /** This function is called if aggregate function without State modifier is selected in a query. * Inserts all weights of the model into the column 'to', so user may use such information if needed */ - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { this->data(place).returnWeights(to); } diff --git a/src/AggregateFunctions/AggregateFunctionMaxIntersections.h b/src/AggregateFunctions/AggregateFunctionMaxIntersections.h index 050c5fd78ea..b8a4dd63eea 100644 --- a/src/AggregateFunctions/AggregateFunctionMaxIntersections.h +++ b/src/AggregateFunctions/AggregateFunctionMaxIntersections.h @@ -129,7 +129,7 @@ public: buf.read(reinterpret_cast(value.data()), size * sizeof(value[0])); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { Int64 current_intersections = 0; Int64 max_intersections = 0; diff --git a/src/AggregateFunctions/AggregateFunctionMerge.h b/src/AggregateFunctions/AggregateFunctionMerge.h index 51a3c11118f..066f7a762f8 100644 --- a/src/AggregateFunctions/AggregateFunctionMerge.h +++ b/src/AggregateFunctions/AggregateFunctionMerge.h @@ -93,9 +93,9 @@ public: nested_func->deserialize(place, buf, arena); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override { - nested_func->insertResultInto(place, to); + nested_func->insertResultInto(place, to, arena); } bool allocatesMemoryInArena() const override diff --git a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h index 69504f7b249..a21a64af9a4 100644 --- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h +++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h @@ -746,7 +746,7 @@ public: return Data::allocatesMemoryInArena(); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { this->data(place).insertResultInto(to); } diff --git a/src/AggregateFunctions/AggregateFunctionNothing.h b/src/AggregateFunctions/AggregateFunctionNothing.h index 511dbbecd38..af90dfb5179 100644 --- a/src/AggregateFunctions/AggregateFunctionNothing.h +++ b/src/AggregateFunctions/AggregateFunctionNothing.h @@ -67,7 +67,7 @@ public: { } - void insertResultInto(AggregateDataPtr, IColumn & to) const override + void insertResultInto(AggregateDataPtr, IColumn & to, Arena *) const override { to.insertDefault(); } diff --git a/src/AggregateFunctions/AggregateFunctionNull.h b/src/AggregateFunctions/AggregateFunctionNull.h index d6f0079232c..2f2c23fdc8b 100644 --- a/src/AggregateFunctions/AggregateFunctionNull.h +++ b/src/AggregateFunctions/AggregateFunctionNull.h @@ -150,14 +150,14 @@ public: } } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override { if constexpr (result_is_nullable) { ColumnNullable & to_concrete = assert_cast(to); if (getFlag(place)) { - nested_function->insertResultInto(nestedPlace(place), to_concrete.getNestedColumn()); + nested_function->insertResultInto(nestedPlace(place), to_concrete.getNestedColumn(), arena); to_concrete.getNullMapData().push_back(0); } else @@ -167,7 +167,7 @@ public: } else { - nested_function->insertResultInto(nestedPlace(place), to); + nested_function->insertResultInto(nestedPlace(place), to, arena); } } diff --git a/src/AggregateFunctions/AggregateFunctionOrFill.h b/src/AggregateFunctions/AggregateFunctionOrFill.h index 1bbf2ea3135..333f07d5e33 100644 --- a/src/AggregateFunctions/AggregateFunctionOrFill.h +++ b/src/AggregateFunctions/AggregateFunctionOrFill.h @@ -148,7 +148,8 @@ public: void insertResultInto( AggregateDataPtr place, - IColumn & to) const override + IColumn & to, + Arena * arena) const override { if (place[size_of_data]) { @@ -157,20 +158,20 @@ public: // -OrNull if (inner_nullable) - nested_function->insertResultInto(place, to); + nested_function->insertResultInto(place, to, arena); else { ColumnNullable & col = typeid_cast(to); col.getNullMapColumn().insertDefault(); - nested_function->insertResultInto(place, col.getNestedColumn()); + nested_function->insertResultInto(place, col.getNestedColumn(), arena); } } else { // -OrDefault - nested_function->insertResultInto(place, to); + nested_function->insertResultInto(place, to, arena); } } else diff --git a/src/AggregateFunctions/AggregateFunctionQuantile.h b/src/AggregateFunctions/AggregateFunctionQuantile.h index 7bdfc13295c..536d9d5683f 100644 --- a/src/AggregateFunctions/AggregateFunctionQuantile.h +++ b/src/AggregateFunctions/AggregateFunctionQuantile.h @@ -138,7 +138,7 @@ public: this->data(place).deserialize(buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { /// const_cast is required because some data structures apply finalizaton (like sorting) for obtain a result. auto & data = this->data(place); diff --git a/src/AggregateFunctions/AggregateFunctionResample.h b/src/AggregateFunctions/AggregateFunctionResample.h index 49cc312287e..043e094a688 100644 --- a/src/AggregateFunctions/AggregateFunctionResample.h +++ b/src/AggregateFunctions/AggregateFunctionResample.h @@ -174,13 +174,14 @@ public: void insertResultInto( AggregateDataPtr place, - IColumn & to) const override + IColumn & to, + Arena * arena) const override { auto & col = assert_cast(to); auto & col_offsets = assert_cast(col.getOffsetsColumn()); for (size_t i = 0; i < total; ++i) - nested_function->insertResultInto(place + i * size_of_data, col.getData()); + nested_function->insertResultInto(place + i * size_of_data, col.getData(), arena); col_offsets.getData().push_back(col.getData().size()); } diff --git a/src/AggregateFunctions/AggregateFunctionRetention.h b/src/AggregateFunctions/AggregateFunctionRetention.h index 3a76ba9f055..b742dcdf77f 100644 --- a/src/AggregateFunctions/AggregateFunctionRetention.h +++ b/src/AggregateFunctions/AggregateFunctionRetention.h @@ -123,7 +123,7 @@ public: this->data(place).deserialize(buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { auto & data_to = assert_cast(assert_cast(to).getData()).getData(); auto & offsets_to = assert_cast(to).getOffsets(); diff --git a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h index 416786f8fcb..79463e890e4 100644 --- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.h +++ b/src/AggregateFunctions/AggregateFunctionSequenceMatch.h @@ -560,7 +560,7 @@ public: DataTypePtr getReturnType() const override { return std::make_shared(); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { this->data(place).sort(); @@ -588,7 +588,7 @@ public: DataTypePtr getReturnType() const override { return std::make_shared(); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { const_cast(this->data(place)).sort(); assert_cast(to).getData().push_back(count(place)); diff --git a/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.h b/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.h index d1405172e27..8c029855a26 100644 --- a/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.h +++ b/src/AggregateFunctions/AggregateFunctionSimpleLinearRegression.h @@ -170,8 +170,8 @@ public: void insertResultInto( AggregateDataPtr place, - IColumn & to - ) const override + IColumn & to, + Arena *) const override { Ret k = this->data(place).getK(); Ret b = this->data(place).getB(k); diff --git a/src/AggregateFunctions/AggregateFunctionState.h b/src/AggregateFunctions/AggregateFunctionState.h index 126d63573af..51a31677723 100644 --- a/src/AggregateFunctions/AggregateFunctionState.h +++ b/src/AggregateFunctions/AggregateFunctionState.h @@ -80,7 +80,7 @@ public: nested_func->deserialize(place, buf, arena); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { assert_cast(to).getData().push_back(place); } diff --git a/src/AggregateFunctions/AggregateFunctionStatistics.h b/src/AggregateFunctions/AggregateFunctionStatistics.h index 7f6de43f5e1..b0ff57665da 100644 --- a/src/AggregateFunctions/AggregateFunctionStatistics.h +++ b/src/AggregateFunctions/AggregateFunctionStatistics.h @@ -143,7 +143,7 @@ public: this->data(place).deserialize(buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { this->data(place).publish(to); } @@ -395,7 +395,7 @@ public: this->data(place).deserialize(buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { this->data(place).publish(to); } diff --git a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h index 96c07cc3d41..7962453cb35 100644 --- a/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h +++ b/src/AggregateFunctions/AggregateFunctionStatisticsSimple.h @@ -455,7 +455,7 @@ public: this->data(place).read(buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { const auto & data = this->data(place); auto & dst = static_cast(to).getData(); diff --git a/src/AggregateFunctions/AggregateFunctionSum.h b/src/AggregateFunctions/AggregateFunctionSum.h index 9d3d559ecee..6f921dbb78b 100644 --- a/src/AggregateFunctions/AggregateFunctionSum.h +++ b/src/AggregateFunctions/AggregateFunctionSum.h @@ -305,7 +305,7 @@ public: this->data(place).read(buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { auto & column = static_cast(to); column.getData().push_back(this->data(place).get()); diff --git a/src/AggregateFunctions/AggregateFunctionSumMap.h b/src/AggregateFunctions/AggregateFunctionSumMap.h index e2aef611955..8209170791e 100644 --- a/src/AggregateFunctions/AggregateFunctionSumMap.h +++ b/src/AggregateFunctions/AggregateFunctionSumMap.h @@ -242,7 +242,7 @@ public: } } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { // Final step does compaction of keys that have zero values, this mutates the state auto & merged_maps = this->data(place).merged_maps; diff --git a/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h b/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h index ad83324e483..3ec40455cf3 100644 --- a/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h +++ b/src/AggregateFunctions/AggregateFunctionTimeSeriesGroupSum.h @@ -253,7 +253,7 @@ public: void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override { this->data(place).deserialize(buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { const auto & value = this->data(place).result; size_t size = value.size(); diff --git a/src/AggregateFunctions/AggregateFunctionTopK.h b/src/AggregateFunctions/AggregateFunctionTopK.h index 23eb0e7ff09..68317d0bdf0 100644 --- a/src/AggregateFunctions/AggregateFunctionTopK.h +++ b/src/AggregateFunctions/AggregateFunctionTopK.h @@ -79,7 +79,7 @@ public: set.read(buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { ColumnArray & arr_to = assert_cast(to); ColumnArray::Offsets & offsets_to = arr_to.getOffsets(); @@ -200,7 +200,7 @@ public: this->data(place).value.merge(this->data(rhs).value); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { ColumnArray & arr_to = assert_cast(to); ColumnArray::Offsets & offsets_to = arr_to.getOffsets(); diff --git a/src/AggregateFunctions/AggregateFunctionUniq.h b/src/AggregateFunctions/AggregateFunctionUniq.h index 1588611b8a2..fe0e96f036b 100644 --- a/src/AggregateFunctions/AggregateFunctionUniq.h +++ b/src/AggregateFunctions/AggregateFunctionUniq.h @@ -240,7 +240,7 @@ public: this->data(place).set.read(buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { assert_cast(to).getData().push_back(this->data(place).set.size()); } @@ -300,7 +300,7 @@ public: this->data(place).set.read(buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { assert_cast(to).getData().push_back(this->data(place).set.size()); } diff --git a/src/AggregateFunctions/AggregateFunctionUniqCombined.h b/src/AggregateFunctions/AggregateFunctionUniqCombined.h index a92caa4a551..e34cc602ccd 100644 --- a/src/AggregateFunctions/AggregateFunctionUniqCombined.h +++ b/src/AggregateFunctions/AggregateFunctionUniqCombined.h @@ -167,7 +167,7 @@ public: this->data(place).set.read(buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { assert_cast(to).getData().push_back(this->data(place).set.size()); } @@ -229,7 +229,7 @@ public: this->data(place).set.read(buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { assert_cast(to).getData().push_back(this->data(place).set.size()); } diff --git a/src/AggregateFunctions/AggregateFunctionUniqUpTo.h b/src/AggregateFunctions/AggregateFunctionUniqUpTo.h index 4c71215141c..2a48e0fb182 100644 --- a/src/AggregateFunctions/AggregateFunctionUniqUpTo.h +++ b/src/AggregateFunctions/AggregateFunctionUniqUpTo.h @@ -180,7 +180,7 @@ public: this->data(place).read(buf, threshold); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { assert_cast(to).getData().push_back(this->data(place).size()); } @@ -242,7 +242,7 @@ public: this->data(place).read(buf, threshold); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { assert_cast(to).getData().push_back(this->data(place).size()); } diff --git a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h index b5704203ade..3f41046c20e 100644 --- a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h +++ b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h @@ -280,7 +280,7 @@ public: this->data(place).deserialize(buf); } - void insertResultInto(AggregateDataPtr place, IColumn & to) const override + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override { assert_cast(to).getData().push_back(getEventLevel(this->data(place))); } diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h index 439a5e07c2e..32dcce908c6 100644 --- a/src/AggregateFunctions/IAggregateFunction.h +++ b/src/AggregateFunctions/IAggregateFunction.h @@ -106,7 +106,7 @@ public: /// Inserts results into a column. /// This method must be called once, from single thread. /// After this method was called for state, you can't do anything with state but destroy. - virtual void insertResultInto(AggregateDataPtr place, IColumn & to) const = 0; + virtual void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const = 0; /// Used for machine learning methods. Predict result from trained model. /// Will insert result into `to` column for rows in range [offset, offset + limit). diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index d4021b45f0e..3374d171059 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -135,7 +135,7 @@ MutableColumnPtr ColumnAggregateFunction::convertToValues(MutableColumnPtr colum res->reserve(data.size()); for (auto * val : data) - func->insertResultInto(val, *res); + func->insertResultInto(val, *res, &column_aggregate_func.createOrGetArena()); return res; } diff --git a/src/Functions/array/arrayReduce.cpp b/src/Functions/array/arrayReduce.cpp index 8d44acc82f5..5c9c9472e98 100644 --- a/src/Functions/array/arrayReduce.cpp +++ b/src/Functions/array/arrayReduce.cpp @@ -187,7 +187,7 @@ void FunctionArrayReduce::executeImpl(Block & block, const ColumnNumbers & argum for (size_t i = 0; i < input_rows_count; ++i) if (!res_col_aggregate_function) - agg_func.insertResultInto(places[i], res_col); + agg_func.insertResultInto(places[i], res_col, arena.get()); else res_col_aggregate_function->insertFrom(places[i]); block.getByPosition(result).column = std::move(result_holder); diff --git a/src/Functions/array/arrayReduceInRanges.cpp b/src/Functions/array/arrayReduceInRanges.cpp index 2dd0cd56343..5b594fdb621 100644 --- a/src/Functions/array/arrayReduceInRanges.cpp +++ b/src/Functions/array/arrayReduceInRanges.cpp @@ -376,7 +376,7 @@ void FunctionArrayReduceInRanges::executeImpl(Block & block, const ColumnNumbers } if (!res_col_aggregate_function) - agg_func.insertResultInto(place, result_data); + agg_func.insertResultInto(place, result_data, arena.get()); else res_col_aggregate_function->insertFrom(place); } diff --git a/src/Functions/runningAccumulate.cpp b/src/Functions/runningAccumulate.cpp index 275259e1209..bf109654bc2 100644 --- a/src/Functions/runningAccumulate.cpp +++ b/src/Functions/runningAccumulate.cpp @@ -124,7 +124,7 @@ public: } agg_func.merge(place.data(), state_to_add, arena.get()); - agg_func.insertResultInto(place.data(), result_column); + agg_func.insertResultInto(place.data(), result_column, arena.get()); ++row_number; } diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp index 538a24fa997..5bd427b42cd 100644 --- a/src/Interpreters/Aggregator.cpp +++ b/src/Interpreters/Aggregator.cpp @@ -822,10 +822,11 @@ Block Aggregator::convertOneBucketToBlock( MutableColumns & key_columns, AggregateColumnsData & aggregate_columns, MutableColumns & final_aggregate_columns, + Arena * arena, bool final_) { convertToBlockImpl(method, method.data.impls[bucket], - key_columns, aggregate_columns, final_aggregate_columns, final_); + key_columns, aggregate_columns, final_aggregate_columns, arena, final_); }); block.info.bucket_num = bucket; @@ -983,6 +984,7 @@ void Aggregator::convertToBlockImpl( MutableColumns & key_columns, AggregateColumnsData & aggregate_columns, MutableColumns & final_aggregate_columns, + Arena * arena, bool final) const { if (data.empty()) @@ -992,7 +994,7 @@ void Aggregator::convertToBlockImpl( throw Exception{"Aggregate. Unexpected key columns size.", ErrorCodes::LOGICAL_ERROR}; if (final) - convertToBlockImplFinal(method, data, key_columns, final_aggregate_columns); + convertToBlockImplFinal(method, data, key_columns, final_aggregate_columns, arena); else convertToBlockImplNotFinal(method, data, key_columns, aggregate_columns); /// In order to release memory early. @@ -1003,7 +1005,8 @@ void Aggregator::convertToBlockImpl( template inline void Aggregator::insertAggregatesIntoColumns( Mapped & mapped, - MutableColumns & final_aggregate_columns) const + MutableColumns & final_aggregate_columns, + Arena * arena) const { /** Final values of aggregate functions are inserted to columns. * Then states of aggregate functions, that are not longer needed, are destroyed. @@ -1034,7 +1037,8 @@ inline void Aggregator::insertAggregatesIntoColumns( for (; insert_i < params.aggregates_size; ++insert_i) aggregate_functions[insert_i]->insertResultInto( mapped + offsets_of_aggregate_states[insert_i], - *final_aggregate_columns[insert_i]); + *final_aggregate_columns[insert_i], + arena); } catch (...) { @@ -1071,21 +1075,22 @@ void NO_INLINE Aggregator::convertToBlockImplFinal( Method & method, Table & data, MutableColumns & key_columns, - MutableColumns & final_aggregate_columns) const + MutableColumns & final_aggregate_columns, + Arena * arena) const { if constexpr (Method::low_cardinality_optimization) { if (data.hasNullKeyData()) { key_columns[0]->insertDefault(); - insertAggregatesIntoColumns(data.getNullKeyData(), final_aggregate_columns); + insertAggregatesIntoColumns(data.getNullKeyData(), final_aggregate_columns, arena); } } data.forEachValue([&](const auto & key, auto & mapped) { method.insertKeyIntoColumns(key, key_columns, key_sizes); - insertAggregatesIntoColumns(mapped, final_aggregate_columns); + insertAggregatesIntoColumns(mapped, final_aggregate_columns, arena); }); } @@ -1174,7 +1179,7 @@ Block Aggregator::prepareBlockAndFill( } } - filler(key_columns, aggregate_columns_data, final_aggregate_columns, final); + filler(key_columns, aggregate_columns_data, final_aggregate_columns, data_variants.aggregates_pool, final); Block res = header.cloneEmpty(); @@ -1198,6 +1203,7 @@ Block Aggregator::prepareBlockAndFill( return res; } + void Aggregator::fillAggregateColumnsWithSingleKey( AggregatedDataVariants & data_variants, MutableColumns & final_aggregate_columns) @@ -1240,6 +1246,7 @@ Block Aggregator::prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_va MutableColumns & key_columns, AggregateColumnsData & aggregate_columns, MutableColumns & final_aggregate_columns, + Arena * arena, bool final_) { if (data_variants.type == AggregatedDataVariants::Type::without_key || params.overflow_row) @@ -1254,7 +1261,7 @@ Block Aggregator::prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_va } else { - insertAggregatesIntoColumns(data, final_aggregate_columns); + insertAggregatesIntoColumns(data, final_aggregate_columns, arena); } if (params.overflow_row) @@ -1282,12 +1289,13 @@ Block Aggregator::prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_v MutableColumns & key_columns, AggregateColumnsData & aggregate_columns, MutableColumns & final_aggregate_columns, + Arena * arena, bool final_) { #define M(NAME) \ else if (data_variants.type == AggregatedDataVariants::Type::NAME) \ convertToBlockImpl(*data_variants.NAME, data_variants.NAME->data, \ - key_columns, aggregate_columns, final_aggregate_columns, final_); + key_columns, aggregate_columns, final_aggregate_columns, arena, final_); if (false) {} // NOLINT APPLY_FOR_VARIANTS_SINGLE_LEVEL(M) diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h index 6d0eeee9014..6c55cb88781 100644 --- a/src/Interpreters/Aggregator.h +++ b/src/Interpreters/Aggregator.h @@ -1164,19 +1164,22 @@ protected: MutableColumns & key_columns, AggregateColumnsData & aggregate_columns, MutableColumns & final_aggregate_columns, + Arena * arena, bool final) const; template void insertAggregatesIntoColumns( Mapped & mapped, - MutableColumns & final_aggregate_columns) const; + MutableColumns & final_aggregate_columns, + Arena * arena) const; template void convertToBlockImplFinal( Method & method, Table & data, MutableColumns & key_columns, - MutableColumns & final_aggregate_columns) const; + MutableColumns & final_aggregate_columns, + Arena * arena) const; template void convertToBlockImplNotFinal( diff --git a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp index be9bf3e354c..3214ca0b4cc 100644 --- a/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/AggregatingSortedAlgorithm.cpp @@ -223,7 +223,7 @@ void AggregatingSortedAlgorithm::AggregatingMergedData::finishGroup() /// Write the simple aggregation result for the current group. for (auto & desc : def.columns_to_simple_aggregate) { - desc.function->insertResultInto(desc.state.data(), *desc.column); + desc.function->insertResultInto(desc.state.data(), *desc.column, arena.get()); desc.destroyState(); } diff --git a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp index f26fe96876f..02e0746f09d 100644 --- a/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/GraphiteRollupSortedAlgorithm.cpp @@ -301,7 +301,7 @@ void GraphiteRollupSortedAlgorithm::GraphiteRollupMergedData::insertRow( const Graphite::AggregationPattern * aggregation_pattern = std::get<1>(current_rule); if (aggregate_state_created) { - aggregation_pattern->function->insertResultInto(place_for_aggregate_state.data(), *value_column); + aggregation_pattern->function->insertResultInto(place_for_aggregate_state.data(), *value_column, nullptr); aggregation_pattern->function->destroy(place_for_aggregate_state.data()); aggregate_state_created = false; } diff --git a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp index 89154044ae5..7d58c22702e 100644 --- a/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp +++ b/src/Processors/Merges/Algorithms/SummingSortedAlgorithm.cpp @@ -497,7 +497,7 @@ void SummingSortedAlgorithm::SummingMergedData::finishGroup() { try { - desc.function->insertResultInto(desc.state.data(), *desc.merged_column); + desc.function->insertResultInto(desc.state.data(), *desc.merged_column, nullptr); /// Update zero status of current row if (desc.column_numbers.size() == 1) diff --git a/tests/queries/0_stateless/01259_combinator_distinct.reference b/tests/queries/0_stateless/01259_combinator_distinct.reference index 83756ffdaa4..281250dedb6 100644 --- a/tests/queries/0_stateless/01259_combinator_distinct.reference +++ b/tests/queries/0_stateless/01259_combinator_distinct.reference @@ -1,6 +1,4 @@ -499500 +4999950000 78 [0,1,2,3,4,5,6,7,8,9,10,11,12] -[0,1,2,3,4,5,6,7,8,9,10,11,12] 20 -5.669227916063075e-17 diff --git a/tests/queries/0_stateless/01259_combinator_distinct.sql b/tests/queries/0_stateless/01259_combinator_distinct.sql index adfddeb34e4..1fef2f17008 100644 --- a/tests/queries/0_stateless/01259_combinator_distinct.sql +++ b/tests/queries/0_stateless/01259_combinator_distinct.sql @@ -1,6 +1,5 @@ -SELECT sum(DISTINCT x) FROM (SELECT number AS x FROM system.numbers_mt LIMIT 100000); -SELECT sum(DISTINCT x) FROM (SELECT number % 13 AS x FROM system.numbers_mt LIMIT 100000); -SELECT groupArray(DISTINCT x) FROM (SELECT number % 13 AS x FROM system.numbers_mt LIMIT 100000); -SELECT groupArray(DISTINCT x) FROM (SELECT number % 13 AS x FROM system.numbers_mt LIMIT 100000); -SELECT finalizeAggregation(countState(DISTINCT toString(number % 20))) FROM numbers_mt (100000); +SELECT sum(DISTINCT number) FROM numbers_mt(100000); +SELECT sum(DISTINCT number % 13) FROM numbers_mt(100000); +SELECT arraySort(groupArray(DISTINCT number % 13)) FROM numbers_mt(100000); +SELECT finalizeAggregation(countState(DISTINCT toString(number % 20))) FROM numbers_mt(100000); -- SELECT corrStableDistinct(DISTINCT x, y) FROM (SELECT number % 11 AS x, number % 13 AS y FROM system.numbers LIMIT 1000); From fb7f4c6369f52673378bd56cd3690ae256f03ea7 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 18 Jun 2020 02:42:40 +0300 Subject: [PATCH 129/318] fix build --- src/AggregateFunctions/AggregateFunctionDistinct.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.cpp b/src/AggregateFunctions/AggregateFunctionDistinct.cpp index 1661277d525..c77e977b0fa 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.cpp +++ b/src/AggregateFunctions/AggregateFunctionDistinct.cpp @@ -28,7 +28,10 @@ public: } AggregateFunctionPtr transformAggregateFunction( - const AggregateFunctionPtr & nested_function, const DataTypes & arguments, const Array &) const override + const AggregateFunctionPtr & nested_function, + const AggregateFunctionProperties &, + const DataTypes & arguments, + const Array &) const override { AggregateFunctionPtr res; if (arguments.size() == 1) From f4037b8f54840ca0b449e86e0bb575de9c01e1be Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 18 Jun 2020 12:00:43 +0300 Subject: [PATCH 130/318] Fix build --- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 9f8859385d3..0e9a2bf7ec4 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -743,7 +743,7 @@ void StorageReplicatedMergeTree::checkTableStructure(const String & zookeeper_pr Coordination::Stat metadata_stat; String metadata_str = zookeeper->get(zookeeper_prefix + "/metadata", &metadata_stat); auto metadata_from_zk = ReplicatedMergeTreeTableMetadata::parse(metadata_str); - old_metadata.checkEquals(metadata_from_zk, getColumns(), global_context); + old_metadata.checkEquals(metadata_from_zk, metadata_snapshot->getColumns(), global_context); Coordination::Stat columns_stat; auto columns_from_zk = ColumnsDescription::parse(zookeeper->get(zookeeper_prefix + "/columns", &columns_stat)); From 760e9a8488f0a5ee24a4ccce141593195c3ffcef Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 18 Jun 2020 12:08:24 +0300 Subject: [PATCH 131/318] Fix crash --- src/Storages/StorageDistributed.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index f853a6a8673..53342c754ed 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -215,9 +215,14 @@ public: } }; -void replaceConstantExpressions(ASTPtr & node, const Context & context, const NamesAndTypesList & columns, ConstStoragePtr storage) +void replaceConstantExpressions( + ASTPtr & node, + const Context & context, + const NamesAndTypesList & columns, + ConstStoragePtr storage, + const StorageMetadataPtr & metadata_snapshot) { - auto syntax_result = SyntaxAnalyzer(context).analyze(node, columns, storage); + auto syntax_result = SyntaxAnalyzer(context).analyze(node, columns, storage, metadata_snapshot); Block block_with_constants = KeyCondition::getBlockWithConstants(node, syntax_result, context); InDepthNodeVisitor visitor(block_with_constants); @@ -777,7 +782,7 @@ ClusterPtr StorageDistributed::skipUnusedShards( condition_ast = select.prewhere() ? select.prewhere()->clone() : select.where()->clone(); } - replaceConstantExpressions(condition_ast, context, metadata_snapshot->getColumns().getAll(), shared_from_this()); + replaceConstantExpressions(condition_ast, context, metadata_snapshot->getColumns().getAll(), shared_from_this(), metadata_snapshot); const auto blocks = evaluateExpressionOverConstantCondition(condition_ast, sharding_key_expr); // Can't get definite answer if we can skip any shards From 35ce47951d7e7f2d9490b97af16899933af4285e Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 18 Jun 2020 12:22:54 +0300 Subject: [PATCH 132/318] Fix storage merge --- src/Storages/StorageMerge.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 228cec99357..235f78505e0 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -170,9 +170,7 @@ Pipes StorageMerge::read( if (selected_tables.empty()) /// FIXME: do we support sampling in this case? return createSources( - metadata_snapshot, query_info, processed_stage, - max_block_size, header, {}, real_column_names, - modified_context, 0, has_table_virtual_column); + {}, query_info, processed_stage, max_block_size, header, {}, real_column_names, modified_context, 0, has_table_virtual_column); size_t tables_count = selected_tables.size(); Float64 num_streams_multiplier = std::min(unsigned(tables_count), std::max(1U, unsigned(context.getSettingsRef().max_streams_multiplier_for_merge_tables))); @@ -212,8 +210,10 @@ Pipes StorageMerge::read( if (query_info.query->as()->sampleSize() && !storage->supportsSampling()) throw Exception("Illegal SAMPLE: table doesn't support sampling", ErrorCodes::SAMPLING_NOT_SUPPORTED); + auto storage_metadata_snapshot = storage->getInMemoryMetadataPtr(); + auto source_pipes = createSources( - metadata_snapshot, query_info, processed_stage, + storage_metadata_snapshot, query_info, processed_stage, max_block_size, header, table, real_column_names, modified_context, current_streams, has_table_virtual_column); @@ -267,6 +267,7 @@ Pipes StorageMerge::createSources( if (real_column_names.empty()) real_column_names.push_back(ExpressionActions::getSmallestColumn(metadata_snapshot->getColumns().getAllPhysical())); + pipes = storage->read(real_column_names, metadata_snapshot, modified_query_info, *modified_context, processed_stage, max_block_size, UInt32(streams_num)); } else if (processed_stage > storage_stage) From aab4ce6394c9bd72622f29f749e5bb5f56a6851a Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 18 Jun 2020 13:29:13 +0300 Subject: [PATCH 133/318] Truncate with metadata --- src/Interpreters/InterpreterDropQuery.cpp | 6 ++++-- src/Storages/IStorage.h | 6 +++++- src/Storages/StorageDistributed.cpp | 2 +- src/Storages/StorageDistributed.h | 2 +- src/Storages/StorageFile.cpp | 6 +++++- src/Storages/StorageFile.h | 6 +++++- src/Storages/StorageJoin.cpp | 5 ++--- src/Storages/StorageJoin.h | 2 +- src/Storages/StorageLog.cpp | 3 +-- src/Storages/StorageLog.h | 2 +- src/Storages/StorageMaterializedView.cpp | 2 +- src/Storages/StorageMaterializedView.h | 2 +- src/Storages/StorageMemory.cpp | 3 ++- src/Storages/StorageMemory.h | 4 ++-- src/Storages/StorageMergeTree.cpp | 2 +- src/Storages/StorageMergeTree.h | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 3 ++- src/Storages/StorageReplicatedMergeTree.h | 2 +- src/Storages/StorageSet.cpp | 3 +-- src/Storages/StorageSet.h | 2 +- src/Storages/StorageStripeLog.cpp | 2 +- src/Storages/StorageStripeLog.h | 2 +- src/Storages/StorageTinyLog.cpp | 4 ++-- src/Storages/StorageTinyLog.h | 2 +- 24 files changed, 44 insertions(+), 31 deletions(-) diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 5ffce2fc3ec..15f19b585de 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -105,8 +105,9 @@ BlockIO InterpreterDropQuery::executeToTable( table->checkTableCanBeDropped(); auto table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto metadata_snapshot = table->getInMemoryMetadataPtr(); /// Drop table data, don't touch metadata - table->truncate(query_ptr, context, table_lock); + table->truncate(query_ptr, metadata_snapshot, context, table_lock); } else if (query.kind == ASTDropQuery::Kind::Drop) { @@ -187,7 +188,8 @@ BlockIO InterpreterDropQuery::executeToTemporaryTable(const String & table_name, { auto table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); /// Drop table data, don't touch metadata - table->truncate(query_ptr, context, table_lock); + auto metadata_snapshot = table->getInMemoryMetadataPtr(); + table->truncate(query_ptr, metadata_snapshot, context, table_lock); } else if (kind == ASTDropQuery::Kind::Drop) { diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index ba1945d5c79..ec13e26ff43 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -292,7 +292,11 @@ public: /** Clear the table data and leave it empty. * Must be called under lockForAlter. */ - virtual void truncate(const ASTPtr & /*query*/, const Context & /* context */, TableStructureWriteLockHolder &) + virtual void truncate( + const ASTPtr & /*query*/, + const StorageMetadataPtr & /* metadata_snapshot */, + const Context & /* context */, + TableStructureWriteLockHolder &) { throw Exception("Truncate is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 53342c754ed..2e07a393b04 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -619,7 +619,7 @@ Strings StorageDistributed::getDataPaths() const return paths; } -void StorageDistributed::truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) +void StorageDistributed::truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableStructureWriteLockHolder &) { std::lock_guard lock(cluster_nodes_mutex); diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index aeb5f1875e9..c952ccde8ac 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -82,7 +82,7 @@ public: BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; /// Removes temporary data in local filesystem. - void truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) override; + void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableStructureWriteLockHolder &) override; void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override; void renameOnDisk(const String & new_path_to_table_data); diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 8083a8b2145..8fb09d0a41e 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -523,7 +523,11 @@ void StorageFile::rename(const String & new_path_to_table_data, const StorageID renameInMemory(new_table_id); } -void StorageFile::truncate(const ASTPtr & /*query*/, const Context & /* context */, TableStructureWriteLockHolder &) +void StorageFile::truncate( + const ASTPtr & /*query*/, + const StorageMetadataPtr & /* metadata_snapshot */, + const Context & /* context */, + TableStructureWriteLockHolder &) { if (paths.size() != 1) throw Exception("Can't truncate table '" + getStorageID().getNameForLogs() + "' in readonly mode", ErrorCodes::DATABASE_ACCESS_DENIED); diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index 65589d245b9..05b4d7aea8a 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -38,7 +38,11 @@ public: const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; - void truncate(const ASTPtr & /*query*/, const Context & /* context */, TableStructureWriteLockHolder &) override; + void truncate( + const ASTPtr & /*query*/, + const StorageMetadataPtr & /* metadata_snapshot */, + const Context & /* context */, + TableStructureWriteLockHolder &) override; void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override; diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 300ab400a46..5000dcd8b18 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -64,10 +64,9 @@ StorageJoin::StorageJoin( } -void StorageJoin::truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) +void StorageJoin::truncate( + const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableStructureWriteLockHolder &) { - /// TODO(alesap) FIXME - auto metadata_snapshot = getInMemoryMetadataPtr(); Poco::File(path).remove(true); Poco::File(path).createDirectories(); Poco::File(path + "tmp/").createDirectories(); diff --git a/src/Storages/StorageJoin.h b/src/Storages/StorageJoin.h index 40dbf1b44dd..4d4d1a81da2 100644 --- a/src/Storages/StorageJoin.h +++ b/src/Storages/StorageJoin.h @@ -27,7 +27,7 @@ class StorageJoin final : public ext::shared_ptr_helper, public Sto public: String getName() const override { return "Join"; } - void truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) override; + void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableStructureWriteLockHolder &) override; /// Access the innards. HashJoinPtr & getJoin() { return join; } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index e0953283a17..45d55938db3 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -535,11 +535,10 @@ void StorageLog::rename(const String & new_path_to_table_data, const StorageID & renameInMemory(new_table_id); } -void StorageLog::truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) +void StorageLog::truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableStructureWriteLockHolder &) { std::shared_lock lock(rwlock); - auto metadata_snapshot = getInMemoryMetadataPtr(); files.clear(); file_count = 0; loaded_marks = false; diff --git a/src/Storages/StorageLog.h b/src/Storages/StorageLog.h index 90d0799e1a8..670e2777d44 100644 --- a/src/Storages/StorageLog.h +++ b/src/Storages/StorageLog.h @@ -39,7 +39,7 @@ public: CheckResults checkData(const ASTPtr & /* query */, const Context & /* context */) override; - void truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) override; + void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableStructureWriteLockHolder &) override; Strings getDataPaths() const override { return {DB::fullPath(disk, table_path)}; } diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 3d3137fe1a6..2c0d5727b31 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -173,7 +173,7 @@ void StorageMaterializedView::drop() executeDropQuery(ASTDropQuery::Kind::Drop, global_context, target_table_id); } -void StorageMaterializedView::truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) +void StorageMaterializedView::truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableStructureWriteLockHolder &) { if (has_inner_table) executeDropQuery(ASTDropQuery::Kind::Truncate, global_context, target_table_id); diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 6f462c2cccc..e2111a15f5c 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -37,7 +37,7 @@ public: void drop() override; - void truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) override; + void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableStructureWriteLockHolder &) override; bool optimize( const ASTPtr & query, diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index f9ef3cfcc98..3bae29ac96c 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -148,7 +148,8 @@ void StorageMemory::drop() data.clear(); } -void StorageMemory::truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) +void StorageMemory::truncate( + const ASTPtr &, const StorageMetadataPtr &, const Context &, TableStructureWriteLockHolder &) { std::lock_guard lock(mutex); data.clear(); diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h index 3c583533462..842c7dc3790 100644 --- a/src/Storages/StorageMemory.h +++ b/src/Storages/StorageMemory.h @@ -37,11 +37,11 @@ public: size_t max_block_size, unsigned num_streams) override; - BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; + BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, const Context & context) override; void drop() override; - void truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) override; + void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableStructureWriteLockHolder &) override; std::optional totalRows() const override; std::optional totalBytes() const override; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 40b3aeffb8a..45f8ecf0ef9 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -231,7 +231,7 @@ void StorageMergeTree::drop() dropAllData(); } -void StorageMergeTree::truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) +void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableStructureWriteLockHolder &) { { /// Asks to complete merges and does not allow them to start. diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index 69ee6714164..cf3eccc0c0b 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -75,7 +75,7 @@ public: CancellationCode killMutation(const String & mutation_id) override; void drop() override; - void truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) override; + void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableStructureWriteLockHolder &) override; void alter(const AlterCommands & commands, const Context & context, TableStructureWriteLockHolder & table_lock_holder) override; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 0e9a2bf7ec4..2e64c54112f 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -4011,7 +4011,8 @@ void StorageReplicatedMergeTree::dropPartition(const ASTPtr & query, const ASTPt } -void StorageReplicatedMergeTree::truncate(const ASTPtr & query, const Context & query_context, TableStructureWriteLockHolder & table_lock) +void StorageReplicatedMergeTree::truncate( + const ASTPtr & query, const StorageMetadataPtr &, const Context & query_context, TableStructureWriteLockHolder & table_lock) { table_lock.release(); /// Truncate is done asynchronously. diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index c98fcb0ae3d..c1ba737d849 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -120,7 +120,7 @@ public: */ void drop() override; - void truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) override; + void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableStructureWriteLockHolder &) override; void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override; diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 235251c0761..93b288569c2 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -142,9 +142,8 @@ void StorageSet::finishInsert() { set->finishInsert(); } size_t StorageSet::getSize() const { return set->getTotalRowCount(); } -void StorageSet::truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) +void StorageSet::truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableStructureWriteLockHolder &) { - auto metadata_snapshot = getInMemoryMetadataPtr(); Poco::File(path).remove(true); Poco::File(path).createDirectories(); Poco::File(path + "tmp/").createDirectories(); diff --git a/src/Storages/StorageSet.h b/src/Storages/StorageSet.h index b7785aadc6a..2685fa26ba6 100644 --- a/src/Storages/StorageSet.h +++ b/src/Storages/StorageSet.h @@ -67,7 +67,7 @@ public: /// Access the insides. SetPtr & getSet() { return set; } - void truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) override; + void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableStructureWriteLockHolder &) override; private: SetPtr set; diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 407c9b164ff..4b95a389f2c 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -326,7 +326,7 @@ CheckResults StorageStripeLog::checkData(const ASTPtr & /* query */, const Conte return file_checker.check(); } -void StorageStripeLog::truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) +void StorageStripeLog::truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableStructureWriteLockHolder &) { std::shared_lock lock(rwlock); diff --git a/src/Storages/StorageStripeLog.h b/src/Storages/StorageStripeLog.h index d06758a60e8..381be7762df 100644 --- a/src/Storages/StorageStripeLog.h +++ b/src/Storages/StorageStripeLog.h @@ -42,7 +42,7 @@ public: Strings getDataPaths() const override { return {DB::fullPath(disk, table_path)}; } - void truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) override; + void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableStructureWriteLockHolder &) override; protected: StorageStripeLog( diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index 4578a82f650..4beb44405d7 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -429,10 +429,10 @@ CheckResults StorageTinyLog::checkData(const ASTPtr & /* query */, const Context return file_checker.check(); } -void StorageTinyLog::truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) +void StorageTinyLog::truncate( + const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableStructureWriteLockHolder &) { std::unique_lock lock(rwlock); - auto metadata_snapshot = getInMemoryMetadataPtr(); disk->clearDirectory(table_path); diff --git a/src/Storages/StorageTinyLog.h b/src/Storages/StorageTinyLog.h index a55bf6d0dcf..ae124e5e958 100644 --- a/src/Storages/StorageTinyLog.h +++ b/src/Storages/StorageTinyLog.h @@ -41,7 +41,7 @@ public: Strings getDataPaths() const override { return {DB::fullPath(disk, table_path)}; } - void truncate(const ASTPtr &, const Context &, TableStructureWriteLockHolder &) override; + void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableStructureWriteLockHolder &) override; void drop() override; From 4de5331b0df17163e05fe11893ea4c2da35ad985 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 18 Jun 2020 13:39:33 +0300 Subject: [PATCH 134/318] Fix SystemTables --- src/Storages/System/StorageSystemTables.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index 84635acb887..df8df75ad6d 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -267,7 +267,6 @@ protected: throw; } } - auto metadata_snapshot = table->getInMemoryMetadataPtr(); ++rows_count; @@ -362,10 +361,14 @@ protected: else src_index += 2; + StorageMetadataPtr metadata_snapshot; + if (table != nullptr) + metadata_snapshot = table->getInMemoryMetadataPtr(); + ASTPtr expression_ptr; if (columns_mask[src_index++]) { - assert(table != nullptr); + assert(metadata_snapshot != nullptr); if ((expression_ptr = metadata_snapshot->getPartitionKeyAST())) res_columns[res_index++]->insert(queryToString(expression_ptr)); else @@ -374,7 +377,7 @@ protected: if (columns_mask[src_index++]) { - assert(table != nullptr); + assert(metadata_snapshot != nullptr); if ((expression_ptr = metadata_snapshot->getSortingKey().expression_list_ast)) res_columns[res_index++]->insert(queryToString(expression_ptr)); else @@ -383,7 +386,7 @@ protected: if (columns_mask[src_index++]) { - assert(table != nullptr); + assert(metadata_snapshot != nullptr); if ((expression_ptr = metadata_snapshot->getPrimaryKey().expression_list_ast)) res_columns[res_index++]->insert(queryToString(expression_ptr)); else @@ -392,7 +395,7 @@ protected: if (columns_mask[src_index++]) { - assert(table != nullptr); + assert(metadata_snapshot != nullptr); if ((expression_ptr = metadata_snapshot->getSamplingKeyAST())) res_columns[res_index++]->insert(queryToString(expression_ptr)); else @@ -401,7 +404,7 @@ protected: if (columns_mask[src_index++]) { - assert(table != nullptr); + assert(metadata_snapshot != nullptr); auto policy = table->getStoragePolicy(); if (policy) res_columns[res_index++]->insert(policy->getName()); From d4c49816ab140f1ca0d73c173d1cd1a62b8003fd Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 18 Jun 2020 14:02:31 +0300 Subject: [PATCH 135/318] Really atomic metadata --- src/DataStreams/InputStreamFromASTInsertQuery.h | 2 +- src/Interpreters/ExpressionAnalyzer.h | 2 +- src/Interpreters/JoinedTables.h | 2 +- src/Interpreters/SyntaxAnalyzer.h | 2 +- src/Storages/IStorage.h | 15 ++++++++------- src/Storages/MergeTree/MergeTreeWhereOptimizer.h | 2 +- src/Storages/StorageInMemoryMetadata.h | 2 +- 7 files changed, 14 insertions(+), 13 deletions(-) diff --git a/src/DataStreams/InputStreamFromASTInsertQuery.h b/src/DataStreams/InputStreamFromASTInsertQuery.h index 0604f011e28..d4c6443c77d 100644 --- a/src/DataStreams/InputStreamFromASTInsertQuery.h +++ b/src/DataStreams/InputStreamFromASTInsertQuery.h @@ -12,7 +12,7 @@ namespace DB struct BlockIO; class Context; struct StorageInMemoryMetadata; -using StorageMetadataPtr = std::shared_ptr; +using StorageMetadataPtr = std::shared_ptr; /** Prepares an input stream which produce data containing in INSERT query * Head of inserting data could be stored in INSERT ast directly diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index cd0b837b4ec..31939f5016b 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -32,7 +32,7 @@ class ASTSelectQuery; struct ASTTablesInSelectQueryElement; struct StorageInMemoryMetadata; -using StorageMetadataPtr = std::shared_ptr; +using StorageMetadataPtr = std::shared_ptr; /// Create columns in block or return false if not possible bool sanitizeBlock(Block & block); diff --git a/src/Interpreters/JoinedTables.h b/src/Interpreters/JoinedTables.h index cff86c5a535..f150de83a94 100644 --- a/src/Interpreters/JoinedTables.h +++ b/src/Interpreters/JoinedTables.h @@ -14,7 +14,7 @@ class ASTSelectQuery; class TableJoin; struct SelectQueryOptions; struct StorageInMemoryMetadata; -using StorageMetadataPtr = std::shared_ptr; +using StorageMetadataPtr = std::shared_ptr; /// Joined tables' columns resolver. /// We want to get each table structure at most once per table occurance. Or even better once per table. diff --git a/src/Interpreters/SyntaxAnalyzer.h b/src/Interpreters/SyntaxAnalyzer.h index 4308b70c45a..dd3c49a0f1a 100644 --- a/src/Interpreters/SyntaxAnalyzer.h +++ b/src/Interpreters/SyntaxAnalyzer.h @@ -17,7 +17,7 @@ struct Settings; struct SelectQueryOptions; using Scalars = std::map; struct StorageInMemoryMetadata; -using StorageMetadataPtr = std::shared_ptr; +using StorageMetadataPtr = std::shared_ptr; struct SyntaxAnalyzerResult { diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index ec13e26ff43..375ab90aee4 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -82,7 +82,7 @@ public: IStorage() = delete; /// Storage fields should be initialized in separate methods like setColumns /// or setTableTTLs. - explicit IStorage(StorageID storage_id_) : storage_id(std::move(storage_id_)), metadata(std::make_shared()) {} //-V730 + explicit IStorage(StorageID storage_id_) : storage_id(std::move(storage_id_)), metadata(std::make_unique()) {} //-V730 virtual ~IStorage() = default; IStorage(const IStorage &) = delete; @@ -137,9 +137,12 @@ public: public: /// thread-unsafe part. lockStructure must be acquired - StorageInMemoryMetadata getInMemoryMetadata() const { return *metadata; } - StorageMetadataPtr getInMemoryMetadataPtr() const { return metadata; } - void setInMemoryMetadata(const StorageInMemoryMetadata & metadata_) { metadata = std::make_shared(metadata_); } + StorageInMemoryMetadata getInMemoryMetadata() const { return *metadata.get(); } + StorageMetadataPtr getInMemoryMetadataPtr() const { return metadata.get(); } + void setInMemoryMetadata(const StorageInMemoryMetadata & metadata_) + { + metadata.set(std::make_unique(metadata_)); + } /// Return list of virtual columns (like _part, _table, etc). In the vast @@ -165,9 +168,7 @@ private: StorageID storage_id; mutable std::mutex id_mutex; - /// TODO (alesap) just use multiversion for atomic metadata - mutable std::mutex ttl_mutex; - StorageMetadataPtr metadata; + MultiVersionStorageMetadataPtr metadata; private: RWLockImpl::LockHolder tryLockTimed( const RWLock & rwlock, RWLockImpl::Type type, const String & query_id, const SettingSeconds & acquire_timeout) const; diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h index 866d0a8754e..cb2f8939cb5 100644 --- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h +++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h @@ -17,7 +17,7 @@ class ASTSelectQuery; class ASTFunction; class MergeTreeData; struct StorageInMemoryMetadata; -using StorageMetadataPtr = std::shared_ptr; +using StorageMetadataPtr = std::shared_ptr; /** Identifies WHERE expressions that can be placed in PREWHERE by calculating respective * sizes of columns used in particular expression and identifying "good" conditions of diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index e4755bb0464..bda48bc19cb 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -194,7 +194,7 @@ struct StorageInMemoryMetadata void check(const Block & block, bool need_all = false) const; }; -using StorageMetadataPtr = std::shared_ptr; +using StorageMetadataPtr = std::shared_ptr; using MultiVersionStorageMetadataPtr = MultiVersion; } From 1a69c3234ae26dd158d1102adf7223f2dd8ff0d3 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 18 Jun 2020 14:09:55 +0300 Subject: [PATCH 136/318] Fix style --- src/Storages/IStorage.cpp | 7 ------- src/Storages/StorageInMemoryMetadata.cpp | 5 ----- 2 files changed, 12 deletions(-) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 8ee9561466a..3a4559f94dc 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -20,13 +20,6 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int COLUMN_QUERIED_MORE_THAN_ONCE; - extern const int DUPLICATE_COLUMN; - extern const int EMPTY_LIST_OF_COLUMNS_PASSED; - extern const int EMPTY_LIST_OF_COLUMNS_QUERIED; - extern const int NO_SUCH_COLUMN_IN_TABLE; - extern const int NOT_FOUND_COLUMN_IN_BLOCK; - extern const int TYPE_MISMATCH; extern const int TABLE_IS_DROPPED; extern const int NOT_IMPLEMENTED; extern const int DEADLOCK_AVOIDED; diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index a394e196eac..81d1f387424 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -10,17 +10,12 @@ namespace DB { namespace ErrorCodes { - extern const int LOGICAL_ERROR; extern const int COLUMN_QUERIED_MORE_THAN_ONCE; extern const int DUPLICATE_COLUMN; - extern const int EMPTY_LIST_OF_COLUMNS_PASSED; extern const int EMPTY_LIST_OF_COLUMNS_QUERIED; extern const int NO_SUCH_COLUMN_IN_TABLE; extern const int NOT_FOUND_COLUMN_IN_BLOCK; extern const int TYPE_MISMATCH; - extern const int TABLE_IS_DROPPED; - extern const int NOT_IMPLEMENTED; - extern const int DEADLOCK_AVOIDED; } From c8a58299ac20beb96f17dabb340524bb40dfb789 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 18 Jun 2020 14:42:48 +0300 Subject: [PATCH 137/318] Fix storage buffer metadata --- src/Storages/StorageBuffer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 065dfaa27bf..88619f5bc42 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -179,7 +179,7 @@ Pipes StorageBuffer::read( if (dst_has_same_structure) { if (query_info.order_optimizer) - query_info.input_order_info = query_info.order_optimizer->getInputOrder(destination, metadata_snapshot); + query_info.input_order_info = query_info.order_optimizer->getInputOrder(destination, destination_metadata_snapshot); /// The destination table has the same structure of the requested columns and we can simply read blocks from there. pipes_from_dst = destination->read( From d79982f4973ccf8f0a9cc6198f23ec1f150494d6 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 18 Jun 2020 19:10:47 +0300 Subject: [PATCH 138/318] Better locks in Storages --- src/Core/iostream_debug_helpers.cpp | 2 +- src/Core/iostream_debug_helpers.h | 3 - src/DataStreams/IBlockInputStream.h | 4 +- src/DataStreams/IBlockOutputStream.h | 4 +- .../PushingToViewsBlockOutputStream.cpp | 5 +- src/Databases/DatabaseMySQL.cpp | 2 +- src/Databases/DatabaseOnDisk.cpp | 2 +- src/Databases/IDatabase.h | 1 - src/Functions/FunctionJoinGet.cpp | 3 +- src/Functions/FunctionJoinGet.h | 4 +- src/Interpreters/InterpreterAlterQuery.cpp | 10 +-- src/Interpreters/InterpreterCreateQuery.cpp | 5 +- src/Interpreters/InterpreterDescribeQuery.cpp | 3 +- src/Interpreters/InterpreterDropQuery.cpp | 4 +- src/Interpreters/InterpreterInsertQuery.cpp | 3 +- src/Interpreters/InterpreterSelectQuery.cpp | 5 +- src/Interpreters/InterpreterSelectQuery.h | 2 +- src/Interpreters/MutationsInterpreter.cpp | 4 +- src/Interpreters/MutationsInterpreter.h | 4 +- src/Processors/Pipe.h | 6 +- src/Processors/QueryPipeline.h | 9 ++- src/Storages/IStorage.cpp | 37 +++------- src/Storages/IStorage.h | 39 +++-------- src/Storages/LiveView/StorageLiveView.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 6 +- src/Storages/MergeTree/MergeTreeData.h | 10 +-- .../MergeTree/MergeTreeDataMergerMutator.cpp | 6 +- .../MergeTree/MergeTreeDataMergerMutator.h | 4 +- .../ReplicatedMergeTreeCleanupThread.cpp | 4 +- .../ReplicatedMergeTreePartCheckThread.cpp | 3 +- src/Storages/ReadInOrderOptimizer.cpp | 4 +- src/Storages/StorageBuffer.cpp | 7 +- src/Storages/StorageBuffer.h | 2 +- src/Storages/StorageDistributed.cpp | 5 +- src/Storages/StorageDistributed.h | 4 +- src/Storages/StorageFile.cpp | 2 +- src/Storages/StorageFile.h | 2 +- src/Storages/StorageJoin.cpp | 2 +- src/Storages/StorageJoin.h | 2 +- src/Storages/StorageLog.cpp | 2 +- src/Storages/StorageLog.h | 2 +- src/Storages/StorageMaterializedView.cpp | 11 ++- src/Storages/StorageMaterializedView.h | 4 +- src/Storages/StorageMemory.cpp | 2 +- src/Storages/StorageMemory.h | 2 +- src/Storages/StorageMerge.cpp | 7 +- src/Storages/StorageMerge.h | 4 +- src/Storages/StorageMergeTree.cpp | 42 ++++------- src/Storages/StorageMergeTree.h | 4 +- src/Storages/StorageNull.cpp | 4 +- src/Storages/StorageNull.h | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 69 ++++++++----------- src/Storages/StorageReplicatedMergeTree.h | 6 +- src/Storages/StorageSet.cpp | 2 +- src/Storages/StorageSet.h | 2 +- src/Storages/StorageStripeLog.cpp | 2 +- src/Storages/StorageStripeLog.h | 2 +- src/Storages/StorageTinyLog.cpp | 2 +- src/Storages/StorageTinyLog.h | 2 +- src/Storages/System/StorageSystemColumns.cpp | 4 +- .../System/StorageSystemPartsBase.cpp | 2 +- src/Storages/System/StorageSystemPartsBase.h | 2 +- src/Storages/System/StorageSystemTables.cpp | 5 +- src/Storages/TableStructureLockHolder.h | 37 ++-------- 64 files changed, 162 insertions(+), 287 deletions(-) diff --git a/src/Core/iostream_debug_helpers.cpp b/src/Core/iostream_debug_helpers.cpp index 8683bb14db6..08477770c53 100644 --- a/src/Core/iostream_debug_helpers.cpp +++ b/src/Core/iostream_debug_helpers.cpp @@ -53,7 +53,7 @@ std::ostream & operator<<(std::ostream & stream, const IStorage & what) return stream; } -std::ostream & operator<<(std::ostream & stream, const TableStructureReadLock &) +std::ostream & operator<<(std::ostream & stream, const TableLockHolder &) { stream << "TableStructureReadLock()"; return stream; diff --git a/src/Core/iostream_debug_helpers.h b/src/Core/iostream_debug_helpers.h index b9e5efa5d95..8abffd4fe58 100644 --- a/src/Core/iostream_debug_helpers.h +++ b/src/Core/iostream_debug_helpers.h @@ -22,9 +22,6 @@ std::ostream & operator<<(std::ostream & stream, const IDataType & what); class IStorage; std::ostream & operator<<(std::ostream & stream, const IStorage & what); -class TableStructureReadLock; -std::ostream & operator<<(std::ostream & stream, const TableStructureReadLock & what); - class IFunctionOverloadResolver; std::ostream & operator<<(std::ostream & stream, const IFunctionOverloadResolver & what); diff --git a/src/DataStreams/IBlockInputStream.h b/src/DataStreams/IBlockInputStream.h index 66f3e68d601..68850a822e8 100644 --- a/src/DataStreams/IBlockInputStream.h +++ b/src/DataStreams/IBlockInputStream.h @@ -109,7 +109,7 @@ public: size_t checkDepth(size_t max_depth) const { return checkDepthImpl(max_depth, max_depth); } /// Do not allow to change the table while the blocks stream and its children are alive. - void addTableLock(const TableStructureReadLockHolder & lock) { table_locks.push_back(lock); } + void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); } /// Get information about execution speed. const BlockStreamProfileInfo & getProfileInfo() const { return info; } @@ -229,7 +229,7 @@ public: protected: /// Order is important: `table_locks` must be destroyed after `children` so that tables from /// which child streams read are protected by the locks during the lifetime of the child streams. - std::vector table_locks; + std::vector table_locks; BlockInputStreams children; std::shared_mutex children_mutex; diff --git a/src/DataStreams/IBlockOutputStream.h b/src/DataStreams/IBlockOutputStream.h index 060438ba457..bb62d0183f9 100644 --- a/src/DataStreams/IBlockOutputStream.h +++ b/src/DataStreams/IBlockOutputStream.h @@ -61,10 +61,10 @@ public: /** Don't let to alter table while instance of stream is alive. */ - void addTableLock(const TableStructureReadLockHolder & lock) { table_locks.push_back(lock); } + void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); } private: - std::vector table_locks; + std::vector table_locks; }; } diff --git a/src/DataStreams/PushingToViewsBlockOutputStream.cpp b/src/DataStreams/PushingToViewsBlockOutputStream.cpp index 72de6b889f1..2d2d678bff6 100644 --- a/src/DataStreams/PushingToViewsBlockOutputStream.cpp +++ b/src/DataStreams/PushingToViewsBlockOutputStream.cpp @@ -33,7 +33,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( * but it's clear that here is not the best place for this functionality. */ addTableLock( - storage->lockStructureForShare(true, context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout)); + storage->lockForShare(context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout)); /// If the "root" table deduplactes blocks, there are no need to make deduplication for children /// Moreover, deduplication for AggregatingMergeTree children could produce false positives due to low size of inserting blocks @@ -74,8 +74,7 @@ PushingToViewsBlockOutputStream::PushingToViewsBlockOutputStream( if (auto * materialized_view = dynamic_cast(dependent_table.get())) { addTableLock( - materialized_view->lockStructureForShare( - true, context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout)); + materialized_view->lockForShare(context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout)); StoragePtr inner_table = materialized_view->getTargetTable(); auto inner_table_id = inner_table->getStorageID(); diff --git a/src/Databases/DatabaseMySQL.cpp b/src/Databases/DatabaseMySQL.cpp index a73fbafb7f5..6e5837257f0 100644 --- a/src/Databases/DatabaseMySQL.cpp +++ b/src/Databases/DatabaseMySQL.cpp @@ -362,7 +362,7 @@ void DatabaseMySQL::cleanOutdatedTables() ++iterator; else { - const auto table_lock = (*iterator)->lockAlterIntention(RWLockImpl::NO_QUERY, lock_acquire_timeout); + const auto table_lock = (*iterator)->lockExclusively(RWLockImpl::NO_QUERY, lock_acquire_timeout); (*iterator)->shutdown(); (*iterator)->is_dropped = true; diff --git a/src/Databases/DatabaseOnDisk.cpp b/src/Databases/DatabaseOnDisk.cpp index 364c9d50c48..0a16b6eacff 100644 --- a/src/Databases/DatabaseOnDisk.cpp +++ b/src/Databases/DatabaseOnDisk.cpp @@ -266,7 +266,7 @@ void DatabaseOnDisk::renameTable( } auto table_data_relative_path = getTableDataPath(table_name); - TableStructureWriteLockHolder table_lock; + TableExclusiveLockHolder table_lock; String table_metadata_path; ASTPtr attach_query; /// DatabaseLazy::detachTable may return nullptr even if table exists, so we need tryGetTable for this case. diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index b9a7a907f73..3d8d5c74ceb 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -22,7 +22,6 @@ class Context; struct Settings; struct ConstraintsDescription; struct IndicesDescription; -struct TableStructureWriteLockHolder; class ASTCreateQuery; using Dictionaries = std::vector; diff --git a/src/Functions/FunctionJoinGet.cpp b/src/Functions/FunctionJoinGet.cpp index a4569684e7f..a33b70684a5 100644 --- a/src/Functions/FunctionJoinGet.cpp +++ b/src/Functions/FunctionJoinGet.cpp @@ -67,8 +67,7 @@ FunctionBaseImplPtr JoinGetOverloadResolver::build(const ColumnsWithTyp auto join = storage_join->getJoin(); DataTypes data_types(arguments.size()); - auto table_lock = storage_join->lockStructureForShare( - false, context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto table_lock = storage_join->lockForShare(context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout); for (size_t i = 0; i < arguments.size(); ++i) data_types[i] = arguments[i].type; diff --git a/src/Functions/FunctionJoinGet.h b/src/Functions/FunctionJoinGet.h index f233ccd8a4f..af95686c207 100644 --- a/src/Functions/FunctionJoinGet.h +++ b/src/Functions/FunctionJoinGet.h @@ -37,7 +37,7 @@ class FunctionJoinGet final : public IFunctionBaseImpl public: static constexpr auto name = or_null ? "joinGetOrNull" : "joinGet"; - FunctionJoinGet(TableStructureReadLockHolder table_lock_, StoragePtr storage_join_, + FunctionJoinGet(TableLockHolder table_lock_, StoragePtr storage_join_, HashJoinPtr join_, String attr_name_, DataTypes argument_types_, DataTypePtr return_type_) : table_lock(std::move(table_lock_)) @@ -57,7 +57,7 @@ public: ExecutableFunctionImplPtr prepare(const Block & sample_block, const ColumnNumbers & arguments, size_t result) const override; private: - TableStructureReadLockHolder table_lock; + TableLockHolder table_lock; StoragePtr storage_join; HashJoinPtr join; const String attr_name; diff --git a/src/Interpreters/InterpreterAlterQuery.cpp b/src/Interpreters/InterpreterAlterQuery.cpp index 958291d5882..61277b8160c 100644 --- a/src/Interpreters/InterpreterAlterQuery.cpp +++ b/src/Interpreters/InterpreterAlterQuery.cpp @@ -43,6 +43,7 @@ BlockIO InterpreterAlterQuery::execute() context.checkAccess(getRequiredAccess()); auto table_id = context.resolveStorageID(alter, Context::ResolveOrdinary); StoragePtr table = DatabaseCatalog::instance().getTable(table_id, context); + auto alter_lock = table->lockForAlter(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto metadata_snapshot = table->getInMemoryMetadataPtr(); /// Add default database to table identifiers that we can encounter in e.g. default expressions, @@ -83,10 +84,7 @@ BlockIO InterpreterAlterQuery::execute() if (!mutation_commands.empty()) { - auto table_lock_holder = table->lockStructureForShare( - false /* because mutation is executed asyncronously */, - context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); - MutationsInterpreter(table, metadata_snapshot, mutation_commands, context, false).validate(table_lock_holder); + MutationsInterpreter(table, metadata_snapshot, mutation_commands, context, false).validate(); table->mutate(mutation_commands, context); } @@ -112,13 +110,11 @@ BlockIO InterpreterAlterQuery::execute() if (!alter_commands.empty()) { - auto table_lock_holder = table->lockAlterIntention( - context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); StorageInMemoryMetadata metadata = table->getInMemoryMetadata(); alter_commands.validate(metadata, context); alter_commands.prepare(metadata); table->checkAlterIsPossible(alter_commands, context.getSettingsRef()); - table->alter(alter_commands, context, table_lock_holder); + table->alter(alter_commands, context, alter_lock); } return {}; diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index bb82c94a764..3e09d728c4c 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -405,7 +405,7 @@ ConstraintsDescription InterpreterCreateQuery::getConstraintsDescription(const A InterpreterCreateQuery::TableProperties InterpreterCreateQuery::setProperties(ASTCreateQuery & create) const { TableProperties properties; - TableStructureReadLockHolder as_storage_lock; + TableLockHolder as_storage_lock; if (create.columns_list) { @@ -428,8 +428,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::setProperties(AS StoragePtr as_storage = DatabaseCatalog::instance().getTable({as_database_name, create.as_table}, context); /// as_storage->getColumns() and setEngine(...) must be called under structure lock of other_table for CREATE ... AS other_table. - as_storage_lock = as_storage->lockStructureForShare( - false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + as_storage_lock = as_storage->lockForShare(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto as_storage_metadata = as_storage->getInMemoryMetadataPtr(); properties.columns = as_storage_metadata->getColumns(); diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp index 535a4280b45..94fa748ea15 100644 --- a/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/src/Interpreters/InterpreterDescribeQuery.cpp @@ -89,8 +89,7 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl() table = DatabaseCatalog::instance().getTable(table_id, context); } - auto table_lock = table->lockStructureForShare( - false, context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto table_lock = table->lockForShare(context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout); auto metadata_snapshot = table->getInMemoryMetadataPtr(); columns = metadata_snapshot->getColumns(); } diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp index 15f19b585de..e6853a8af4c 100644 --- a/src/Interpreters/InterpreterDropQuery.cpp +++ b/src/Interpreters/InterpreterDropQuery.cpp @@ -93,7 +93,7 @@ BlockIO InterpreterDropQuery::executeToTable( { context.checkAccess(table->isView() ? AccessType::DROP_VIEW : AccessType::DROP_TABLE, table_id); table->shutdown(); - TableStructureWriteLockHolder table_lock; + TableExclusiveLockHolder table_lock; if (database->getEngineName() != "Atomic") table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); /// Drop table from memory, don't touch data and metadata @@ -116,7 +116,7 @@ BlockIO InterpreterDropQuery::executeToTable( table->shutdown(); - TableStructureWriteLockHolder table_lock; + TableExclusiveLockHolder table_lock; if (database->getEngineName() != "Atomic") table_lock = table->lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp index a39e8961970..554907d37d3 100644 --- a/src/Interpreters/InterpreterInsertQuery.cpp +++ b/src/Interpreters/InterpreterInsertQuery.cpp @@ -118,8 +118,7 @@ BlockIO InterpreterInsertQuery::execute() BlockIO res; StoragePtr table = getTable(query); - auto table_lock = table->lockStructureForShare( - true, context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto table_lock = table->lockForShare(context.getInitialQueryId(), context.getSettingsRef().lock_acquire_timeout); auto metadata_snapshot = table->getInMemoryMetadataPtr(); auto query_sample_block = getSampleBlock(query, table, metadata_snapshot); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index d832bcb7dc0..e0d5adf92b8 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -255,8 +255,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (storage) { - table_lock = storage->lockStructureForShare( - false, context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); + table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); table_id = storage->getStorageID(); if (metadata_snapshot == nullptr) metadata_snapshot = storage->getInMemoryMetadataPtr(); @@ -277,7 +276,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( { /// Rewritten with subquery. Free storage locks here. storage = {}; - table_lock.release(); + table_lock.reset(); table_id = StorageID::createEmpty(); } } diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h index 2f0faa2ba72..e274175eb30 100644 --- a/src/Interpreters/InterpreterSelectQuery.h +++ b/src/Interpreters/InterpreterSelectQuery.h @@ -185,7 +185,7 @@ private: /// Table from where to read data, if not subquery. StoragePtr storage; StorageID table_id = StorageID::createEmpty(); /// Will be initialized if storage is not nullptr - TableStructureReadLockHolder table_lock; + TableLockHolder table_lock; /// Used when we read from prepared input, not table or subquery. BlockInputStreamPtr input; diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 3ad813a15b7..51b0cf92484 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -671,7 +671,7 @@ BlockInputStreamPtr MutationsInterpreter::addStreamsForLaterStages(const std::ve return in; } -void MutationsInterpreter::validate(TableStructureReadLockHolder &) +void MutationsInterpreter::validate() { const Settings & settings = context.getSettingsRef(); @@ -696,7 +696,7 @@ void MutationsInterpreter::validate(TableStructureReadLockHolder &) addStreamsForLaterStages(stages, in)->getHeader(); } -BlockInputStreamPtr MutationsInterpreter::execute(TableStructureReadLockHolder &) +BlockInputStreamPtr MutationsInterpreter::execute() { if (!can_execute) throw Exception("Cannot execute mutations interpreter because can_execute flag set to false", ErrorCodes::LOGICAL_ERROR); diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h index 3c3175c1856..894d135a099 100644 --- a/src/Interpreters/MutationsInterpreter.h +++ b/src/Interpreters/MutationsInterpreter.h @@ -32,12 +32,12 @@ public: const Context & context_, bool can_execute_); - void validate(TableStructureReadLockHolder & table_lock_holder); + void validate(); size_t evaluateCommandsSize(); /// The resulting stream will return blocks containing only changed columns and columns, that we need to recalculate indices. - BlockInputStreamPtr execute(TableStructureReadLockHolder & table_lock_holder); + BlockInputStreamPtr execute(); /// Only changed columns. const Block & getUpdatedHeader() const; diff --git a/src/Processors/Pipe.h b/src/Processors/Pipe.h index ec5514915a7..085016c3588 100644 --- a/src/Processors/Pipe.h +++ b/src/Processors/Pipe.h @@ -62,12 +62,12 @@ public: /// Do not allow to change the table while the processors of pipe are alive. /// TODO: move it to pipeline. - void addTableLock(const TableStructureReadLockHolder & lock) { table_locks.push_back(lock); } + void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); } /// This methods are from QueryPipeline. Needed to make conversion from pipeline to pipe possible. void addInterpreterContext(std::shared_ptr context) { interpreter_context.emplace_back(std::move(context)); } void addStorageHolder(StoragePtr storage) { storage_holders.emplace_back(std::move(storage)); } - const std::vector & getTableLocks() const { return table_locks; } + const std::vector & getTableLocks() const { return table_locks; } const std::vector> & getContexts() const { return interpreter_context; } const std::vector & getStorageHolders() const { return storage_holders; } @@ -80,7 +80,7 @@ private: /// It is the max number of processors which can be executed in parallel for each step. See QueryPipeline::Streams. size_t max_parallel_streams = 0; - std::vector table_locks; + std::vector table_locks; /// Some processors may implicitly use Context or temporary Storage created by Interpreter. /// But lifetime of Streams is not nested in lifetime of Interpreters, so we have to store it here, diff --git a/src/Processors/QueryPipeline.h b/src/Processors/QueryPipeline.h index 129b7f5ae3c..9f632a7180a 100644 --- a/src/Processors/QueryPipeline.h +++ b/src/Processors/QueryPipeline.h @@ -7,14 +7,13 @@ #include #include +#include namespace DB { -class TableStructureReadLock; -using TableStructureReadLockPtr = std::shared_ptr; -using TableStructureReadLocks = std::vector; +using TableLockHolders = std::vector; class Context; class IOutputFormat; @@ -146,7 +145,7 @@ public: const Block & getHeader() const { return current_header; } - void addTableLock(const TableStructureReadLockHolder & lock) { table_locks.push_back(lock); } + void addTableLock(const TableLockHolder & lock) { table_locks.push_back(lock); } void addInterpreterContext(std::shared_ptr context) { interpreter_context.emplace_back(std::move(context)); } void addStorageHolder(StoragePtr storage) { storage_holders.emplace_back(std::move(storage)); } @@ -180,7 +179,7 @@ private: /// because QueryPipeline is alive until query is finished. std::vector> interpreter_context; std::vector storage_holders; - TableStructureReadLocks table_locks; + TableLockHolders table_locks; /// Common header for each stream. Block current_header; diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 3a4559f94dc..42224ec01ac 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -47,58 +47,43 @@ RWLockImpl::LockHolder IStorage::tryLockTimed( return lock_holder; } -TableStructureReadLockHolder IStorage::lockStructureForShare(bool will_add_new_data, const String & query_id, const SettingSeconds & acquire_timeout) +TableLockHolder IStorage::lockForShare(const String & query_id, const SettingSeconds & acquire_timeout) { - TableStructureReadLockHolder result; - if (will_add_new_data) - result.new_data_structure_lock = tryLockTimed(new_data_structure_lock, RWLockImpl::Read, query_id, acquire_timeout); - result.structure_lock = tryLockTimed(structure_lock, RWLockImpl::Read, query_id, acquire_timeout); + TableLockHolder result = tryLockTimed(drop_lock, RWLockImpl::Read, query_id, acquire_timeout); if (is_dropped) throw Exception("Table is dropped", ErrorCodes::TABLE_IS_DROPPED); + return result; } -TableStructureWriteLockHolder IStorage::lockAlterIntention(const String & query_id, const SettingSeconds & acquire_timeout) +TableLockHolder IStorage::lockForAlter(const String & query_id, const SettingSeconds & acquire_timeout) { - TableStructureWriteLockHolder result; - result.alter_intention_lock = tryLockTimed(alter_intention_lock, RWLockImpl::Write, query_id, acquire_timeout); + TableLockHolder result = tryLockTimed(alter_lock, RWLockImpl::Write, query_id, acquire_timeout); if (is_dropped) throw Exception("Table is dropped", ErrorCodes::TABLE_IS_DROPPED); + return result; } -void IStorage::lockStructureExclusively(TableStructureWriteLockHolder & lock_holder, const String & query_id, const SettingSeconds & acquire_timeout) -{ - if (!lock_holder.alter_intention_lock) - throw Exception("Alter intention lock for table " + getStorageID().getNameForLogs() + " was not taken. This is a bug.", ErrorCodes::LOGICAL_ERROR); - if (!lock_holder.new_data_structure_lock) - lock_holder.new_data_structure_lock = tryLockTimed(new_data_structure_lock, RWLockImpl::Write, query_id, acquire_timeout); - lock_holder.structure_lock = tryLockTimed(structure_lock, RWLockImpl::Write, query_id, acquire_timeout); -} - -TableStructureWriteLockHolder IStorage::lockExclusively(const String & query_id, const SettingSeconds & acquire_timeout) +TableExclusiveLockHolder IStorage::lockExclusively(const String & query_id, const SettingSeconds & acquire_timeout) { - TableStructureWriteLockHolder result; - result.alter_intention_lock = tryLockTimed(alter_intention_lock, RWLockImpl::Write, query_id, acquire_timeout); + TableExclusiveLockHolder result; + result.alter_lock = tryLockTimed(alter_lock, RWLockImpl::Write, query_id, acquire_timeout); if (is_dropped) throw Exception("Table is dropped", ErrorCodes::TABLE_IS_DROPPED); - result.new_data_structure_lock = tryLockTimed(new_data_structure_lock, RWLockImpl::Write, query_id, acquire_timeout); - result.structure_lock = tryLockTimed(structure_lock, RWLockImpl::Write, query_id, acquire_timeout); + result.drop_lock = tryLockTimed(drop_lock, RWLockImpl::Write, query_id, acquire_timeout); return result; } void IStorage::alter( - const AlterCommands & params, - const Context & context, - TableStructureWriteLockHolder & table_lock_holder) + const AlterCommands & params, const Context & context, TableLockHolder &) { - lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto table_id = getStorageID(); StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); params.apply(new_metadata, context); diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 375ab90aee4..1309b727a74 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -135,7 +135,7 @@ public: using ColumnSizeByName = std::unordered_map; virtual ColumnSizeByName getColumnSizes() const { return {}; } -public: /// thread-unsafe part. lockStructure must be acquired +public: StorageInMemoryMetadata getInMemoryMetadata() const { return *metadata.get(); } StorageMetadataPtr getInMemoryMetadataPtr() const { return metadata.get(); } @@ -174,21 +174,11 @@ private: const RWLock & rwlock, RWLockImpl::Type type, const String & query_id, const SettingSeconds & acquire_timeout) const; public: - /// Acquire this lock if you need the table structure to remain constant during the execution of - /// the query. If will_add_new_data is true, this means that the query will add new data to the table - /// (INSERT or a parts merge). - TableStructureReadLockHolder lockStructureForShare(bool will_add_new_data, const String & query_id, const SettingSeconds & acquire_timeout); + TableLockHolder lockForShare(const String & query_id, const SettingSeconds & acquire_timeout); - /// Acquire this lock at the start of ALTER to lock out other ALTERs and make sure that only you - /// can modify the table structure. It can later be upgraded to the exclusive lock. - TableStructureWriteLockHolder lockAlterIntention(const String & query_id, const SettingSeconds & acquire_timeout); + TableLockHolder lockForAlter(const String & query_id, const SettingSeconds & acquire_timeout); - /// Upgrade alter intention lock to the full exclusive structure lock. This is done by ALTER queries - /// to ensure that no other query uses the table structure and it can be safely changed. - void lockStructureExclusively(TableStructureWriteLockHolder & lock_holder, const String & query_id, const SettingSeconds & acquire_timeout); - - /// Acquire the full exclusive lock immediately. No other queries can run concurrently. - TableStructureWriteLockHolder lockExclusively(const String & query_id, const SettingSeconds & acquire_timeout); + TableExclusiveLockHolder lockExclusively(const String & query_id, const SettingSeconds & acquire_timeout); /** Returns stage to which query is going to be processed in read() function. * (Normally, the function only reads the columns from the list, but in other cases, @@ -297,7 +287,7 @@ public: const ASTPtr & /*query*/, const StorageMetadataPtr & /* metadata_snapshot */, const Context & /* context */, - TableStructureWriteLockHolder &) + TableExclusiveLockHolder &) { throw Exception("Truncate is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED); } @@ -323,7 +313,7 @@ public: * This method must fully execute the ALTER query, taking care of the locks itself. * To update the table metadata on disk, this method should call InterpreterAlterQuery::updateMetadata-> */ - virtual void alter(const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder); + virtual void alter(const AlterCommands & params, const Context & context, TableLockHolder & alter_lock_holder); /** Checks that alter commands can be applied to storage. For example, columns can be modified, * or primary key can be changes, etc. @@ -441,22 +431,9 @@ public: } private: - /// You always need to take the next three locks in this order. + mutable RWLock alter_lock = RWLockImpl::create(); - /// If you hold this lock exclusively, you can be sure that no other structure modifying queries - /// (e.g. ALTER, DROP) are concurrently executing. But queries that only read table structure - /// (e.g. SELECT, INSERT) can continue to execute. - mutable RWLock alter_intention_lock = RWLockImpl::create(); - - /// It is taken for share for the entire INSERT query and the entire merge of the parts (for MergeTree). - /// ALTER COLUMN queries acquire an exclusive lock to ensure that no new parts with the old structure - /// are added to the table and thus the set of parts to modify doesn't change. - mutable RWLock new_data_structure_lock = RWLockImpl::create(); - - /// Lock for the table column structure (names, types, etc.) and data path. - /// It is taken in exclusive mode by queries that modify them (e.g. RENAME, ALTER and DROP) - /// and in share mode by other queries. - mutable RWLock structure_lock = RWLockImpl::create(); + mutable RWLock drop_lock = RWLockImpl::create(); }; } diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index ac6bd48f534..0abb01d7dc7 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -514,7 +514,7 @@ void StorageLiveView::drop() void StorageLiveView::refresh(const Context & context) { - auto alter_lock = lockAlterIntention(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto alter_lock = lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); { std::lock_guard lock(mutex); if (getNewBlocks()) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 115e0b78bf0..0ed4e98e864 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1436,7 +1436,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeData::createPart( void MergeTreeData::changeSettings( const ASTPtr & new_settings, - TableStructureWriteLockHolder & /* table_lock_holder */) + TableLockHolder & /* table_lock_holder */) { if (new_settings) { @@ -1481,7 +1481,7 @@ void MergeTreeData::changeSettings( } } -void MergeTreeData::freezeAll(const String & with_name, const Context & context, TableStructureReadLockHolder &) +void MergeTreeData::freezeAll(const String & with_name, const Context & context, TableLockHolder &) { freezePartitionsByMatcher([] (const DataPartPtr &){ return true; }, with_name, context); } @@ -2289,7 +2289,7 @@ void MergeTreeData::removePartContributionToColumnSizes(const DataPartPtr & part } -void MergeTreeData::freezePartition(const ASTPtr & partition_ast, const String & with_name, const Context & context, TableStructureReadLockHolder &) +void MergeTreeData::freezePartition(const ASTPtr & partition_ast, const String & with_name, const Context & context, TableLockHolder &) { std::optional prefix; String partition_id; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index af6bee4936c..ca6928cbb01 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -477,7 +477,7 @@ public: /// Delete all directories which names begin with "tmp" /// Set non-negative parameter value to override MergeTreeSettings temporary_directories_lifetime - /// Must be called with locked lockStructureForShare(). + /// Must be called with locked lockForShare(). void clearOldTemporaryDirectories(ssize_t custom_directories_lifetime_seconds = -1); /// After the call to dropAllData() no method can be called. @@ -489,7 +489,7 @@ public: /// Moves the entire data directory. /// Flushes the uncompressed blocks cache and the marks cache. - /// Must be called with locked lockStructureForAlter(). + /// Must be called with locked lockForShare(). void rename(const String & new_table_path, const StorageID & new_table_id) override; /// Check if the ALTER can be performed: @@ -502,10 +502,10 @@ public: /// Change MergeTreeSettings void changeSettings( const ASTPtr & new_settings, - TableStructureWriteLockHolder & table_lock_holder); + TableLockHolder & table_lock_holder); /// Freezes all parts. - void freezeAll(const String & with_name, const Context & context, TableStructureReadLockHolder & table_lock_holder); + void freezeAll(const String & with_name, const Context & context, TableLockHolder & table_lock_holder); /// Should be called if part data is suspected to be corrupted. void reportBrokenPart(const String & name) const @@ -527,7 +527,7 @@ public: * Backup is created in directory clickhouse_dir/shadow/i/, where i - incremental number, * or if 'with_name' is specified - backup is created in directory with specified name. */ - void freezePartition(const ASTPtr & partition, const String & with_name, const Context & context, TableStructureReadLockHolder & table_lock_holder); + void freezePartition(const ASTPtr & partition, const String & with_name, const Context & context, TableLockHolder & table_lock_holder); public: diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 732c97c6dcc..d52154002fe 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -581,7 +581,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor const FutureMergedMutatedPart & future_part, const StorageMetadataPtr & metadata_snapshot, MergeList::Entry & merge_entry, - TableStructureReadLockHolder &, + TableLockHolder &, time_t time_of_merge, const ReservationPtr & space_reservation, bool deduplicate, @@ -995,7 +995,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor time_t time_of_mutation, const Context & context, const ReservationPtr & space_reservation, - TableStructureReadLockHolder & table_lock_holder) + TableLockHolder &) { checkOperationIsNotCanceled(merge_entry); @@ -1046,7 +1046,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mutatePartToTempor if (!for_interpreter.empty()) { interpreter.emplace(storage_from_source_part, metadata_snapshot, for_interpreter, context_for_reading, true); - in = interpreter->execute(table_lock_holder); + in = interpreter->execute(); updated_header = interpreter->getUpdatedHeader(); in->setProgressCallback(MergeProgressCallback(merge_entry, watch_prev_elapsed, stage_progress)); } diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 7828f79ea33..d62587bef5f 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -107,7 +107,7 @@ public: const FutureMergedMutatedPart & future_part, const StorageMetadataPtr & metadata_snapshot, MergeListEntry & merge_entry, - TableStructureReadLockHolder & table_lock_holder, + TableLockHolder & table_lock_holder, time_t time_of_merge, const ReservationPtr & space_reservation, bool deduplicate, @@ -122,7 +122,7 @@ public: time_t time_of_mutation, const Context & context, const ReservationPtr & space_reservation, - TableStructureReadLockHolder & table_lock_holder); + TableLockHolder & table_lock_holder); MergeTreeData::DataPartPtr renameMergedTemporaryPart( MergeTreeData::MutableDataPartPtr & new_data_part, diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 0870c0fdf72..f7fa957e997 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -58,9 +58,7 @@ void ReplicatedMergeTreeCleanupThread::iterate() storage.clearOldPartsAndRemoveFromZK(); { - /// TODO: Implement tryLockStructureForShare. - auto lock = storage.lockStructureForShare( - false, RWLockImpl::NO_QUERY, storage.getSettings()->lock_acquire_timeout_for_background_operations); + auto lock = storage.lockForShare(RWLockImpl::NO_QUERY, storage.getSettings()->lock_acquire_timeout_for_background_operations); storage.clearOldTemporaryDirectories(); } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp index 0d824fa2dd8..75a3c463061 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreePartCheckThread.cpp @@ -201,8 +201,7 @@ CheckResult ReplicatedMergeTreePartCheckThread::checkPart(const String & part_na { auto zookeeper = storage.getZooKeeper(); - auto table_lock = storage.lockStructureForShare( - false, RWLockImpl::NO_QUERY, storage.getSettings()->lock_acquire_timeout_for_background_operations); + auto table_lock = storage.lockForShare(RWLockImpl::NO_QUERY, storage.getSettings()->lock_acquire_timeout_for_background_operations); auto local_part_header = ReplicatedMergeTreePartHeader::fromColumnsAndChecksums( part->getColumns(), part->checksums); diff --git a/src/Storages/ReadInOrderOptimizer.cpp b/src/Storages/ReadInOrderOptimizer.cpp index a6cc6211788..bc220bc33ce 100644 --- a/src/Storages/ReadInOrderOptimizer.cpp +++ b/src/Storages/ReadInOrderOptimizer.cpp @@ -33,13 +33,13 @@ ReadInOrderOptimizer::ReadInOrderOptimizer( InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StoragePtr & storage, const StorageMetadataPtr & metadata_snapshot) const { Names sorting_key_columns; - if (const auto * merge_tree = dynamic_cast(storage.get())) + if (dynamic_cast(storage.get())) { if (!metadata_snapshot->hasSortingKey()) return {}; sorting_key_columns = metadata_snapshot->getSortingKeyColumns(); } - else if (const auto * part = dynamic_cast(storage.get())) + else if (dynamic_cast(storage.get())) { if (!metadata_snapshot->hasSortingKey()) return {}; diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index 88619f5bc42..b4d6b66ebe7 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -163,8 +163,7 @@ Pipes StorageBuffer::read( if (destination.get() == this) throw Exception("Destination table is myself. Read will cause infinite loop.", ErrorCodes::INFINITE_LOOP); - auto destination_lock = destination->lockStructureForShare( - false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto destination_lock = destination->lockForShare(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto destination_metadata_snapshot = destination->getInMemoryMetadataPtr(); @@ -804,10 +803,8 @@ std::optional StorageBuffer::totalBytes() const return bytes; } -void StorageBuffer::alter(const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder) +void StorageBuffer::alter(const AlterCommands & params, const Context & context, TableLockHolder &) { - lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); - auto table_id = getStorageID(); checkAlterIsPossible(params, context.getSettingsRef()); auto metadata_snapshot = getInMemoryMetadataPtr(); diff --git a/src/Storages/StorageBuffer.h b/src/Storages/StorageBuffer.h index ceedbd25a0c..e168f79293e 100644 --- a/src/Storages/StorageBuffer.h +++ b/src/Storages/StorageBuffer.h @@ -89,7 +89,7 @@ public: void checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) const override; /// The structure of the subordinate table is not checked and does not change. - void alter(const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder) override; + void alter(const AlterCommands & params, const Context & context, TableLockHolder & table_lock_holder) override; std::optional totalRows() const override; std::optional totalBytes() const override; diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 2e07a393b04..9c20e3f8e11 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -564,9 +564,8 @@ void StorageDistributed::checkAlterIsPossible(const AlterCommands & commands, co } } -void StorageDistributed::alter(const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder) +void StorageDistributed::alter(const AlterCommands & params, const Context & context, TableLockHolder &) { - lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto table_id = getStorageID(); checkAlterIsPossible(params, context.getSettingsRef()); @@ -619,7 +618,7 @@ Strings StorageDistributed::getDataPaths() const return paths; } -void StorageDistributed::truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableStructureWriteLockHolder &) +void StorageDistributed::truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &) { std::lock_guard lock(cluster_nodes_mutex); diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index c952ccde8ac..006f2bb580a 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -82,7 +82,7 @@ public: BlockOutputStreamPtr write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) override; /// Removes temporary data in local filesystem. - void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableStructureWriteLockHolder &) override; + void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &) override; void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override; void renameOnDisk(const String & new_path_to_table_data); @@ -91,7 +91,7 @@ public: /// in the sub-tables, you need to manually add and delete columns /// the structure of the sub-table is not checked - void alter(const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder) override; + void alter(const AlterCommands & params, const Context & context, TableLockHolder & table_lock_holder) override; void startup() override; void shutdown() override; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 8fb09d0a41e..4867a0bc215 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -527,7 +527,7 @@ void StorageFile::truncate( const ASTPtr & /*query*/, const StorageMetadataPtr & /* metadata_snapshot */, const Context & /* context */, - TableStructureWriteLockHolder &) + TableExclusiveLockHolder &) { if (paths.size() != 1) throw Exception("Can't truncate table '" + getStorageID().getNameForLogs() + "' in readonly mode", ErrorCodes::DATABASE_ACCESS_DENIED); diff --git a/src/Storages/StorageFile.h b/src/Storages/StorageFile.h index 05b4d7aea8a..2c4a43eb979 100644 --- a/src/Storages/StorageFile.h +++ b/src/Storages/StorageFile.h @@ -42,7 +42,7 @@ public: const ASTPtr & /*query*/, const StorageMetadataPtr & /* metadata_snapshot */, const Context & /* context */, - TableStructureWriteLockHolder &) override; + TableExclusiveLockHolder &) override; void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override; diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 5000dcd8b18..21e4370c28b 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -65,7 +65,7 @@ StorageJoin::StorageJoin( void StorageJoin::truncate( - const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableStructureWriteLockHolder &) + const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableExclusiveLockHolder&) { Poco::File(path).remove(true); Poco::File(path).createDirectories(); diff --git a/src/Storages/StorageJoin.h b/src/Storages/StorageJoin.h index 4d4d1a81da2..fb8ffc1c353 100644 --- a/src/Storages/StorageJoin.h +++ b/src/Storages/StorageJoin.h @@ -27,7 +27,7 @@ class StorageJoin final : public ext::shared_ptr_helper, public Sto public: String getName() const override { return "Join"; } - void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableStructureWriteLockHolder &) override; + void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableExclusiveLockHolder &) override; /// Access the innards. HashJoinPtr & getJoin() { return join; } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 45d55938db3..542fb507d83 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -535,7 +535,7 @@ void StorageLog::rename(const String & new_path_to_table_data, const StorageID & renameInMemory(new_table_id); } -void StorageLog::truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableStructureWriteLockHolder &) +void StorageLog::truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableExclusiveLockHolder &) { std::shared_lock lock(rwlock); diff --git a/src/Storages/StorageLog.h b/src/Storages/StorageLog.h index 670e2777d44..d020f906609 100644 --- a/src/Storages/StorageLog.h +++ b/src/Storages/StorageLog.h @@ -39,7 +39,7 @@ public: CheckResults checkData(const ASTPtr & /* query */, const Context & /* context */) override; - void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableStructureWriteLockHolder &) override; + void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableExclusiveLockHolder &) override; Strings getDataPaths() const override { return {DB::fullPath(disk, table_path)}; } diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 2c0d5727b31..976b3c80dec 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -116,8 +116,7 @@ Pipes StorageMaterializedView::read( const unsigned num_streams) { auto storage = getTargetTable(); - auto lock = storage->lockStructureForShare( - false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto lock = storage->lockForShare(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto metadata_snapshot = storage->getInMemoryMetadataPtr(); if (query_info.order_optimizer) @@ -134,8 +133,7 @@ Pipes StorageMaterializedView::read( BlockOutputStreamPtr StorageMaterializedView::write(const ASTPtr & query, const StorageMetadataPtr & /*metadata_snapshot*/, const Context & context) { auto storage = getTargetTable(); - auto lock = storage->lockStructureForShare( - true, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto lock = storage->lockForShare(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto metadata_snapshot = storage->getInMemoryMetadataPtr(); auto stream = storage->write(query, metadata_snapshot, context); @@ -173,7 +171,7 @@ void StorageMaterializedView::drop() executeDropQuery(ASTDropQuery::Kind::Drop, global_context, target_table_id); } -void StorageMaterializedView::truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableStructureWriteLockHolder &) +void StorageMaterializedView::truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &) { if (has_inner_table) executeDropQuery(ASTDropQuery::Kind::Truncate, global_context, target_table_id); @@ -204,9 +202,8 @@ bool StorageMaterializedView::optimize( void StorageMaterializedView::alter( const AlterCommands & params, const Context & context, - TableStructureWriteLockHolder & table_lock_holder) + TableLockHolder &) { - lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto table_id = getStorageID(); StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); StorageInMemoryMetadata old_metadata = getInMemoryMetadata(); diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index e2111a15f5c..f45d9203bad 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -37,7 +37,7 @@ public: void drop() override; - void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableStructureWriteLockHolder &) override; + void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &) override; bool optimize( const ASTPtr & query, @@ -47,7 +47,7 @@ public: bool deduplicate, const Context & context) override; - void alter(const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder) override; + void alter(const AlterCommands & params, const Context & context, TableLockHolder & table_lock_holder) override; void checkAlterIsPossible(const AlterCommands & commands, const Settings & settings) const override; diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 3bae29ac96c..05b37ecf32e 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -149,7 +149,7 @@ void StorageMemory::drop() } void StorageMemory::truncate( - const ASTPtr &, const StorageMetadataPtr &, const Context &, TableStructureWriteLockHolder &) + const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &) { std::lock_guard lock(mutex); data.clear(); diff --git a/src/Storages/StorageMemory.h b/src/Storages/StorageMemory.h index 842c7dc3790..851fe7fc70a 100644 --- a/src/Storages/StorageMemory.h +++ b/src/Storages/StorageMemory.h @@ -41,7 +41,7 @@ public: void drop() override; - void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableStructureWriteLockHolder &) override; + void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &) override; std::optional totalRows() const override; std::optional totalBytes() const override; diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 235f78505e0..f4030ed573f 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -333,7 +333,7 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables(const String const auto & table = iterator->table(); if (table && table.get() != this) selected_tables.emplace_back( - table, table->lockStructureForShare(false, query_id, settings.lock_acquire_timeout), iterator->name()); + table, table->lockForShare(query_id, settings.lock_acquire_timeout), iterator->name()); iterator->next(); } @@ -362,7 +362,7 @@ StorageMerge::StorageListWithLocks StorageMerge::getSelectedTables( if (storage.get() != this) { selected_tables.emplace_back( - storage, storage->lockStructureForShare(false, query_id, settings.lock_acquire_timeout), iterator->name()); + storage, storage->lockForShare(query_id, settings.lock_acquire_timeout), iterator->name()); virtual_column->insert(iterator->name()); } @@ -405,9 +405,8 @@ void StorageMerge::checkAlterIsPossible(const AlterCommands & commands, const Se } void StorageMerge::alter( - const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder) + const AlterCommands & params, const Context & context, TableLockHolder &) { - lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto table_id = getStorageID(); StorageInMemoryMetadata storage_metadata = getInMemoryMetadata(); diff --git a/src/Storages/StorageMerge.h b/src/Storages/StorageMerge.h index 1ad22869e39..f2af25d3f3d 100644 --- a/src/Storages/StorageMerge.h +++ b/src/Storages/StorageMerge.h @@ -42,7 +42,7 @@ public: /// you need to add and remove columns in the sub-tables manually /// the structure of sub-tables is not checked - void alter(const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder) override; + void alter(const AlterCommands & params, const Context & context, TableLockHolder & table_lock_holder) override; bool mayBenefitFromIndexForIn( const ASTPtr & left_in_operand, const Context & query_context, const StorageMetadataPtr & metadata_snapshot) const override; @@ -52,7 +52,7 @@ private: OptimizedRegularExpression table_name_regexp; Context global_context; - using StorageWithLockAndName = std::tuple; + using StorageWithLockAndName = std::tuple; using StorageListWithLocks = std::list; StorageListWithLocks getSelectedTables(const String & query_id, const Settings & settings) const; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 45f8ecf0ef9..9042afe1b2d 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -231,7 +231,7 @@ void StorageMergeTree::drop() dropAllData(); } -void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableStructureWriteLockHolder &) +void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &) { { /// Asks to complete merges and does not allow them to start. @@ -254,7 +254,7 @@ void StorageMergeTree::truncate(const ASTPtr &, const StorageMetadataPtr &, cons void StorageMergeTree::alter( const AlterCommands & commands, const Context & context, - TableStructureWriteLockHolder & table_lock_holder) + TableLockHolder & table_lock_holder) { auto table_id = getStorageID(); @@ -268,8 +268,6 @@ void StorageMergeTree::alter( /// This alter can be performed at new_metadata level only if (commands.isSettingsAlter()) { - lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); - changeSettings(new_metadata.settings_changes, table_lock_holder); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); @@ -277,10 +275,6 @@ void StorageMergeTree::alter( else { { - /// TODO (relax this lock and remove this action lock) - auto merges_block = getActionLock(ActionLocks::PartsMerge); - lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); - changeSettings(new_metadata.settings_changes, table_lock_holder); /// Reinitialize primary key because primary key column types might have changed. setProperties(new_metadata, old_metadata); @@ -290,9 +284,6 @@ void StorageMergeTree::alter( if (!maybe_mutation_commands.empty()) mutation_version = startMutation(maybe_mutation_commands, mutation_file_name); - /// We release all locks except alter_intention_lock which allows - /// to execute alter queries sequentially - table_lock_holder.releaseAllExceptAlterIntention(); } /// Always execute required mutations synchronously, because alters @@ -591,8 +582,7 @@ bool StorageMergeTree::merge( bool deduplicate, String * out_disable_reason) { - auto table_lock_holder = lockStructureForShare( - true, RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); + auto table_lock_holder = lockForShare(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); auto metadata_snapshot = getInMemoryMetadataPtr(); FutureMergedMutatedPart future_part; @@ -740,8 +730,7 @@ BackgroundProcessingPoolTaskResult StorageMergeTree::movePartsTask() bool StorageMergeTree::tryMutatePart() { - auto table_lock_holder = lockStructureForShare( - true, RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); + auto table_lock_holder = lockForShare(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); StorageMetadataPtr metadata_snapshot = getInMemoryMetadataPtr(); size_t max_ast_elements = global_context.getSettingsRef().max_expanded_ast_elements; @@ -876,13 +865,8 @@ BackgroundProcessingPoolTaskResult StorageMergeTree::mergeMutateTask() /// Clear old parts. It is unnecessary to do it more than once a second. if (auto lock = time_after_previous_cleanup.compareAndRestartDeferred(1)) { - { - /// TODO: Implement tryLockStructureForShare. - auto lock_structure = lockStructureForShare( - false, RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); - clearOldPartsFromFilesystem(); - clearOldTemporaryDirectories(); - } + clearOldPartsFromFilesystem(); + clearOldTemporaryDirectories(); clearOldMutations(); } @@ -1078,16 +1062,14 @@ void StorageMergeTree::alterPartition( case PartitionCommand::FREEZE_PARTITION: { - auto lock = lockStructureForShare( - false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto lock = lockForShare(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); freezePartition(command.partition, command.with_name, context, lock); } break; case PartitionCommand::FREEZE_ALL_PARTITIONS: { - auto lock = lockStructureForShare( - false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto lock = lockForShare(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); freezeAll(command.with_name, context, lock); } break; @@ -1156,8 +1138,8 @@ void StorageMergeTree::attachPartition( void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, const ASTPtr & partition, bool replace, const Context & context) { - auto lock1 = lockStructureForShare(false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); - auto lock2 = source_table->lockStructureForShare(false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto lock1 = lockForShare(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto lock2 = source_table->lockForShare(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto source_metadata_snapshot = source_table->getInMemoryMetadataPtr(); auto my_metadata_snapshot = getInMemoryMetadataPtr(); @@ -1229,8 +1211,8 @@ void StorageMergeTree::replacePartitionFrom(const StoragePtr & source_table, con void StorageMergeTree::movePartitionToTable(const StoragePtr & dest_table, const ASTPtr & partition, const Context & context) { - auto lock1 = lockStructureForShare(false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); - auto lock2 = dest_table->lockStructureForShare(false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto lock1 = lockForShare(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto lock2 = dest_table->lockForShare(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto dest_table_storage = std::dynamic_pointer_cast(dest_table); if (!dest_table_storage) diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h index cf3eccc0c0b..9a45fd285dc 100644 --- a/src/Storages/StorageMergeTree.h +++ b/src/Storages/StorageMergeTree.h @@ -75,9 +75,9 @@ public: CancellationCode killMutation(const String & mutation_id) override; void drop() override; - void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableStructureWriteLockHolder &) override; + void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &) override; - void alter(const AlterCommands & commands, const Context & context, TableStructureWriteLockHolder & table_lock_holder) override; + void alter(const AlterCommands & commands, const Context & context, TableLockHolder & table_lock_holder) override; void checkTableCanBeDropped() const override; diff --git a/src/Storages/StorageNull.cpp b/src/Storages/StorageNull.cpp index 7589c4b44dc..499f7329cd9 100644 --- a/src/Storages/StorageNull.cpp +++ b/src/Storages/StorageNull.cpp @@ -45,10 +45,8 @@ void StorageNull::checkAlterIsPossible(const AlterCommands & commands, const Set } -void StorageNull::alter( - const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder) +void StorageNull::alter(const AlterCommands & params, const Context & context, TableLockHolder &) { - lockStructureExclusively(table_lock_holder, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto table_id = getStorageID(); StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); diff --git a/src/Storages/StorageNull.h b/src/Storages/StorageNull.h index 6bd102bdcda..e79174c2565 100644 --- a/src/Storages/StorageNull.h +++ b/src/Storages/StorageNull.h @@ -44,7 +44,7 @@ public: void checkAlterIsPossible(const AlterCommands & commands, const Settings & /* settings */) const override; - void alter(const AlterCommands & params, const Context & context, TableStructureWriteLockHolder & table_lock_holder) override; + void alter(const AlterCommands & params, const Context & context, TableLockHolder & table_lock_holder) override; std::optional totalRows() const override { diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 2e64c54112f..b62f6bbd198 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1309,8 +1309,7 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry) ReservationPtr reserved_space = reserveSpacePreferringTTLRules(estimated_space_for_merge, ttl_infos, time(nullptr), max_volume_index); - auto table_lock = lockStructureForShare( - false, RWLockImpl::NO_QUERY, storage_settings_ptr->lock_acquire_timeout_for_background_operations); + auto table_lock = lockForShare(RWLockImpl::NO_QUERY, storage_settings_ptr->lock_acquire_timeout_for_background_operations); StorageMetadataPtr metadata_snapshot = getInMemoryMetadataPtr(); FutureMergedMutatedPart future_merged_part(parts, entry.new_part_type); @@ -1436,8 +1435,8 @@ bool StorageReplicatedMergeTree::tryExecutePartMutation(const StorageReplicatedM /// Can throw an exception. ReservationPtr reserved_space = reserveSpace(estimated_space_for_result, source_part->volume); - auto table_lock = lockStructureForShare( - false, RWLockImpl::NO_QUERY, storage_settings_ptr->lock_acquire_timeout_for_background_operations); + auto table_lock = lockForShare( + RWLockImpl::NO_QUERY, storage_settings_ptr->lock_acquire_timeout_for_background_operations); StorageMetadataPtr metadata_snapshot = getInMemoryMetadataPtr(); MutableDataPartPtr new_part; @@ -1793,8 +1792,8 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) PartDescriptions parts_to_add; DataPartsVector parts_to_remove; - auto table_lock_holder_dst_table = lockStructureForShare( - false, RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); + auto table_lock_holder_dst_table = lockForShare( + RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); auto dst_metadata_snapshot = getInMemoryMetadataPtr(); for (size_t i = 0; i < entry_replace.new_part_names.size(); ++i) @@ -1833,7 +1832,7 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) } StoragePtr source_table; - TableStructureReadLockHolder table_lock_holder_src_table; + TableLockHolder table_lock_holder_src_table; StorageID source_table_id{entry_replace.from_database, entry_replace.from_table}; auto clone_data_parts_from_source_table = [&] () -> size_t @@ -1857,11 +1856,11 @@ bool StorageReplicatedMergeTree::executeReplaceRange(const LogEntry & entry) return 0; } - table_lock_holder_src_table = source_table->lockStructureForShare( - false, RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); + table_lock_holder_src_table = source_table->lockForShare( + RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); - DataPartStates valid_states{MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, - MergeTreeDataPartState::Outdated}; + DataPartStates valid_states{ + MergeTreeDataPartState::PreCommitted, MergeTreeDataPartState::Committed, MergeTreeDataPartState::Outdated}; size_t num_clonable_parts = 0; for (PartDescriptionPtr & part_desc : parts_to_add) @@ -3092,10 +3091,9 @@ bool StorageReplicatedMergeTree::fetchPart(const String & part_name, const Strin LOG_DEBUG(log, "Fetching part {} from {}", part_name, source_replica_path); - TableStructureReadLockHolder table_lock_holder; + TableLockHolder table_lock_holder; if (!to_detached) - table_lock_holder = lockStructureForShare( - true, RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); + table_lock_holder = lockForShare(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); /// Logging Stopwatch stopwatch; @@ -3636,10 +3634,7 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer zookeeper->multi(requests); { - /// TODO (relax this lock and remove this action locks) - auto merges_block = getActionLock(ActionLocks::PartsMerge); - auto fetchers_block = getActionLock(ActionLocks::PartsFetch); - auto table_lock = lockExclusively(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); + auto alter_lock = lockForAlter(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); LOG_INFO(log, "Metadata changed in ZooKeeper. Applying changes locally."); @@ -3658,7 +3653,7 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer void StorageReplicatedMergeTree::alter( - const AlterCommands & params, const Context & query_context, TableStructureWriteLockHolder & table_lock_holder) + const AlterCommands & params, const Context & query_context, TableLockHolder & table_lock_holder) { assertNotReadonly(); @@ -3666,8 +3661,6 @@ void StorageReplicatedMergeTree::alter( if (params.isSettingsAlter()) { - lockStructureExclusively( - table_lock_holder, query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout); /// We don't replicate storage_settings_ptr ALTER. It's local operation. /// Also we don't upgrade alter lock to table structure lock. StorageInMemoryMetadata future_metadata = getInMemoryMetadata(); @@ -3732,8 +3725,6 @@ void StorageReplicatedMergeTree::alter( if (ast_to_str(current_metadata->settings_changes) != ast_to_str(future_metadata.settings_changes)) { - lockStructureExclusively( - table_lock_holder, query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout); /// Just change settings StorageInMemoryMetadata metadata_copy = *current_metadata; metadata_copy.settings_changes = future_metadata.settings_changes; @@ -3824,7 +3815,7 @@ void StorageReplicatedMergeTree::alter( } - table_lock_holder.release(); + table_lock_holder.reset(); std::vector unwaited; if (query_context.getSettingsRef().replication_alter_partitions_sync == 2) @@ -3908,16 +3899,14 @@ void StorageReplicatedMergeTree::alterPartition( case PartitionCommand::FREEZE_PARTITION: { - auto lock = lockStructureForShare( - false, query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout); + auto lock = lockForShare(query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout); freezePartition(command.partition, command.with_name, query_context, lock); } break; case PartitionCommand::FREEZE_ALL_PARTITIONS: { - auto lock = lockStructureForShare( - false, query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout); + auto lock = lockForShare(query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout); freezeAll(command.with_name, query_context, lock); } break; @@ -4012,7 +4001,7 @@ void StorageReplicatedMergeTree::dropPartition(const ASTPtr & query, const ASTPt void StorageReplicatedMergeTree::truncate( - const ASTPtr & query, const StorageMetadataPtr &, const Context & query_context, TableStructureWriteLockHolder & table_lock) + const ASTPtr & query, const StorageMetadataPtr &, const Context & query_context, TableExclusiveLockHolder & table_lock) { table_lock.release(); /// Truncate is done asynchronously. @@ -4925,10 +4914,8 @@ CancellationCode StorageReplicatedMergeTree::killMutation(const String & mutatio void StorageReplicatedMergeTree::clearOldPartsAndRemoveFromZK() { - /// Critical section is not required (since grabOldParts() returns unique part set on each call) - - auto table_lock = lockStructureForShare( - false, RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); + auto table_lock = lockForShare( + RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); auto zookeeper = getZooKeeper(); DataPartsVector parts = grabOldParts(); @@ -5219,8 +5206,8 @@ void StorageReplicatedMergeTree::replacePartitionFrom(const StoragePtr & source_ const Context & context) { /// First argument is true, because we possibly will add new data to current table. - auto lock1 = lockStructureForShare(true, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); - auto lock2 = source_table->lockStructureForShare(false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto lock1 = lockForShare(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto lock2 = source_table->lockForShare(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); auto source_metadata_snapshot = source_table->getInMemoryMetadataPtr(); auto metadata_snapshot = getInMemoryMetadataPtr(); @@ -5397,16 +5384,16 @@ void StorageReplicatedMergeTree::replacePartitionFrom(const StoragePtr & source_ /// If necessary, wait until the operation is performed on all replicas. if (context.getSettingsRef().replication_alter_partitions_sync > 1) { - lock2.release(); - lock1.release(); + lock2.reset(); + lock1.reset(); waitForAllReplicasToProcessLogEntry(entry); } } void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_table, const ASTPtr & partition, const Context & query_context) { - auto lock1 = lockStructureForShare(false, query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout); - auto lock2 = dest_table->lockStructureForShare(false, query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout); + auto lock1 = lockForShare(query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout); + auto lock2 = dest_table->lockForShare(query_context.getCurrentQueryId(), query_context.getSettingsRef().lock_acquire_timeout); auto dest_table_storage = std::dynamic_pointer_cast(dest_table); if (!dest_table_storage) @@ -5583,7 +5570,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta if (query_context.getSettingsRef().replication_alter_partitions_sync > 1) { - lock2.release(); + lock2.reset(); dest_table_storage->waitForAllReplicasToProcessLogEntry(entry); } @@ -5600,7 +5587,7 @@ void StorageReplicatedMergeTree::movePartitionToTable(const StoragePtr & dest_ta if (query_context.getSettingsRef().replication_alter_partitions_sync > 1) { - lock1.release(); + lock1.reset(); waitForAllReplicasToProcessLogEntry(entry_delete); } diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index c1ba737d849..e340de88749 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -103,7 +103,7 @@ public: bool optimize(const ASTPtr & query, const StorageMetadataPtr & metadata_snapshot, const ASTPtr & partition, bool final, bool deduplicate, const Context & query_context) override; - void alter(const AlterCommands & params, const Context & query_context, TableStructureWriteLockHolder & table_lock_holder) override; + void alter(const AlterCommands & params, const Context & query_context, TableLockHolder & table_lock_holder) override; void alterPartition( const ASTPtr & query, @@ -120,7 +120,7 @@ public: */ void drop() override; - void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableStructureWriteLockHolder &) override; + void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &) override; void rename(const String & new_path_to_table_data, const StorageID & new_table_id) override; @@ -315,7 +315,7 @@ private: void checkTableStructure(const String & zookeeper_prefix, const StorageMetadataPtr & metadata_snapshot); /// A part of ALTER: apply metadata changes only (data parts are altered separately). - /// Must be called under IStorage::lockStructureForAlter() lock. + /// Must be called under IStorage::lockForAlter() lock. void setTableStructure(ColumnsDescription new_columns, const ReplicatedMergeTreeTableMetadata::Diff & metadata_diff); /** Check that the set of parts corresponds to that in ZK (/replicas/me/parts/). diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 93b288569c2..58d5226c91a 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -142,7 +142,7 @@ void StorageSet::finishInsert() { set->finishInsert(); } size_t StorageSet::getSize() const { return set->getTotalRowCount(); } -void StorageSet::truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableStructureWriteLockHolder &) +void StorageSet::truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableExclusiveLockHolder &) { Poco::File(path).remove(true); Poco::File(path).createDirectories(); diff --git a/src/Storages/StorageSet.h b/src/Storages/StorageSet.h index 2685fa26ba6..de7c65bbc3e 100644 --- a/src/Storages/StorageSet.h +++ b/src/Storages/StorageSet.h @@ -67,7 +67,7 @@ public: /// Access the insides. SetPtr & getSet() { return set; } - void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableStructureWriteLockHolder &) override; + void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableExclusiveLockHolder &) override; private: SetPtr set; diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 4b95a389f2c..3086e971121 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -326,7 +326,7 @@ CheckResults StorageStripeLog::checkData(const ASTPtr & /* query */, const Conte return file_checker.check(); } -void StorageStripeLog::truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableStructureWriteLockHolder &) +void StorageStripeLog::truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder &) { std::shared_lock lock(rwlock); diff --git a/src/Storages/StorageStripeLog.h b/src/Storages/StorageStripeLog.h index 381be7762df..dfdf4c381b6 100644 --- a/src/Storages/StorageStripeLog.h +++ b/src/Storages/StorageStripeLog.h @@ -42,7 +42,7 @@ public: Strings getDataPaths() const override { return {DB::fullPath(disk, table_path)}; } - void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableStructureWriteLockHolder &) override; + void truncate(const ASTPtr &, const StorageMetadataPtr &, const Context &, TableExclusiveLockHolder&) override; protected: StorageStripeLog( diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index 4beb44405d7..7a399f35c9c 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -430,7 +430,7 @@ CheckResults StorageTinyLog::checkData(const ASTPtr & /* query */, const Context } void StorageTinyLog::truncate( - const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableStructureWriteLockHolder &) + const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableExclusiveLockHolder &) { std::unique_lock lock(rwlock); diff --git a/src/Storages/StorageTinyLog.h b/src/Storages/StorageTinyLog.h index ae124e5e958..60dacf6e162 100644 --- a/src/Storages/StorageTinyLog.h +++ b/src/Storages/StorageTinyLog.h @@ -41,7 +41,7 @@ public: Strings getDataPaths() const override { return {DB::fullPath(disk, table_path)}; } - void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableStructureWriteLockHolder &) override; + void truncate(const ASTPtr &, const StorageMetadataPtr & metadata_snapshot, const Context &, TableExclusiveLockHolder &) override; void drop() override; diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 319ef257d6d..85d0f679708 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -103,11 +103,11 @@ protected: { StoragePtr storage = storages.at(std::make_pair(database_name, table_name)); - TableStructureReadLockHolder table_lock; + TableLockHolder table_lock; try { - table_lock = storage->lockStructureForShare(false, query_id, lock_acquire_timeout); + table_lock = storage->lockForShare(query_id, lock_acquire_timeout); } catch (const Exception & e) { diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index b998b60c02d..b48f8a3cb6b 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -196,7 +196,7 @@ StoragesInfo StoragesInfoStream::next() try { /// For table not to be dropped and set of columns to remain constant. - info.table_lock = info.storage->lockStructureForShare(false, query_id, settings.lock_acquire_timeout); + info.table_lock = info.storage->lockForShare(query_id, settings.lock_acquire_timeout); } catch (const Exception & e) { diff --git a/src/Storages/System/StorageSystemPartsBase.h b/src/Storages/System/StorageSystemPartsBase.h index 8af1f46d8a7..56c9a8fb0d0 100644 --- a/src/Storages/System/StorageSystemPartsBase.h +++ b/src/Storages/System/StorageSystemPartsBase.h @@ -14,7 +14,7 @@ class Context; struct StoragesInfo { StoragePtr storage = nullptr; - TableStructureReadLockHolder table_lock; + TableLockHolder table_lock; String database; String table; diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index df8df75ad6d..b7f029945d8 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -245,7 +245,7 @@ protected: continue; StoragePtr table = nullptr; - TableStructureReadLockHolder lock; + TableLockHolder lock; if (need_lock_structure) { @@ -257,8 +257,7 @@ protected: } try { - lock = table->lockStructureForShare( - false, context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + lock = table->lockForShare(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); } catch (const Exception & e) { diff --git a/src/Storages/TableStructureLockHolder.h b/src/Storages/TableStructureLockHolder.h index b5fc0c620ad..946f9ee545c 100644 --- a/src/Storages/TableStructureLockHolder.h +++ b/src/Storages/TableStructureLockHolder.h @@ -5,44 +5,17 @@ namespace DB { -/// Structs that hold table structure (columns, their types, default values etc.) locks when executing queries. -/// See IStorage::lock* methods for comments. - -struct TableStructureWriteLockHolder +struct TableExclusiveLockHolder { - void release() - { - *this = TableStructureWriteLockHolder(); - } - - void releaseAllExceptAlterIntention() - { - new_data_structure_lock.reset(); - structure_lock.reset(); - } + void release() { *this = TableExclusiveLockHolder(); } private: friend class IStorage; /// Order is important. - RWLockImpl::LockHolder alter_intention_lock; - RWLockImpl::LockHolder new_data_structure_lock; - RWLockImpl::LockHolder structure_lock; -}; - -struct TableStructureReadLockHolder -{ - void release() - { - *this = TableStructureReadLockHolder(); - } - -private: - friend class IStorage; - - /// Order is important. - RWLockImpl::LockHolder new_data_structure_lock; - RWLockImpl::LockHolder structure_lock; + RWLockImpl::LockHolder alter_lock; + RWLockImpl::LockHolder drop_lock; }; +using TableLockHolder = RWLockImpl::LockHolder; } From 54e5fe7dbc6336da462812549ae2cad1911cbf66 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 18 Jun 2020 19:19:40 +0300 Subject: [PATCH 139/318] Less locks --- src/Storages/StorageMergeTree.cpp | 2 -- src/Storages/StorageReplicatedMergeTree.cpp | 14 +++++--------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 9042afe1b2d..16e921f5294 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1086,8 +1086,6 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, cons /// Asks to complete merges and does not allow them to start. /// This protects against "revival" of data for a removed partition after completion of merge. auto merge_blocker = merger_mutator.merges_blocker.cancel(); - /// Waits for completion of merge and does not start new ones. - auto lock = lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); String partition_id = getPartitionIDFromQuery(partition, context); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index b62f6bbd198..c1eb8183a32 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3633,17 +3633,13 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer zookeeper->multi(requests); - { - auto alter_lock = lockForAlter(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); + LOG_INFO(log, "Metadata changed in ZooKeeper. Applying changes locally."); - LOG_INFO(log, "Metadata changed in ZooKeeper. Applying changes locally."); + auto metadata_diff = ReplicatedMergeTreeTableMetadata(*this, getInMemoryMetadataPtr()).checkAndFindDiff(metadata_from_entry); + setTableStructure(std::move(columns_from_entry), metadata_diff); + metadata_version = entry.alter_version; - auto metadata_diff = ReplicatedMergeTreeTableMetadata(*this, getInMemoryMetadataPtr()).checkAndFindDiff(metadata_from_entry); - setTableStructure(std::move(columns_from_entry), metadata_diff); - metadata_version = entry.alter_version; - - LOG_INFO(log, "Applied changes to the metadata of the table. Current metadata version: {}", metadata_version); - } + LOG_INFO(log, "Applied changes to the metadata of the table. Current metadata version: {}", metadata_version); /// This transaction may not happen, but it's OK, because on the next retry we will eventually create/update this node zookeeper->createOrUpdate(replica_path + "/metadata_version", std::to_string(metadata_version), zkutil::CreateMode::Persistent); From b3ee8967dce4f9b6e9e99e447ff3b63a4b2d7e63 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 18 Jun 2020 19:28:20 +0300 Subject: [PATCH 140/318] Fix style --- src/Storages/IStorage.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 42224ec01ac..919464a6a5d 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -19,7 +19,6 @@ namespace DB namespace ErrorCodes { - extern const int LOGICAL_ERROR; extern const int TABLE_IS_DROPPED; extern const int NOT_IMPLEMENTED; extern const int DEADLOCK_AVOIDED; From e888dafdc25f9a498feefdc3eec2776c93731892 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 18 Jun 2020 20:03:42 +0300 Subject: [PATCH 141/318] Remove unused method --- src/Storages/MergeTree/MergeTreeData.cpp | 11 ----------- src/Storages/MergeTree/MergeTreeData.h | 1 - src/Storages/StorageMergeTree.cpp | 1 - src/Storages/StorageReplicatedMergeTree.cpp | 7 +++---- 4 files changed, 3 insertions(+), 17 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 0ed4e98e864..779a6a7ebea 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -183,9 +183,6 @@ MergeTreeData::MergeTreeData( throw Exception("Sampling expression must be present in the primary key", ErrorCodes::BAD_ARGUMENTS); } - - setTTLExpressions(metadata_, metadata_); - /// format_file always contained on any data path PathWithDisk version_file; /// Creating directories, if not exist. @@ -516,14 +513,6 @@ void MergeTreeData::checkTTLExpressions(const StorageInMemoryMetadata & new_meta } } -/// Todo replace columns with TTL for columns -void MergeTreeData::setTTLExpressions(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata) -{ - checkTTLExpressions(new_metadata, old_metadata); - //setColumnTTLs(new_metadata.column_ttls_by_name); - //setTableTTLs(new_metadata.table_ttl); -} - void MergeTreeData::checkStoragePolicy(const StoragePolicyPtr & new_storage_policy) const { diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index ca6928cbb01..2e6c0bfc903 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -792,7 +792,6 @@ protected: void initPartitionKey(const KeyDescription & new_partition_key); void checkTTLExpressions(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata) const; - void setTTLExpressions(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata); void checkStoragePolicy(const StoragePolicyPtr & new_storage_policy) const; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 16e921f5294..324c61ae419 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -278,7 +278,6 @@ void StorageMergeTree::alter( changeSettings(new_metadata.settings_changes, table_lock_holder); /// Reinitialize primary key because primary key column types might have changed. setProperties(new_metadata, old_metadata); - setTTLExpressions(new_metadata, old_metadata); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index c1eb8183a32..352e0cbe802 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -816,13 +816,12 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column } } - auto table_id = getStorageID(); - DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(global_context, table_id, new_metadata); - /// Even if the primary/sorting keys didn't change we must reinitialize it /// because primary key column types might have changed. setProperties(new_metadata, old_metadata); - setTTLExpressions(new_metadata, old_metadata); + + auto table_id = getStorageID(); + DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(global_context, table_id, new_metadata); } From c666763cc05048477fea6099c0b20280bc3946cd Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 18 Jun 2020 20:09:06 +0300 Subject: [PATCH 142/318] Remove unused method better --- src/Storages/MergeTree/MergeTreeData.cpp | 3 +++ src/Storages/StorageMergeTree.cpp | 1 + src/Storages/StorageReplicatedMergeTree.cpp | 1 + 3 files changed, 5 insertions(+) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 779a6a7ebea..72937bd7102 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -183,6 +183,9 @@ MergeTreeData::MergeTreeData( throw Exception("Sampling expression must be present in the primary key", ErrorCodes::BAD_ARGUMENTS); } + + checkTTLExpressions(metadata_, metadata_); + /// format_file always contained on any data path PathWithDisk version_file; /// Creating directories, if not exist. diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 324c61ae419..1c497a6b62c 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -277,6 +277,7 @@ void StorageMergeTree::alter( { changeSettings(new_metadata.settings_changes, table_lock_holder); /// Reinitialize primary key because primary key column types might have changed. + checkTTLExpressions(new_metadata, old_metadata); setProperties(new_metadata, old_metadata); DatabaseCatalog::instance().getDatabase(table_id.database_name)->alterTable(context, table_id, new_metadata); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 352e0cbe802..30b2644fd04 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -818,6 +818,7 @@ void StorageReplicatedMergeTree::setTableStructure(ColumnsDescription new_column /// Even if the primary/sorting keys didn't change we must reinitialize it /// because primary key column types might have changed. + checkTTLExpressions(new_metadata, old_metadata); setProperties(new_metadata, old_metadata); auto table_id = getStorageID(); From 8b131e2079c07b8d5c46cc758055e46d8e029d61 Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 18 Jun 2020 20:19:11 +0300 Subject: [PATCH 143/318] Remove int contention --- src/Storages/StorageReplicatedMergeTree.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 30b2644fd04..c1534d9eed6 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3633,13 +3633,16 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer zookeeper->multi(requests); - LOG_INFO(log, "Metadata changed in ZooKeeper. Applying changes locally."); + { + auto alter_lock = lockExclusively(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); + LOG_INFO(log, "Metadata changed in ZooKeeper. Applying changes locally."); - auto metadata_diff = ReplicatedMergeTreeTableMetadata(*this, getInMemoryMetadataPtr()).checkAndFindDiff(metadata_from_entry); - setTableStructure(std::move(columns_from_entry), metadata_diff); - metadata_version = entry.alter_version; + auto metadata_diff = ReplicatedMergeTreeTableMetadata(*this, getInMemoryMetadataPtr()).checkAndFindDiff(metadata_from_entry); + setTableStructure(std::move(columns_from_entry), metadata_diff); + metadata_version = entry.alter_version; - LOG_INFO(log, "Applied changes to the metadata of the table. Current metadata version: {}", metadata_version); + LOG_INFO(log, "Applied changes to the metadata of the table. Current metadata version: {}", metadata_version); + } /// This transaction may not happen, but it's OK, because on the next retry we will eventually create/update this node zookeeper->createOrUpdate(replica_path + "/metadata_version", std::to_string(metadata_version), zkutil::CreateMode::Persistent); From 041533eae204a2bfc478ed551e3554032d940ef4 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 16 Jun 2020 21:49:04 +0300 Subject: [PATCH 144/318] Disable optimize_skip_unused_shards if sharding_key has non-deterministic func Example of such functions is rand() And this patch disables only optimize_skip_unused_shards, i.e. INSERT code path does not changed, so it will work as before. --- src/Storages/StorageDistributed.cpp | 18 +++++++++++++++++- src/Storages/StorageDistributed.h | 1 + ...01071_force_optimize_skip_unused_shards.sql | 6 ++++++ ...nused_shards_no_non_deterministic.reference | 0 ...skip_unused_shards_no_non_deterministic.sql | 10 ++++++++++ 5 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/01320_optimize_skip_unused_shards_no_non_deterministic.reference create mode 100644 tests/queries/0_stateless/01320_optimize_skip_unused_shards_no_non_deterministic.sql diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 77ed0470d4a..d434aa4b0b9 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -188,6 +189,18 @@ ExpressionActionsPtr buildShardingKeyExpression(const ASTPtr & sharding_key, con return ExpressionAnalyzer(query, syntax_result, context).getActions(project); } +bool isExpressionActionsDeterministics(const ExpressionActionsPtr & actions) +{ + for (const auto & action : actions->getActions()) + { + if (action.type != ExpressionAction::APPLY_FUNCTION) + continue; + if (!action.function_base->isDeterministic()) + return false; + } + return true; +} + class ReplacingConstantExpressionsMatcher { public: @@ -292,6 +305,7 @@ StorageDistributed::StorageDistributed( { sharding_key_expr = buildShardingKeyExpression(sharding_key_, *global_context, getColumns().getAllPhysical(), false); sharding_key_column_name = sharding_key_->getColumnName(); + sharding_key_is_deterministic = isExpressionActionsDeterministics(sharding_key_expr); } if (!relative_data_path.empty()) @@ -687,7 +701,7 @@ ClusterPtr StorageDistributed::getOptimizedCluster(const Context & context, cons ClusterPtr cluster = getCluster(); const Settings & settings = context.getSettingsRef(); - if (has_sharding_key) + if (has_sharding_key && sharding_key_is_deterministic) { ClusterPtr optimized = skipUnusedShards(cluster, query_ptr, context); if (optimized) @@ -700,6 +714,8 @@ ClusterPtr StorageDistributed::getOptimizedCluster(const Context & context, cons std::stringstream exception_message; if (!has_sharding_key) exception_message << "No sharding key"; + else if (sharding_key_is_deterministic) + exception_message << "Sharding key is not deterministic"; else exception_message << "Sharding key " << sharding_key_column_name << " is not used"; diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index ecd2b17b48e..02da81a1172 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -142,6 +142,7 @@ public: const String cluster_name; bool has_sharding_key; + bool sharding_key_is_deterministic = false; ExpressionActionsPtr sharding_key_expr; String sharding_key_column_name; diff --git a/tests/queries/0_stateless/01071_force_optimize_skip_unused_shards.sql b/tests/queries/0_stateless/01071_force_optimize_skip_unused_shards.sql index dbbc2c735ce..98878f4fdd8 100644 --- a/tests/queries/0_stateless/01071_force_optimize_skip_unused_shards.sql +++ b/tests/queries/0_stateless/01071_force_optimize_skip_unused_shards.sql @@ -24,6 +24,12 @@ set force_optimize_skip_unused_shards=1; select * from dist_01071; -- { serverError 507 } set force_optimize_skip_unused_shards=2; select * from dist_01071; -- { serverError 507 } +drop table if exists dist_01071; + +-- non deterministic function (i.e. rand()) +create table dist_01071 as data_01071 Engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01071, key + rand()); +set force_optimize_skip_unused_shards=1; +select * from dist_01071 where key = 0; -- { serverError 507 } drop table if exists data_01071; drop table if exists dist_01071; diff --git a/tests/queries/0_stateless/01320_optimize_skip_unused_shards_no_non_deterministic.reference b/tests/queries/0_stateless/01320_optimize_skip_unused_shards_no_non_deterministic.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01320_optimize_skip_unused_shards_no_non_deterministic.sql b/tests/queries/0_stateless/01320_optimize_skip_unused_shards_no_non_deterministic.sql new file mode 100644 index 00000000000..ca58f7be94c --- /dev/null +++ b/tests/queries/0_stateless/01320_optimize_skip_unused_shards_no_non_deterministic.sql @@ -0,0 +1,10 @@ +drop table if exists data_01320; +drop table if exists dist_01320; + +create table data_01320 (key Int) Engine=Null(); +-- non deterministic function (i.e. rand()) +create table dist_01320 as data_01320 Engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01320, key + rand()); + +set optimize_skip_unused_shards=1; +set force_optimize_skip_unused_shards=1; +select * from dist_01320 where key = 0; -- { serverError 507 } From 65072473284812856e032eded7c8a356563dee7c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 16 Jun 2020 22:02:06 +0300 Subject: [PATCH 145/318] optimize_skip_unused_shards=2 will disable it for nested distributed queries P.S. Looks like settings can be converted between SettingUInt64 and SettingBool without breaking binary protocol. FWIW maybe it is a good idea to change the semantics of the settings as follow (but I guess that changing semantic is not a good idea, better to add new settings and deprecate old ones): - optimize_skip_unused_shards -- accept nesting level on which the optimization will work - force_skip_optimize_shards_nesting -- accept nesting level on which the optimization will work --- docs/en/operations/settings/settings.md | 3 ++- docs/ru/operations/settings/settings.md | 3 ++- src/Core/Settings.h | 2 +- src/Interpreters/ClusterProxy/executeQuery.cpp | 8 ++++++++ ...optimize_skip_unused_shards_no_nested.reference | 0 ...01319_optimize_skip_unused_shards_no_nested.sql | 14 ++++++++++++++ 6 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.reference create mode 100644 tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.sql diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 237058f1b83..89a3e60b6e7 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1121,7 +1121,8 @@ Enables or disables skipping of unused shards for [SELECT](../../sql-reference/s Possible values: - 0 — Disabled. -- 1 — Enabled. +- 1 — Enabled, including nested `Distributed()` tables. +- 2 — Enabled, excluding nested `Distributed()` tables. Default value: 0 diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 5e34affcaac..05492700ee7 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1032,7 +1032,8 @@ ClickHouse генерирует исключение Возможные значения: - 0 — Выключена. -- 1 — Включена. +- 1 — Включена, включая вложенные `Distributed` таблицы. +- 2 — Включена, исключая вложенные `Distributed` таблицы. Значение по умолчанию: 0 diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 1f3a8f42400..daef73a002f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -121,7 +121,7 @@ struct Settings : public SettingsCollection \ M(SettingBool, distributed_group_by_no_merge, false, "Do not merge aggregation states from different servers for distributed query processing - in case it is for certain that there are different keys on different shards.", 0) \ M(SettingBool, parallel_distributed_insert_select, false, "If true, distributed insert select query in the same cluster will be processed on local tables on every shard", 0) \ - M(SettingBool, optimize_skip_unused_shards, false, "Assumes that data is distributed by sharding_key. Optimization to skip unused shards if SELECT query filters by sharding_key.", 0) \ + M(SettingUInt64, optimize_skip_unused_shards, 0, "Assumes that data is distributed by sharding_key. Optimization to skip unused shards if SELECT query filters by sharding_key (if 1 - includes nested Distributed, 2 - disable for nested Distributed).", 0) \ M(SettingBool, optimize_distributed_group_by_sharding_key, false, "Optimize GROUP BY sharding_key queries (by avodiing costly aggregation on the initiator server).", 0) \ M(SettingUInt64, force_optimize_skip_unused_shards, 0, "Throw an exception if unused shards cannot be skipped (1 - throw only if the table has the sharding key, 2 - always throw.", 0) \ M(SettingBool, force_optimize_skip_unused_shards_no_nested, false, "Do not apply force_optimize_skip_unused_shards for nested Distributed tables.", 0) \ diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index fa977249eaa..64aae175598 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -17,6 +17,8 @@ namespace ClusterProxy Context removeUserRestrictionsFromSettings(const Context & context, const Settings & settings) { + static const UInt64 OPTIMIZE_SKIP_UNUSED_SHARDS_NO_NESTED = 2; + Settings new_settings = settings; new_settings.queue_max_wait_ms = Cluster::saturate(new_settings.queue_max_wait_ms, settings.max_execution_time); @@ -34,6 +36,12 @@ Context removeUserRestrictionsFromSettings(const Context & context, const Settin new_settings.force_optimize_skip_unused_shards.changed = false; } + if (settings.optimize_skip_unused_shards == OPTIMIZE_SKIP_UNUSED_SHARDS_NO_NESTED) + { + new_settings.optimize_skip_unused_shards = 0; + new_settings.optimize_skip_unused_shards.changed = false; + } + Context new_context(context); new_context.setSettings(new_settings); diff --git a/tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.reference b/tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.sql b/tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.sql new file mode 100644 index 00000000000..293ab42dcf4 --- /dev/null +++ b/tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.sql @@ -0,0 +1,14 @@ +drop table if exists data_01319; +drop table if exists dist_01319; +drop table if exists dist_layer_01319; + +create table data_01319 (key Int, sub_key Int) Engine=Null(); + +set force_optimize_skip_unused_shards=2; +set optimize_skip_unused_shards=1; + +create table dist_layer_01319 as data_01319 Engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01319, sub_key%2); +create table dist_01319 as data_01319 Engine=Distributed(test_cluster_two_shards, currentDatabase(), dist_layer_01319, key%2); +select * from dist_01319 where key = 1; -- { serverError 507 } +set optimize_skip_unused_shards=2; -- no nested +select * from dist_01319 where key = 1; From d34e6217bcf325f5f2273c079d4b1b9d3ac87c0f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 18 Jun 2020 21:45:39 +0300 Subject: [PATCH 146/318] Add logging of adjusting conditional settings for distributed queries --- src/Interpreters/ClusterProxy/executeQuery.cpp | 14 +++++++++++--- src/Interpreters/ClusterProxy/executeQuery.h | 4 ++-- src/Storages/StorageDistributed.cpp | 4 ++-- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 64aae175598..38ad60f30bf 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -15,7 +15,7 @@ namespace DB namespace ClusterProxy { -Context removeUserRestrictionsFromSettings(const Context & context, const Settings & settings) +Context removeUserRestrictionsFromSettings(const Context & context, const Settings & settings, Poco::Logger * log) { static const UInt64 OPTIMIZE_SKIP_UNUSED_SHARDS_NO_NESTED = 2; @@ -34,12 +34,18 @@ Context removeUserRestrictionsFromSettings(const Context & context, const Settin { new_settings.force_optimize_skip_unused_shards = 0; new_settings.force_optimize_skip_unused_shards.changed = false; + + if (log) + LOG_TRACE(log, "Disabling force_optimize_skip_unused_shards (due to force_optimize_skip_unused_shards_no_nested)"); } if (settings.optimize_skip_unused_shards == OPTIMIZE_SKIP_UNUSED_SHARDS_NO_NESTED) { new_settings.optimize_skip_unused_shards = 0; new_settings.optimize_skip_unused_shards.changed = false; + + if (log) + LOG_TRACE(log, "Disabling optimize_skip_unused_shards (due to optimize_skip_unused_shards=2)"); } Context new_context(context); @@ -49,14 +55,16 @@ Context removeUserRestrictionsFromSettings(const Context & context, const Settin } Pipes executeQuery( - IStreamFactory & stream_factory, const ClusterPtr & cluster, + IStreamFactory & stream_factory, const ClusterPtr & cluster, Poco::Logger * log, const ASTPtr & query_ast, const Context & context, const Settings & settings, const SelectQueryInfo & query_info) { + assert(log); + Pipes res; const std::string query = queryToString(query_ast); - Context new_context = removeUserRestrictionsFromSettings(context, settings); + Context new_context = removeUserRestrictionsFromSettings(context, settings, log); ThrottlerPtr user_level_throttler; if (auto * process_list_element = context.getProcessListElement()) diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index fed8b83db03..dcbbe0c7e95 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -21,13 +21,13 @@ class IStreamFactory; /// removes different restrictions (like max_concurrent_queries_for_user, max_memory_usage_for_user, etc.) /// from settings and creates new context with them -Context removeUserRestrictionsFromSettings(const Context & context, const Settings & settings); +Context removeUserRestrictionsFromSettings(const Context & context, const Settings & settings, Poco::Logger * log = nullptr); /// Execute a distributed query, creating a vector of BlockInputStreams, from which the result can be read. /// `stream_factory` object encapsulates the logic of creating streams for a different type of query /// (currently SELECT, DESCRIBE). Pipes executeQuery( - IStreamFactory & stream_factory, const ClusterPtr & cluster, + IStreamFactory & stream_factory, const ClusterPtr & cluster, Poco::Logger * log, const ASTPtr & query_ast, const Context & context, const Settings & settings, const SelectQueryInfo & query_info); } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index d434aa4b0b9..201aeb7273b 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -519,8 +519,8 @@ Pipes StorageDistributed::read( : ClusterProxy::SelectStreamFactory( header, processed_stage, StorageID{remote_database, remote_table}, scalars, has_virtual_shard_num_column, context.getExternalTables()); - return ClusterProxy::executeQuery( - select_stream_factory, cluster, modified_query_ast, context, context.getSettingsRef(), query_info); + return ClusterProxy::executeQuery(select_stream_factory, cluster, log, + modified_query_ast, context, context.getSettingsRef(), query_info); } From 724c09a22c75bded4f043ac6d7e2616d70b54307 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 18 Jun 2020 21:45:39 +0300 Subject: [PATCH 147/318] Add missing DROP TABLE in 01319_mv_constants_bug --- tests/queries/0_stateless/01319_mv_constants_bug.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/01319_mv_constants_bug.sql b/tests/queries/0_stateless/01319_mv_constants_bug.sql index 975a33d7b71..191183ab286 100644 --- a/tests/queries/0_stateless/01319_mv_constants_bug.sql +++ b/tests/queries/0_stateless/01319_mv_constants_bug.sql @@ -3,6 +3,7 @@ DROP TABLE IF EXISTS distributed_table_1; DROP TABLE IF EXISTS distributed_table_2; DROP TABLE IF EXISTS local_table_1; DROP TABLE IF EXISTS local_table_2; +DROP TABLE IF EXISTS local_table_merged; CREATE TABLE local_table_1 (id String) ENGINE = MergeTree ORDER BY (id); CREATE TABLE local_table_2(id String) ENGINE = MergeTree ORDER BY (id); From 0e218b0f15a502ed5c95499e13fa89f8d52fa006 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 18 Jun 2020 21:45:40 +0300 Subject: [PATCH 148/318] Improve 01319_optimize_skip_unused_shards_no_nested Before there is no check that optimize_skip_unused_shards was working for the first level, use cluster with unavalable shard to guarantee this. --- .../01319_optimize_skip_unused_shards_no_nested.sql | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.sql b/tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.sql index 293ab42dcf4..6bf8e17a56c 100644 --- a/tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.sql +++ b/tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.sql @@ -7,8 +7,10 @@ create table data_01319 (key Int, sub_key Int) Engine=Null(); set force_optimize_skip_unused_shards=2; set optimize_skip_unused_shards=1; -create table dist_layer_01319 as data_01319 Engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01319, sub_key%2); -create table dist_01319 as data_01319 Engine=Distributed(test_cluster_two_shards, currentDatabase(), dist_layer_01319, key%2); +create table dist_layer_01319 as data_01319 Engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01319, sub_key); +-- test_unavailable_shard here to check that optimize_skip_unused_shards always +-- remove some nodes from the cluster for the first nesting level +create table dist_01319 as data_01319 Engine=Distributed(test_unavailable_shard, currentDatabase(), dist_layer_01319, key+1); select * from dist_01319 where key = 1; -- { serverError 507 } set optimize_skip_unused_shards=2; -- no nested select * from dist_01319 where key = 1; From 009977c20c1651ccad992240428aa8388850358c Mon Sep 17 00:00:00 2001 From: alesapin Date: Thu, 18 Jun 2020 22:45:37 +0300 Subject: [PATCH 149/318] Fix locks --- src/Storages/LiveView/StorageLiveView.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 0abb01d7dc7..f4f3c6b8642 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -514,7 +514,7 @@ void StorageLiveView::drop() void StorageLiveView::refresh(const Context & context) { - auto alter_lock = lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); + auto table_lock = lockExclusively(context.getCurrentQueryId(), context.getSettingsRef().lock_acquire_timeout); { std::lock_guard lock(mutex); if (getNewBlocks()) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index c1534d9eed6..c256a79dfe3 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3634,7 +3634,7 @@ bool StorageReplicatedMergeTree::executeMetadataAlter(const StorageReplicatedMer zookeeper->multi(requests); { - auto alter_lock = lockExclusively(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); + auto lock = lockForAlter(RWLockImpl::NO_QUERY, getSettings()->lock_acquire_timeout_for_background_operations); LOG_INFO(log, "Metadata changed in ZooKeeper. Applying changes locally."); auto metadata_diff = ReplicatedMergeTreeTableMetadata(*this, getInMemoryMetadataPtr()).checkAndFindDiff(metadata_from_entry); From 016ee6316faa9bf1523e1a1f65a0aa3ece449234 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 19 Jun 2020 15:05:29 +0300 Subject: [PATCH 150/318] Add missed check --- src/Storages/StorageInMemoryMetadata.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index 81d1f387424..c33361ec7bf 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -16,6 +16,7 @@ namespace ErrorCodes extern const int NO_SUCH_COLUMN_IN_TABLE; extern const int NOT_FOUND_COLUMN_IN_BLOCK; extern const int TYPE_MISMATCH; + extern const int EMPTY_LIST_OF_COLUMNS_PASSED; } @@ -69,6 +70,8 @@ StorageInMemoryMetadata & StorageInMemoryMetadata::operator=(const StorageInMemo void StorageInMemoryMetadata::setColumns(ColumnsDescription columns_) { + if (columns_.getAllPhysical().empty()) + throw Exception("Empty list of columns passed", ErrorCodes::EMPTY_LIST_OF_COLUMNS_PASSED); columns = std::move(columns_); } From 3c47faa9daa9ac8bc9445ac914d6a7d266d5e239 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 19 Jun 2020 18:14:08 +0300 Subject: [PATCH 151/318] Move partition key initialization into registerMergeTree --- src/Storages/IStorage.h | 2 ++ src/Storages/MergeTree/MergeTreeData.cpp | 6 +----- src/Storages/MergeTree/MergeTreeData.h | 2 +- src/Storages/MergeTree/registerStorageMergeTree.cpp | 6 ++++++ src/Storages/StorageInMemoryMetadata.h | 3 --- 5 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 1309b727a74..eb69264c6c8 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -138,7 +138,9 @@ public: public: StorageInMemoryMetadata getInMemoryMetadata() const { return *metadata.get(); } + StorageMetadataPtr getInMemoryMetadataPtr() const { return metadata.get(); } + void setInMemoryMetadata(const StorageInMemoryMetadata & metadata_) { metadata.set(std::make_unique(metadata_)); diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 72937bd7102..dfc7636b3e4 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -118,7 +118,7 @@ const char * DELETE_ON_DESTROY_MARKER_PATH = "delete-on-destroy.txt"; MergeTreeData::MergeTreeData( const StorageID & table_id_, const String & relative_data_path_, - StorageInMemoryMetadata metadata_, + const StorageInMemoryMetadata & metadata_, Context & context_, const String & date_column_name, const MergingParams & merging_params_, @@ -143,15 +143,11 @@ MergeTreeData::MergeTreeData( throw Exception("MergeTree storages require data path", ErrorCodes::INCORRECT_FILE_NAME); MergeTreeDataFormatVersion min_format_version(0); - /// TODO(alesap) Move to register methods if (!date_column_name.empty()) { try { - auto partition_by_ast = makeASTFunction("toYYYYMM", std::make_shared(date_column_name)); - metadata_.partition_key = KeyDescription::getKeyFromAST(partition_by_ast, metadata_.columns, global_context); initPartitionKey(metadata_.partition_key); - if (minmax_idx_date_column_pos == -1) throw Exception("Could not find Date column", ErrorCodes::BAD_TYPE_OF_FIELD); } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 2e6c0bfc903..c6721658d78 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -324,7 +324,7 @@ public: /// attach - whether the existing table is attached or the new table is created. MergeTreeData(const StorageID & table_id_, const String & relative_data_path_, - StorageInMemoryMetadata metadata_, + const StorageInMemoryMetadata & metadata_, Context & context_, const String & date_column_name, const MergingParams & merging_params_, diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp index 98884de985e..1ecac8f413d 100644 --- a/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -570,6 +570,12 @@ static StoragePtr create(const StorageFactory::Arguments & args) throw Exception( "Date column name must be an unquoted string" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::BAD_ARGUMENTS); + + auto partition_by_ast = makeASTFunction("toYYYYMM", std::make_shared(date_column_name)); + + metadata.partition_key = KeyDescription::getKeyFromAST(partition_by_ast, metadata.columns, args.context); + + ++arg_num; /// If there is an expression for sampling diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index bda48bc19cb..756b11150fb 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -14,9 +14,6 @@ namespace DB { -/// Structure represent table metadata stored in memory. -/// Only one storage engine support all fields -- MergeTree. -/// Complete table AST can be recreated from this struct. struct StorageInMemoryMetadata { /// Columns of table with their names, types, From a2e7e9f232edbb4e09a24a962b9aef2f2b8b9236 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 19 Jun 2020 18:21:48 +0300 Subject: [PATCH 152/318] Remove unused constructor and rename method --- src/Storages/MergeTree/MergeTreeData.cpp | 6 +++--- src/Storages/MergeTree/MergeTreeData.h | 2 +- src/Storages/StorageInMemoryMetadata.cpp | 11 ----------- src/Storages/StorageInMemoryMetadata.h | 1 - 4 files changed, 4 insertions(+), 16 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index dfc7636b3e4..f1567a1d18d 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -147,7 +147,7 @@ MergeTreeData::MergeTreeData( { try { - initPartitionKey(metadata_.partition_key); + checkPartitionKeyAndInitMinMax(metadata_.partition_key); if (minmax_idx_date_column_pos == -1) throw Exception("Could not find Date column", ErrorCodes::BAD_TYPE_OF_FIELD); } @@ -161,7 +161,7 @@ MergeTreeData::MergeTreeData( else { is_custom_partitioned = true; - initPartitionKey(metadata_.partition_key); + checkPartitionKeyAndInitMinMax(metadata_.partition_key); min_format_version = MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING; } @@ -415,7 +415,7 @@ ExpressionActionsPtr MergeTreeData::getSortingKeyAndSkipIndicesExpression(const } -void MergeTreeData::initPartitionKey(const KeyDescription & new_partition_key) +void MergeTreeData::checkPartitionKeyAndInitMinMax(const KeyDescription & new_partition_key) { if (new_partition_key.expression_list_ast->children.empty()) return; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index c6721658d78..1b970d470ba 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -789,7 +789,7 @@ protected: void setProperties(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata, bool attach = false); - void initPartitionKey(const KeyDescription & new_partition_key); + void checkPartitionKeyAndInitMinMax(const KeyDescription & new_partition_key); void checkTTLExpressions(const StorageInMemoryMetadata & new_metadata, const StorageInMemoryMetadata & old_metadata) const; diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index c33361ec7bf..e39dff6472e 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -19,17 +19,6 @@ namespace ErrorCodes extern const int EMPTY_LIST_OF_COLUMNS_PASSED; } - -StorageInMemoryMetadata::StorageInMemoryMetadata( - const ColumnsDescription & columns_, - const IndicesDescription & secondary_indices_, - const ConstraintsDescription & constraints_) - : columns(columns_) - , secondary_indices(secondary_indices_) - , constraints(constraints_) -{ -} - StorageInMemoryMetadata::StorageInMemoryMetadata(const StorageInMemoryMetadata & other) : columns(other.columns) , secondary_indices(other.secondary_indices) diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 756b11150fb..83f9180dcbb 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -42,7 +42,6 @@ struct StorageInMemoryMetadata SelectQueryDescription select; StorageInMemoryMetadata() = default; - StorageInMemoryMetadata(const ColumnsDescription & columns_, const IndicesDescription & secondary_indices_, const ConstraintsDescription & constraints_); StorageInMemoryMetadata(const StorageInMemoryMetadata & other); StorageInMemoryMetadata & operator=(const StorageInMemoryMetadata & other); From c9fa5d2ec3e1ad7612702049e048a0193fdd991c Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 19 Jun 2020 18:39:41 +0300 Subject: [PATCH 153/318] Better naming --- src/Storages/IStorage.h | 3 +-- src/Storages/Kafka/StorageKafka.cpp | 6 +++--- src/Storages/LiveView/StorageLiveView.cpp | 6 +++--- src/Storages/StorageBuffer.cpp | 8 ++++---- src/Storages/StorageDictionary.cpp | 6 +++--- src/Storages/StorageDistributed.cpp | 10 +++++----- src/Storages/StorageFile.cpp | 14 +++++++------- src/Storages/StorageGenerateRandom.cpp | 6 +++--- src/Storages/StorageHDFS.cpp | 8 ++++---- src/Storages/StorageInput.cpp | 6 +++--- src/Storages/StorageLog.cpp | 10 +++++----- src/Storages/StorageMaterializedView.cpp | 8 ++++---- src/Storages/StorageMemory.cpp | 8 ++++---- src/Storages/StorageMerge.cpp | 6 +++--- src/Storages/StorageMySQL.cpp | 8 ++++---- src/Storages/StorageS3.cpp | 8 ++++---- src/Storages/StorageSet.cpp | 8 ++++---- src/Storages/StorageStripeLog.cpp | 8 ++++---- src/Storages/StorageTinyLog.cpp | 10 +++++----- src/Storages/StorageURL.cpp | 8 ++++---- src/Storages/StorageValues.cpp | 6 +++--- src/Storages/StorageView.cpp | 8 ++++---- src/Storages/System/StorageSystemColumns.cpp | 6 +++--- src/Storages/System/StorageSystemDetachedParts.cpp | 6 +++--- src/Storages/System/StorageSystemDisks.cpp | 6 +++--- src/Storages/System/StorageSystemNumbers.cpp | 6 +++--- src/Storages/System/StorageSystemOne.cpp | 6 +++--- src/Storages/System/StorageSystemPartsBase.cpp | 6 +++--- src/Storages/System/StorageSystemReplicas.cpp | 6 +++--- .../System/StorageSystemStoragePolicies.cpp | 6 +++--- src/Storages/System/StorageSystemTables.cpp | 6 +++--- src/Storages/System/StorageSystemZeros.cpp | 6 +++--- 32 files changed, 114 insertions(+), 115 deletions(-) diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index eb69264c6c8..e980bad889c 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -80,8 +80,7 @@ class IStorage : public std::enable_shared_from_this, public TypePromo { public: IStorage() = delete; - /// Storage fields should be initialized in separate methods like setColumns - /// or setTableTTLs. + /// Storage metadata can be set separately in setInMemoryMetadata method explicit IStorage(StorageID storage_id_) : storage_id(std::move(storage_id_)), metadata(std::make_unique()) {} //-V730 virtual ~IStorage() = default; diff --git a/src/Storages/Kafka/StorageKafka.cpp b/src/Storages/Kafka/StorageKafka.cpp index b46cf0579ec..e0949cd9420 100644 --- a/src/Storages/Kafka/StorageKafka.cpp +++ b/src/Storages/Kafka/StorageKafka.cpp @@ -137,9 +137,9 @@ StorageKafka::StorageKafka( , intermediate_commit(kafka_settings->kafka_commit_every_batch.value) , settings_adjustments(createSettingsAdjustments()) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(columns_); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + setInMemoryMetadata(storage_metadata); task = global_context.getSchedulePool().createTask(log->name(), [this]{ threadFunc(); }); task->deactivate(); diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index f4f3c6b8642..efd0a71b18f 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -255,9 +255,9 @@ StorageLiveView::StorageLiveView( live_view_context = std::make_unique(global_context); live_view_context->makeQueryContext(); - StorageInMemoryMetadata metadata_; - metadata_.setColumns(columns_); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + setInMemoryMetadata(storage_metadata); if (!query.select) throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY); diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index b4d6b66ebe7..bd3945c26e0 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -77,10 +77,10 @@ StorageBuffer::StorageBuffer( , log(&Poco::Logger::get("StorageBuffer (" + table_id_.getFullTableName() + ")")) , bg_pool(global_context.getBufferFlushSchedulePool()) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(columns_); - metadata_.setConstraints(constraints_); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + storage_metadata.setConstraints(constraints_); + setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/StorageDictionary.cpp b/src/Storages/StorageDictionary.cpp index 25126ad951d..83a093d5635 100644 --- a/src/Storages/StorageDictionary.cpp +++ b/src/Storages/StorageDictionary.cpp @@ -100,9 +100,9 @@ StorageDictionary::StorageDictionary( : IStorage(table_id_) , dictionary_name(dictionary_name_) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(ColumnsDescription{getNamesAndTypes(dictionary_structure_)}); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(ColumnsDescription{getNamesAndTypes(dictionary_structure_)}); + setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 9c20e3f8e11..bf9e7f126c5 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -290,14 +290,14 @@ StorageDistributed::StorageDistributed( , storage_policy(storage_policy_) , relative_data_path(relative_data_path_) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(columns_); - metadata_.setConstraints(constraints_); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + storage_metadata.setConstraints(constraints_); + setInMemoryMetadata(storage_metadata); if (sharding_key_) { - sharding_key_expr = buildShardingKeyExpression(sharding_key_, *global_context, metadata_.getColumns().getAllPhysical(), false); + sharding_key_expr = buildShardingKeyExpression(sharding_key_, *global_context, storage_metadata.getColumns().getAllPhysical(), false); sharding_key_column_name = sharding_key_->getColumnName(); } diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 4867a0bc215..c7671fd8759 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -167,9 +167,9 @@ StorageFile::StorageFile(const std::string & table_path_, const std::string & us Block header = StorageDistributedDirectoryMonitor::createStreamFromFile(first_path)->getHeader(); - StorageInMemoryMetadata metadata_; - metadata_.setColumns(ColumnsDescription(header.getNamesAndTypesList())); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(ColumnsDescription(header.getNamesAndTypesList())); + setInMemoryMetadata(storage_metadata); } } } @@ -191,12 +191,12 @@ StorageFile::StorageFile(CommonArguments args) , compression_method(args.compression_method) , base_path(args.context.getPath()) { - StorageInMemoryMetadata metadata_; + StorageInMemoryMetadata storage_metadata; if (args.format_name != "Distributed") - metadata_.setColumns(args.columns); + storage_metadata.setColumns(args.columns); - metadata_.setConstraints(args.constraints); - setInMemoryMetadata(metadata_); + storage_metadata.setConstraints(args.constraints); + setInMemoryMetadata(storage_metadata); } class StorageFileSource : public SourceWithProgress diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index dad323f7b72..6d923f7678c 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -388,9 +388,9 @@ StorageGenerateRandom::StorageGenerateRandom(const StorageID & table_id_, const : IStorage(table_id_), max_array_length(max_array_length_), max_string_length(max_string_length_) { random_seed = random_seed_ ? sipHash64(*random_seed_) : randomSeed(); - StorageInMemoryMetadata metadata_; - metadata_.setColumns(columns_); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/StorageHDFS.cpp b/src/Storages/StorageHDFS.cpp index ee5a426cedc..ce492017a09 100644 --- a/src/Storages/StorageHDFS.cpp +++ b/src/Storages/StorageHDFS.cpp @@ -50,10 +50,10 @@ StorageHDFS::StorageHDFS(const String & uri_, { context.getRemoteHostFilter().checkURL(Poco::URI(uri)); - StorageInMemoryMetadata metadata_; - metadata_.setColumns(columns_); - metadata_.setConstraints(constraints_); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + storage_metadata.setConstraints(constraints_); + setInMemoryMetadata(storage_metadata); } namespace diff --git a/src/Storages/StorageInput.cpp b/src/Storages/StorageInput.cpp index 4430fb11186..dc8d7ec1581 100644 --- a/src/Storages/StorageInput.cpp +++ b/src/Storages/StorageInput.cpp @@ -21,9 +21,9 @@ namespace ErrorCodes StorageInput::StorageInput(const StorageID & table_id, const ColumnsDescription & columns_) : IStorage(table_id) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(columns_); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 542fb507d83..a655373ce6e 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -434,10 +434,10 @@ StorageLog::StorageLog( , max_compress_block_size(max_compress_block_size_) , file_checker(disk, table_path + "sizes.json") { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(columns_); - metadata_.setConstraints(constraints_); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + storage_metadata.setConstraints(constraints_); + setInMemoryMetadata(storage_metadata); if (relative_path_.empty()) throw Exception("Storage " + getName() + " requires data path", ErrorCodes::INCORRECT_FILE_NAME); @@ -445,7 +445,7 @@ StorageLog::StorageLog( /// create directories if they do not exist disk->createDirectories(table_path); - for (const auto & column : metadata_.getColumns().getAllPhysical()) + for (const auto & column : storage_metadata.getColumns().getAllPhysical()) addFiles(column.name, *column.type); marks_file_path = table_path + DBMS_STORAGE_LOG_MARKS_FILE_NAME; diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 976b3c80dec..9e6245c3ced 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -50,8 +50,8 @@ StorageMaterializedView::StorageMaterializedView( bool attach_) : IStorage(table_id_), global_context(local_context.getGlobalContext()) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(columns_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); if (!query.select) throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY); @@ -67,8 +67,8 @@ StorageMaterializedView::StorageMaterializedView( throw Exception("UNION is not supported for MATERIALIZED VIEW", ErrorCodes::QUERY_IS_NOT_SUPPORTED_IN_MATERIALIZED_VIEW); auto select = SelectQueryDescription::getSelectQueryFromASTForMatView(query.select->clone(), local_context); - metadata_.setSelectQuery(select); - setInMemoryMetadata(metadata_); + storage_metadata.setSelectQuery(select); + setInMemoryMetadata(storage_metadata); if (!has_inner_table) target_table_id = query.to_table_id; diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 05b37ecf32e..c77fddb2372 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -94,10 +94,10 @@ private: StorageMemory::StorageMemory(const StorageID & table_id_, ColumnsDescription columns_description_, ConstraintsDescription constraints_) : IStorage(table_id_) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(std::move(columns_description_)); - metadata_.setConstraints(std::move(constraints_)); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(std::move(columns_description_)); + storage_metadata.setConstraints(std::move(constraints_)); + setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index f4030ed573f..8a28387d24d 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -51,9 +51,9 @@ StorageMerge::StorageMerge( , table_name_regexp(table_name_regexp_) , global_context(context_) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(columns_); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + setInMemoryMetadata(storage_metadata); } template diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 3e9b48e976b..919acd79fdd 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -56,10 +56,10 @@ StorageMySQL::StorageMySQL( , pool(std::move(pool_)) , global_context(context_) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(columns_); - metadata_.setConstraints(constraints_); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + storage_metadata.setConstraints(constraints_); + setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index 7f237fd551f..f8d3de3a238 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -206,10 +206,10 @@ StorageS3::StorageS3( , compression_method(compression_method_) { context_global.getRemoteHostFilter().checkURL(uri_.uri); - StorageInMemoryMetadata metadata_; - metadata_.setColumns(columns_); - metadata_.setConstraints(constraints_); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + storage_metadata.setConstraints(constraints_); + setInMemoryMetadata(storage_metadata); auto settings = context_.getStorageS3Settings().getSettings(uri.endpoint); Aws::Auth::AWSCredentials credentials(access_key_id_, secret_access_key_); diff --git a/src/Storages/StorageSet.cpp b/src/Storages/StorageSet.cpp index 58d5226c91a..f2946afbbfd 100644 --- a/src/Storages/StorageSet.cpp +++ b/src/Storages/StorageSet.cpp @@ -105,10 +105,10 @@ StorageSetOrJoinBase::StorageSetOrJoinBase( const Context & context_) : IStorage(table_id_) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(columns_); - metadata_.setConstraints(constraints_); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + storage_metadata.setConstraints(constraints_); + setInMemoryMetadata(storage_metadata); if (relative_path_.empty()) diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index 3086e971121..f773ab2ca1e 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -237,10 +237,10 @@ StorageStripeLog::StorageStripeLog( , file_checker(disk, table_path + "sizes.json") , log(&Poco::Logger::get("StorageStripeLog")) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(columns_); - metadata_.setConstraints(constraints_); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + storage_metadata.setConstraints(constraints_); + setInMemoryMetadata(storage_metadata); if (relative_path_.empty()) throw Exception("Storage " + getName() + " requires data path", ErrorCodes::INCORRECT_FILE_NAME); diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index 7a399f35c9c..9f19f44d16b 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -339,10 +339,10 @@ StorageTinyLog::StorageTinyLog( , file_checker(disk, table_path + "sizes.json") , log(&Poco::Logger::get("StorageTinyLog")) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(columns_); - metadata_.setConstraints(constraints_); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + storage_metadata.setConstraints(constraints_); + setInMemoryMetadata(storage_metadata); if (relative_path_.empty()) throw Exception("Storage " + getName() + " requires data path", ErrorCodes::INCORRECT_FILE_NAME); @@ -353,7 +353,7 @@ StorageTinyLog::StorageTinyLog( disk->createDirectories(table_path); } - for (const auto & col : metadata_.getColumns().getAllPhysical()) + for (const auto & col : storage_metadata.getColumns().getAllPhysical()) addFiles(col.name, *col.type); } diff --git a/src/Storages/StorageURL.cpp b/src/Storages/StorageURL.cpp index 802ad0571a8..fd9dde695ff 100644 --- a/src/Storages/StorageURL.cpp +++ b/src/Storages/StorageURL.cpp @@ -44,10 +44,10 @@ IStorageURLBase::IStorageURLBase( { context_global.getRemoteHostFilter().checkURL(uri); - StorageInMemoryMetadata metadata_; - metadata_.setColumns(columns_); - metadata_.setConstraints(constraints_); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + storage_metadata.setConstraints(constraints_); + setInMemoryMetadata(storage_metadata); } namespace diff --git a/src/Storages/StorageValues.cpp b/src/Storages/StorageValues.cpp index 063cd3d5224..e99aeb52018 100644 --- a/src/Storages/StorageValues.cpp +++ b/src/Storages/StorageValues.cpp @@ -16,9 +16,9 @@ StorageValues::StorageValues( const NamesAndTypesList & virtuals_) : IStorage(table_id_), res_block(res_block_), virtuals(virtuals_) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(columns_); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + setInMemoryMetadata(storage_metadata); } Pipes StorageValues::read( diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 006b1b3caec..21d353f6bed 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -38,8 +38,8 @@ StorageView::StorageView( const ColumnsDescription & columns_) : IStorage(table_id_) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(columns_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); if (!query.select) throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY); @@ -47,8 +47,8 @@ StorageView::StorageView( SelectQueryDescription description; description.inner_query = query.select->ptr(); - metadata_.setSelectQuery(description); - setInMemoryMetadata(metadata_); + storage_metadata.setSelectQuery(description); + setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 85d0f679708..6f5c8bc673c 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -26,8 +26,8 @@ namespace ErrorCodes StorageSystemColumns::StorageSystemColumns(const std::string & name_) : IStorage({"system", name_}) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(ColumnsDescription( + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(ColumnsDescription( { { "database", std::make_shared() }, { "table", std::make_shared() }, @@ -46,7 +46,7 @@ StorageSystemColumns::StorageSystemColumns(const std::string & name_) { "is_in_sampling_key", std::make_shared() }, { "compression_codec", std::make_shared() }, })); - setInMemoryMetadata(metadata_); + setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 7228651d140..c325df1251d 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -30,8 +30,8 @@ protected: explicit StorageSystemDetachedParts() : IStorage({"system", "detached_parts"}) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(ColumnsDescription{{ + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(ColumnsDescription{{ {"database", std::make_shared()}, {"table", std::make_shared()}, {"partition_id", std::make_shared(std::make_shared())}, @@ -42,7 +42,7 @@ protected: {"max_block_number", std::make_shared(std::make_shared())}, {"level", std::make_shared(std::make_shared())} }}); - setInMemoryMetadata(metadata_); + setInMemoryMetadata(storage_metadata); } Pipes read( diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index fbcdd78988a..a270a96b8f7 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -14,8 +14,8 @@ namespace ErrorCodes StorageSystemDisks::StorageSystemDisks(const std::string & name_) : IStorage({"system", name_}) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(ColumnsDescription( + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(ColumnsDescription( { {"name", std::make_shared()}, {"path", std::make_shared()}, @@ -23,7 +23,7 @@ StorageSystemDisks::StorageSystemDisks(const std::string & name_) {"total_space", std::make_shared()}, {"keep_free_space", std::make_shared()}, })); - setInMemoryMetadata(metadata_); + setInMemoryMetadata(storage_metadata); } Pipes StorageSystemDisks::read( diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index 50921c53fb6..404eb1af99c 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -118,9 +118,9 @@ private: StorageSystemNumbers::StorageSystemNumbers(const StorageID & table_id, bool multithreaded_, std::optional limit_, UInt64 offset_, bool even_distribution_) : IStorage(table_id), multithreaded(multithreaded_), even_distribution(even_distribution_), limit(limit_), offset(offset_) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(ColumnsDescription({{"number", std::make_shared()}})); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(ColumnsDescription({{"number", std::make_shared()}})); + setInMemoryMetadata(storage_metadata); } Pipes StorageSystemNumbers::read( diff --git a/src/Storages/System/StorageSystemOne.cpp b/src/Storages/System/StorageSystemOne.cpp index 20d61d5da1b..f6fba0d302c 100644 --- a/src/Storages/System/StorageSystemOne.cpp +++ b/src/Storages/System/StorageSystemOne.cpp @@ -14,9 +14,9 @@ namespace DB StorageSystemOne::StorageSystemOne(const std::string & name_) : IStorage({"system", name_}) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(ColumnsDescription({{"dummy", std::make_shared()}})); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(ColumnsDescription({{"dummy", std::make_shared()}})); + setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index b48f8a3cb6b..928b146247d 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -278,9 +278,9 @@ StorageSystemPartsBase::StorageSystemPartsBase(std::string name_, NamesAndTypesL add_alias("bytes", "bytes_on_disk"); add_alias("marks_size", "marks_bytes"); - StorageInMemoryMetadata metadata_; - metadata_.setColumns(tmp_columns); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(tmp_columns); + setInMemoryMetadata(storage_metadata); } NamesAndTypesList StorageSystemPartsBase::getVirtuals() const diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index f79e9138500..26076d09914 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -19,8 +19,8 @@ namespace DB StorageSystemReplicas::StorageSystemReplicas(const std::string & name_) : IStorage({"system", name_}) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(ColumnsDescription({ + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(ColumnsDescription({ { "database", std::make_shared() }, { "table", std::make_shared() }, { "engine", std::make_shared() }, @@ -53,7 +53,7 @@ StorageSystemReplicas::StorageSystemReplicas(const std::string & name_) { "active_replicas", std::make_shared() }, { "zookeeper_exception", std::make_shared() }, })); - setInMemoryMetadata(metadata_); + setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemStoragePolicies.cpp b/src/Storages/System/StorageSystemStoragePolicies.cpp index a6092a28a47..e589955c861 100644 --- a/src/Storages/System/StorageSystemStoragePolicies.cpp +++ b/src/Storages/System/StorageSystemStoragePolicies.cpp @@ -17,8 +17,8 @@ namespace ErrorCodes StorageSystemStoragePolicies::StorageSystemStoragePolicies(const std::string & name_) : IStorage({"system", name_}) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns( + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns( ColumnsDescription({ {"policy_name", std::make_shared()}, {"volume_name", std::make_shared()}, @@ -27,7 +27,7 @@ StorageSystemStoragePolicies::StorageSystemStoragePolicies(const std::string & n {"max_data_part_size", std::make_shared()}, {"move_factor", std::make_shared()} })); - setInMemoryMetadata(metadata_); + setInMemoryMetadata(storage_metadata); } Pipes StorageSystemStoragePolicies::read( diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index b7f029945d8..deb8f0551ea 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -33,8 +33,8 @@ namespace ErrorCodes StorageSystemTables::StorageSystemTables(const std::string & name_) : IStorage({"system", name_}) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(ColumnsDescription( + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(ColumnsDescription( { {"database", std::make_shared()}, {"name", std::make_shared()}, @@ -56,7 +56,7 @@ StorageSystemTables::StorageSystemTables(const std::string & name_) {"total_rows", std::make_shared(std::make_shared())}, {"total_bytes", std::make_shared(std::make_shared())}, })); - setInMemoryMetadata(metadata_); + setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/System/StorageSystemZeros.cpp b/src/Storages/System/StorageSystemZeros.cpp index d325840091e..9489ff249a5 100644 --- a/src/Storages/System/StorageSystemZeros.cpp +++ b/src/Storages/System/StorageSystemZeros.cpp @@ -84,9 +84,9 @@ private: StorageSystemZeros::StorageSystemZeros(const StorageID & table_id_, bool multithreaded_, std::optional limit_) : IStorage(table_id_), multithreaded(multithreaded_), limit(limit_) { - StorageInMemoryMetadata metadata_; - metadata_.setColumns(ColumnsDescription({{"zero", std::make_shared()}})); - setInMemoryMetadata(metadata_); + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(ColumnsDescription({{"zero", std::make_shared()}})); + setInMemoryMetadata(storage_metadata); } From 4c0879ae300c40ca0d1aa20e5f4e4856fb96c401 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 19 Jun 2020 20:17:13 +0300 Subject: [PATCH 154/318] Better logging in storages --- src/Interpreters/InterpreterSelectQuery.cpp | 8 +++++--- src/Storages/IStorage.h | 4 +++- src/Storages/Kafka/KafkaBlockInputStream.cpp | 4 ++-- src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 2 +- src/Storages/MergeTree/MergeTreeReadPool.cpp | 2 +- .../MergeTree/MergeTreeReverseSelectProcessor.cpp | 2 +- src/Storages/MergeTree/MergeTreeSelectProcessor.cpp | 2 +- src/Storages/MergeTree/MergeTreeSequentialSource.cpp | 2 +- src/Storages/StorageBuffer.cpp | 3 +-- src/Storages/StorageGenerateRandom.cpp | 2 +- src/Storages/StorageInMemoryMetadata.cpp | 9 +++++---- src/Storages/StorageInMemoryMetadata.h | 4 ++-- src/Storages/StorageJoin.cpp | 4 ++-- src/Storages/StorageLog.cpp | 2 +- src/Storages/StorageMemory.cpp | 4 ++-- src/Storages/StorageMerge.cpp | 4 ++-- src/Storages/StorageMySQL.cpp | 2 +- src/Storages/StorageNull.h | 2 +- src/Storages/StorageStripeLog.cpp | 6 +++--- src/Storages/StorageTinyLog.cpp | 2 +- src/Storages/StorageValues.cpp | 2 +- src/Storages/StorageView.cpp | 2 +- src/Storages/StorageXDBC.cpp | 4 ++-- src/Storages/System/IStorageSystemOneBlock.h | 2 +- src/Storages/System/StorageSystemColumns.cpp | 2 +- src/Storages/System/StorageSystemDisks.cpp | 2 +- src/Storages/System/StorageSystemNumbers.cpp | 2 +- src/Storages/System/StorageSystemOne.cpp | 2 +- src/Storages/System/StorageSystemPartsBase.cpp | 2 +- src/Storages/System/StorageSystemReplicas.cpp | 2 +- src/Storages/System/StorageSystemStoragePolicies.cpp | 2 +- src/Storages/System/StorageSystemTables.cpp | 2 +- src/Storages/System/StorageSystemZeros.cpp | 2 +- 33 files changed, 51 insertions(+), 47 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index e0d5adf92b8..187fdeb2b19 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -381,14 +381,15 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (storage) { - source_header = metadata_snapshot->getSampleBlockForColumns(required_columns, storage->getVirtuals()); + source_header = metadata_snapshot->getSampleBlockForColumns(required_columns, storage->getVirtuals(), storage->getStorageID()); /// Fix source_header for filter actions. if (row_policy_filter) { filter_info = std::make_shared(); filter_info->column_name = generateFilterActions(filter_info->actions, row_policy_filter, required_columns); - source_header = metadata_snapshot->getSampleBlockForColumns(filter_info->actions->getRequiredColumns(), storage->getVirtuals()); + source_header = metadata_snapshot->getSampleBlockForColumns( + filter_info->actions->getRequiredColumns(), storage->getVirtuals(), storage->getStorageID()); } } @@ -1344,7 +1345,8 @@ void InterpreterSelectQuery::executeFetchColumns( if (pipes.empty()) { - Pipe pipe(std::make_shared(metadata_snapshot->getSampleBlockForColumns(required_columns, storage->getVirtuals()))); + Pipe pipe(std::make_shared( + metadata_snapshot->getSampleBlockForColumns(required_columns, storage->getVirtuals(), storage->getStorageID()))); if (query_info.prewhere_info) { diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index e980bad889c..6abf310442e 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -81,7 +81,9 @@ class IStorage : public std::enable_shared_from_this, public TypePromo public: IStorage() = delete; /// Storage metadata can be set separately in setInMemoryMetadata method - explicit IStorage(StorageID storage_id_) : storage_id(std::move(storage_id_)), metadata(std::make_unique()) {} //-V730 + explicit IStorage(StorageID storage_id_) + : storage_id(std::move(storage_id_)) + , metadata(std::make_unique()) {} //-V730 virtual ~IStorage() = default; IStorage(const IStorage &) = delete; diff --git a/src/Storages/Kafka/KafkaBlockInputStream.cpp b/src/Storages/Kafka/KafkaBlockInputStream.cpp index 847b0d915cd..9634cded7c8 100644 --- a/src/Storages/Kafka/KafkaBlockInputStream.cpp +++ b/src/Storages/Kafka/KafkaBlockInputStream.cpp @@ -27,7 +27,7 @@ KafkaBlockInputStream::KafkaBlockInputStream( , commit_in_suffix(commit_in_suffix_) , non_virtual_header(metadata_snapshot->getSampleBlockNonMaterialized()) , virtual_header(metadata_snapshot->getSampleBlockForColumns( - {"_topic", "_key", "_offset", "_partition", "_timestamp", "_timestamp_ms", "_headers.name", "_headers.value"}, storage.getVirtuals())) + {"_topic", "_key", "_offset", "_partition", "_timestamp", "_timestamp_ms", "_headers.name", "_headers.value"}, storage.getVirtuals(), storage.getStorageID())) { } @@ -44,7 +44,7 @@ KafkaBlockInputStream::~KafkaBlockInputStream() Block KafkaBlockInputStream::getHeader() const { - return metadata_snapshot->getSampleBlockForColumns(column_names, storage.getVirtuals()); + return metadata_snapshot->getSampleBlockForColumns(column_names, storage.getVirtuals(), storage.getStorageID()); } void KafkaBlockInputStream::readPrefixImpl() diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 7f7fd203297..fa91a9190e5 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -224,7 +224,7 @@ Pipes MergeTreeDataSelectExecutor::readFromParts( std::multiset part_values = VirtualColumnUtils::extractSingleValueFromBlock(virtual_columns_block, "_part"); - metadata_snapshot->check(real_column_names, data.getVirtuals()); + metadata_snapshot->check(real_column_names, data.getVirtuals(), data.getStorageID()); const Settings & settings = context.getSettingsRef(); const auto & primary_key = metadata_snapshot->getPrimaryKey(); diff --git a/src/Storages/MergeTree/MergeTreeReadPool.cpp b/src/Storages/MergeTree/MergeTreeReadPool.cpp index fdf3908d21e..d78f72d1dd0 100644 --- a/src/Storages/MergeTree/MergeTreeReadPool.cpp +++ b/src/Storages/MergeTree/MergeTreeReadPool.cpp @@ -150,7 +150,7 @@ MarkRanges MergeTreeReadPool::getRestMarks(const IMergeTreeDataPart & part, cons Block MergeTreeReadPool::getHeader() const { - return metadata_snapshot->getSampleBlockForColumns(column_names, data.getVirtuals()); + return metadata_snapshot->getSampleBlockForColumns(column_names, data.getVirtuals(), data.getStorageID()); } void MergeTreeReadPool::profileFeedback(const ReadBufferFromFileBase::ProfileInfo info) diff --git a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp index b71c343614b..1e6352824ef 100644 --- a/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeReverseSelectProcessor.cpp @@ -50,7 +50,7 @@ MergeTreeReverseSelectProcessor::MergeTreeReverseSelectProcessor( bool quiet) : MergeTreeBaseSelectProcessor{ - replaceTypes(metadata_snapshot_->getSampleBlockForColumns(required_columns_, storage_.getVirtuals()), owned_data_part_), + replaceTypes(metadata_snapshot_->getSampleBlockForColumns(required_columns_, storage_.getVirtuals(), storage_.getStorageID()), owned_data_part_), storage_, metadata_snapshot_, prewhere_info_, max_block_size_rows_, preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, reader_settings_, use_uncompressed_cache_, virt_column_names_}, diff --git a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp index b46b414bfe8..a9ba6f7836b 100644 --- a/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp +++ b/src/Storages/MergeTree/MergeTreeSelectProcessor.cpp @@ -30,7 +30,7 @@ MergeTreeSelectProcessor::MergeTreeSelectProcessor( bool quiet) : MergeTreeBaseSelectProcessor{ - metadata_snapshot_->getSampleBlockForColumns(required_columns_, storage_.getVirtuals()), + metadata_snapshot_->getSampleBlockForColumns(required_columns_, storage_.getVirtuals(), storage_.getStorageID()), storage_, metadata_snapshot_, prewhere_info_, max_block_size_rows_, preferred_block_size_bytes_, preferred_max_column_in_block_size_bytes_, reader_settings_, use_uncompressed_cache_, virt_column_names_}, diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index f8e31db2b5a..edd63aadd29 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -17,7 +17,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( bool read_with_direct_io_, bool take_column_types_from_storage, bool quiet) - : SourceWithProgress(metadata_snapshot_->getSampleBlockForColumns(columns_to_read_, storage_.getVirtuals())) + : SourceWithProgress(metadata_snapshot_->getSampleBlockForColumns(columns_to_read_, storage_.getVirtuals(), storage_.getStorageID())) , storage(storage_) , metadata_snapshot(metadata_snapshot_) , data_part(std::move(data_part_)) diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp index bd3945c26e0..e0bd19feba9 100644 --- a/src/Storages/StorageBuffer.cpp +++ b/src/Storages/StorageBuffer.cpp @@ -90,7 +90,7 @@ class BufferSource : public SourceWithProgress public: BufferSource(const Names & column_names_, StorageBuffer::Buffer & buffer_, const StorageBuffer & storage, const StorageMetadataPtr & metadata_snapshot) : SourceWithProgress( - metadata_snapshot->getSampleBlockForColumns(column_names_, storage.getVirtuals())) + metadata_snapshot->getSampleBlockForColumns(column_names_, storage.getVirtuals(), storage.getStorageID())) , column_names(column_names_.begin(), column_names_.end()) , buffer(buffer_) {} @@ -468,7 +468,6 @@ bool StorageBuffer::mayBenefitFromIndexForIn( if (destination.get() == this) throw Exception("Destination table is myself. Read will cause infinite loop.", ErrorCodes::INFINITE_LOOP); - /// TODO alesap (check destination metadata) return destination->mayBenefitFromIndexForIn(left_in_operand, query_context, destination->getInMemoryMetadataPtr()); } diff --git a/src/Storages/StorageGenerateRandom.cpp b/src/Storages/StorageGenerateRandom.cpp index 6d923f7678c..1f227265d19 100644 --- a/src/Storages/StorageGenerateRandom.cpp +++ b/src/Storages/StorageGenerateRandom.cpp @@ -436,7 +436,7 @@ Pipes StorageGenerateRandom::read( size_t max_block_size, unsigned num_streams) { - metadata_snapshot->check(column_names, getVirtuals()); + metadata_snapshot->check(column_names, getVirtuals(), getStorageID()); Pipes pipes; pipes.reserve(num_streams); diff --git a/src/Storages/StorageInMemoryMetadata.cpp b/src/Storages/StorageInMemoryMetadata.cpp index e39dff6472e..b7f4565a55a 100644 --- a/src/Storages/StorageInMemoryMetadata.cpp +++ b/src/Storages/StorageInMemoryMetadata.cpp @@ -249,7 +249,8 @@ Block StorageInMemoryMetadata::getSampleBlock() const return res; } -Block StorageInMemoryMetadata::getSampleBlockForColumns(const Names & column_names, const NamesAndTypesList & virtuals) const +Block StorageInMemoryMetadata::getSampleBlockForColumns( + const Names & column_names, const NamesAndTypesList & virtuals, const StorageID & storage_id) const { Block res; @@ -274,7 +275,7 @@ Block StorageInMemoryMetadata::getSampleBlockForColumns(const Names & column_nam else { throw Exception( - "Column " + backQuote(name) + " not found in table " /*+ getStorageID().getNameForLogs() TODO(alesap)*/, + "Column " + backQuote(name) + " not found in table " + storage_id.getNameForLogs(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK); } } @@ -442,7 +443,7 @@ namespace } } -void StorageInMemoryMetadata::check(const Names & column_names, const NamesAndTypesList & virtuals) const +void StorageInMemoryMetadata::check(const Names & column_names, const NamesAndTypesList & virtuals, const StorageID & storage_id) const { NamesAndTypesList available_columns = getColumns().getAllPhysical(); available_columns.insert(available_columns.end(), virtuals.begin(), virtuals.end()); @@ -459,7 +460,7 @@ void StorageInMemoryMetadata::check(const Names & column_names, const NamesAndTy { if (columns_map.end() == columns_map.find(name)) throw Exception( - "There is no column with name " + backQuote(name) + " in table " + /* TODO alesap getStorageID().getNameForLogs() +*/ ". There are columns: " + list_of_columns, + "There is no column with name " + backQuote(name) + " in table " + storage_id.getNameForLogs() + ". There are columns: " + list_of_columns, ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); if (unique_names.end() != unique_names.find(name)) diff --git a/src/Storages/StorageInMemoryMetadata.h b/src/Storages/StorageInMemoryMetadata.h index 83f9180dcbb..1d392c2d228 100644 --- a/src/Storages/StorageInMemoryMetadata.h +++ b/src/Storages/StorageInMemoryMetadata.h @@ -110,7 +110,7 @@ struct StorageInMemoryMetadata Block getSampleBlockNonMaterialized() const; /// ordinary. Block getSampleBlockWithVirtuals(const NamesAndTypesList & virtuals) const; /// ordinary + materialized + virtuals. Block getSampleBlockForColumns( - const Names & column_names, const NamesAndTypesList & virtuals) const; /// ordinary + materialized + aliases + virtuals. + const Names & column_names, const NamesAndTypesList & virtuals, const StorageID & storage_id) const; /// ordinary + materialized + aliases + virtuals. /// Returns structure with partition key. const KeyDescription & getPartitionKey() const; @@ -176,7 +176,7 @@ struct StorageInMemoryMetadata /// Verify that all the requested names are in the table and are set correctly: /// list of names is not empty and the names do not repeat. - void check(const Names & column_names, const NamesAndTypesList & virtuals) const; + void check(const Names & column_names, const NamesAndTypesList & virtuals, const StorageID & storage_id) const; /// Check that all the requested names are in the table and have the correct types. void check(const NamesAndTypesList & columns) const; diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 21e4370c28b..af1a8bf10d7 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -445,10 +445,10 @@ Pipes StorageJoin::read( size_t max_block_size, unsigned /*num_streams*/) { - metadata_snapshot->check(column_names, getVirtuals()); + metadata_snapshot->check(column_names, getVirtuals(), getStorageID()); Pipes pipes; - pipes.emplace_back(std::make_shared(*join, max_block_size, metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals()))); + pipes.emplace_back(std::make_shared(*join, max_block_size, metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()))); return pipes; } diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index a655373ce6e..39fa1d1af70 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -587,7 +587,7 @@ Pipes StorageLog::read( size_t max_block_size, unsigned num_streams) { - metadata_snapshot->check(column_names, getVirtuals()); + metadata_snapshot->check(column_names, getVirtuals(), getStorageID()); loadMarks(); NamesAndTypesList all_columns = Nested::collect(metadata_snapshot->getColumns().getAllPhysical().addTypes(column_names)); diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index c77fddb2372..44413caaa57 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -28,7 +28,7 @@ public: BlocksList::iterator end_, const StorageMemory & storage, const StorageMetadataPtr & metadata_snapshot) - : SourceWithProgress(metadata_snapshot->getSampleBlockForColumns(column_names_, storage.getVirtuals())) + : SourceWithProgress(metadata_snapshot->getSampleBlockForColumns(column_names_, storage.getVirtuals(), storage.getStorageID())) , column_names(std::move(column_names_)) , begin(begin_) , end(end_) @@ -110,7 +110,7 @@ Pipes StorageMemory::read( size_t /*max_block_size*/, unsigned num_streams) { - metadata_snapshot->check(column_names, getVirtuals()); + metadata_snapshot->check(column_names, getVirtuals(), getStorageID()); std::lock_guard lock(mutex); diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 8a28387d24d..0ef4e415ff3 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -426,7 +426,7 @@ Block StorageMerge::getQueryHeader( { case QueryProcessingStage::FetchColumns: { - Block header = metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals()); + Block header = metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()); if (query_info.prewhere_info) { query_info.prewhere_info->prewhere_actions->execute(header); @@ -438,7 +438,7 @@ Block StorageMerge::getQueryHeader( case QueryProcessingStage::WithMergeableState: case QueryProcessingStage::Complete: return InterpreterSelectQuery( - query_info.query, context, std::make_shared(metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals())), + query_info.query, context, std::make_shared(metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID())), SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); } throw Exception("Logical Error: unknown processed stage.", ErrorCodes::LOGICAL_ERROR); diff --git a/src/Storages/StorageMySQL.cpp b/src/Storages/StorageMySQL.cpp index 919acd79fdd..6f57ea196d0 100644 --- a/src/Storages/StorageMySQL.cpp +++ b/src/Storages/StorageMySQL.cpp @@ -72,7 +72,7 @@ Pipes StorageMySQL::read( size_t max_block_size_, unsigned) { - metadata_snapshot->check(column_names_, getVirtuals()); + metadata_snapshot->check(column_names_, getVirtuals(), getStorageID()); String query = transformQueryForExternalDatabase( query_info_, metadata_snapshot->getColumns().getOrdinary(), diff --git a/src/Storages/StorageNull.h b/src/Storages/StorageNull.h index e79174c2565..072a5e3bc32 100644 --- a/src/Storages/StorageNull.h +++ b/src/Storages/StorageNull.h @@ -33,7 +33,7 @@ public: { Pipes pipes; pipes.emplace_back( - std::make_shared(metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals()))); + std::make_shared(metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()))); return pipes; } diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index f773ab2ca1e..e55cc190f80 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -60,7 +60,7 @@ public: IndexForNativeFormat::Blocks::const_iterator index_end) { if (index_begin == index_end) - return metadata_snapshot->getSampleBlockForColumns(column_names, storage.getVirtuals()); + return metadata_snapshot->getSampleBlockForColumns(column_names, storage.getVirtuals(), storage.getStorageID()); /// TODO: check if possible to always return storage.getSampleBlock() @@ -276,7 +276,7 @@ Pipes StorageStripeLog::read( { std::shared_lock lock(rwlock); - metadata_snapshot->check(column_names, getVirtuals()); + metadata_snapshot->check(column_names, getVirtuals(), getStorageID()); NameSet column_names_set(column_names.begin(), column_names.end()); @@ -285,7 +285,7 @@ Pipes StorageStripeLog::read( String index_file = table_path + "index.mrk"; if (!disk->exists(index_file)) { - pipes.emplace_back(std::make_shared(metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals()))); + pipes.emplace_back(std::make_shared(metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()))); return pipes; } diff --git a/src/Storages/StorageTinyLog.cpp b/src/Storages/StorageTinyLog.cpp index 9f19f44d16b..ef8c30cacbe 100644 --- a/src/Storages/StorageTinyLog.cpp +++ b/src/Storages/StorageTinyLog.cpp @@ -404,7 +404,7 @@ Pipes StorageTinyLog::read( const size_t max_block_size, const unsigned /*num_streams*/) { - metadata_snapshot->check(column_names, getVirtuals()); + metadata_snapshot->check(column_names, getVirtuals(), getStorageID()); Pipes pipes; diff --git a/src/Storages/StorageValues.cpp b/src/Storages/StorageValues.cpp index e99aeb52018..d5585edde3b 100644 --- a/src/Storages/StorageValues.cpp +++ b/src/Storages/StorageValues.cpp @@ -30,7 +30,7 @@ Pipes StorageValues::read( size_t /*max_block_size*/, unsigned /*num_streams*/) { - metadata_snapshot->check(column_names, getVirtuals()); + metadata_snapshot->check(column_names, getVirtuals(), getStorageID()); Pipes pipes; diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 21d353f6bed..6e66b770486 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -88,7 +88,7 @@ Pipes StorageView::read( { return std::make_shared( header, metadata_snapshot->getSampleBlockForColumns( - column_names, getVirtuals()), ConvertingTransform::MatchColumnsMode::Name); + column_names, getVirtuals(), getStorageID()), ConvertingTransform::MatchColumnsMode::Name); }); pipes = std::move(pipeline).getPipes(); diff --git a/src/Storages/StorageXDBC.cpp b/src/Storages/StorageXDBC.cpp index 05cf4ed5abf..fc4bbefe74c 100644 --- a/src/Storages/StorageXDBC.cpp +++ b/src/Storages/StorageXDBC.cpp @@ -94,7 +94,7 @@ Pipes StorageXDBC::read( size_t max_block_size, unsigned num_streams) { - metadata_snapshot->check(column_names, getVirtuals()); + metadata_snapshot->check(column_names, getVirtuals(), getStorageID()); bridge_helper->startBridgeSync(); return IStorageURLBase::read(column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams); @@ -130,7 +130,7 @@ BlockOutputStreamPtr StorageXDBC::write(const ASTPtr & /*query*/, const StorageM Block StorageXDBC::getHeaderBlock(const Names & column_names, const StorageMetadataPtr & metadata_snapshot) const { - return metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals()); + return metadata_snapshot->getSampleBlockForColumns(column_names, getVirtuals(), getStorageID()); } std::string StorageXDBC::getName() const diff --git a/src/Storages/System/IStorageSystemOneBlock.h b/src/Storages/System/IStorageSystemOneBlock.h index b3a2a6fe53b..7c2ef85f158 100644 --- a/src/Storages/System/IStorageSystemOneBlock.h +++ b/src/Storages/System/IStorageSystemOneBlock.h @@ -37,7 +37,7 @@ public: size_t /*max_block_size*/, unsigned /*num_streams*/) override { - metadata_snapshot->check(column_names, getVirtuals()); + metadata_snapshot->check(column_names, getVirtuals(), getStorageID()); Block sample_block = metadata_snapshot->getSampleBlock(); MutableColumns res_columns = sample_block.cloneEmptyColumns(); diff --git a/src/Storages/System/StorageSystemColumns.cpp b/src/Storages/System/StorageSystemColumns.cpp index 6f5c8bc673c..beb01bc6192 100644 --- a/src/Storages/System/StorageSystemColumns.cpp +++ b/src/Storages/System/StorageSystemColumns.cpp @@ -249,7 +249,7 @@ Pipes StorageSystemColumns::read( const size_t max_block_size, const unsigned /*num_streams*/) { - metadata_snapshot->check(column_names, getVirtuals()); + metadata_snapshot->check(column_names, getVirtuals(), getStorageID()); /// Create a mask of what columns are needed in the result. diff --git a/src/Storages/System/StorageSystemDisks.cpp b/src/Storages/System/StorageSystemDisks.cpp index a270a96b8f7..cf00bbb5254 100644 --- a/src/Storages/System/StorageSystemDisks.cpp +++ b/src/Storages/System/StorageSystemDisks.cpp @@ -35,7 +35,7 @@ Pipes StorageSystemDisks::read( const size_t /*max_block_size*/, const unsigned /*num_streams*/) { - metadata_snapshot->check(column_names, getVirtuals()); + metadata_snapshot->check(column_names, getVirtuals(), getStorageID()); MutableColumnPtr col_name = ColumnString::create(); MutableColumnPtr col_path = ColumnString::create(); diff --git a/src/Storages/System/StorageSystemNumbers.cpp b/src/Storages/System/StorageSystemNumbers.cpp index 404eb1af99c..c70446ddeba 100644 --- a/src/Storages/System/StorageSystemNumbers.cpp +++ b/src/Storages/System/StorageSystemNumbers.cpp @@ -132,7 +132,7 @@ Pipes StorageSystemNumbers::read( size_t max_block_size, unsigned num_streams) { - metadata_snapshot->check(column_names, getVirtuals()); + metadata_snapshot->check(column_names, getVirtuals(), getStorageID()); if (limit && *limit < max_block_size) { diff --git a/src/Storages/System/StorageSystemOne.cpp b/src/Storages/System/StorageSystemOne.cpp index f6fba0d302c..3329cbb035e 100644 --- a/src/Storages/System/StorageSystemOne.cpp +++ b/src/Storages/System/StorageSystemOne.cpp @@ -29,7 +29,7 @@ Pipes StorageSystemOne::read( const size_t /*max_block_size*/, const unsigned /*num_streams*/) { - metadata_snapshot->check(column_names, getVirtuals()); + metadata_snapshot->check(column_names, getVirtuals(), getStorageID()); Block header{ColumnWithTypeAndName( DataTypeUInt8().createColumn(), diff --git a/src/Storages/System/StorageSystemPartsBase.cpp b/src/Storages/System/StorageSystemPartsBase.cpp index 928b146247d..168b305605d 100644 --- a/src/Storages/System/StorageSystemPartsBase.cpp +++ b/src/Storages/System/StorageSystemPartsBase.cpp @@ -41,7 +41,7 @@ bool StorageSystemPartsBase::hasStateColumn(const Names & column_names, const St /// Do not check if only _state column is requested if (!(has_state_column && real_column_names.empty())) - metadata_snapshot->check(real_column_names, {}); + metadata_snapshot->check(real_column_names, {}, getStorageID()); return has_state_column; } diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index 26076d09914..27a9cd0c4bb 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -66,7 +66,7 @@ Pipes StorageSystemReplicas::read( const size_t /*max_block_size*/, const unsigned /*num_streams*/) { - metadata_snapshot->check(column_names, getVirtuals()); + metadata_snapshot->check(column_names, getVirtuals(), getStorageID()); const auto access = context.getAccess(); const bool check_access_for_databases = !access->isGranted(AccessType::SHOW_TABLES); diff --git a/src/Storages/System/StorageSystemStoragePolicies.cpp b/src/Storages/System/StorageSystemStoragePolicies.cpp index e589955c861..ec771ec2421 100644 --- a/src/Storages/System/StorageSystemStoragePolicies.cpp +++ b/src/Storages/System/StorageSystemStoragePolicies.cpp @@ -39,7 +39,7 @@ Pipes StorageSystemStoragePolicies::read( const size_t /*max_block_size*/, const unsigned /*num_streams*/) { - metadata_snapshot->check(column_names, getVirtuals()); + metadata_snapshot->check(column_names, getVirtuals(), getStorageID()); MutableColumnPtr col_policy_name = ColumnString::create(); MutableColumnPtr col_volume_name = ColumnString::create(); diff --git a/src/Storages/System/StorageSystemTables.cpp b/src/Storages/System/StorageSystemTables.cpp index deb8f0551ea..4b78416dabb 100644 --- a/src/Storages/System/StorageSystemTables.cpp +++ b/src/Storages/System/StorageSystemTables.cpp @@ -458,7 +458,7 @@ Pipes StorageSystemTables::read( const size_t max_block_size, const unsigned /*num_streams*/) { - metadata_snapshot->check(column_names, getVirtuals()); + metadata_snapshot->check(column_names, getVirtuals(), getStorageID()); /// Create a mask of what columns are needed in the result. diff --git a/src/Storages/System/StorageSystemZeros.cpp b/src/Storages/System/StorageSystemZeros.cpp index 9489ff249a5..3839439794b 100644 --- a/src/Storages/System/StorageSystemZeros.cpp +++ b/src/Storages/System/StorageSystemZeros.cpp @@ -99,7 +99,7 @@ Pipes StorageSystemZeros::read( size_t max_block_size, unsigned num_streams) { - metadata_snapshot->check(column_names, getVirtuals()); + metadata_snapshot->check(column_names, getVirtuals(), getStorageID()); bool use_multiple_streams = multithreaded; From 85070ea2fd5e6a42d6ef5c4d3ab3aff909ce9652 Mon Sep 17 00:00:00 2001 From: alesapin Date: Fri, 19 Jun 2020 22:41:24 +0300 Subject: [PATCH 155/318] Remove unused variable --- src/Interpreters/InterpreterSelectQuery.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 187fdeb2b19..c9715523174 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -497,9 +497,6 @@ Block InterpreterSelectQuery::getSampleBlockImpl() bool second_stage = from_stage <= QueryProcessingStage::WithMergeableState && options.to_stage > QueryProcessingStage::WithMergeableState; - Names columns_required_for_sampling; - Names columns_required_for_; - analysis_result = ExpressionAnalysisResult( *query_analyzer, metadata_snapshot, From 85974dd699b53084bb1a140ddea1ec18494685c1 Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Fri, 19 Jun 2020 22:56:18 +0300 Subject: [PATCH 156/318] Update 00816_long_concurrent_alter_column.sh --- tests/queries/0_stateless/00816_long_concurrent_alter_column.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh b/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh index 3ed0c6e1a6a..965408065cf 100755 --- a/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh +++ b/tests/queries/0_stateless/00816_long_concurrent_alter_column.sh @@ -59,7 +59,6 @@ wait echo "DROP TABLE concurrent_alter_column" | ${CLICKHOUSE_CLIENT} -sleep 7 # Check for deadlocks echo "SELECT * FROM system.processes WHERE query_id LIKE 'alter%'" | ${CLICKHOUSE_CLIENT} From 7a76abeb2a0c55bb50d6b3ee87ba1c8732d361af Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Fri, 19 Jun 2020 23:13:07 +0300 Subject: [PATCH 157/318] distinct combinator for function of multiuple arguments --- .../AggregateFunctionDistinct.cpp | 19 +- .../AggregateFunctionDistinct.h | 312 ++++++++++-------- src/AggregateFunctions/Helpers.h | 13 + src/AggregateFunctions/KeyHolderHelpers.h | 2 +- .../01259_combinator_distinct.reference | 7 + .../0_stateless/01259_combinator_distinct.sql | 6 +- 6 files changed, 214 insertions(+), 145 deletions(-) diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.cpp b/src/AggregateFunctions/AggregateFunctionDistinct.cpp index c77e977b0fa..1b1e0b872cf 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.cpp +++ b/src/AggregateFunctions/AggregateFunctionDistinct.cpp @@ -36,21 +36,24 @@ public: AggregateFunctionPtr res; if (arguments.size() == 1) { - res = AggregateFunctionPtr(createWithNumericType(*arguments[0], nested_function, arguments)); + res.reset(createWithNumericType< + AggregateFunctionDistinct, + AggregateFunctionDistinctSingleNumericData>(*arguments[0], nested_function, arguments)); + if (res) return res; if (arguments[0]->isValueUnambiguouslyRepresentedInContiguousMemoryRegion()) - return std::make_shared>(nested_function, arguments); + return std::make_shared< + AggregateFunctionDistinct< + AggregateFunctionDistinctSingleGenericData>>(nested_function, arguments); else - return std::make_shared>(nested_function, arguments); + return std::make_shared< + AggregateFunctionDistinct< + AggregateFunctionDistinctSingleGenericData>>(nested_function, arguments); } - if (!res) - throw Exception("Illegal type " /* + argument_type->getName() + */ - " of argument for aggregate function " + nested_function->getName() + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - - return res; + return std::make_shared>(nested_function, arguments); } }; diff --git a/src/AggregateFunctions/AggregateFunctionDistinct.h b/src/AggregateFunctions/AggregateFunctionDistinct.h index 5c663bb6441..cb5fd526f6d 100644 --- a/src/AggregateFunctions/AggregateFunctionDistinct.h +++ b/src/AggregateFunctions/AggregateFunctionDistinct.h @@ -6,8 +6,11 @@ #include #include #include - #include +#include +#include + +#include namespace DB { @@ -17,21 +20,148 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } + template struct AggregateFunctionDistinctSingleNumericData { /// When creating, the hash table must be small. using Set = HashSetWithStackMemory, 4>; - Set value; + using Self = AggregateFunctionDistinctSingleNumericData; + Set set; + + void add(const IColumn ** columns, size_t /* columns_num */, size_t row_num, Arena *) + { + const auto & vec = assert_cast &>(*columns[0]).getData(); + set.insert(vec[row_num]); + } + + void merge(const Self & rhs, Arena *) + { + set.merge(rhs.set); + } + + void serialize(WriteBuffer & buf) const + { + set.write(buf); + } + + void deserialize(ReadBuffer & buf, Arena *) + { + set.read(buf); + } + + MutableColumns getArguments(const DataTypes & argument_types) const + { + MutableColumns argument_columns; + argument_columns.emplace_back(argument_types[0]->createColumn()); + for (const auto & elem : set) + argument_columns[0]->insert(elem.getValue()); + + return argument_columns; + } }; -template -class AggregateFunctionDistinctBase : public IAggregateFunctionDataHelper +struct AggregateFunctionDistinctGenericData { -protected: - static constexpr size_t prefix_size = sizeof(Data); + /// When creating, the hash table must be small. + using Set = HashSetWithSavedHashWithStackMemory; + using Self = AggregateFunctionDistinctGenericData; + Set set; + + void merge(const Self & rhs, Arena * arena) + { + Set::LookupResult it; + bool inserted; + for (const auto & elem : rhs.set) + set.emplace(ArenaKeyHolder{elem.getValue(), *arena}, it, inserted); + } + + void serialize(WriteBuffer & buf) const + { + writeVarUInt(set.size(), buf); + for (const auto & elem : set) + writeStringBinary(elem.getValue(), buf); + } + + void deserialize(ReadBuffer & buf, Arena * arena) + { + size_t size; + readVarUInt(size, buf); + for (size_t i = 0; i < size; ++i) + set.insert(readStringBinaryInto(*arena, buf)); + } +}; + +template +struct AggregateFunctionDistinctSingleGenericData : public AggregateFunctionDistinctGenericData +{ + void add(const IColumn ** columns, size_t /* columns_num */, size_t row_num, Arena * arena) + { + Set::LookupResult it; + bool inserted; + auto key_holder = getKeyHolder(*columns[0], row_num, *arena); + set.emplace(key_holder, it, inserted); + } + + MutableColumns getArguments(const DataTypes & argument_types) const + { + MutableColumns argument_columns; + argument_columns.emplace_back(argument_types[0]->createColumn()); + for (const auto & elem : set) + deserializeAndInsert(elem.getValue(), *argument_columns[0]); + + return argument_columns; + } +}; + +struct AggregateFunctionDistinctMultipleGenericData : public AggregateFunctionDistinctGenericData +{ + void add(const IColumn ** columns, size_t columns_num, size_t row_num, Arena * arena) + { + const char * begin = nullptr; + StringRef value(begin, 0); + SipHash hash; + for (size_t i = 0; i < columns_num; ++i) + { + columns[i]->updateHashWithValue(row_num, hash); + auto cur_ref = columns[i]->serializeValueIntoArena(row_num, *arena, begin); + value.data = cur_ref.data - value.size; + value.size += cur_ref.size; + } + + Set::LookupResult it; + bool inserted; + auto key_holder = SerializedKeyHolder{value, *arena}; + set.emplace(key_holder, it, inserted); + } + + MutableColumns getArguments(const DataTypes & argument_types) const + { + MutableColumns argument_columns(argument_types.size()); + for (size_t i = 0; i < argument_types.size(); ++i) + argument_columns[i] = argument_types[i]->createColumn(); + + for (const auto & elem : set) + { + const char * begin = elem.getValue().data; + for (auto & column : argument_columns) + begin = column->deserializeAndInsertFromArena(begin); + } + + return argument_columns; + } +}; + +/** Adaptor for aggregate functions. + * Adding -Distinct suffix to aggregate function +**/ +template +class AggregateFunctionDistinct : public IAggregateFunctionDataHelper> +{ +private: + static constexpr auto prefix_size = sizeof(Data); AggregateFunctionPtr nested_func; - size_t num_arguments; + size_t arguments_num; AggregateDataPtr getNestedPlace(AggregateDataPtr place) const noexcept { @@ -44,6 +174,46 @@ protected: } public: + AggregateFunctionDistinct(AggregateFunctionPtr nested_func_, const DataTypes & arguments) + : IAggregateFunctionDataHelper(arguments, nested_func_->getParameters()) + , nested_func(nested_func_) + , arguments_num(arguments.size()) + { + if (arguments.empty()) + throw Exception("Aggregate function " + getName() + " require at least one argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + } + + void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override + { + this->data(place).add(columns, arguments_num, row_num, arena); + } + + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override + { + this->data(place).merge(this->data(rhs), arena); + } + + void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override + { + this->data(place).serialize(buf); + } + + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override + { + this->data(place).deserialize(buf, arena); + } + + void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override + { + auto arguments = this->data(place).getArguments(this->argument_types); + ColumnRawPtrs arguments_raw(arguments.size()); + for (size_t i = 0; i < arguments.size(); ++i) + arguments_raw[i] = arguments[i].get(); + + assert(!arguments.empty()); + this->nested_func->addBatchSinglePlace(arguments[0]->size(), this->getNestedPlace(place), arguments_raw.data(), arena); + this->nested_func->insertResultInto(this->getNestedPlace(place), to, arena); + } size_t sizeOfData() const override { @@ -76,134 +246,6 @@ public: { return true; } - - AggregateFunctionDistinctBase(AggregateFunctionPtr nested, const DataTypes & arguments) - : IAggregateFunctionDataHelper(arguments, {}) - , nested_func(nested), num_arguments(arguments.size()) - { - if (arguments.empty()) - throw Exception("Aggregate function " + getName() + " require at least one argument", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - } -}; - - -/** Adaptor for aggregate functions. - * Adding -Distinct suffix to aggregate function -**/ -template -class AggregateFunctionDistinctSingleNumericImpl final - : public AggregateFunctionDistinctBase, - AggregateFunctionDistinctSingleNumericImpl> -{ -public: - - AggregateFunctionDistinctSingleNumericImpl(AggregateFunctionPtr nested, const DataTypes & arguments) - : AggregateFunctionDistinctBase< - AggregateFunctionDistinctSingleNumericData, - AggregateFunctionDistinctSingleNumericImpl>(nested, arguments) {} - - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override - { - const auto & vec = assert_cast &>(*columns[0]).getData(); - this->data(place).value.insert(vec[row_num]); - } - - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena *) const override - { - this->data(place).value.merge(this->data(rhs).value); - } - - void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override - { - this->data(place).value.write(buf); - } - - void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override - { - this->data(place).value.read(buf); - } - - void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override - { - const auto & set = this->data(place).value; - auto arguments = this->argument_types[0]->createColumn(); - for (const auto & elem : set) - arguments->insert(elem.getValue()); - - const auto * arguments_ptr = arguments.get(); - this->nested_func->addBatchSinglePlace(arguments->size(), this->getNestedPlace(place), &arguments_ptr, arena); - this->nested_func->insertResultInto(this->getNestedPlace(place), to, arena); - } -}; - -struct AggregateFunctionDistinctSingleGenericData -{ - using Set = HashSetWithSavedHashWithStackMemory; - Set value; -}; - -template -class AggregateFunctionDistinctSingleGenericImpl final - : public AggregateFunctionDistinctBase> -{ -public: - using Data = AggregateFunctionDistinctSingleGenericData; - - AggregateFunctionDistinctSingleGenericImpl(AggregateFunctionPtr nested, const DataTypes & arguments) - : AggregateFunctionDistinctBase< - AggregateFunctionDistinctSingleGenericData, - AggregateFunctionDistinctSingleGenericImpl>(nested, arguments) {} - - void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena * arena) const override - { - auto & set = this->data(place).value; - - Data::Set::LookupResult it; - bool inserted; - auto key_holder = getKeyHolder(*columns[0], row_num, *arena); - set.emplace(key_holder, it, inserted); - } - - void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override - { - auto & cur_set = this->data(place).value; - const auto & rhs_set = this->data(rhs).value; - - Data::Set::LookupResult it; - bool inserted; - for (const auto & elem : rhs_set) - cur_set.emplace(ArenaKeyHolder{elem.getValue(), *arena}, it, inserted); - } - - void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override - { - const auto & set = this->data(place).value; - writeVarUInt(set.size(), buf); - for (const auto & elem : set) - writeStringBinary(elem.getValue(), buf); - } - - void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena * arena) const override - { - auto & set = this->data(place).value; - size_t size; - readVarUInt(size, buf); - for (size_t i = 0; i < size; ++i) - set.insert(readStringBinaryInto(*arena, buf)); - } - - void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const override - { - const auto & set = this->data(place).value; - auto arguments = this->argument_types[0]->createColumn(); - for (const auto & elem : set) - deserializeAndInsert(elem.getValue(), *arguments); - - const auto * arguments_ptr = arguments.get(); - this->nested_func->addBatchSinglePlace(arguments->size(), this->getNestedPlace(place), &arguments_ptr, arena); - this->nested_func->insertResultInto(this->getNestedPlace(place), to, arena); - } }; } diff --git a/src/AggregateFunctions/Helpers.h b/src/AggregateFunctions/Helpers.h index 6c03d25e0b1..bc24e53a763 100644 --- a/src/AggregateFunctions/Helpers.h +++ b/src/AggregateFunctions/Helpers.h @@ -33,6 +33,19 @@ static IAggregateFunction * createWithNumericType(const IDataType & argument_typ return nullptr; } +template