From fa2afb292c1a2bbf08cb186edf2017f71922418a Mon Sep 17 00:00:00 2001 From: zhao zhou Date: Mon, 16 May 2022 22:48:37 +0800 Subject: [PATCH 001/121] Improve performance for column replicate for uint32 type. In our benchmark unit test there has about 2x performance boost Co-authored-by: vesslanjin --- src/Columns/ColumnVector.cpp | 104 +++++++++++++++++++++++++++++++++++ src/Columns/ColumnVector.h | 4 +- 2 files changed, 107 insertions(+), 1 deletion(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index eca10049a0b..01f5c2d3782 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -401,6 +401,12 @@ ColumnPtr ColumnVector::index(const IColumn & indexes, size_t limit) const template ColumnPtr ColumnVector::replicate(const IColumn::Offsets & offsets) const { + #ifdef __SSE4_2__ + if constexpr (std::is_same_v) + { + return replicateSSE2(offsets); + } + #endif const size_t size = data.size(); if (size != offsets.size()) throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); @@ -421,6 +427,104 @@ ColumnPtr ColumnVector::replicate(const IColumn::Offsets & offsets) const return res; } +#ifdef __SSE4_2__ +template +ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const +{ + const size_t size = data.size(); + if (size != offsets.size()) + throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); + + if (0 == size) + return this->create(); + + auto res = this->create(offsets.back()); + + auto it = res->getData().begin(); // NOLINT + ///column use paddedpodarray.Don't worry about the 4 conitnues op will out of range + if constexpr (std::is_same_v) + { + size_t prev_offset = 0; + int cp_begin = -1; + for (size_t i = 0; i < size; ++i) + { + size_t span = offsets[i] - prev_offset; + prev_offset = offsets[i]; + if (span == 1) + { + if (cp_begin == -1) + cp_begin = i; + continue; + } + ///data : 11 22 33 44 55 + ///offsets: 0 1 2 3 3 + ///res: 22 33 44 + size_t cpsz = (!(cp_begin == -1)) * (i - cp_begin); + bool remain = (cpsz & 3); + size_t sse_cp_counter = (cpsz >> 2); + sse_cp_counter = remain * (sse_cp_counter + 1) + (!remain) * (sse_cp_counter); + auto it_tmp = it; + size_t data_start = cp_begin; + cp_begin = -1; + constexpr const int msk_cp = (_MM_SHUFFLE(3, 2, 1, 0)); + while (sse_cp_counter--) + { + __m128i cdata = _mm_loadu_si128(reinterpret_cast(&data[data_start])); + auto cres = _mm_shuffle_epi32(cdata, msk_cp); + _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), cres); + it_tmp += 4; + data_start += 4; + } + it += cpsz; + if (span == 0) + { + continue; + } + ///data : 11 22 33 + ///offsets: 0 0 4 + ///res: 33 33 33 33 + size_t shuffle_sz = span; + bool shuffle_remain = (shuffle_sz & 3); + size_t sse_shuffle_counter = (shuffle_sz >> 2); + sse_shuffle_counter = shuffle_remain * (sse_shuffle_counter + 1) + (!shuffle_remain) * (sse_shuffle_counter); + it_tmp = it; + constexpr const int msk_shuffle = (_MM_SHUFFLE(0, 0, 0, 0)); + __m128i cdata = _mm_loadu_si128(reinterpret_cast(&data[i])); + while (sse_shuffle_counter--) + { + auto cres = _mm_shuffle_epi32(cdata, msk_shuffle); + _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), cres); + it_tmp += 4; + } + it += shuffle_sz; + } + ///data : 11 22 33 44 55 + ///offsets: 1 2 3 4 5 + ///res: 11 22 33 44 55 + if (cp_begin != -1) + { + size_t cpsz = (size - cp_begin); + bool remain = (cpsz & 3); + size_t sse_cp_counter = (cpsz >> 2); + sse_cp_counter = remain * (sse_cp_counter + 1) + (!remain) * (sse_cp_counter); + auto it_tmp = it; + size_t data_start = cp_begin; + constexpr const int msk_cp = (_MM_SHUFFLE(3, 2, 1, 0)); + while (sse_cp_counter--) + { + __m128i cdata = _mm_loadu_si128(reinterpret_cast(&data[data_start])); + auto cres = _mm_shuffle_epi32(cdata, msk_cp); + _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), cres); + it_tmp += 4; + data_start += 4; + } + it += cpsz; + } + } + return res; +} +#endif + template void ColumnVector::gather(ColumnGathererStream & gatherer) { diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 62a0e3a1190..30cbff0570e 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -132,7 +132,9 @@ private: /// Sugar constructor. ColumnVector(std::initializer_list il) : data{il} {} - + #ifdef __SSE4_2__ + ColumnPtr replicateSSE2(const IColumn::Offsets & offsets) const; + #endif public: bool isNumeric() const override { return is_arithmetic_v; } From 4b2a24b5ec27eb1728d2b009eedf9dedea04266a Mon Sep 17 00:00:00 2001 From: zhao zhou Date: Mon, 16 May 2022 23:31:54 +0800 Subject: [PATCH 002/121] change comment format --- src/Columns/ColumnVector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 01f5c2d3782..1f7158e64d6 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -441,7 +441,7 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const auto res = this->create(offsets.back()); auto it = res->getData().begin(); // NOLINT - ///column use paddedpodarray.Don't worry about the 4 conitnues op will out of range + ///column using padded pod array. Don't worry about the 4 conitnues op will out of range if constexpr (std::is_same_v) { size_t prev_offset = 0; From dbea0c122e9e65124e74b210d4c896c43d5111dd Mon Sep 17 00:00:00 2001 From: zhao zhou Date: Wed, 18 May 2022 21:10:01 +0800 Subject: [PATCH 003/121] change macro from sse4.2 to sse2 --- src/Columns/ColumnVector.cpp | 4 ++-- src/Columns/ColumnVector.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 1f7158e64d6..97d184a5d61 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -401,7 +401,7 @@ ColumnPtr ColumnVector::index(const IColumn & indexes, size_t limit) const template ColumnPtr ColumnVector::replicate(const IColumn::Offsets & offsets) const { - #ifdef __SSE4_2__ + #ifdef __SSE2__ if constexpr (std::is_same_v) { return replicateSSE2(offsets); @@ -427,7 +427,7 @@ ColumnPtr ColumnVector::replicate(const IColumn::Offsets & offsets) const return res; } -#ifdef __SSE4_2__ +#ifdef __SSE2__ template ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const { diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 30cbff0570e..6dcc0647781 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -132,7 +132,7 @@ private: /// Sugar constructor. ColumnVector(std::initializer_list il) : data{il} {} - #ifdef __SSE4_2__ + #ifdef __SSE2__ ColumnPtr replicateSSE2(const IColumn::Offsets & offsets) const; #endif public: From 44f2e3a06d09de37e0a0634b473e3e47a21b97e6 Mon Sep 17 00:00:00 2001 From: zhao zhou Date: Thu, 19 May 2022 02:49:26 +0800 Subject: [PATCH 004/121] change comment --- src/Columns/ColumnVector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index d478d6ce8ee..b0a21b471f9 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -568,7 +568,7 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const auto res = this->create(offsets.back()); auto it = res->getData().begin(); // NOLINT - ///column using padded pod array. Don't worry about the 4 conitnues op will out of range + ///Column using padded pod array. Don't worry about the 4 conitnues op will out of range if constexpr (std::is_same_v) { size_t prev_offset = 0; From 58518047fe1f30c97bbce60c9ef70deb867d7359 Mon Sep 17 00:00:00 2001 From: zhao zhou Date: Thu, 19 May 2022 19:13:12 +0800 Subject: [PATCH 005/121] iterator not always pointer type. so use the auto* substitue auto that clang tidy suggest is not alwyas ok. --- src/Columns/ColumnVector.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index b0a21b471f9..c702b20105d 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -590,7 +590,7 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const bool remain = (cpsz & 3); size_t sse_cp_counter = (cpsz >> 2); sse_cp_counter = remain * (sse_cp_counter + 1) + (!remain) * (sse_cp_counter); - auto it_tmp = it; + auto it_tmp = it; // NOLINT size_t data_start = cp_begin; cp_begin = -1; constexpr const int msk_cp = (_MM_SHUFFLE(3, 2, 1, 0)); @@ -634,7 +634,7 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const bool remain = (cpsz & 3); size_t sse_cp_counter = (cpsz >> 2); sse_cp_counter = remain * (sse_cp_counter + 1) + (!remain) * (sse_cp_counter); - auto it_tmp = it; + auto it_tmp = it; // NOLINT size_t data_start = cp_begin; constexpr const int msk_cp = (_MM_SHUFFLE(3, 2, 1, 0)); while (sse_cp_counter--) From ad1f0c025e5cad2b7dc9017c43da45e5563b69fc Mon Sep 17 00:00:00 2001 From: zhao zhou Date: Fri, 20 May 2022 05:52:21 +0800 Subject: [PATCH 006/121] change some comment --- src/Columns/ColumnVector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index c702b20105d..21d84b85e7d 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -568,7 +568,7 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const auto res = this->create(offsets.back()); auto it = res->getData().begin(); // NOLINT - ///Column using padded pod array. Don't worry about the 4 conitnues op will out of range + ///Column uses padded pod array. Don't worry about the 4 conitnues op will out of range if constexpr (std::is_same_v) { size_t prev_offset = 0; From 69d50bf232e821971e46e02692a1a018b8d0cf6a Mon Sep 17 00:00:00 2001 From: zhao zhou Date: Fri, 20 May 2022 06:33:47 +0800 Subject: [PATCH 007/121] change some comment --- src/Columns/ColumnVector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 21d84b85e7d..f312aa7e59a 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -568,7 +568,7 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const auto res = this->create(offsets.back()); auto it = res->getData().begin(); // NOLINT - ///Column uses padded pod array. Don't worry about the 4 conitnues op will out of range + ///Column use padded pod array. Don't worry about the 4 conitnues op will out of range if constexpr (std::is_same_v) { size_t prev_offset = 0; From ce9305939bddeef4353e98793b16230957c1ad66 Mon Sep 17 00:00:00 2001 From: zhao zhou Date: Fri, 20 May 2022 06:42:22 +0800 Subject: [PATCH 008/121] change comment --- src/Columns/ColumnVector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index f312aa7e59a..21d84b85e7d 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -568,7 +568,7 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const auto res = this->create(offsets.back()); auto it = res->getData().begin(); // NOLINT - ///Column use padded pod array. Don't worry about the 4 conitnues op will out of range + ///Column uses padded pod array. Don't worry about the 4 conitnues op will out of range if constexpr (std::is_same_v) { size_t prev_offset = 0; From 114125dde2a7a405b81a42d35fb05c726057ad48 Mon Sep 17 00:00:00 2001 From: zhao zhou Date: Fri, 20 May 2022 07:32:26 +0800 Subject: [PATCH 009/121] change comment --- src/Columns/ColumnVector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 21d84b85e7d..f312aa7e59a 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -568,7 +568,7 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const auto res = this->create(offsets.back()); auto it = res->getData().begin(); // NOLINT - ///Column uses padded pod array. Don't worry about the 4 conitnues op will out of range + ///Column use padded pod array. Don't worry about the 4 conitnues op will out of range if constexpr (std::is_same_v) { size_t prev_offset = 0; From 59ecd3fa82f4e052e1b7549f0df5538451d84ae6 Mon Sep 17 00:00:00 2001 From: zhao zhou Date: Fri, 20 May 2022 18:20:36 +0800 Subject: [PATCH 010/121] change comment --- src/Columns/ColumnVector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index f312aa7e59a..21d84b85e7d 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -568,7 +568,7 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const auto res = this->create(offsets.back()); auto it = res->getData().begin(); // NOLINT - ///Column use padded pod array. Don't worry about the 4 conitnues op will out of range + ///Column uses padded pod array. Don't worry about the 4 conitnues op will out of range if constexpr (std::is_same_v) { size_t prev_offset = 0; From b5ddbf10d88dc541e3e424af60039029824ea22a Mon Sep 17 00:00:00 2001 From: zhao zhou Date: Fri, 20 May 2022 22:41:47 +0800 Subject: [PATCH 011/121] change comment --- src/Columns/ColumnVector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 21d84b85e7d..483f0db19ed 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -568,7 +568,7 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const auto res = this->create(offsets.back()); auto it = res->getData().begin(); // NOLINT - ///Column uses padded pod array. Don't worry about the 4 conitnues op will out of range + ///Column used padded pod array. Don't worry about the 4 conitnues op will out of range if constexpr (std::is_same_v) { size_t prev_offset = 0; From dc58f82788b3bdc5b4c3597ace5861f56b471f37 Mon Sep 17 00:00:00 2001 From: zhao zhou Date: Sat, 21 May 2022 03:19:02 +0800 Subject: [PATCH 012/121] change comment --- src/Columns/ColumnVector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 483f0db19ed..c702b20105d 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -568,7 +568,7 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const auto res = this->create(offsets.back()); auto it = res->getData().begin(); // NOLINT - ///Column used padded pod array. Don't worry about the 4 conitnues op will out of range + ///Column using padded pod array. Don't worry about the 4 conitnues op will out of range if constexpr (std::is_same_v) { size_t prev_offset = 0; From a876435f4774d83a36a756ca351054bf9ec4c892 Mon Sep 17 00:00:00 2001 From: zhao zhou Date: Sat, 21 May 2022 13:51:34 +0800 Subject: [PATCH 013/121] change comment --- src/Columns/ColumnVector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index c702b20105d..483f0db19ed 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -568,7 +568,7 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const auto res = this->create(offsets.back()); auto it = res->getData().begin(); // NOLINT - ///Column using padded pod array. Don't worry about the 4 conitnues op will out of range + ///Column used padded pod array. Don't worry about the 4 conitnues op will out of range if constexpr (std::is_same_v) { size_t prev_offset = 0; From ae739ca9baea2d4dbd97512578eba49143977389 Mon Sep 17 00:00:00 2001 From: zhao zhou Date: Sat, 21 May 2022 20:12:43 +0800 Subject: [PATCH 014/121] change comment --- src/Columns/ColumnVector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 483f0db19ed..53695ffb167 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -568,7 +568,7 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const auto res = this->create(offsets.back()); auto it = res->getData().begin(); // NOLINT - ///Column used padded pod array. Don't worry about the 4 conitnues op will out of range + ///Column used padded pod array. Don't worry about the 4 conitnue ops will out of range if constexpr (std::is_same_v) { size_t prev_offset = 0; From 50c02af959423905cca83e6e19a9d88c5970d183 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 21 May 2022 23:35:04 +0300 Subject: [PATCH 015/121] Make the code slightly more humane. --- src/Columns/ColumnVector.cpp | 184 ++++++++++++++++++----------------- 1 file changed, 93 insertions(+), 91 deletions(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 53695ffb167..54f4745bb5e 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -437,10 +437,10 @@ ColumnPtr ColumnVector::filter(const IColumn::Filter & filt, ssize_t result_s const T * data_pos = data.data(); /** A slightly more optimized version. - * Based on the assumption that often pieces of consecutive values - * completely pass or do not pass the filter. - * Therefore, we will optimistically check the parts of `SIMD_BYTES` values. - */ + * Based on the assumption that often pieces of consecutive values + * completely pass or do not pass the filter. + * Therefore, we will optimistically check the parts of `SIMD_BYTES` values. + */ static constexpr size_t SIMD_BYTES = 64; const UInt8 * filt_end_aligned = filt_pos + size / SIMD_BYTES * SIMD_BYTES; @@ -528,12 +528,6 @@ ColumnPtr ColumnVector::index(const IColumn & indexes, size_t limit) const template ColumnPtr ColumnVector::replicate(const IColumn::Offsets & offsets) const { - #ifdef __SSE2__ - if constexpr (std::is_same_v) - { - return replicateSSE2(offsets); - } - #endif const size_t size = data.size(); if (size != offsets.size()) throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); @@ -541,6 +535,11 @@ ColumnPtr ColumnVector::replicate(const IColumn::Offsets & offsets) const if (0 == size) return this->create(); +#ifdef __SSE2__ + if constexpr (std::is_same_v) + return replicateSSE2(offsets); +#endif + auto res = this->create(offsets.back()); auto it = res->getData().begin(); // NOLINT @@ -554,104 +553,107 @@ ColumnPtr ColumnVector::replicate(const IColumn::Offsets & offsets) const return res; } + #ifdef __SSE2__ + template ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const { - const size_t size = data.size(); - if (size != offsets.size()) - throw Exception("Size of offsets doesn't match size of column.", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH); - - if (0 == size) - return this->create(); - auto res = this->create(offsets.back()); auto it = res->getData().begin(); // NOLINT - ///Column used padded pod array. Don't worry about the 4 conitnue ops will out of range - if constexpr (std::is_same_v) + + /// Column is using PaddedPODArray, so we don't have to worry about the 4 out of range elements. + + size_t prev_offset = 0; + int copy_begin = -1; + + for (size_t i = 0; i < size; ++i) { - size_t prev_offset = 0; - int cp_begin = -1; - for (size_t i = 0; i < size; ++i) + size_t span = offsets[i] - prev_offset; + prev_offset = offsets[i]; + if (span == 1) { - size_t span = offsets[i] - prev_offset; - prev_offset = offsets[i]; - if (span == 1) - { - if (cp_begin == -1) - cp_begin = i; - continue; - } - ///data : 11 22 33 44 55 - ///offsets: 0 1 2 3 3 - ///res: 22 33 44 - size_t cpsz = (!(cp_begin == -1)) * (i - cp_begin); - bool remain = (cpsz & 3); - size_t sse_cp_counter = (cpsz >> 2); - sse_cp_counter = remain * (sse_cp_counter + 1) + (!remain) * (sse_cp_counter); - auto it_tmp = it; // NOLINT - size_t data_start = cp_begin; - cp_begin = -1; - constexpr const int msk_cp = (_MM_SHUFFLE(3, 2, 1, 0)); - while (sse_cp_counter--) - { - __m128i cdata = _mm_loadu_si128(reinterpret_cast(&data[data_start])); - auto cres = _mm_shuffle_epi32(cdata, msk_cp); - _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), cres); - it_tmp += 4; - data_start += 4; - } - it += cpsz; - if (span == 0) - { - continue; - } - ///data : 11 22 33 - ///offsets: 0 0 4 - ///res: 33 33 33 33 - size_t shuffle_sz = span; - bool shuffle_remain = (shuffle_sz & 3); - size_t sse_shuffle_counter = (shuffle_sz >> 2); - sse_shuffle_counter = shuffle_remain * (sse_shuffle_counter + 1) + (!shuffle_remain) * (sse_shuffle_counter); - it_tmp = it; - constexpr const int msk_shuffle = (_MM_SHUFFLE(0, 0, 0, 0)); - __m128i cdata = _mm_loadu_si128(reinterpret_cast(&data[i])); - while (sse_shuffle_counter--) - { - auto cres = _mm_shuffle_epi32(cdata, msk_shuffle); - _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), cres); - it_tmp += 4; - } - it += shuffle_sz; + if (copy_begin == -1) + copy_begin = i; + continue; } - ///data : 11 22 33 44 55 - ///offsets: 1 2 3 4 5 - ///res: 11 22 33 44 55 - if (cp_begin != -1) + + /// data : 11 22 33 44 55 + /// offsets: 0 1 2 3 3 + /// res: 22 33 44 + + size_t copy_size = (!(copy_begin == -1)) * (i - copy_begin); + bool remain = (copy_size & 3); + size_t sse_copy_counter = (copy_size >> 2); + sse_copy_counter = remain * (sse_copy_counter + 1) + (!remain) * (sse_copy_counter); + auto it_tmp = it; // NOLINT + size_t data_start = copy_begin; + copy_begin = -1; + constexpr const int msk_cp = _MM_SHUFFLE(3, 2, 1, 0); + while (sse_copy_counter) { - size_t cpsz = (size - cp_begin); - bool remain = (cpsz & 3); - size_t sse_cp_counter = (cpsz >> 2); - sse_cp_counter = remain * (sse_cp_counter + 1) + (!remain) * (sse_cp_counter); - auto it_tmp = it; // NOLINT - size_t data_start = cp_begin; - constexpr const int msk_cp = (_MM_SHUFFLE(3, 2, 1, 0)); - while (sse_cp_counter--) - { - __m128i cdata = _mm_loadu_si128(reinterpret_cast(&data[data_start])); - auto cres = _mm_shuffle_epi32(cdata, msk_cp); - _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), cres); - it_tmp += 4; - data_start += 4; - } - it += cpsz; + __m128i cdata = _mm_loadu_si128(reinterpret_cast(&data[data_start])); + auto cres = _mm_shuffle_epi32(cdata, msk_cp); + _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), cres); + it_tmp += 4; + data_start += 4; + --sse_copy_counter; } + + it += copy_size; + if (span == 0) + continue; + + /// data : 11 22 33 + /// offsets: 0 0 4 + /// res: 33 33 33 33 + size_t shuffle_sz = span; + bool shuffle_remain = (shuffle_sz & 3); + size_t sse_shuffle_counter = (shuffle_sz >> 2); + sse_shuffle_counter = shuffle_remain * (sse_shuffle_counter + 1) + (!shuffle_remain) * (sse_shuffle_counter); + it_tmp = it; + constexpr const int msk_shuffle = (_MM_SHUFFLE(0, 0, 0, 0)); + __m128i cdata = _mm_loadu_si128(reinterpret_cast(&data[i])); + while (sse_shuffle_counter) + { + auto cres = _mm_shuffle_epi32(cdata, msk_shuffle); + _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), cres); + it_tmp += 4; + --sse_shuffle_counter; + } + it += shuffle_sz; } + + /// data : 11 22 33 44 55 + /// offsets: 1 2 3 4 5 + /// res: 11 22 33 44 55 + if (copy_begin != -1) + { + size_t copy_size = (size - copy_begin); + bool remain = (copy_size & 3); + size_t sse_copy_counter = (copy_size >> 2); + sse_copy_counter = remain * (sse_copy_counter + 1) + (!remain) * (sse_copy_counter); + auto it_tmp = it; // NOLINT + size_t data_start = copy_begin; + constexpr const int msk_cp = (_MM_SHUFFLE(3, 2, 1, 0)); + while (sse_copy_counter) + { + __m128i cdata = _mm_loadu_si128(reinterpret_cast(&data[data_start])); + auto cres = _mm_shuffle_epi32(cdata, msk_cp); + _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), cres); + it_tmp += 4; + data_start += 4; + --sse_copy_counter; + } + it += copy_size; + } + return res; } #endif + template void ColumnVector::gather(ColumnGathererStream & gatherer) { From fa032a76b61562b2b51a73ff2d0d78e4faa694de Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 21 May 2022 23:38:51 +0300 Subject: [PATCH 016/121] Make variable names less disgusting. --- src/Columns/ColumnVector.cpp | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 54f4745bb5e..d49b3e7f948 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -590,12 +590,12 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const auto it_tmp = it; // NOLINT size_t data_start = copy_begin; copy_begin = -1; - constexpr const int msk_cp = _MM_SHUFFLE(3, 2, 1, 0); + constexpr const int copy_mask = _MM_SHUFFLE(3, 2, 1, 0); while (sse_copy_counter) { - __m128i cdata = _mm_loadu_si128(reinterpret_cast(&data[data_start])); - auto cres = _mm_shuffle_epi32(cdata, msk_cp); - _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), cres); + __m128i data_to_copy = _mm_loadu_si128(reinterpret_cast(&data[data_start])); + auto copy_result = _mm_shuffle_epi32(data_to_copy, copy_mask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), copy_result); it_tmp += 4; data_start += 4; --sse_copy_counter; @@ -613,12 +613,12 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const size_t sse_shuffle_counter = (shuffle_sz >> 2); sse_shuffle_counter = shuffle_remain * (sse_shuffle_counter + 1) + (!shuffle_remain) * (sse_shuffle_counter); it_tmp = it; - constexpr const int msk_shuffle = (_MM_SHUFFLE(0, 0, 0, 0)); - __m128i cdata = _mm_loadu_si128(reinterpret_cast(&data[i])); + constexpr const int shuffle_mask = (_MM_SHUFFLE(0, 0, 0, 0)); + __m128i data_to_copy = _mm_loadu_si128(reinterpret_cast(&data[i])); while (sse_shuffle_counter) { - auto cres = _mm_shuffle_epi32(cdata, msk_shuffle); - _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), cres); + auto copy_result = _mm_shuffle_epi32(data_to_copy, shuffle_mask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), copy_result); it_tmp += 4; --sse_shuffle_counter; } @@ -636,12 +636,12 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const sse_copy_counter = remain * (sse_copy_counter + 1) + (!remain) * (sse_copy_counter); auto it_tmp = it; // NOLINT size_t data_start = copy_begin; - constexpr const int msk_cp = (_MM_SHUFFLE(3, 2, 1, 0)); + constexpr const int copy_mask = (_MM_SHUFFLE(3, 2, 1, 0)); while (sse_copy_counter) { - __m128i cdata = _mm_loadu_si128(reinterpret_cast(&data[data_start])); - auto cres = _mm_shuffle_epi32(cdata, msk_cp); - _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), cres); + __m128i data_to_copy = _mm_loadu_si128(reinterpret_cast(&data[data_start])); + auto copy_result = _mm_shuffle_epi32(data_to_copy, copy_mask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), copy_result); it_tmp += 4; data_start += 4; --sse_copy_counter; From d36ec5e34d272203edb10b57b1fe6e6be5645e0d Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 21 May 2022 23:39:34 +0300 Subject: [PATCH 017/121] There are so many disgusting names. --- src/Columns/ColumnVector.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index d49b3e7f948..2f52fa6aa85 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -608,9 +608,9 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const /// data : 11 22 33 /// offsets: 0 0 4 /// res: 33 33 33 33 - size_t shuffle_sz = span; - bool shuffle_remain = (shuffle_sz & 3); - size_t sse_shuffle_counter = (shuffle_sz >> 2); + size_t shuffle_size = span; + bool shuffle_remain = (shuffle_size & 3); + size_t sse_shuffle_counter = (shuffle_size >> 2); sse_shuffle_counter = shuffle_remain * (sse_shuffle_counter + 1) + (!shuffle_remain) * (sse_shuffle_counter); it_tmp = it; constexpr const int shuffle_mask = (_MM_SHUFFLE(0, 0, 0, 0)); @@ -622,7 +622,7 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const it_tmp += 4; --sse_shuffle_counter; } - it += shuffle_sz; + it += shuffle_size; } /// data : 11 22 33 44 55 From a1a0d5b07565f9b1d0ff57d1dbf2b973b53f72ca Mon Sep 17 00:00:00 2001 From: zhao zhou Date: Sun, 22 May 2022 14:22:54 +0800 Subject: [PATCH 018/121] fix compile error --- src/Columns/ColumnVector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 2f52fa6aa85..7e2748c472c 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -567,7 +567,7 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const size_t prev_offset = 0; int copy_begin = -1; - + size_t size = offsets.size(); for (size_t i = 0; i < size; ++i) { size_t span = offsets[i] - prev_offset; From eff6adbaa4aa17f47ac23b680ec4550dfc5ab0f9 Mon Sep 17 00:00:00 2001 From: zhao zhou Date: Sun, 22 May 2022 15:50:47 +0800 Subject: [PATCH 019/121] fix offset type issue --- src/Columns/ColumnVector.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 7e2748c472c..3a9ccafb57f 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -565,8 +565,8 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const /// Column is using PaddedPODArray, so we don't have to worry about the 4 out of range elements. - size_t prev_offset = 0; - int copy_begin = -1; + IColumn::Offset prev_offset = 0; + std::optional copy_begin; size_t size = offsets.size(); for (size_t i = 0; i < size; ++i) { @@ -574,7 +574,7 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const prev_offset = offsets[i]; if (span == 1) { - if (copy_begin == -1) + if (!copy_begin) copy_begin = i; continue; } @@ -583,13 +583,13 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const /// offsets: 0 1 2 3 3 /// res: 22 33 44 - size_t copy_size = (!(copy_begin == -1)) * (i - copy_begin); + size_t copy_size = (static_cast(copy_begin)) * (i - (*copy_begin)); bool remain = (copy_size & 3); size_t sse_copy_counter = (copy_size >> 2); sse_copy_counter = remain * (sse_copy_counter + 1) + (!remain) * (sse_copy_counter); auto it_tmp = it; // NOLINT - size_t data_start = copy_begin; - copy_begin = -1; + size_t data_start = (static_cast(copy_begin))*(*copy_begin); + copy_begin.reset(); constexpr const int copy_mask = _MM_SHUFFLE(3, 2, 1, 0); while (sse_copy_counter) { @@ -628,14 +628,14 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const /// data : 11 22 33 44 55 /// offsets: 1 2 3 4 5 /// res: 11 22 33 44 55 - if (copy_begin != -1) + if (copy_begin) { - size_t copy_size = (size - copy_begin); + size_t copy_size = (size - (*copy_begin)); bool remain = (copy_size & 3); size_t sse_copy_counter = (copy_size >> 2); sse_copy_counter = remain * (sse_copy_counter + 1) + (!remain) * (sse_copy_counter); auto it_tmp = it; // NOLINT - size_t data_start = copy_begin; + size_t data_start = *copy_begin; constexpr const int copy_mask = (_MM_SHUFFLE(3, 2, 1, 0)); while (sse_copy_counter) { From a6347dcea626fa21f26cdfd38055efa25acaba14 Mon Sep 17 00:00:00 2001 From: zhao zhou Date: Mon, 23 May 2022 01:11:01 +0800 Subject: [PATCH 020/121] fix deference null optional value issue. Dereference NUll optioanl value in debug mode will trigger about signal. --- src/Columns/ColumnVector.cpp | 53 +++++++++++++++++++----------------- 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 3a9ccafb57f..5d84cbfb782 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -574,34 +574,37 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const prev_offset = offsets[i]; if (span == 1) { - if (!copy_begin) - copy_begin = i; - continue; + if (!copy_begin) + copy_begin = i; + continue; } /// data : 11 22 33 44 55 /// offsets: 0 1 2 3 3 /// res: 22 33 44 + if(copy_begin) + { + size_t copy_size = i - (*copy_begin); + bool remain = (copy_size & 3); + size_t sse_copy_counter = (copy_size >> 2); + sse_copy_counter = remain * (sse_copy_counter + 1) + (!remain) * (sse_copy_counter); + auto it_tmp = it; // NOLINT + size_t data_start = *copy_begin; + copy_begin.reset(); + constexpr const int copy_mask = _MM_SHUFFLE(3, 2, 1, 0); + while (sse_copy_counter) + { + __m128i data_to_copy = _mm_loadu_si128(reinterpret_cast(&data[data_start])); + auto copy_result = _mm_shuffle_epi32(data_to_copy, copy_mask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), copy_result); + it_tmp += 4; + data_start += 4; + --sse_copy_counter; + } - size_t copy_size = (static_cast(copy_begin)) * (i - (*copy_begin)); - bool remain = (copy_size & 3); - size_t sse_copy_counter = (copy_size >> 2); - sse_copy_counter = remain * (sse_copy_counter + 1) + (!remain) * (sse_copy_counter); - auto it_tmp = it; // NOLINT - size_t data_start = (static_cast(copy_begin))*(*copy_begin); - copy_begin.reset(); - constexpr const int copy_mask = _MM_SHUFFLE(3, 2, 1, 0); - while (sse_copy_counter) - { - __m128i data_to_copy = _mm_loadu_si128(reinterpret_cast(&data[data_start])); - auto copy_result = _mm_shuffle_epi32(data_to_copy, copy_mask); - _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), copy_result); - it_tmp += 4; - data_start += 4; - --sse_copy_counter; - } + it += copy_size; + } - it += copy_size; if (span == 0) continue; @@ -612,13 +615,13 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const bool shuffle_remain = (shuffle_size & 3); size_t sse_shuffle_counter = (shuffle_size >> 2); sse_shuffle_counter = shuffle_remain * (sse_shuffle_counter + 1) + (!shuffle_remain) * (sse_shuffle_counter); - it_tmp = it; + auto it_tmp = it; // NOLINT constexpr const int shuffle_mask = (_MM_SHUFFLE(0, 0, 0, 0)); - __m128i data_to_copy = _mm_loadu_si128(reinterpret_cast(&data[i])); + __m128i data_to_shuffle = _mm_loadu_si128(reinterpret_cast(&data[i])); + auto shuffle_result = _mm_shuffle_epi32(data_to_shuffle, shuffle_mask); while (sse_shuffle_counter) { - auto copy_result = _mm_shuffle_epi32(data_to_copy, shuffle_mask); - _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), copy_result); + _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), shuffle_result); it_tmp += 4; --sse_shuffle_counter; } From a3d6cb2e58ed30f83fdcf7a09b490768d541fd25 Mon Sep 17 00:00:00 2001 From: zhao zhou Date: Mon, 23 May 2022 01:36:59 +0800 Subject: [PATCH 021/121] fix style issue --- src/Columns/ColumnVector.cpp | 52 ++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 5d84cbfb782..7920124d4f2 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -565,7 +565,7 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const /// Column is using PaddedPODArray, so we don't have to worry about the 4 out of range elements. - IColumn::Offset prev_offset = 0; + IColumn::Offset prev_offset = 0; std::optional copy_begin; size_t size = offsets.size(); for (size_t i = 0; i < size; ++i) @@ -574,36 +574,36 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const prev_offset = offsets[i]; if (span == 1) { - if (!copy_begin) - copy_begin = i; - continue; + if (!copy_begin) + copy_begin = i; + continue; } /// data : 11 22 33 44 55 /// offsets: 0 1 2 3 3 /// res: 22 33 44 - if(copy_begin) - { - size_t copy_size = i - (*copy_begin); - bool remain = (copy_size & 3); - size_t sse_copy_counter = (copy_size >> 2); - sse_copy_counter = remain * (sse_copy_counter + 1) + (!remain) * (sse_copy_counter); - auto it_tmp = it; // NOLINT - size_t data_start = *copy_begin; - copy_begin.reset(); - constexpr const int copy_mask = _MM_SHUFFLE(3, 2, 1, 0); - while (sse_copy_counter) - { - __m128i data_to_copy = _mm_loadu_si128(reinterpret_cast(&data[data_start])); - auto copy_result = _mm_shuffle_epi32(data_to_copy, copy_mask); - _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), copy_result); - it_tmp += 4; - data_start += 4; - --sse_copy_counter; - } + if (copy_begin) + { + size_t copy_size = i - (*copy_begin); + bool remain = (copy_size & 3); + size_t sse_copy_counter = (copy_size >> 2); + sse_copy_counter = remain * (sse_copy_counter + 1) + (!remain) * (sse_copy_counter); + auto it_tmp = it; // NOLINT + size_t data_start = (*copy_begin); + copy_begin.reset(); + constexpr const int copy_mask = _MM_SHUFFLE(3, 2, 1, 0); + while (sse_copy_counter) + { + __m128i data_to_copy = _mm_loadu_si128(reinterpret_cast(&data[data_start])); + auto copy_result = _mm_shuffle_epi32(data_to_copy, copy_mask); + _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), copy_result); + it_tmp += 4; + data_start += 4; + --sse_copy_counter; + } - it += copy_size; - } + it += copy_size; + } if (span == 0) continue; @@ -618,7 +618,7 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const auto it_tmp = it; // NOLINT constexpr const int shuffle_mask = (_MM_SHUFFLE(0, 0, 0, 0)); __m128i data_to_shuffle = _mm_loadu_si128(reinterpret_cast(&data[i])); - auto shuffle_result = _mm_shuffle_epi32(data_to_shuffle, shuffle_mask); + auto shuffle_result = _mm_shuffle_epi32(data_to_shuffle, shuffle_mask); while (sse_shuffle_counter) { _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), shuffle_result); From 5162e87dd2005e6d75159d0ab773d35c8c192d13 Mon Sep 17 00:00:00 2001 From: zhao zhou Date: Tue, 24 May 2022 10:22:29 +0800 Subject: [PATCH 022/121] remove parentheses --- src/Columns/ColumnVector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 7920124d4f2..920fb12124c 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -589,7 +589,7 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const size_t sse_copy_counter = (copy_size >> 2); sse_copy_counter = remain * (sse_copy_counter + 1) + (!remain) * (sse_copy_counter); auto it_tmp = it; // NOLINT - size_t data_start = (*copy_begin); + size_t data_start = *copy_begin; copy_begin.reset(); constexpr const int copy_mask = _MM_SHUFFLE(3, 2, 1, 0); while (sse_copy_counter) From 2ff2c5547969fd058d3634de931134741da1ef6c Mon Sep 17 00:00:00 2001 From: zhao zhou Date: Thu, 26 May 2022 21:11:56 +0800 Subject: [PATCH 023/121] change some comment --- src/Columns/ColumnVector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 920fb12124c..b659e50426f 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -581,7 +581,7 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const /// data : 11 22 33 44 55 /// offsets: 0 1 2 3 3 - /// res: 22 33 44 + /// res: 22 33 44 if (copy_begin) { size_t copy_size = i - (*copy_begin); From 781701b223ece9ab16e1a09a089545e64f471af6 Mon Sep 17 00:00:00 2001 From: zhao zhou Date: Thu, 26 May 2022 22:23:22 +0800 Subject: [PATCH 024/121] change comment --- src/Columns/ColumnVector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index b659e50426f..7ab2a7e387a 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -580,7 +580,7 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const } /// data : 11 22 33 44 55 - /// offsets: 0 1 2 3 3 + /// offsets: 0 1 2 3 3 /// res: 22 33 44 if (copy_begin) { From bef9971890522dde57756938fd79cc4196cab53b Mon Sep 17 00:00:00 2001 From: zhao zhou Date: Tue, 31 May 2022 21:13:02 +0800 Subject: [PATCH 025/121] change comment --- src/Columns/ColumnVector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 7ab2a7e387a..b659e50426f 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -580,7 +580,7 @@ ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const } /// data : 11 22 33 44 55 - /// offsets: 0 1 2 3 3 + /// offsets: 0 1 2 3 3 /// res: 22 33 44 if (copy_begin) { From 4f13521aa6de2fc6813214307da5e4c83ded9ea5 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 15 Jun 2022 14:50:47 +0800 Subject: [PATCH 026/121] struct type support for storage hive --- .../hive_server/prepare_hive_data.sh | 5 +++ src/DataTypes/NestedUtils.cpp | 22 +++++++++++-- src/Storages/Hive/StorageHive.cpp | 21 +++++++++--- src/Storages/Hive/StorageHive.h | 2 ++ tests/integration/test_hive_query/test.py | 33 +++++++++++++++++++ 5 files changed, 75 insertions(+), 8 deletions(-) diff --git a/docker/test/integration/hive_server/prepare_hive_data.sh b/docker/test/integration/hive_server/prepare_hive_data.sh index 8126b975612..cee5581e2db 100755 --- a/docker/test/integration/hive_server/prepare_hive_data.sh +++ b/docker/test/integration/hive_server/prepare_hive_data.sh @@ -8,3 +8,8 @@ hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo pa hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '00' as hour from test.demo;" hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '01' as hour from test.demo;" + +hive -e "CREATE TABLE test.test_hive_types( f_tinyint tinyint, f_smallint smallint, f_int int, f_integer int, f_bigint bigint, f_float float, f_double double, f_decimal decimal(10,0), f_timestamp timestamp, f_date date, f_string string, f_varchar varchar(100), f_char char(100), f_bool boolean, f_array_int array, f_array_string array, f_array_float array, f_map_int map, f_map_string map, f_map_float map, f_struct struct) PARTITIONED BY( day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat';" + +hive -e "insert into test.test_hive_types partition(day='2022-02-20') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, '2022-02-20 14:47:04', '2022-02-20', 'hello world', 'hello world', 'hello world', true, array(1,2,3), array('hello world', 'hello world'), array(float(1.1),float(1.2)), map('a', 100, 'b', 200, 'c', 300), map('a', 'aa', 'b', 'bb', 'c', 'cc'), map('a', float(111.1), 'b', float(222.2), 'c', float(333.3)), named_struct('a', 'aaa', 'b', 200, 'c', float(333.3)); insert into test.test_hive_types partition(day='2022-02-19') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, '2022-02-19 14:47:04', '2022-02-19', 'hello world', 'hello world', 'hello world', true, array(1,2,3), array('hello world', 'hello world'), array(float(1.1),float(1.2)), map('a', 100, 'b', 200, 'c', 300), map('a', 'aa', 'b', 'bb', 'c', 'cc'), map('a', float(111.1), 'b', float(222.2), 'c', float(333.3)), named_struct('a', 'aaa', 'b', 200, 'c', float(333.3));" + diff --git a/src/DataTypes/NestedUtils.cpp b/src/DataTypes/NestedUtils.cpp index 20aae08f363..db66d4d979b 100644 --- a/src/DataTypes/NestedUtils.cpp +++ b/src/DataTypes/NestedUtils.cpp @@ -17,7 +17,6 @@ #include - namespace DB { @@ -76,8 +75,7 @@ Block flatten(const Block & block) for (const auto & elem : block) { - const DataTypeArray * type_arr = typeid_cast(elem.type.get()); - if (type_arr) + if (const DataTypeArray * type_arr = typeid_cast(elem.type.get())) { const DataTypeTuple * type_tuple = typeid_cast(type_arr->getNestedType().get()); if (type_tuple && type_tuple->haveExplicitNames()) @@ -114,6 +112,24 @@ Block flatten(const Block & block) else res.insert(elem); } + else if (const DataTypeTuple * type_tuple = typeid_cast(elem.type.get())) + { + if (type_tuple->haveExplicitNames()) + { + const DataTypes & element_types = type_tuple->getElements(); + const Strings & names = type_tuple->getElementNames(); + const ColumnTuple * column_tuple = typeid_cast(elem.column.get()); + size_t tuple_size = column_tuple->tupleSize(); + for (size_t i = 0; i < tuple_size; ++i) + { + const auto & element_column = column_tuple->getColumn(i); + String nested_name = concatenateName(elem.name, names[i]); + res.insert(ColumnWithTypeAndName(element_column.getPtr(), element_types[i], nested_name)); + } + } + else + res.insert(elem); + } else res.insert(elem); } diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 6046dd58677..bc65ab2fd97 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -37,6 +37,8 @@ #include #include #include +#include +#include namespace DB { @@ -563,8 +565,8 @@ HiveFiles StorageHive::collectHiveFilesFromPartition( const ContextPtr & context_, PruneLevel prune_level) const { - LOG_DEBUG( - log, "Collect hive files from partition {}, prune_level:{}", boost::join(partition.values, ","), pruneLevelToString(prune_level)); + //LOG_DEBUG( + // log, "Collect hive files from partition {}, prune_level:{}", boost::join(partition.values, ","), pruneLevelToString(prune_level)); /// Skip partition "__HIVE_DEFAULT_PARTITION__" bool has_default_partition = false; @@ -766,8 +768,13 @@ Pipe StorageHive::read( sources_info->hive_metastore_client = hive_metastore_client; sources_info->partition_name_types = partition_name_types; - Block sample_block; const auto header_block = storage_snapshot->metadata->getSampleBlock(); + bool support_subset_columns = supportsSubsetOfColumns(); + Block flatten_block; + if (support_subset_columns) + flatten_block = Nested::flatten(header_block); + + Block sample_block; for (const auto & column : column_names) { if (header_block.has(column)) @@ -775,13 +782,17 @@ Pipe StorageHive::read( sample_block.insert(header_block.getByName(column)); continue; } - + else if (support_subset_columns && flatten_block.has(column)) + { + sample_block.insert(flatten_block.getByName(column)); + continue; + } if (column == "_path") sources_info->need_path_column = true; if (column == "_file") sources_info->need_file_column = true; } - + LOG_TRACE(&Poco::Logger::get("StorageHive"), "sample_block={}", sample_block.dumpNames()); if (num_streams > sources_info->hive_files.size()) num_streams = sources_info->hive_files.size(); diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index d92d2dbd745..2f69ae00b6e 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -42,6 +42,8 @@ public: String getName() const override { return "Hive"; } bool supportsIndexForIn() const override { return true; } + + bool supportsSubcolumns() const override { return true; } bool mayBenefitFromIndexForIn( const ASTPtr & /* left_in_operand */, ContextPtr /* query_context */, diff --git a/tests/integration/test_hive_query/test.py b/tests/integration/test_hive_query/test.py index fd4d91d6f78..00aaaec7afe 100644 --- a/tests/integration/test_hive_query/test.py +++ b/tests/integration/test_hive_query/test.py @@ -400,3 +400,36 @@ def test_cache_dir_use(started_cluster): ["bash", "-c", "ls /tmp/clickhouse_local_cache1 | wc -l"] ) assert result0 != "0" and result1 != "0" + +def test_cache_dir_use(started_cluster): + node = started_cluster.instances["h0_0_0"] + result0 = node.exec_in_container( + ["bash", "-c", "ls /tmp/clickhouse_local_cache | wc -l"] + ) + result1 = node.exec_in_container( + ["bash", "-c", "ls /tmp/clickhouse_local_cache1 | wc -l"] + ) + assert result0 != "0" and result1 != "0" + +def test_hive_struct_type(started_cluster): + node = started_cluster.instances["h0_0_0"] + result = node.query( + """ + CREATE TABLE IF NOT EXISTS default.test_hive_types (`f_tinyint` Int8, `f_smallint` Int16, `f_int` Int32, `f_integer` Int32, `f_bigint` Int64, `f_float` Float32, `f_double` Float64, `f_decimal` Float64, `f_timestamp` DateTime, `f_date` Date, `f_string` String, `f_varchar` String, `f_char` String, `f_bool` Boolean, `f_array_int` Array(Int32), `f_array_string` Array(String), `f_array_float` Array(Float32), `f_map_int` Map(String, Int32), `f_map_string` Map(String, String), `f_map_float` Map(String, Float32), `f_struct` Tuple(a String, b Int32, c Float32), `day` String) ENGINE = Hive('thrift://hivetest:9083', 'test', 'test_hive_types') PARTITION BY (day) + """ + ) + result = node.query( + """ + SELECT * FROM default.test_hive_types WHERE day = '2022-02-20' SETTINGS input_format_parquet_import_nested=1 + """ + ) + expected_result = """1 2 3 4 5 6.11 7.22 8 2022-02-20 14:47:04 2022-02-20 hello world hello world hello world true [1,2,3] ['hello world','hello world'] [1.1,1.2] {'a':100,'b':200,'c':300} {'a':'aa','b':'bb','c':'cc'} {'a':111.1,'b':222.2,'c':333.3} ('aaa',200,333.3) 2022-02-20""" + assert result.strip() == expected_result + + result = node.query( + """ + SELECT day, f_struct.a FROM default.test_hive_types WHERE day = '2022-02-20' SETTINGS input_format_parquet_import_nested=1 + """ + ) + expected_result = """2022-02-20 aaa""" + assert result.strip() == expected_result From 655e42c9bcd9c4fe409f0d72f30b6697f2a57eeb Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 16 Jun 2022 09:44:41 +0800 Subject: [PATCH 027/121] remove trace logs --- src/Storages/Hive/StorageHive.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index bc65ab2fd97..0135da97445 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -792,7 +792,7 @@ Pipe StorageHive::read( if (column == "_file") sources_info->need_file_column = true; } - LOG_TRACE(&Poco::Logger::get("StorageHive"), "sample_block={}", sample_block.dumpNames()); + if (num_streams > sources_info->hive_files.size()) num_streams = sources_info->hive_files.size(); From e115e3f7311400d0e3c1af2b4dfb13bd3ad617f5 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 16 Jun 2022 09:53:04 +0800 Subject: [PATCH 028/121] remove unused header --- src/Storages/Hive/StorageHive.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 0135da97445..ab2b1ef7a09 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -38,7 +38,6 @@ #include #include #include -#include namespace DB { From 35d534c213faf5abde296dc041737d8970d0d2f3 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 16 Jun 2022 16:16:34 +0800 Subject: [PATCH 029/121] nested struct in struct --- .../hive_server/prepare_hive_data.sh | 4 +- src/DataTypes/NestedUtils.cpp | 68 +++++++++++++++- src/DataTypes/NestedUtils.h | 13 +++ .../Formats/Impl/ArrowColumnToCHColumn.cpp | 81 ++++++++++--------- src/Storages/Hive/StorageHive.cpp | 24 ++++-- tests/integration/test_hive_query/test.py | 6 +- 6 files changed, 145 insertions(+), 51 deletions(-) diff --git a/docker/test/integration/hive_server/prepare_hive_data.sh b/docker/test/integration/hive_server/prepare_hive_data.sh index cee5581e2db..39d435eb05a 100755 --- a/docker/test/integration/hive_server/prepare_hive_data.sh +++ b/docker/test/integration/hive_server/prepare_hive_data.sh @@ -9,7 +9,7 @@ hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo pa hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '00' as hour from test.demo;" hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '01' as hour from test.demo;" -hive -e "CREATE TABLE test.test_hive_types( f_tinyint tinyint, f_smallint smallint, f_int int, f_integer int, f_bigint bigint, f_float float, f_double double, f_decimal decimal(10,0), f_timestamp timestamp, f_date date, f_string string, f_varchar varchar(100), f_char char(100), f_bool boolean, f_array_int array, f_array_string array, f_array_float array, f_map_int map, f_map_string map, f_map_float map, f_struct struct) PARTITIONED BY( day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat';" +hive -e "CREATE TABLE test.test_hive_types( f_tinyint tinyint, f_smallint smallint, f_int int, f_integer int, f_bigint bigint, f_float float, f_double double, f_decimal decimal(10,0), f_timestamp timestamp, f_date date, f_string string, f_varchar varchar(100), f_char char(100), f_bool boolean, f_array_int array, f_array_string array, f_array_float array, f_map_int map, f_map_string map, f_map_float map, f_struct struct>) PARTITIONED BY( day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat';" -hive -e "insert into test.test_hive_types partition(day='2022-02-20') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, '2022-02-20 14:47:04', '2022-02-20', 'hello world', 'hello world', 'hello world', true, array(1,2,3), array('hello world', 'hello world'), array(float(1.1),float(1.2)), map('a', 100, 'b', 200, 'c', 300), map('a', 'aa', 'b', 'bb', 'c', 'cc'), map('a', float(111.1), 'b', float(222.2), 'c', float(333.3)), named_struct('a', 'aaa', 'b', 200, 'c', float(333.3)); insert into test.test_hive_types partition(day='2022-02-19') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, '2022-02-19 14:47:04', '2022-02-19', 'hello world', 'hello world', 'hello world', true, array(1,2,3), array('hello world', 'hello world'), array(float(1.1),float(1.2)), map('a', 100, 'b', 200, 'c', 300), map('a', 'aa', 'b', 'bb', 'c', 'cc'), map('a', float(111.1), 'b', float(222.2), 'c', float(333.3)), named_struct('a', 'aaa', 'b', 200, 'c', float(333.3));" +hive -e "insert into test.test_hive_types partition(day='2022-02-20') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, '2022-02-20 14:47:04', '2022-02-20', 'hello world', 'hello world', 'hello world', true, array(1,2,3), array('hello world', 'hello world'), array(float(1.1),float(1.2)), map('a', 100, 'b', 200, 'c', 300), map('a', 'aa', 'b', 'bb', 'c', 'cc'), map('a', float(111.1), 'b', float(222.2), 'c', float(333.3)), named_struct('a', 'aaa', 'b', 200, 'c', float(333.3), 'd', named_struct('x', 10, 'y', 'xyz')); insert into test.test_hive_types partition(day='2022-02-19') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, '2022-02-19 14:47:04', '2022-02-19', 'hello world', 'hello world', 'hello world', true, array(1,2,3), array('hello world', 'hello world'), array(float(1.1),float(1.2)), map('a', 100, 'b', 200, 'c', 300), map('a', 'aa', 'b', 'bb', 'c', 'cc'), map('a', float(111.1), 'b', float(222.2), 'c', float(333.3)), named_struct('a', 'aaa', 'b', 200, 'c', float(333.3), 'd', named_struct('x', 11, 'y', 'abc'));" diff --git a/src/DataTypes/NestedUtils.cpp b/src/DataTypes/NestedUtils.cpp index db66d4d979b..0df664ad408 100644 --- a/src/DataTypes/NestedUtils.cpp +++ b/src/DataTypes/NestedUtils.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -259,7 +260,72 @@ std::unordered_set getAllTableNames(const Block & block, bool to_lower_c } return nested_table_names; } - } +NestedColumnExtractHelper::NestedColumnExtractHelper(const Block & block_, bool case_insentive_) + : block(block_) + , case_insentive(case_insentive_) +{} + +std::optional NestedColumnExtractHelper::extractColumn(const String & column_name) +{ + if (block.has(column_name, case_insentive)) + return {block.getByName(column_name, case_insentive)}; + + auto nested_names = Nested::splitName(column_name); + if (case_insentive) + { + boost::to_lower(nested_names.first); + boost::to_lower(nested_names.second); + } + if (!block.has(nested_names.first, case_insentive)) + return {}; + + if (!nested_tables.contains(nested_names.first)) + { + ColumnsWithTypeAndName columns = {block.getByName(nested_names.first, case_insentive)}; + nested_tables[nested_names.first] = std::make_shared(Nested::flatten(columns)); + } + + return extractColumn(column_name, nested_names.first, nested_names.second); +} + +std::optional NestedColumnExtractHelper::extractColumn( + const String & original_column_name, const String & column_name_prefix, const String & column_name_suffix) +{ + auto table_iter = nested_tables.find(column_name_prefix); + if (table_iter == nested_tables.end()) + { + return {}; + } + + auto & nested_table = table_iter->second; + auto nested_names = Nested::splitName(column_name_suffix); + auto new_column_name_prefix = Nested::concatenateName(column_name_prefix, nested_names.first); + if (nested_names.second.empty()) + { + if (nested_table->has(new_column_name_prefix, case_insentive)) + { + ColumnWithTypeAndName column = nested_table->getByName(new_column_name_prefix, case_insentive); + if (case_insentive) + column.name = original_column_name; + return {column}; + } + else + { + return {}; + } + } + + if (!nested_table->has(new_column_name_prefix, case_insentive)) + { + return {}; + } + + ColumnsWithTypeAndName columns = {nested_table->getByName(new_column_name_prefix, case_insentive)}; + Block sub_block(columns); + nested_tables[new_column_name_prefix] = std::make_shared(Nested::flatten(sub_block)); + return extractColumn(original_column_name, new_column_name_prefix, nested_names.second); + +} } diff --git a/src/DataTypes/NestedUtils.h b/src/DataTypes/NestedUtils.h index f6dc42d5c58..39f73b65100 100644 --- a/src/DataTypes/NestedUtils.h +++ b/src/DataTypes/NestedUtils.h @@ -35,4 +35,17 @@ namespace Nested std::unordered_set getAllTableNames(const Block & block, bool to_lower_case = false); } +class NestedColumnExtractHelper +{ +public: + explicit NestedColumnExtractHelper(const Block & block_, bool case_insentive_); + std::optional extractColumn(const String & column_name); +private: + std::optional + extractColumn(const String & original_column_name, const String & column_name_prefix, const String & column_name_suffix); + const Block & block; + bool case_insentive; + std::map nested_tables; +}; + } diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index c792d828e44..da89d6f7321 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -1,4 +1,6 @@ #include "ArrowColumnToCHColumn.h" +#include +#include #if USE_ARROW || USE_ORC || USE_PARQUET @@ -602,8 +604,8 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & Columns columns_list; UInt64 num_rows = name_to_column_ptr.begin()->second->length(); - columns_list.reserve(header.rows()); - std::unordered_map nested_tables; + columns_list.reserve(header.columns()); + std::unordered_map>> nested_tables; bool skipped = false; for (size_t column_i = 0, columns = header.columns(); column_i < columns; ++column_i) { @@ -613,55 +615,57 @@ void ArrowColumnToCHColumn::arrowColumnsToCHChunk(Chunk & res, NameToColumnPtr & if (case_insensitive_matching) boost::to_lower(search_column_name); - bool read_from_nested = false; - String nested_table_name = Nested::extractTableName(header_column.name); - String search_nested_table_name = nested_table_name; - if (case_insensitive_matching) - boost::to_lower(search_nested_table_name); - + ColumnWithTypeAndName column; if (!name_to_column_ptr.contains(search_column_name)) { + bool read_from_nested = false; /// Check if it's a column from nested table. - if (import_nested && name_to_column_ptr.contains(search_nested_table_name)) + if (import_nested) { - if (!nested_tables.contains(search_nested_table_name)) + String nested_table_name = Nested::extractTableName(header_column.name); + String search_nested_table_name = nested_table_name; + if (case_insensitive_matching) + boost::to_lower(search_nested_table_name); + if (name_to_column_ptr.contains(search_nested_table_name)) { - std::shared_ptr arrow_column = name_to_column_ptr[search_nested_table_name]; - ColumnsWithTypeAndName cols - = {readColumnFromArrowColumn(arrow_column, nested_table_name, format_name, false, dictionary_values, true, true, false, skipped)}; - Block block(cols); - nested_tables[search_nested_table_name] = std::make_shared(Nested::flatten(block)); + if (!nested_tables.contains(search_nested_table_name)) + { + std::shared_ptr arrow_column = name_to_column_ptr[search_nested_table_name]; + ColumnsWithTypeAndName cols = {readColumnFromArrowColumn( + arrow_column, nested_table_name, format_name, false, dictionary_values, true, true, false, skipped)}; + BlockPtr block_ptr = std::make_shared(cols); + auto column_extractor = std::make_shared(*block_ptr, case_insensitive_matching); + nested_tables[search_nested_table_name] = {block_ptr, column_extractor}; + } + auto nested_column = nested_tables[search_nested_table_name].second->extractColumn(search_column_name); + if (nested_column) + { + column = *nested_column; + if (case_insensitive_matching) + column.name = header_column.name; + read_from_nested = true; + } } - - read_from_nested = nested_tables[search_nested_table_name]->has(header_column.name, case_insensitive_matching); } - if (!read_from_nested) { if (!allow_missing_columns) throw Exception{ErrorCodes::THERE_IS_NO_COLUMN, "Column '{}' is not presented in input data.", header_column.name}; - - ColumnWithTypeAndName column; - column.name = header_column.name; - column.type = header_column.type; - column.column = header_column.column->cloneResized(num_rows); - columns_list.push_back(std::move(column.column)); - continue; + else + { + column.name = header_column.name; + column.type = header_column.type; + column.column = header_column.column->cloneResized(num_rows); + columns_list.push_back(std::move(column.column)); + continue; + } } } - - - ColumnWithTypeAndName column; - if (read_from_nested) - { - column = nested_tables[search_nested_table_name]->getByName(header_column.name, case_insensitive_matching); - if (case_insensitive_matching) - column.name = header_column.name; - } else { auto arrow_column = name_to_column_ptr[search_column_name]; - column = readColumnFromArrowColumn(arrow_column, header_column.name, format_name, false, dictionary_values, true, true, false, skipped); + column = readColumnFromArrowColumn( + arrow_column, header_column.name, format_name, false, dictionary_values, true, true, false, skipped); } try @@ -689,17 +693,16 @@ std::vector ArrowColumnToCHColumn::getMissingColumns(const arrow::Schema { std::vector missing_columns; auto block_from_arrow = arrowSchemaToCHHeader(schema, format_name, false, &header, case_insensitive_matching); - auto flatten_block_from_arrow = Nested::flatten(block_from_arrow); + NestedColumnExtractHelper nested_columns_extractor(block_from_arrow, case_insensitive_matching); for (size_t i = 0, columns = header.columns(); i < columns; ++i) { const auto & header_column = header.getByPosition(i); bool read_from_nested = false; - String nested_table_name = Nested::extractTableName(header_column.name); if (!block_from_arrow.has(header_column.name, case_insensitive_matching)) { - if (import_nested && block_from_arrow.has(nested_table_name, case_insensitive_matching)) - read_from_nested = flatten_block_from_arrow.has(header_column.name, case_insensitive_matching); + if (import_nested && nested_columns_extractor.extractColumn(header_column.name)) + read_from_nested = true; if (!read_from_nested) { diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index ab2b1ef7a09..9dbfd1119dc 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -1,3 +1,4 @@ +#include #include #if USE_HIVE @@ -769,11 +770,18 @@ Pipe StorageHive::read( const auto header_block = storage_snapshot->metadata->getSampleBlock(); bool support_subset_columns = supportsSubsetOfColumns(); - Block flatten_block; - if (support_subset_columns) - flatten_block = Nested::flatten(header_block); + auto settings = context_->getSettingsRef(); + auto case_insensitive_matching = [&]() -> bool + { + if (format_name == "Parquet") + return settings.input_format_parquet_case_insensitive_column_matching; + else if (format_name == "ORC") + return settings.input_format_orc_case_insensitive_column_matching; + return false; + }; Block sample_block; + NestedColumnExtractHelper nested_columns_extractor(header_block, case_insensitive_matching()); for (const auto & column : column_names) { if (header_block.has(column)) @@ -781,10 +789,14 @@ Pipe StorageHive::read( sample_block.insert(header_block.getByName(column)); continue; } - else if (support_subset_columns && flatten_block.has(column)) + else if (support_subset_columns) { - sample_block.insert(flatten_block.getByName(column)); - continue; + auto subset_column = nested_columns_extractor.extractColumn(column); + if (subset_column) + { + sample_block.insert(*subset_column); + continue; + } } if (column == "_path") sources_info->need_path_column = true; diff --git a/tests/integration/test_hive_query/test.py b/tests/integration/test_hive_query/test.py index 00aaaec7afe..4bac09320ce 100644 --- a/tests/integration/test_hive_query/test.py +++ b/tests/integration/test_hive_query/test.py @@ -415,7 +415,7 @@ def test_hive_struct_type(started_cluster): node = started_cluster.instances["h0_0_0"] result = node.query( """ - CREATE TABLE IF NOT EXISTS default.test_hive_types (`f_tinyint` Int8, `f_smallint` Int16, `f_int` Int32, `f_integer` Int32, `f_bigint` Int64, `f_float` Float32, `f_double` Float64, `f_decimal` Float64, `f_timestamp` DateTime, `f_date` Date, `f_string` String, `f_varchar` String, `f_char` String, `f_bool` Boolean, `f_array_int` Array(Int32), `f_array_string` Array(String), `f_array_float` Array(Float32), `f_map_int` Map(String, Int32), `f_map_string` Map(String, String), `f_map_float` Map(String, Float32), `f_struct` Tuple(a String, b Int32, c Float32), `day` String) ENGINE = Hive('thrift://hivetest:9083', 'test', 'test_hive_types') PARTITION BY (day) + CREATE TABLE IF NOT EXISTS default.test_hive_types (`f_tinyint` Int8, `f_smallint` Int16, `f_int` Int32, `f_integer` Int32, `f_bigint` Int64, `f_float` Float32, `f_double` Float64, `f_decimal` Float64, `f_timestamp` DateTime, `f_date` Date, `f_string` String, `f_varchar` String, `f_char` String, `f_bool` Boolean, `f_array_int` Array(Int32), `f_array_string` Array(String), `f_array_float` Array(Float32), `f_map_int` Map(String, Int32), `f_map_string` Map(String, String), `f_map_float` Map(String, Float32), `f_struct` Tuple(a String, b Int32, c Float32, d Tuple(x Int32, y String)), `day` String) ENGINE = Hive('thrift://hivetest:9083', 'test', 'test_hive_types') PARTITION BY (day) """ ) result = node.query( @@ -423,7 +423,7 @@ def test_hive_struct_type(started_cluster): SELECT * FROM default.test_hive_types WHERE day = '2022-02-20' SETTINGS input_format_parquet_import_nested=1 """ ) - expected_result = """1 2 3 4 5 6.11 7.22 8 2022-02-20 14:47:04 2022-02-20 hello world hello world hello world true [1,2,3] ['hello world','hello world'] [1.1,1.2] {'a':100,'b':200,'c':300} {'a':'aa','b':'bb','c':'cc'} {'a':111.1,'b':222.2,'c':333.3} ('aaa',200,333.3) 2022-02-20""" + expected_result = """1 2 3 4 5 6.11 7.22 8 2022-02-20 14:47:04 2022-02-20 hello world hello world hello world true [1,2,3] ['hello world','hello world'] [1.1,1.2] {'a':100,'b':200,'c':300} {'a':'aa','b':'bb','c':'cc'} {'a':111.1,'b':222.2,'c':333.3} ('aaa',200,333.3,(10,'xyz')) 2022-02-20""" assert result.strip() == expected_result result = node.query( @@ -431,5 +431,5 @@ def test_hive_struct_type(started_cluster): SELECT day, f_struct.a FROM default.test_hive_types WHERE day = '2022-02-20' SETTINGS input_format_parquet_import_nested=1 """ ) - expected_result = """2022-02-20 aaa""" + expected_result = """2022-02-20 aaa 10""" assert result.strip() == expected_result From a8b17fec848a698dc8d0f6886b041089c991a923 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 16 Jun 2022 16:53:11 +0800 Subject: [PATCH 030/121] fixed a bug --- src/Storages/Hive/StorageHive.cpp | 2 +- src/Storages/Hive/StorageHive.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 9dbfd1119dc..60936f6a3f4 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -769,7 +769,7 @@ Pipe StorageHive::read( sources_info->partition_name_types = partition_name_types; const auto header_block = storage_snapshot->metadata->getSampleBlock(); - bool support_subset_columns = supportsSubsetOfColumns(); + bool support_subset_columns = supportsSubcolumns(); auto settings = context_->getSettingsRef(); auto case_insensitive_matching = [&]() -> bool diff --git a/src/Storages/Hive/StorageHive.h b/src/Storages/Hive/StorageHive.h index 2f69ae00b6e..fd806553a86 100644 --- a/src/Storages/Hive/StorageHive.h +++ b/src/Storages/Hive/StorageHive.h @@ -44,6 +44,7 @@ public: bool supportsIndexForIn() const override { return true; } bool supportsSubcolumns() const override { return true; } + bool mayBenefitFromIndexForIn( const ASTPtr & /* left_in_operand */, ContextPtr /* query_context */, From 9753ea91575e306ee8729ec533ed93abc0adc261 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 16 Jun 2022 19:27:30 +0800 Subject: [PATCH 031/121] fixed a bug fixed a bug in TranslateQualifiedNamesMatcher to handle nested tuple field with the table qulified name --- src/Interpreters/IdentifierSemantic.cpp | 7 +++ src/Interpreters/IdentifierSemantic.h | 1 + .../TranslateQualifiedNamesVisitor.cpp | 46 +++++++++---------- ...ranslate_qualified_names_matcher.reference | 2 + ...heck_sranslate_qualified_names_matcher.sql | 10 ++++ 5 files changed, 42 insertions(+), 24 deletions(-) create mode 100644 tests/queries/0_stateless/02337_check_sranslate_qualified_names_matcher.reference create mode 100644 tests/queries/0_stateless/02337_check_sranslate_qualified_names_matcher.sql diff --git a/src/Interpreters/IdentifierSemantic.cpp b/src/Interpreters/IdentifierSemantic.cpp index dbfdba77619..14cb43e2a2e 100644 --- a/src/Interpreters/IdentifierSemantic.cpp +++ b/src/Interpreters/IdentifierSemantic.cpp @@ -188,6 +188,13 @@ IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const return canReferColumnToTable(identifier, table_with_columns.table); } +std::optional IdentifierSemantic::getColumnNamePart(const ASTIdentifier & node, size_t pos) +{ + if (pos >= node.name_parts.size()) + return {}; + return node.name_parts[pos]; +} + /// Strip qualifications from left side of column name. /// Example: 'database.table.name' -> 'name'. void IdentifierSemantic::setColumnShortName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table) diff --git a/src/Interpreters/IdentifierSemantic.h b/src/Interpreters/IdentifierSemantic.h index 5dc828c36ba..622813a8abe 100644 --- a/src/Interpreters/IdentifierSemantic.h +++ b/src/Interpreters/IdentifierSemantic.h @@ -40,6 +40,7 @@ struct IdentifierSemantic /// @returns name for column identifiers static std::optional getColumnName(const ASTIdentifier & node); static std::optional getColumnName(const ASTPtr & ast); + static std::optional getColumnNamePart(const ASTIdentifier & node, size_t pos); /// @returns name for 'not a column' identifiers static std::optional extractNestedName(const ASTIdentifier & identifier, const String & table_name); diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 3129f9d7fe2..91d9ca082d5 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -33,35 +33,33 @@ namespace ErrorCodes bool TranslateQualifiedNamesMatcher::Data::unknownColumn(size_t table_pos, const ASTIdentifier & identifier) const { const auto & table = tables[table_pos].table; - auto nested1 = IdentifierSemantic::extractNestedName(identifier, table.table); - auto nested2 = IdentifierSemantic::extractNestedName(identifier, table.alias); - - const String & short_name = identifier.shortName(); const auto & columns = tables[table_pos].columns; + if (columns.empty()) + return false; + auto match = IdentifierSemantic::canReferColumnToTable(identifier, table); + size_t to_strip = 0; + switch (match) + { + case IdentifierSemantic::ColumnMatch::TableName: + case IdentifierSemantic::ColumnMatch::AliasedTableName: + case IdentifierSemantic::ColumnMatch::TableAlias: + to_strip = 1; + break; + case IdentifierSemantic::ColumnMatch::DBAndTable: + to_strip = 2; + break; + default: + break; + } + const auto & column_name = IdentifierSemantic::getColumnNamePart(identifier, to_strip); + if (!column_name) + return true; for (const auto & column : columns) { - const String & known_name = column.name; - if (short_name == known_name) - return false; - if (nested1 && *nested1 == known_name) - return false; - if (nested2 && *nested2 == known_name) + if (*column_name == column.name) return false; } - - const auto & hidden_columns = tables[table_pos].hidden_columns; - for (const auto & column : hidden_columns) - { - const String & known_name = column.name; - if (short_name == known_name) - return false; - if (nested1 && *nested1 == known_name) - return false; - if (nested2 && *nested2 == known_name) - return false; - } - - return !columns.empty(); + return true; } bool TranslateQualifiedNamesMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child) diff --git a/tests/queries/0_stateless/02337_check_sranslate_qualified_names_matcher.reference b/tests/queries/0_stateless/02337_check_sranslate_qualified_names_matcher.reference new file mode 100644 index 00000000000..c094c553f81 --- /dev/null +++ b/tests/queries/0_stateless/02337_check_sranslate_qualified_names_matcher.reference @@ -0,0 +1,2 @@ +12 +12 diff --git a/tests/queries/0_stateless/02337_check_sranslate_qualified_names_matcher.sql b/tests/queries/0_stateless/02337_check_sranslate_qualified_names_matcher.sql new file mode 100644 index 00000000000..f2b5c372ba7 --- /dev/null +++ b/tests/queries/0_stateless/02337_check_sranslate_qualified_names_matcher.sql @@ -0,0 +1,10 @@ +CREATE TABLE nested_name_tuples +( + `a` Tuple(x String, y Tuple(i Int32, j String)) +) +ENGINE = Memory; + +INSERT INTO nested_name_tuples VALUS(('asd', (12, 'ddd'))); + +SELECT t.a.y.i FROM nested_name_tuples as t; +SELECT nested_name_tuples.a.y.i FROM nested_name_tuples as t; From 285bb44b2542a16a36e6bcd1496a13682066b7fb Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 17 Jun 2022 09:33:16 +0800 Subject: [PATCH 032/121] nothing --- src/Interpreters/TranslateQualifiedNamesVisitor.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 91d9ca082d5..a1432b10b3c 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -36,6 +36,7 @@ bool TranslateQualifiedNamesMatcher::Data::unknownColumn(size_t table_pos, const const auto & columns = tables[table_pos].columns; if (columns.empty()) return false; + auto match = IdentifierSemantic::canReferColumnToTable(identifier, table); size_t to_strip = 0; switch (match) From 8c629085e485f15a221f2e443f3b89e95bc0d9e0 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 17 Jun 2022 09:36:59 +0800 Subject: [PATCH 033/121] simplified code --- src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index da89d6f7321..305009a070f 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -698,18 +698,13 @@ std::vector ArrowColumnToCHColumn::getMissingColumns(const arrow::Schema for (size_t i = 0, columns = header.columns(); i < columns; ++i) { const auto & header_column = header.getByPosition(i); - bool read_from_nested = false; if (!block_from_arrow.has(header_column.name, case_insensitive_matching)) { - if (import_nested && nested_columns_extractor.extractColumn(header_column.name)) - read_from_nested = true; - - if (!read_from_nested) + if (!import_nested || !nested_columns_extractor.extractColumn(header_column.name)) { if (!allow_missing_columns) throw Exception{ErrorCodes::THERE_IS_NO_COLUMN, "Column '{}' is not presented in input data.", header_column.name}; - - missing_columns.push_back(i); + missing_columns.push_back(i); } } } From 68c588585187f90e5c920abafb1ddfa0f2f132aa Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 17 Jun 2022 12:19:25 +0800 Subject: [PATCH 034/121] fixed bugs --- src/Interpreters/TranslateQualifiedNamesVisitor.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index a1432b10b3c..9a24aed9814 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -19,7 +19,6 @@ #include #include - namespace DB { @@ -55,11 +54,19 @@ bool TranslateQualifiedNamesMatcher::Data::unknownColumn(size_t table_pos, const const auto & column_name = IdentifierSemantic::getColumnNamePart(identifier, to_strip); if (!column_name) return true; + for (const auto & column : columns) { if (*column_name == column.name) return false; } + const auto & hidden_columns = tables[table_pos].hidden_columns; + for (const auto & column : hidden_columns) + { + const String & known_name = column.name; + if (*column_name == known_name) + return false; + } return true; } From 55757dfb34bb0c27fd7d960912925cca0aae42e5 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 17 Jun 2022 15:13:31 +0800 Subject: [PATCH 035/121] fixed bugs --- src/Interpreters/IdentifierSemantic.cpp | 34 ++++++++--- src/Interpreters/IdentifierSemantic.h | 3 +- .../TranslateQualifiedNamesVisitor.cpp | 59 ++++++++++++------- .../TranslateQualifiedNamesVisitor.h | 2 + 4 files changed, 68 insertions(+), 30 deletions(-) diff --git a/src/Interpreters/IdentifierSemantic.cpp b/src/Interpreters/IdentifierSemantic.cpp index 14cb43e2a2e..f0658cb7c9b 100644 --- a/src/Interpreters/IdentifierSemantic.cpp +++ b/src/Interpreters/IdentifierSemantic.cpp @@ -142,6 +142,33 @@ std::optional IdentifierSemantic::extractNestedName(const ASTIdentifier return {}; } +String IdentifierSemantic::extractNestedName(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & table) +{ + auto match = IdentifierSemantic::canReferColumnToTable(identifier, table); + size_t to_strip = 0; + switch (match) + { + case IdentifierSemantic::ColumnMatch::TableName: + case IdentifierSemantic::ColumnMatch::AliasedTableName: + case IdentifierSemantic::ColumnMatch::TableAlias: + to_strip = 1; + break; + case IdentifierSemantic::ColumnMatch::DBAndTable: + to_strip = 2; + break; + default: + break; + } + String res; + for (size_t i = to_strip, sz = identifier.name_parts.size(); i < sz; ++i) + { + if (!res.empty()) + res += "."; + res += identifier.name_parts[i]; + } + return res; +} + bool IdentifierSemantic::doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table) { size_t num_components = identifier.name_parts.size(); @@ -188,13 +215,6 @@ IdentifierSemantic::ColumnMatch IdentifierSemantic::canReferColumnToTable(const return canReferColumnToTable(identifier, table_with_columns.table); } -std::optional IdentifierSemantic::getColumnNamePart(const ASTIdentifier & node, size_t pos) -{ - if (pos >= node.name_parts.size()) - return {}; - return node.name_parts[pos]; -} - /// Strip qualifications from left side of column name. /// Example: 'database.table.name' -> 'name'. void IdentifierSemantic::setColumnShortName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table) diff --git a/src/Interpreters/IdentifierSemantic.h b/src/Interpreters/IdentifierSemantic.h index 622813a8abe..c082e83b75c 100644 --- a/src/Interpreters/IdentifierSemantic.h +++ b/src/Interpreters/IdentifierSemantic.h @@ -40,11 +40,12 @@ struct IdentifierSemantic /// @returns name for column identifiers static std::optional getColumnName(const ASTIdentifier & node); static std::optional getColumnName(const ASTPtr & ast); - static std::optional getColumnNamePart(const ASTIdentifier & node, size_t pos); /// @returns name for 'not a column' identifiers static std::optional extractNestedName(const ASTIdentifier & identifier, const String & table_name); + static String extractNestedName(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & table); + static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); static ColumnMatch canReferColumnToTable(const ASTIdentifier & identifier, const TableWithColumnNamesAndTypes & table_with_columns); diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 9a24aed9814..ac060a48270 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -1,3 +1,4 @@ +#include #include #include @@ -6,6 +7,7 @@ #include #include #include +#include #include #include @@ -19,6 +21,7 @@ #include #include + namespace DB { @@ -28,43 +31,55 @@ namespace ErrorCodes extern const int UNSUPPORTED_JOIN_KEYS; extern const int LOGICAL_ERROR; } +bool TranslateQualifiedNamesMatcher::Data::matchColumnName(const String & name, const String & column_name, DataTypePtr column_type) +{ + if (name.size() < column_name.size()) + return false; + + if (std::strncmp(name.data(), column_name.data(), column_name.size()) != 0) + return false; + + if (name.size() == column_name.size()) + return true; + /// In case the type is named tuple, check the name recursively. + if (const DataTypeTuple * type_tuple = typeid_cast(column_type.get())) + { + if (type_tuple->haveExplicitNames() && name.at(column_name.size()) == '.') + { + const Strings & names = type_tuple->getElementNames(); + const DataTypes & element_types = type_tuple->getElements(); + for (size_t i = 0; i < names.size(); ++i) + { + if (matchColumnName(name.substr(column_name.size() + 1, name.size() - column_name.size()), names[i], element_types[i])) + { + return true; + } + } + } + } + + return false; +} bool TranslateQualifiedNamesMatcher::Data::unknownColumn(size_t table_pos, const ASTIdentifier & identifier) const { const auto & table = tables[table_pos].table; const auto & columns = tables[table_pos].columns; if (columns.empty()) return false; - - auto match = IdentifierSemantic::canReferColumnToTable(identifier, table); - size_t to_strip = 0; - switch (match) - { - case IdentifierSemantic::ColumnMatch::TableName: - case IdentifierSemantic::ColumnMatch::AliasedTableName: - case IdentifierSemantic::ColumnMatch::TableAlias: - to_strip = 1; - break; - case IdentifierSemantic::ColumnMatch::DBAndTable: - to_strip = 2; - break; - default: - break; - } - const auto & column_name = IdentifierSemantic::getColumnNamePart(identifier, to_strip); - if (!column_name) - return true; + + // Remove database and table name from the identifier'name + auto full_name = IdentifierSemantic::extractNestedName(identifier, table); for (const auto & column : columns) { - if (*column_name == column.name) + if (matchColumnName(full_name, column.name, column.type)) return false; } const auto & hidden_columns = tables[table_pos].hidden_columns; for (const auto & column : hidden_columns) { - const String & known_name = column.name; - if (*column_name == known_name) + if (matchColumnName(full_name, column.name, column.type)) return false; } return true; diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.h b/src/Interpreters/TranslateQualifiedNamesVisitor.h index 9c46d926eca..b1d4d94d01c 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -39,6 +39,7 @@ public: bool hasTable() const { return !tables.empty(); } bool processAsterisks() const { return hasTable() && has_columns; } bool unknownColumn(size_t table_pos, const ASTIdentifier & identifier) const; + static bool matchColumnName(const String & name, const String & column_name, DataTypePtr column_type); }; static void visit(ASTPtr & ast, Data & data); @@ -53,6 +54,7 @@ private: static void visit(ASTFunction &, const ASTPtr &, Data &); static void extractJoinUsingColumns(ASTPtr ast, Data & data); + }; /// Visits AST for names qualification. From 14609e043ceaf84c88fa733896ef8b851436077b Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 17 Jun 2022 15:15:09 +0800 Subject: [PATCH 036/121] fixed style --- src/Interpreters/TranslateQualifiedNamesVisitor.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index ac060a48270..60753dcc1f5 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -35,10 +35,10 @@ bool TranslateQualifiedNamesMatcher::Data::matchColumnName(const String & name, { if (name.size() < column_name.size()) return false; - + if (std::strncmp(name.data(), column_name.data(), column_name.size()) != 0) return false; - + if (name.size() == column_name.size()) return true; @@ -67,7 +67,7 @@ bool TranslateQualifiedNamesMatcher::Data::unknownColumn(size_t table_pos, const const auto & columns = tables[table_pos].columns; if (columns.empty()) return false; - + // Remove database and table name from the identifier'name auto full_name = IdentifierSemantic::extractNestedName(identifier, table); From 4a0a6e8cde7efb460810c17abb5c2cd9caa154bc Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 17 Jun 2022 15:17:56 +0800 Subject: [PATCH 037/121] small optimization --- src/Interpreters/TranslateQualifiedNamesVisitor.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 60753dcc1f5..9292ef1353e 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -49,9 +49,10 @@ bool TranslateQualifiedNamesMatcher::Data::matchColumnName(const String & name, { const Strings & names = type_tuple->getElementNames(); const DataTypes & element_types = type_tuple->getElements(); + String sub_name = name.substr(column_name.size() + 1, name.size() - column_name.size()); for (size_t i = 0; i < names.size(); ++i) { - if (matchColumnName(name.substr(column_name.size() + 1, name.size() - column_name.size()), names[i], element_types[i])) + if (matchColumnName(sub_name, names[i], element_types[i])) { return true; } From 308ca625181a4c8cc044250c977939a434c5265b Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 17 Jun 2022 15:37:54 +0800 Subject: [PATCH 038/121] fixed bugs --- src/Interpreters/TranslateQualifiedNamesVisitor.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 9292ef1353e..fd71dc01595 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -66,8 +66,6 @@ bool TranslateQualifiedNamesMatcher::Data::unknownColumn(size_t table_pos, const { const auto & table = tables[table_pos].table; const auto & columns = tables[table_pos].columns; - if (columns.empty()) - return false; // Remove database and table name from the identifier'name auto full_name = IdentifierSemantic::extractNestedName(identifier, table); @@ -83,7 +81,7 @@ bool TranslateQualifiedNamesMatcher::Data::unknownColumn(size_t table_pos, const if (matchColumnName(full_name, column.name, column.type)) return false; } - return true; + return !columns.empty(); } bool TranslateQualifiedNamesMatcher::needChildVisit(ASTPtr & node, const ASTPtr & child) From 3d3c044d572be17197caaf007e05efd7818fff3b Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 17 Jun 2022 16:39:28 +0800 Subject: [PATCH 039/121] rename files --- ... => 02337_check_translate_qualified_names_matcher.reference} | 0 ...er.sql => 02337_check_translate_qualified_names_matcher.sql} | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename tests/queries/0_stateless/{02337_check_sranslate_qualified_names_matcher.reference => 02337_check_translate_qualified_names_matcher.reference} (100%) rename tests/queries/0_stateless/{02337_check_sranslate_qualified_names_matcher.sql => 02337_check_translate_qualified_names_matcher.sql} (77%) diff --git a/tests/queries/0_stateless/02337_check_sranslate_qualified_names_matcher.reference b/tests/queries/0_stateless/02337_check_translate_qualified_names_matcher.reference similarity index 100% rename from tests/queries/0_stateless/02337_check_sranslate_qualified_names_matcher.reference rename to tests/queries/0_stateless/02337_check_translate_qualified_names_matcher.reference diff --git a/tests/queries/0_stateless/02337_check_sranslate_qualified_names_matcher.sql b/tests/queries/0_stateless/02337_check_translate_qualified_names_matcher.sql similarity index 77% rename from tests/queries/0_stateless/02337_check_sranslate_qualified_names_matcher.sql rename to tests/queries/0_stateless/02337_check_translate_qualified_names_matcher.sql index f2b5c372ba7..09ab591f98a 100644 --- a/tests/queries/0_stateless/02337_check_sranslate_qualified_names_matcher.sql +++ b/tests/queries/0_stateless/02337_check_translate_qualified_names_matcher.sql @@ -4,7 +4,7 @@ CREATE TABLE nested_name_tuples ) ENGINE = Memory; -INSERT INTO nested_name_tuples VALUS(('asd', (12, 'ddd'))); +INSERT INTO nested_name_tuples VALUES(('asd', (12, 'ddd'))); SELECT t.a.y.i FROM nested_name_tuples as t; SELECT nested_name_tuples.a.y.i FROM nested_name_tuples as t; From c13bf03fe066950a48d85625d55d7d0552fad636 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 20 Jun 2022 09:56:21 +0800 Subject: [PATCH 040/121] fixed code style --- src/DataTypes/NestedUtils.cpp | 4 ++-- tests/integration/test_hive_query/test.py | 12 +++++++----- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/DataTypes/NestedUtils.cpp b/src/DataTypes/NestedUtils.cpp index 0df664ad408..2e429bcff10 100644 --- a/src/DataTypes/NestedUtils.cpp +++ b/src/DataTypes/NestedUtils.cpp @@ -311,7 +311,7 @@ std::optional NestedColumnExtractHelper::extractColumn( column.name = original_column_name; return {column}; } - else + else { return {}; } @@ -325,7 +325,7 @@ std::optional NestedColumnExtractHelper::extractColumn( ColumnsWithTypeAndName columns = {nested_table->getByName(new_column_name_prefix, case_insentive)}; Block sub_block(columns); nested_tables[new_column_name_prefix] = std::make_shared(Nested::flatten(sub_block)); - return extractColumn(original_column_name, new_column_name_prefix, nested_names.second); + return extractColumn(original_column_name, new_column_name_prefix, nested_names.second); } } diff --git a/tests/integration/test_hive_query/test.py b/tests/integration/test_hive_query/test.py index 4bac09320ce..538c99d1a65 100644 --- a/tests/integration/test_hive_query/test.py +++ b/tests/integration/test_hive_query/test.py @@ -401,6 +401,7 @@ def test_cache_dir_use(started_cluster): ) assert result0 != "0" and result1 != "0" + def test_cache_dir_use(started_cluster): node = started_cluster.instances["h0_0_0"] result0 = node.exec_in_container( @@ -411,6 +412,7 @@ def test_cache_dir_use(started_cluster): ) assert result0 != "0" and result1 != "0" + def test_hive_struct_type(started_cluster): node = started_cluster.instances["h0_0_0"] result = node.query( @@ -419,17 +421,17 @@ def test_hive_struct_type(started_cluster): """ ) result = node.query( - """ + """ SELECT * FROM default.test_hive_types WHERE day = '2022-02-20' SETTINGS input_format_parquet_import_nested=1 - """ + """ ) - expected_result = """1 2 3 4 5 6.11 7.22 8 2022-02-20 14:47:04 2022-02-20 hello world hello world hello world true [1,2,3] ['hello world','hello world'] [1.1,1.2] {'a':100,'b':200,'c':300} {'a':'aa','b':'bb','c':'cc'} {'a':111.1,'b':222.2,'c':333.3} ('aaa',200,333.3,(10,'xyz')) 2022-02-20""" + expected_result = """1 2 3 4 5 6.11 7.22 8 2022-02-20 14:47:04 2022-02-20 hello world hello world hello world true [1,2,3] ['hello world','hello world'] [1.1,1.2] {'a':100,'b':200,'c':300} {'a':'aa','b':'bb','c':'cc'} {'a':111.1,'b':222.2,'c':333.3} ('aaa',200,333.3,(10,'xyz')) 2022-02-20""" assert result.strip() == expected_result result = node.query( - """ + """ SELECT day, f_struct.a FROM default.test_hive_types WHERE day = '2022-02-20' SETTINGS input_format_parquet_import_nested=1 - """ + """ ) expected_result = """2022-02-20 aaa 10""" assert result.strip() == expected_result From fc641d9ce49bbd0e256ddb45d01cee02405f2f2d Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 21 Jun 2022 11:29:55 +0800 Subject: [PATCH 041/121] some changes --- src/DataTypes/NestedUtils.cpp | 9 +++++++-- src/DataTypes/NestedUtils.h | 8 ++++++-- src/Storages/Hive/StorageHive.cpp | 6 +++--- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/src/DataTypes/NestedUtils.cpp b/src/DataTypes/NestedUtils.cpp index 2e429bcff10..9cab49a509d 100644 --- a/src/DataTypes/NestedUtils.cpp +++ b/src/DataTypes/NestedUtils.cpp @@ -4,6 +4,7 @@ #include #include #include +#include "Columns/IColumn.h" #include #include @@ -119,7 +120,11 @@ Block flatten(const Block & block) { const DataTypes & element_types = type_tuple->getElements(); const Strings & names = type_tuple->getElementNames(); - const ColumnTuple * column_tuple = typeid_cast(elem.column.get()); + const ColumnTuple * column_tuple; + if(isColumnConst(*elem.column)) + column_tuple = typeid_cast(&assert_cast(*elem.column).getDataColumn()); + else + column_tuple = typeid_cast(elem.column.get()); size_t tuple_size = column_tuple->tupleSize(); for (size_t i = 0; i < tuple_size; ++i) { @@ -306,7 +311,7 @@ std::optional NestedColumnExtractHelper::extractColumn( { if (nested_table->has(new_column_name_prefix, case_insentive)) { - ColumnWithTypeAndName column = nested_table->getByName(new_column_name_prefix, case_insentive); + ColumnWithTypeAndName column = *nested_table->findByName(new_column_name_prefix, case_insentive); if (case_insentive) column.name = original_column_name; return {column}; diff --git a/src/DataTypes/NestedUtils.h b/src/DataTypes/NestedUtils.h index 39f73b65100..9473d30497a 100644 --- a/src/DataTypes/NestedUtils.h +++ b/src/DataTypes/NestedUtils.h @@ -18,8 +18,9 @@ namespace Nested /// Returns the prefix of the name to the first '.'. Or the name is unchanged if there is no dot. std::string extractTableName(const std::string & nested_name); - /// Replace Array(Tuple(...)) columns to a multiple of Array columns in a form of `column_name.element_name`. - /// only for named tuples that actually represent Nested structures. + /// Flat a column of nested type into columns + /// 1) For named tuples,t Tuple(x .., y ..., ...), replace it with t.x ..., t.y ... , ... + /// 2) For an Array with named Tuple element column, a Array(Tuple(x ..., y ..., ...)), replace it with multiple Array Columns, a.x ..., a.y ..., ... Block flatten(const Block & block); /// Collect Array columns in a form of `column_name.element_name` to single Array(Tuple(...)) column. @@ -35,6 +36,9 @@ namespace Nested std::unordered_set getAllTableNames(const Block & block, bool to_lower_case = false); } +/// Use this class to extract element columns from columns of nested type in a block, e.g. named Tuple. +/// It can extract a column from a multiple nested type column, e.g. named Tuple in named Tuple +/// Keeps some intermediate datas to avoid rebuild them multi-times. class NestedColumnExtractHelper { public: diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 60936f6a3f4..6d298c0033c 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -565,8 +565,8 @@ HiveFiles StorageHive::collectHiveFilesFromPartition( const ContextPtr & context_, PruneLevel prune_level) const { - //LOG_DEBUG( - // log, "Collect hive files from partition {}, prune_level:{}", boost::join(partition.values, ","), pruneLevelToString(prune_level)); + LOG_DEBUG( + log, "Collect hive files from partition {}, prune_level:{}", boost::join(partition.values, ","), pruneLevelToString(prune_level)); /// Skip partition "__HIVE_DEFAULT_PARTITION__" bool has_default_partition = false; @@ -794,7 +794,7 @@ Pipe StorageHive::read( auto subset_column = nested_columns_extractor.extractColumn(column); if (subset_column) { - sample_block.insert(*subset_column); + sample_block.insert(std::move(*subset_column)); continue; } } From 52db1b35a10f744839311f18b2927221bc0118d6 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Wed, 22 Jun 2022 16:25:57 +0000 Subject: [PATCH 042/121] improve performace of insertion to columns of type JSON --- src/AggregateFunctions/Helpers.h | 28 --- src/Columns/ColumnObject.cpp | 30 +-- src/Columns/ColumnObject.h | 2 + src/DataTypes/FieldToDataType.cpp | 79 ++++--- src/DataTypes/FieldToDataType.h | 10 +- src/DataTypes/IDataType.h | 27 +++ src/DataTypes/ObjectUtils.cpp | 2 +- .../Serializations/SerializationObject.cpp | 16 +- src/DataTypes/getLeastSupertype.cpp | 201 +++++++++--------- src/DataTypes/getLeastSupertype.h | 29 ++- src/Interpreters/RowRefs.cpp | 1 - src/Storages/MergeTree/KeyCondition.cpp | 2 +- tests/performance/json_type.xml | 17 ++ .../01825_type_json_parallel_insert.reference | 2 +- .../01825_type_json_parallel_insert.sql | 4 +- 15 files changed, 251 insertions(+), 199 deletions(-) create mode 100644 tests/performance/json_type.xml diff --git a/src/AggregateFunctions/Helpers.h b/src/AggregateFunctions/Helpers.h index 77660c54d32..6e140f4b9cf 100644 --- a/src/AggregateFunctions/Helpers.h +++ b/src/AggregateFunctions/Helpers.h @@ -3,34 +3,6 @@ #include #include -#define FOR_BASIC_NUMERIC_TYPES(M) \ - M(UInt8) \ - M(UInt16) \ - M(UInt32) \ - M(UInt64) \ - M(Int8) \ - M(Int16) \ - M(Int32) \ - M(Int64) \ - M(Float32) \ - M(Float64) - -#define FOR_NUMERIC_TYPES(M) \ - M(UInt8) \ - M(UInt16) \ - M(UInt32) \ - M(UInt64) \ - M(UInt128) \ - M(UInt256) \ - M(Int8) \ - M(Int16) \ - M(Int32) \ - M(Int64) \ - M(Int128) \ - M(Int256) \ - M(Float32) \ - M(Float64) - namespace DB { struct Settings; diff --git a/src/Columns/ColumnObject.cpp b/src/Columns/ColumnObject.cpp index d3e68273d03..b52a5aab256 100644 --- a/src/Columns/ColumnObject.cpp +++ b/src/Columns/ColumnObject.cpp @@ -138,7 +138,7 @@ public: type_indexes.insert(TypeToTypeIndex>); } - DataTypePtr getScalarType() const { return getLeastSupertype(type_indexes, true); } + DataTypePtr getScalarType() const { return getLeastSupertypeOrString(type_indexes); } bool haveNulls() const { return have_nulls; } bool needConvertField() const { return field_types.size() > 1; } @@ -167,6 +167,7 @@ FieldInfo getFieldInfo(const Field & field) ColumnObject::Subcolumn::Subcolumn(MutableColumnPtr && data_, bool is_nullable_) : least_common_type(getDataTypeByColumn(*data_)) , is_nullable(is_nullable_) + , num_rows(data_->size()) { data.push_back(std::move(data_)); } @@ -176,15 +177,13 @@ ColumnObject::Subcolumn::Subcolumn( : least_common_type(std::make_shared()) , is_nullable(is_nullable_) , num_of_defaults_in_prefix(size_) + , num_rows(size_) { } size_t ColumnObject::Subcolumn::size() const { - size_t res = num_of_defaults_in_prefix; - for (const auto & part : data) - res += part->size(); - return res; + return num_rows; } size_t ColumnObject::Subcolumn::byteSize() const @@ -321,7 +320,7 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo info) { if (isConversionRequiredBetweenIntegers(*base_type, *least_common_base_type)) { - base_type = getLeastSupertype(DataTypes{std::move(base_type), least_common_base_type}, true); + base_type = getLeastSupertypeOrString(DataTypes{std::move(base_type), least_common_base_type}); type_changed = true; if (!least_common_base_type->equals(*base_type)) addNewColumnPart(createArrayOfType(std::move(base_type), value_dim)); @@ -332,12 +331,14 @@ void ColumnObject::Subcolumn::insert(Field field, FieldInfo info) field = convertFieldToTypeOrThrow(field, *least_common_type.get()); data.back()->insert(field); + ++num_rows; } void ColumnObject::Subcolumn::insertRangeFrom(const Subcolumn & src, size_t start, size_t length) { assert(start + length <= src.size()); size_t end = start + length; + num_rows += length; if (data.empty()) { @@ -345,7 +346,7 @@ void ColumnObject::Subcolumn::insertRangeFrom(const Subcolumn & src, size_t star } else if (!least_common_type.get()->equals(*src.getLeastCommonType())) { - auto new_least_common_type = getLeastSupertype(DataTypes{least_common_type.get(), src.getLeastCommonType()}, true); + auto new_least_common_type = getLeastSupertypeOrString(DataTypes{least_common_type.get(), src.getLeastCommonType()}); if (!new_least_common_type->equals(*least_common_type.get())) addNewColumnPart(std::move(new_least_common_type)); } @@ -487,6 +488,8 @@ void ColumnObject::Subcolumn::insertDefault() ++num_of_defaults_in_prefix; else data.back()->insertDefault(); + + ++num_rows; } void ColumnObject::Subcolumn::insertManyDefaults(size_t length) @@ -495,12 +498,15 @@ void ColumnObject::Subcolumn::insertManyDefaults(size_t length) num_of_defaults_in_prefix += length; else data.back()->insertManyDefaults(length); + + num_rows += length; } void ColumnObject::Subcolumn::popBack(size_t n) { assert(n <= size()); + num_rows -= n; size_t num_removed = 0; for (auto it = data.rbegin(); it != data.rend(); ++it) { @@ -559,15 +565,11 @@ ColumnObject::Subcolumn ColumnObject::Subcolumn::recreateWithDefaultValues(const if (is_nullable) scalar_type = makeNullable(scalar_type); - Subcolumn new_subcolumn; + Subcolumn new_subcolumn(*this); new_subcolumn.least_common_type = LeastCommonType{createArrayOfType(scalar_type, field_info.num_dimensions)}; - new_subcolumn.is_nullable = is_nullable; - new_subcolumn.num_of_defaults_in_prefix = num_of_defaults_in_prefix; - new_subcolumn.data.reserve(data.size()); - for (const auto & part : data) - new_subcolumn.data.push_back(recreateColumnWithDefaultValues( - part, scalar_type, field_info.num_dimensions)); + for (auto & part : new_subcolumn.data) + part = recreateColumnWithDefaultValues(part, scalar_type, field_info.num_dimensions); return new_subcolumn; } diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index 89e42183ea0..4dc5bb5ce24 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -146,6 +146,8 @@ public: /// least common type and we count number of defaults in prefix, /// which will be converted to the default type of final common type. size_t num_of_defaults_in_prefix = 0; + + size_t num_rows = 0; }; using Subcolumns = SubcolumnsTree; diff --git a/src/DataTypes/FieldToDataType.cpp b/src/DataTypes/FieldToDataType.cpp index 283d1b1e41a..00b4665af94 100644 --- a/src/DataTypes/FieldToDataType.cpp +++ b/src/DataTypes/FieldToDataType.cpp @@ -22,13 +22,14 @@ namespace ErrorCodes extern const int EMPTY_DATA_PASSED; } - -DataTypePtr FieldToDataType::operator() (const Null &) const +template +DataTypePtr FieldToDataType::operator() (const Null &) const { return std::make_shared(std::make_shared()); } -DataTypePtr FieldToDataType::operator() (const UInt64 & x) const +template +DataTypePtr FieldToDataType::operator() (const UInt64 & x) const { if (x <= std::numeric_limits::max()) return std::make_shared(); if (x <= std::numeric_limits::max()) return std::make_shared(); @@ -36,7 +37,8 @@ DataTypePtr FieldToDataType::operator() (const UInt64 & x) const return std::make_shared(); } -DataTypePtr FieldToDataType::operator() (const Int64 & x) const +template +DataTypePtr FieldToDataType::operator() (const Int64 & x) const { if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) return std::make_shared(); if (x <= std::numeric_limits::max() && x >= std::numeric_limits::min()) return std::make_shared(); @@ -44,77 +46,90 @@ DataTypePtr FieldToDataType::operator() (const Int64 & x) const return std::make_shared(); } -DataTypePtr FieldToDataType::operator() (const Float64 &) const +template +DataTypePtr FieldToDataType::operator() (const Float64 &) const { return std::make_shared(); } -DataTypePtr FieldToDataType::operator() (const UInt128 &) const +template +DataTypePtr FieldToDataType::operator() (const UInt128 &) const { return std::make_shared(); } -DataTypePtr FieldToDataType::operator() (const Int128 &) const +template +DataTypePtr FieldToDataType::operator() (const Int128 &) const { return std::make_shared(); } -DataTypePtr FieldToDataType::operator() (const UInt256 &) const +template +DataTypePtr FieldToDataType::operator() (const UInt256 &) const { return std::make_shared(); } -DataTypePtr FieldToDataType::operator() (const Int256 &) const +template +DataTypePtr FieldToDataType::operator() (const Int256 &) const { return std::make_shared(); } -DataTypePtr FieldToDataType::operator() (const UUID &) const +template +DataTypePtr FieldToDataType::operator() (const UUID &) const { return std::make_shared(); } -DataTypePtr FieldToDataType::operator() (const String &) const +template +DataTypePtr FieldToDataType::operator() (const String &) const { return std::make_shared(); } -DataTypePtr FieldToDataType::operator() (const DecimalField & x) const +template +DataTypePtr FieldToDataType::operator() (const DecimalField & x) const { using Type = DataTypeDecimal; return std::make_shared(Type::maxPrecision(), x.getScale()); } -DataTypePtr FieldToDataType::operator() (const DecimalField & x) const +template +DataTypePtr FieldToDataType::operator() (const DecimalField & x) const { using Type = DataTypeDecimal; return std::make_shared(Type::maxPrecision(), x.getScale()); } -DataTypePtr FieldToDataType::operator() (const DecimalField & x) const +template +DataTypePtr FieldToDataType::operator() (const DecimalField & x) const { using Type = DataTypeDecimal; return std::make_shared(Type::maxPrecision(), x.getScale()); } -DataTypePtr FieldToDataType::operator() (const DecimalField & x) const +template +DataTypePtr FieldToDataType::operator() (const DecimalField & x) const { using Type = DataTypeDecimal; return std::make_shared(Type::maxPrecision(), x.getScale()); } -DataTypePtr FieldToDataType::operator() (const Array & x) const +template +DataTypePtr FieldToDataType::operator() (const Array & x) const { DataTypes element_types; element_types.reserve(x.size()); for (const Field & elem : x) - element_types.emplace_back(applyVisitor(FieldToDataType(allow_convertion_to_string), elem)); + element_types.emplace_back(applyVisitor(*this, elem)); - return std::make_shared(getLeastSupertype(element_types, allow_convertion_to_string)); + return std::make_shared(getLeastSupertype(element_types)); } -DataTypePtr FieldToDataType::operator() (const Tuple & tuple) const +template +DataTypePtr FieldToDataType::operator() (const Tuple & tuple) const { if (tuple.empty()) throw Exception("Cannot infer type of an empty tuple", ErrorCodes::EMPTY_DATA_PASSED); @@ -123,12 +138,13 @@ DataTypePtr FieldToDataType::operator() (const Tuple & tuple) const element_types.reserve(tuple.size()); for (const auto & element : tuple) - element_types.push_back(applyVisitor(FieldToDataType(allow_convertion_to_string), element)); + element_types.push_back(applyVisitor(*this, element)); return std::make_shared(element_types); } -DataTypePtr FieldToDataType::operator() (const Map & map) const +template +DataTypePtr FieldToDataType::operator() (const Map & map) const { DataTypes key_types; DataTypes value_types; @@ -139,30 +155,37 @@ DataTypePtr FieldToDataType::operator() (const Map & map) const { const auto & tuple = elem.safeGet(); assert(tuple.size() == 2); - key_types.push_back(applyVisitor(FieldToDataType(allow_convertion_to_string), tuple[0])); - value_types.push_back(applyVisitor(FieldToDataType(allow_convertion_to_string), tuple[1])); + key_types.push_back(applyVisitor(*this, tuple[0])); + value_types.push_back(applyVisitor(*this, tuple[1])); } return std::make_shared( - getLeastSupertype(key_types, allow_convertion_to_string), - getLeastSupertype(value_types, allow_convertion_to_string)); + getLeastSupertype(key_types), + getLeastSupertype(value_types)); } -DataTypePtr FieldToDataType::operator() (const Object &) const +template +DataTypePtr FieldToDataType::operator() (const Object &) const { /// TODO: Do we need different parameters for type Object? return std::make_shared("json", false); } -DataTypePtr FieldToDataType::operator() (const AggregateFunctionStateData & x) const +template +DataTypePtr FieldToDataType::operator() (const AggregateFunctionStateData & x) const { const auto & name = static_cast(x).name; return DataTypeFactory::instance().get(name); } -DataTypePtr FieldToDataType::operator()(const bool &) const +template +DataTypePtr FieldToDataType::operator()(const bool &) const { return DataTypeFactory::instance().get("Bool"); } +template class FieldToDataType; +template class FieldToDataType; +template class FieldToDataType; + } diff --git a/src/DataTypes/FieldToDataType.h b/src/DataTypes/FieldToDataType.h index 1922ac8b746..5e66fe420ad 100644 --- a/src/DataTypes/FieldToDataType.h +++ b/src/DataTypes/FieldToDataType.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -17,14 +18,10 @@ using DataTypePtr = std::shared_ptr; * Note that you still have to convert Field to corresponding data type before inserting to columns * (for example, this is necessary to convert elements of Array to common type). */ +template class FieldToDataType : public StaticVisitor { public: - FieldToDataType(bool allow_convertion_to_string_ = false) - : allow_convertion_to_string(allow_convertion_to_string_) - { - } - DataTypePtr operator() (const Null & x) const; DataTypePtr operator() (const UInt64 & x) const; DataTypePtr operator() (const UInt128 & x) const; @@ -45,9 +42,6 @@ public: DataTypePtr operator() (const UInt256 & x) const; DataTypePtr operator() (const Int256 & x) const; DataTypePtr operator() (const bool & x) const; - -private: - bool allow_convertion_to_string; }; } diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h index 420ef61a13f..fce8906abe5 100644 --- a/src/DataTypes/IDataType.h +++ b/src/DataTypes/IDataType.h @@ -565,4 +565,31 @@ class DataTypeEnum; template inline constexpr bool IsDataTypeEnum> = true; +#define FOR_BASIC_NUMERIC_TYPES(M) \ + M(UInt8) \ + M(UInt16) \ + M(UInt32) \ + M(UInt64) \ + M(Int8) \ + M(Int16) \ + M(Int32) \ + M(Int64) \ + M(Float32) \ + M(Float64) + +#define FOR_NUMERIC_TYPES(M) \ + M(UInt8) \ + M(UInt16) \ + M(UInt32) \ + M(UInt64) \ + M(UInt128) \ + M(UInt256) \ + M(Int8) \ + M(Int16) \ + M(Int32) \ + M(Int64) \ + M(Int128) \ + M(Int256) \ + M(Float32) \ + M(Float64) } diff --git a/src/DataTypes/ObjectUtils.cpp b/src/DataTypes/ObjectUtils.cpp index df639ae7aab..b81c8c7a033 100644 --- a/src/DataTypes/ObjectUtils.cpp +++ b/src/DataTypes/ObjectUtils.cpp @@ -261,7 +261,7 @@ DataTypePtr getLeastCommonTypeForObject(const DataTypes & types, bool check_ambi key.getPath(), subtypes[0]->getName(), subtypes[i]->getName()); tuple_paths.emplace_back(key); - tuple_types.emplace_back(getLeastSupertype(subtypes, /*allow_conversion_to_string=*/ true)); + tuple_types.emplace_back(getLeastSupertypeOrString(subtypes)); } if (tuple_paths.empty()) diff --git a/src/DataTypes/Serializations/SerializationObject.cpp b/src/DataTypes/Serializations/SerializationObject.cpp index 85831df271a..cf49fa8798d 100644 --- a/src/DataTypes/Serializations/SerializationObject.cpp +++ b/src/DataTypes/Serializations/SerializationObject.cpp @@ -61,29 +61,23 @@ void SerializationObject::deserializeTextImpl(IColumn & column, Reader & auto & [paths, values] = *result; assert(paths.size() == values.size()); - HashSet paths_set; - size_t column_size = column_object.size(); - + size_t old_column_size = column_object.size(); for (size_t i = 0; i < paths.size(); ++i) { auto field_info = getFieldInfo(values[i]); if (isNothing(field_info.scalar_type)) continue; - if (!paths_set.insert(paths[i].getPath()).second) - throw Exception(ErrorCodes::INCORRECT_DATA, - "Object has ambiguous path: {}", paths[i].getPath()); - if (!column_object.hasSubcolumn(paths[i])) { if (paths[i].hasNested()) - column_object.addNestedSubcolumn(paths[i], field_info, column_size); + column_object.addNestedSubcolumn(paths[i], field_info, old_column_size); else - column_object.addSubcolumn(paths[i], column_size); + column_object.addSubcolumn(paths[i], old_column_size); } auto & subcolumn = column_object.getSubcolumn(paths[i]); - assert(subcolumn.size() == column_size); + assert(subcolumn.size() == old_column_size); subcolumn.insert(std::move(values[i]), std::move(field_info)); } @@ -92,7 +86,7 @@ void SerializationObject::deserializeTextImpl(IColumn & column, Reader & const auto & subcolumns = column_object.getSubcolumns(); for (const auto & entry : subcolumns) { - if (!paths_set.has(entry->path.getPath())) + if (entry->data.size() == old_column_size) { bool inserted = column_object.tryInsertDefaultFromNested(entry); if (!inserted) diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index 14cfafb1ef3..8c6dba5a339 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -55,16 +55,24 @@ String getExceptionMessagePrefix(const DataTypes & types) return res.str(); } -DataTypePtr getNumericType(const TypeIndexSet & types, bool allow_conversion_to_string) +template +DataTypePtr throwOrReturn(const DataTypes & types, std::string_view message_suffix, int error_code) { - auto throw_or_return = [&](std::string_view message, int error_code) - { - if (allow_conversion_to_string) - return std::make_shared(); + if constexpr (on_error == LeastSupertypeOnError::String) + return std::make_shared(); - throw Exception(String(message), error_code); - }; + if constexpr (on_error == LeastSupertypeOnError::Null) + return nullptr; + if (message_suffix.empty()) + throw Exception(error_code, getExceptionMessagePrefix(types)); + + throw Exception(error_code, "{} {}", getExceptionMessagePrefix(types), message_suffix); +} + +template +DataTypePtr getNumericType(const TypeIndexSet & types) +{ bool all_numbers = true; size_t max_bits_of_signed_integer = 0; @@ -107,14 +115,14 @@ DataTypePtr getNumericType(const TypeIndexSet & types, bool allow_conversion_to_ maximize(max_mantissa_bits_of_floating, 24); else if (type == TypeIndex::Float64) maximize(max_mantissa_bits_of_floating, 53); - else + else if (type != TypeIndex::Nothing) all_numbers = false; } if (max_bits_of_signed_integer || max_bits_of_unsigned_integer || max_mantissa_bits_of_floating) { if (!all_numbers) - return throw_or_return(getExceptionMessagePrefix(types) + " because some of them are numbers and some of them are not", ErrorCodes::NO_COMMON_TYPE); + return throwOrReturn(types, "because some of them are numbers and some of them are not", ErrorCodes::NO_COMMON_TYPE); /// If there are signed and unsigned types of same bit-width, the result must be signed number with at least one more bit. /// Example, common of Int32, UInt32 = Int64. @@ -129,10 +137,9 @@ DataTypePtr getNumericType(const TypeIndexSet & types, bool allow_conversion_to_ if (min_bit_width_of_integer != 64) ++min_bit_width_of_integer; else - return throw_or_return( - getExceptionMessagePrefix(types) - + " because some of them are signed integers and some are unsigned integers," - " but there is no signed integer type, that can exactly represent all required unsigned integer values", + return throwOrReturn(types, + "because some of them are signed integers and some are unsigned integers," + " but there is no signed integer type, that can exactly represent all required unsigned integer values", ErrorCodes::NO_COMMON_TYPE); } @@ -145,8 +152,8 @@ DataTypePtr getNumericType(const TypeIndexSet & types, bool allow_conversion_to_ else if (min_mantissa_bits <= 53) return std::make_shared(); else - return throw_or_return(getExceptionMessagePrefix(types) - + " because some of them are integers and some are floating point," + return throwOrReturn(types, + " because some of them are integers and some are floating point," " but there is no floating point type, that can exactly represent all required integers", ErrorCodes::NO_COMMON_TYPE); } @@ -166,8 +173,8 @@ DataTypePtr getNumericType(const TypeIndexSet & types, bool allow_conversion_to_ else if (min_bit_width_of_integer <= 256) return std::make_shared(); else - return throw_or_return(getExceptionMessagePrefix(types) - + " because some of them are signed integers and some are unsigned integers," + throwOrReturn(types, + " because some of them are signed integers and some are unsigned integers," " but there is no signed integer type, that can exactly represent all required unsigned integer values", ErrorCodes::NO_COMMON_TYPE); } @@ -186,9 +193,8 @@ DataTypePtr getNumericType(const TypeIndexSet & types, bool allow_conversion_to_ else if (min_bit_width_of_integer <= 256) return std::make_shared(); else - return throw_or_return("Logical error: " + getExceptionMessagePrefix(types) - + " but as all data types are unsigned integers, we must have found maximum unsigned integer type", ErrorCodes::NO_COMMON_TYPE); - + throwOrReturn(types, + " but as all data types are unsigned integers, we must have found maximum unsigned integer type", ErrorCodes::NO_COMMON_TYPE); } } @@ -197,16 +203,9 @@ DataTypePtr getNumericType(const TypeIndexSet & types, bool allow_conversion_to_ } -DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_string) +template +DataTypePtr getLeastSupertype(const DataTypes & types) { - auto throw_or_return = [&](std::string_view message, int error_code) - { - if (allow_conversion_to_string) - return std::make_shared(); - - throw Exception(String(message), error_code); - }; - /// Trivial cases if (types.empty()) @@ -243,7 +242,7 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_ non_nothing_types.emplace_back(type); if (non_nothing_types.size() < types.size()) - return getLeastSupertype(non_nothing_types, allow_conversion_to_string); + return getLeastSupertype(non_nothing_types); } /// For Arrays @@ -268,9 +267,9 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_ if (have_array) { if (!all_arrays) - return throw_or_return(getExceptionMessagePrefix(types) + " because some of them are Array and some of them are not", ErrorCodes::NO_COMMON_TYPE); + return throwOrReturn(types, "because some of them are Array and some of them are not", ErrorCodes::NO_COMMON_TYPE); - return std::make_shared(getLeastSupertype(nested_types, allow_conversion_to_string)); + return std::make_shared(getLeastSupertype(nested_types)); } } @@ -294,7 +293,7 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_ nested_types[elem_idx].reserve(types.size()); } else if (tuple_size != type_tuple->getElements().size()) - return throw_or_return(getExceptionMessagePrefix(types) + " because Tuples have different sizes", ErrorCodes::NO_COMMON_TYPE); + return throwOrReturn(types, "because Tuples have different sizes", ErrorCodes::NO_COMMON_TYPE); have_tuple = true; @@ -308,11 +307,11 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_ if (have_tuple) { if (!all_tuples) - return throw_or_return(getExceptionMessagePrefix(types) + " because some of them are Tuple and some of them are not", ErrorCodes::NO_COMMON_TYPE); + return throwOrReturn(types, "because some of them are Tuple and some of them are not", ErrorCodes::NO_COMMON_TYPE); DataTypes common_tuple_types(tuple_size); for (size_t elem_idx = 0; elem_idx < tuple_size; ++elem_idx) - common_tuple_types[elem_idx] = getLeastSupertype(nested_types[elem_idx], allow_conversion_to_string); + common_tuple_types[elem_idx] = getLeastSupertype(nested_types[elem_idx]); return std::make_shared(common_tuple_types); } @@ -342,11 +341,11 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_ if (have_maps) { if (!all_maps) - return throw_or_return(getExceptionMessagePrefix(types) + " because some of them are Maps and some of them are not", ErrorCodes::NO_COMMON_TYPE); + return throwOrReturn(types, "because some of them are Maps and some of them are not", ErrorCodes::NO_COMMON_TYPE); return std::make_shared( - getLeastSupertype(key_types, allow_conversion_to_string), - getLeastSupertype(value_types, allow_conversion_to_string)); + getLeastSupertype(key_types), + getLeastSupertype(value_types)); } } @@ -377,9 +376,9 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_ if (have_low_cardinality) { if (have_not_low_cardinality) - return getLeastSupertype(nested_types, allow_conversion_to_string); + return getLeastSupertype(nested_types); else - return std::make_shared(getLeastSupertype(nested_types, allow_conversion_to_string)); + return std::make_shared(getLeastSupertype(nested_types)); } } @@ -405,7 +404,7 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_ if (have_nullable) { - return std::make_shared(getLeastSupertype(nested_types, allow_conversion_to_string)); + return std::make_shared(getLeastSupertype(nested_types)); } } @@ -425,7 +424,7 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_ { bool all_strings = type_ids.size() == (have_string + have_fixed_string); if (!all_strings) - return throw_or_return(getExceptionMessagePrefix(types) + " because some of them are String/FixedString and some of them are not", ErrorCodes::NO_COMMON_TYPE); + return throwOrReturn(types, "because some of them are String/FixedString and some of them are not", ErrorCodes::NO_COMMON_TYPE); return std::make_shared(); } @@ -442,8 +441,8 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_ { bool all_date_or_datetime = type_ids.size() == (have_date + have_date32 + have_datetime + have_datetime64); if (!all_date_or_datetime) - return throw_or_return(getExceptionMessagePrefix(types) - + " because some of them are Date/Date32/DateTime/DateTime64 and some of them are not", + return throwOrReturn(types, + "because some of them are Date/Date32/DateTime/DateTime64 and some of them are not", ErrorCodes::NO_COMMON_TYPE); if (have_datetime64 == 0 && have_date32 == 0) @@ -520,8 +519,7 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_ } if (num_supported != type_ids.size()) - return throw_or_return(getExceptionMessagePrefix(types) + " because some of them have no lossless conversion to Decimal", - ErrorCodes::NO_COMMON_TYPE); + return throwOrReturn(types, "because some of them have no lossless conversion to Decimal", ErrorCodes::NO_COMMON_TYPE); UInt32 max_scale = 0; for (const auto & type : types) @@ -543,7 +541,7 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_ } if (min_precision > DataTypeDecimal::maxPrecision()) - return throw_or_return(getExceptionMessagePrefix(types) + " because the least supertype is Decimal(" + return throwOrReturn(types, "because the least supertype is Decimal(" + toString(min_precision) + ',' + toString(max_scale) + ')', ErrorCodes::NO_COMMON_TYPE); @@ -557,68 +555,77 @@ DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_ /// For numeric types, the most complicated part. { - auto numeric_type = getNumericType(type_ids, allow_conversion_to_string); + auto numeric_type = getNumericType(type_ids); if (numeric_type) return numeric_type; } /// All other data types (UUID, AggregateFunction, Enum...) are compatible only if they are the same (checked in trivial cases). - return throw_or_return(getExceptionMessagePrefix(types), ErrorCodes::NO_COMMON_TYPE); + return throwOrReturn(types, "", ErrorCodes::NO_COMMON_TYPE); } -DataTypePtr getLeastSupertype(const TypeIndexSet & types, bool allow_conversion_to_string) +DataTypePtr getLeastSupertypeOrString(const DataTypes & types) { - auto throw_or_return = [&](std::string_view message, int error_code) - { - if (allow_conversion_to_string) - return std::make_shared(); - - throw Exception(String(message), error_code); - }; - - TypeIndexSet types_set; - for (const auto & type : types) - { - if (WhichDataType(type).isNothing()) - continue; - - if (!WhichDataType(type).isSimple()) - throw Exception(ErrorCodes::NO_COMMON_TYPE, - "Cannot get common type by type ids with parametric type {}", typeToString(type)); - - types_set.insert(type); - } - - if (types_set.empty()) - return std::make_shared(); - - if (types.contains(TypeIndex::String)) - { - if (types.size() != 1) - return throw_or_return(getExceptionMessagePrefix(types) + " because some of them are String and some of them are not", ErrorCodes::NO_COMMON_TYPE); - - return std::make_shared(); - } - - /// For numeric types, the most complicated part. - auto numeric_type = getNumericType(types, allow_conversion_to_string); - if (numeric_type) - return numeric_type; - - /// All other data types (UUID, AggregateFunction, Enum...) are compatible only if they are the same (checked in trivial cases). - return throw_or_return(getExceptionMessagePrefix(types), ErrorCodes::NO_COMMON_TYPE); + return getLeastSupertype(types); } DataTypePtr tryGetLeastSupertype(const DataTypes & types) { - try - { - return getLeastSupertype(types); - } - catch (...) - { - return nullptr; - } + return getLeastSupertype(types); } +template +DataTypePtr getLeastSupertype(const TypeIndexSet & types) +{ + if (types.empty()) + return std::make_shared(); + + if (types.size() == 1) + { + WhichDataType which(*types.begin()); + if (which.isNothing()) + return std::make_shared(); + + #define DISPATCH(TYPE) \ + if (which.idx == TypeIndex::TYPE) \ + return std::make_shared>(); + + FOR_NUMERIC_TYPES(DISPATCH) + #undef DISPATCH + + if (which.isString()) + return std::make_shared(); + + throwOrReturn(types, "because cannot get common type by type indexes with non-simple types", ErrorCodes::NO_COMMON_TYPE); + } + + if (types.contains(TypeIndex::String)) + { + bool only_string = types.size() == 2 && types.contains(TypeIndex::Nothing); + if (!only_string) + return throwOrReturn(types, "because some of them are String and some of them are not", ErrorCodes::NO_COMMON_TYPE); + + return std::make_shared(); + } + + auto numeric_type = getNumericType(types); + if (numeric_type) + return numeric_type; + + return throwOrReturn(types, "", ErrorCodes::NO_COMMON_TYPE); +} + +DataTypePtr getLeastSupertypeOrString(const TypeIndexSet & types) +{ + return getLeastSupertype(types); +} + +DataTypePtr tryGetLeastSupertype(const TypeIndexSet & types) +{ + return getLeastSupertype(types); +} + +template DataTypePtr getLeastSupertype(const DataTypes & types); +template DataTypePtr getLeastSupertype(const TypeIndexSet & types); + } diff --git a/src/DataTypes/getLeastSupertype.h b/src/DataTypes/getLeastSupertype.h index 5444bb34d06..2ef4a0e6850 100644 --- a/src/DataTypes/getLeastSupertype.h +++ b/src/DataTypes/getLeastSupertype.h @@ -1,24 +1,39 @@ #pragma once - #include - namespace DB { +enum class LeastSupertypeOnError +{ + Throw, + String, + Null, +}; + /** Get data type that covers all possible values of passed data types. - * If there is no such data type, throws an exception - * or if 'allow_conversion_to_string' is true returns String as common type. + * If there is no such data type, throws an exception. * * Examples: least common supertype for UInt8, Int8 - Int16. * Examples: there is no least common supertype for Array(UInt8), Int8. */ -DataTypePtr getLeastSupertype(const DataTypes & types, bool allow_conversion_to_string = false); +template +DataTypePtr getLeastSupertype(const DataTypes & types); -using TypeIndexSet = std::unordered_set; -DataTypePtr getLeastSupertype(const TypeIndexSet & types, bool allow_conversion_to_string = false); +/// Same as above but return String type instead of throwing exception. +/// All types can be casted to String, because they can be serialized to String. +DataTypePtr getLeastSupertypeOrString(const DataTypes & types); /// Same as above but return nullptr instead of throwing exception. DataTypePtr tryGetLeastSupertype(const DataTypes & types); +using TypeIndexSet = std::unordered_set; + +template +DataTypePtr getLeastSupertype(const TypeIndexSet & types); + +DataTypePtr getLeastSupertypeOrString(const TypeIndexSet & types); + +DataTypePtr tryGetLeastSupertype(const TypeIndexSet & types); + } diff --git a/src/Interpreters/RowRefs.cpp b/src/Interpreters/RowRefs.cpp index 97feed54c08..2a18c2c700a 100644 --- a/src/Interpreters/RowRefs.cpp +++ b/src/Interpreters/RowRefs.cpp @@ -1,7 +1,6 @@ #include #include -#include #include #include #include diff --git a/src/Storages/MergeTree/KeyCondition.cpp b/src/Storages/MergeTree/KeyCondition.cpp index f6baae723c9..40f23fe5294 100644 --- a/src/Storages/MergeTree/KeyCondition.cpp +++ b/src/Storages/MergeTree/KeyCondition.cpp @@ -1300,7 +1300,7 @@ bool KeyCondition::tryParseAtomFromAST(const ASTPtr & node, ContextPtr context, } else { - DataTypePtr common_type = tryGetLeastSupertype({key_expr_type_not_null, const_type}); + DataTypePtr common_type = tryGetLeastSupertype(DataTypes{key_expr_type_not_null, const_type}); if (!common_type) return false; diff --git a/tests/performance/json_type.xml b/tests/performance/json_type.xml new file mode 100644 index 00000000000..29e52f1e53f --- /dev/null +++ b/tests/performance/json_type.xml @@ -0,0 +1,17 @@ + + + 1 + + + CREATE TABLE t_json_1(data JSON) ENGINE = MergeTree ORDER BY tuple() + CREATE TABLE t_json_2(data JSON) ENGINE = MergeTree ORDER BY tuple() + CREATE TABLE t_json_3(data JSON) ENGINE = MergeTree ORDER BY tuple() + + INSERT INTO t_json_1 SELECT materialize('{"k1":1, "k2": "some"}') FROM numbers(200000) + INSERT INTO t_json_2 SELECT '{"col' || toString(number % 100) || '":' || toString(number) || '}' FROM numbers(100000) + INSERT INTO t_json_3 SELECT materialize('{"k1":[{"k2":"aaa","k3":[{"k4":"bbb"},{"k4":"ccc"}]},{"k2":"ddd","k3":[{"k4":"eee"},{"k4":"fff"}]}]}') FROM numbers_mt(100000) + + DROP TABLE IF EXISTS t_json_1 + DROP TABLE IF EXISTS t_json_2 + DROP TABLE IF EXISTS t_json_3 + diff --git a/tests/queries/0_stateless/01825_type_json_parallel_insert.reference b/tests/queries/0_stateless/01825_type_json_parallel_insert.reference index ac512064a43..158d61d46f7 100644 --- a/tests/queries/0_stateless/01825_type_json_parallel_insert.reference +++ b/tests/queries/0_stateless/01825_type_json_parallel_insert.reference @@ -1 +1 @@ -Tuple(k1 Int8, k2 String) 3000000 +Tuple(k1 Int8, k2 String) 500000 diff --git a/tests/queries/0_stateless/01825_type_json_parallel_insert.sql b/tests/queries/0_stateless/01825_type_json_parallel_insert.sql index 93d1eecfbd7..e443c9455d5 100644 --- a/tests/queries/0_stateless/01825_type_json_parallel_insert.sql +++ b/tests/queries/0_stateless/01825_type_json_parallel_insert.sql @@ -1,10 +1,10 @@ -- Tags: long, no-backward-compatibility-check:22.3.2.1 DROP TABLE IF EXISTS t_json_parallel; -SET allow_experimental_object_type = 1, max_insert_threads = 20, max_threads = 20; +SET allow_experimental_object_type = 1, max_insert_threads = 20, max_threads = 20, min_insert_block_size_rows = 65536; CREATE TABLE t_json_parallel (data JSON) ENGINE = MergeTree ORDER BY tuple(); -INSERT INTO t_json_parallel SELECT materialize('{"k1":1, "k2": "some"}') FROM numbers_mt(3000000); +INSERT INTO t_json_parallel SELECT materialize('{"k1":1, "k2": "some"}') FROM numbers_mt(500000); SELECT any(toTypeName(data)), count() FROM t_json_parallel; DROP TABLE t_json_parallel; From 31bf1203d337e3127a319671d704fb324b6cfb01 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 23 Jun 2022 15:57:19 +0800 Subject: [PATCH 043/121] update codes --- src/DataTypes/NestedUtils.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/DataTypes/NestedUtils.cpp b/src/DataTypes/NestedUtils.cpp index 9cab49a509d..dbdba39fa84 100644 --- a/src/DataTypes/NestedUtils.cpp +++ b/src/DataTypes/NestedUtils.cpp @@ -121,7 +121,7 @@ Block flatten(const Block & block) const DataTypes & element_types = type_tuple->getElements(); const Strings & names = type_tuple->getElementNames(); const ColumnTuple * column_tuple; - if(isColumnConst(*elem.column)) + if (isColumnConst(*elem.column)) column_tuple = typeid_cast(&assert_cast(*elem.column).getDataColumn()); else column_tuple = typeid_cast(elem.column.get()); @@ -309,12 +309,12 @@ std::optional NestedColumnExtractHelper::extractColumn( auto new_column_name_prefix = Nested::concatenateName(column_name_prefix, nested_names.first); if (nested_names.second.empty()) { - if (nested_table->has(new_column_name_prefix, case_insentive)) + if (auto * column_ref = nested_table->findByName(new_column_name_prefix, case_insentive)) { - ColumnWithTypeAndName column = *nested_table->findByName(new_column_name_prefix, case_insentive); + ColumnWithTypeAndName column = *column_ref; if (case_insentive) column.name = original_column_name; - return {column}; + return {std::move(column)}; } else { From 96e6f9a2d02ba6cf560703f73d6acc971b3dd445 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 23 Jun 2022 16:10:01 +0800 Subject: [PATCH 044/121] fixed code style --- src/DataTypes/NestedUtils.h | 4 ++-- src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/src/DataTypes/NestedUtils.h b/src/DataTypes/NestedUtils.h index 9473d30497a..e7cda541f47 100644 --- a/src/DataTypes/NestedUtils.h +++ b/src/DataTypes/NestedUtils.h @@ -36,9 +36,9 @@ namespace Nested std::unordered_set getAllTableNames(const Block & block, bool to_lower_case = false); } -/// Use this class to extract element columns from columns of nested type in a block, e.g. named Tuple. +/// Use this class to extract element columns from columns of nested type in a block, e.g. named Tuple. /// It can extract a column from a multiple nested type column, e.g. named Tuple in named Tuple -/// Keeps some intermediate datas to avoid rebuild them multi-times. +/// Keeps some intermediate data to avoid rebuild them multi-times. class NestedColumnExtractHelper { public: diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index 305009a070f..eefe4231b89 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -1,4 +1,3 @@ -#include "ArrowColumnToCHColumn.h" #include #include @@ -704,7 +703,7 @@ std::vector ArrowColumnToCHColumn::getMissingColumns(const arrow::Schema { if (!allow_missing_columns) throw Exception{ErrorCodes::THERE_IS_NO_COLUMN, "Column '{}' is not presented in input data.", header_column.name}; - missing_columns.push_back(i); + missing_columns.push_back(i); } } } From cd8e5c7c49d4246014ce4cf423d087278745cfd3 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Thu, 23 Jun 2022 17:43:54 +0800 Subject: [PATCH 045/121] update headers --- src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp | 3 +-- src/Storages/Hive/StorageHive.cpp | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp index eefe4231b89..f922ff048b2 100644 --- a/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp +++ b/src/Processors/Formats/Impl/ArrowColumnToCHColumn.cpp @@ -1,5 +1,4 @@ -#include -#include +#include "ArrowColumnToCHColumn.h" #if USE_ARROW || USE_ORC || USE_PARQUET diff --git a/src/Storages/Hive/StorageHive.cpp b/src/Storages/Hive/StorageHive.cpp index 6d298c0033c..b717d373598 100644 --- a/src/Storages/Hive/StorageHive.cpp +++ b/src/Storages/Hive/StorageHive.cpp @@ -1,4 +1,3 @@ -#include #include #if USE_HIVE From 0b2ec429063ca2663214013d2c3c5e479a956369 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 23 Jun 2022 11:26:06 +0000 Subject: [PATCH 046/121] fix build --- src/Columns/tests/gtest_column_object.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Columns/tests/gtest_column_object.cpp b/src/Columns/tests/gtest_column_object.cpp index d5e58e5fce2..e1ad949f6a8 100644 --- a/src/Columns/tests/gtest_column_object.cpp +++ b/src/Columns/tests/gtest_column_object.cpp @@ -89,7 +89,7 @@ TEST(ColumnObject, InsertRangeFrom) const auto & type_dst = subcolumn_dst.getLeastCommonType(); const auto & type_src = subcolumn_src.getLeastCommonType(); - auto type_res = getLeastSupertype(DataTypes{type_dst, type_src}, true); + auto type_res = getLeastSupertypeOrString(DataTypes{type_dst, type_src}); size_t from = rng() % subcolumn_src.size(); size_t to = rng() % subcolumn_src.size(); From 3e62d0fb8c3c74017725a82083678713fa648a6a Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Thu, 23 Jun 2022 11:31:39 +0000 Subject: [PATCH 047/121] fix test --- tests/performance/json_type.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/performance/json_type.xml b/tests/performance/json_type.xml index 29e52f1e53f..ef11856df0b 100644 --- a/tests/performance/json_type.xml +++ b/tests/performance/json_type.xml @@ -1,6 +1,6 @@ - 1 + 1 CREATE TABLE t_json_1(data JSON) ENGINE = MergeTree ORDER BY tuple() From 2339906e2ae5ac7d5df4a8d31ec2b6ec4a9ebf54 Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Thu, 23 Jun 2022 11:30:00 -0700 Subject: [PATCH 048/121] Adding TLS V13 Test --- tests/integration/test_tlsv1_3/__init__.py | 0 .../test_tlsv1_3/certs/ca-cert.pem | 32 +++ .../test_tlsv1_3/certs/ca-cert.srl | 1 + .../integration/test_tlsv1_3/certs/ca-key.pem | 52 ++++ .../test_tlsv1_3/certs/client1-cert.pem | 30 +++ .../test_tlsv1_3/certs/client1-key.pem | 52 ++++ .../test_tlsv1_3/certs/client1-req.pem | 27 ++ .../test_tlsv1_3/certs/client2-cert.pem | 30 +++ .../test_tlsv1_3/certs/client2-key.pem | 52 ++++ .../test_tlsv1_3/certs/client2-req.pem | 27 ++ .../test_tlsv1_3/certs/client3-cert.pem | 30 +++ .../test_tlsv1_3/certs/client3-key.pem | 52 ++++ .../test_tlsv1_3/certs/client3-req.pem | 27 ++ .../test_tlsv1_3/certs/dhparam4096.pem | 13 + .../test_tlsv1_3/certs/generate_certs.sh | 23 ++ .../test_tlsv1_3/certs/server-cert.pem | 31 +++ .../test_tlsv1_3/certs/server-ext.cnf | 1 + .../test_tlsv1_3/certs/server-key.pem | 52 ++++ .../test_tlsv1_3/certs/server-req.pem | 27 ++ .../test_tlsv1_3/certs/wrong-cert.pem | 32 +++ .../test_tlsv1_3/certs/wrong-key.pem | 52 ++++ .../test_tlsv1_3/configs/ssl_config.xml | 73 ++++++ .../configs/users_with_ssl_auth.xml | 22 ++ tests/integration/test_tlsv1_3/test.py | 236 ++++++++++++++++++ 24 files changed, 974 insertions(+) create mode 100644 tests/integration/test_tlsv1_3/__init__.py create mode 100644 tests/integration/test_tlsv1_3/certs/ca-cert.pem create mode 100644 tests/integration/test_tlsv1_3/certs/ca-cert.srl create mode 100644 tests/integration/test_tlsv1_3/certs/ca-key.pem create mode 100644 tests/integration/test_tlsv1_3/certs/client1-cert.pem create mode 100644 tests/integration/test_tlsv1_3/certs/client1-key.pem create mode 100644 tests/integration/test_tlsv1_3/certs/client1-req.pem create mode 100644 tests/integration/test_tlsv1_3/certs/client2-cert.pem create mode 100644 tests/integration/test_tlsv1_3/certs/client2-key.pem create mode 100644 tests/integration/test_tlsv1_3/certs/client2-req.pem create mode 100644 tests/integration/test_tlsv1_3/certs/client3-cert.pem create mode 100644 tests/integration/test_tlsv1_3/certs/client3-key.pem create mode 100644 tests/integration/test_tlsv1_3/certs/client3-req.pem create mode 100644 tests/integration/test_tlsv1_3/certs/dhparam4096.pem create mode 100755 tests/integration/test_tlsv1_3/certs/generate_certs.sh create mode 100644 tests/integration/test_tlsv1_3/certs/server-cert.pem create mode 100644 tests/integration/test_tlsv1_3/certs/server-ext.cnf create mode 100644 tests/integration/test_tlsv1_3/certs/server-key.pem create mode 100644 tests/integration/test_tlsv1_3/certs/server-req.pem create mode 100644 tests/integration/test_tlsv1_3/certs/wrong-cert.pem create mode 100644 tests/integration/test_tlsv1_3/certs/wrong-key.pem create mode 100644 tests/integration/test_tlsv1_3/configs/ssl_config.xml create mode 100644 tests/integration/test_tlsv1_3/configs/users_with_ssl_auth.xml create mode 100644 tests/integration/test_tlsv1_3/test.py diff --git a/tests/integration/test_tlsv1_3/__init__.py b/tests/integration/test_tlsv1_3/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_tlsv1_3/certs/ca-cert.pem b/tests/integration/test_tlsv1_3/certs/ca-cert.pem new file mode 100644 index 00000000000..293e1c7f564 --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/ca-cert.pem @@ -0,0 +1,32 @@ +-----BEGIN CERTIFICATE----- +MIIFhTCCA22gAwIBAgIUVRNcr0jCH3vSTxg8QYQH6CCtyF4wDQYJKoZIhvcNAQEL +BQAwUjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM +GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjIwMjE4 +MDk0MzA2WhcNMzIwMjE2MDk0MzA2WjBSMQswCQYDVQQGEwJSVTETMBEGA1UECAwK +U29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMQsw +CQYDVQQDDAJjYTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALwojNvu +fXQYQ4tucqNOEDHf2sNgxwxqY6QdtJ+zNfVjsK4I3Vqo8TtzxfDYGolkYem/bYJM +xQar9ehUm9ok/0kJgIo8vDXxxDJtvjz5Fd5oFWJLMxojLE9NSa0A4m18jGfbFNsF +XoU0njiInyzNaU9d4bMpaweseCZdt9Y4LR93FkuhSU/v18lWQPob8SSIij059IZP +sEUxpDOTxclAmG/Knd/6v3ecVFiQgexZM0gCtf7kcw41mxsAaP/mOexodIZDR70Y +LYjL7R2ZGhpClfQc8SO5NSpfEqsfreDX7XoaCTsy7/rqr3Nfiby6sc//awG0Ww/f +FRf2+2BU2xEwOVa3i5wU5raYY6eqFLK9q9c2IWPSqYzAmvhK2pqWQ/iaCU/Q89ow +SbKudJTLK8Y6v9LW4Q8ZLZF+CzS5cI+QEfIYqTLFdInH1BLoxx7cymEv07CDkcTo +2WtV8GdMph2P3U/9NoXQDonjCSj0lQUjgUdcrBPaIIVbIn6/5vfw8LQa8PoGDhIx +AYQkqPR+LHxCqIMzdqKZ+OXD/HPhiigpxLhF7mVRLvvoyrOZVJbcu1qmgCcQw0IE +fWzvWne+9cYC9lgt8+/k6d6B1uhYsIwwhgoj0dffFjc0sF6zfceGK+H1K2JCE0aY +zT1HlvSoZdA7lEs5xbGJnkBHqlOvQ63ynXCzAgMBAAGjUzBRMB0GA1UdDgQWBBTn +AtgFU20JF7kTZCKlY7/hi0kYRzAfBgNVHSMEGDAWgBTnAtgFU20JF7kTZCKlY7/h +i0kYRzAPBgNVHRMBAf8EBTADAQH/MA0GCSqGSIb3DQEBCwUAA4ICAQCpiWgJ1XUw +a8Bdeznsa57oy+5mqQZWpRVkzTQRHEGV850OGh7WQ6u9kVAHefaHH9hsVxyggton +6/MDsu4KL5jqKmJaIAepPIOw6DTc2zs044I7W/rxRp+w1hL2TS+EahMrSPwdzCcl +NNAM0dXocGylf6qwwMqiYAR1K3UIrlyq4QTr1oEPIqJBkDg1JDYrt4T2DroPjW20 +5hlCQ/tft5ddGL0EFEaKWwAcPFm7jAwJiz2eUqmT6PcmaZ24qPn5RXVkaBAkrSga +1WgM8r3LGu2EKhdiDc5hRJKjS8RZyLvZNNzlL3+N42nGmGZkND5bV6u82OD+qn17 +LRZOt0Cr70HqszSYk/67ijjaa4n/fuuAqorV+yYB8accRXtoi00nxykT+H+yI1rD +swvcrfDvhUgY5zmunWyQUYh0q/2Hj75GbLup3Cd0B4MrBwqyCqcEugM4OSf6aRMr +e/vjeggTVPN08xE1LUkugalx0B0aoO6qFahJ2CmkAcYLLlS2N+F7TMuPavc0kVxD +I3qA5G9zvNCliSLX2+kM+LzslI8+pP/A98bvh6nW4HtZkI0jq1ks7XR0GeOhCI8E +0l/YuElxxgKhN4INKhhMoDKqPib4z8gbmkenR2CenQCpfLMIrhTXZgtw+gvEgpIE +/QK97G8XPqga6zn471wrYJnuyJli+sP7aw== +-----END CERTIFICATE----- diff --git a/tests/integration/test_tlsv1_3/certs/ca-cert.srl b/tests/integration/test_tlsv1_3/certs/ca-cert.srl new file mode 100644 index 00000000000..c02cd0a4526 --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/ca-cert.srl @@ -0,0 +1 @@ +05F10C67567FE30795D77AF2540F6AC8D4CF2461 diff --git a/tests/integration/test_tlsv1_3/certs/ca-key.pem b/tests/integration/test_tlsv1_3/certs/ca-key.pem new file mode 100644 index 00000000000..e85dca8553e --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/ca-key.pem @@ -0,0 +1,52 @@ +-----BEGIN PRIVATE KEY----- +MIIJRAIBADANBgkqhkiG9w0BAQEFAASCCS4wggkqAgEAAoICAQC8KIzb7n10GEOL +bnKjThAx39rDYMcMamOkHbSfszX1Y7CuCN1aqPE7c8Xw2BqJZGHpv22CTMUGq/Xo +VJvaJP9JCYCKPLw18cQybb48+RXeaBViSzMaIyxPTUmtAOJtfIxn2xTbBV6FNJ44 +iJ8szWlPXeGzKWsHrHgmXbfWOC0fdxZLoUlP79fJVkD6G/EkiIo9OfSGT7BFMaQz +k8XJQJhvyp3f+r93nFRYkIHsWTNIArX+5HMONZsbAGj/5jnsaHSGQ0e9GC2Iy+0d +mRoaQpX0HPEjuTUqXxKrH63g1+16Ggk7Mu/66q9zX4m8urHP/2sBtFsP3xUX9vtg +VNsRMDlWt4ucFOa2mGOnqhSyvavXNiFj0qmMwJr4StqalkP4mglP0PPaMEmyrnSU +yyvGOr/S1uEPGS2Rfgs0uXCPkBHyGKkyxXSJx9QS6Mce3MphL9Owg5HE6NlrVfBn +TKYdj91P/TaF0A6J4wko9JUFI4FHXKwT2iCFWyJ+v+b38PC0GvD6Bg4SMQGEJKj0 +fix8QqiDM3aimfjlw/xz4YooKcS4Re5lUS776MqzmVSW3LtapoAnEMNCBH1s71p3 +vvXGAvZYLfPv5OnegdboWLCMMIYKI9HX3xY3NLBes33Hhivh9StiQhNGmM09R5b0 +qGXQO5RLOcWxiZ5AR6pTr0Ot8p1wswIDAQABAoICAQCO/c4Wccb7TFlAhD4wpumd +zX5GDq0WXV+94CldWGdARnOFvwzhkhRJ1zDtWH3KPfQ/HJBPfqIY8OQfnPUYMhej +3MnHxGJQKJyuqkHxumYJMFZX7cg3K9XHqne8NzjcddOKNa9Cx3DOkG9RjVpSRQSs +IS+d5XMGUOa6WWyVKvn3uJvD/B1n12DJDHiy2jtHRVCxOPMAg1z1KMWdwMaFrEZs +ZrHV/ow1jSN4btGd2SgkqJLA08IwYUKvoX8qQj9wzu0G/+hr5wzrsfZQEQMKQ+IL +s1b6jAzAV6IrVBbjEZXSviiXyZ0gteuCJW/acpMg+/3JPNQbWrCAFt1wluwowto/ +JAFIvlh29hfE5c+HEMpQNa0tdj7jepBn/0YEbgwpayMikKiLZXEpgheWCGypAQWp +Hm+N0Ym7HSGe82obxi8EjKRnNwFUtotWzUBKeo9aFwPZHLFlspljd+5ynDvKqXnk +txYZj6K3TtMs30HAG6fqxSPyiZ5W+5yF7nt6qLODs6m4Os+lrk1GnoqC0/uLMzIU +CRJKulrJOK4/Z2tPn9IAhcREbS4oROUeNqqo0Cfs3ssvkV7JTHF4IsKhCmElMmGa +bevOI+pvdjfECShy0Jnbtni6ece/II4/edfUp9kWN45xZLpzDjfqCVD66JS9g6ZU +i/EVll+d5zaI2TzzwZgHUQKCAQEA3d8siwXbq7x0cAB013+tvkvGMJ2EuS1TWdLk +a2P6CAnlZMWvv2cPSd2WpimHjqKxrbn6VE79mOc2l9Y1NOUUWWZATrhN7V8xMapQ +0YiYCHeaMERUAUKdzCgRN2/mRbZCBzpPBbWbb6NtKfRFJsD9zAe2JBwDVh9hvAL8 +YVBoczrEfj1ILnmtPhAJVI6s6rDsA4MgKjLs0Tt7Cc7rQxqNSpHEvwv1yLQmjp0N +L5b1TEt7fqVJ9dirykJquBYEKf55Z1qZhQzmnbu9OPnzeqGDakl5F/UsXDB5Bokp +ilcV+nFbh175Q+gTEhaSacGW8gzRw6j18PuciBjeWVEM5hhxOwKCAQEA2RnRMjv9 +46jQarJTFbIHg1SqrR87GSLnt6672M5TX9frzxMCuVDjKgdecstvLjm6X+/cPQKT +Q3javJnJXT4cx//1J7RLO6ZBVSCZf3//XntdHdFVJf5ySQtK+MJyfxjpzP6KBPfb +WPrva8p29ejbBdtsOT0M6gY5tPfadU2XEaf+BoyX9NUmu1U46Iqi+eCOjR+GVvhP +pJzGgLeOsaRVCfc9I7XPoVu3AEx5Kt55yRYm4fyGPsAd+mRDbIXMXdL0k8CfWWDr +8TT5rqKI+gFPFQCwToBW3DwHIGY+3RmoXFfQ0IJaKwOk4AB7m6HC3mv1crtjTFSM +9p74oQzNX7UG6QKCAQBEs2cygRTdH5SaXbnQRKvC4emzggLn5/4IMUIjcqioNpA+ +XOwngzz7rU6JkxBzfTMxTQYTdwYVg3qnF2AQSeK8L+o3teADYVd1PnyZ9QbGkGpB +CddNMJh17+4s0UxnR6E4Zbi0VuCTd/JEbGvBLT8pHzYqBjaOQ1dbBT2q0GAXVhoj +0Mv6ABlBv2t0MF2gqjnaeI7MIkqsGxPlHJpChAU+EtbuJUDs7cOGo2DC3KaGAlVy +CLJXGslO7rPm3oJZkn97HlWtGiqKquhTrSnUThDIJ4oEfhlHTocbG/ut53tZuiIS +T7k1arYFAtJBRv17Y7bMNBQ7k12L0s9+rpck5GqjAoIBAQCVBPSkj6tZbpII+viu +5rHjguVYyhwtx9jYK1eDnTR7kGGrlPgErjIPslkxYNSjHTsCCUnakv70jGtQlBs1 +JqJo4hesNkSB4D/uJ99VNk3a08D566uP1dUqsFa44/flp/ssG/gvKtbkf/KBwcrg +RwK4RYJG09IefUF1J8BLToQIuZBTfIP9qaXZZskWTbtK28ndsqrq3a0FaBuVVOnc +o9k/avcLoQuxTZwS12tAcs+TqOHtswGO5x5stg/V2Q2LxXbeSJTYq/+oZN2R8r0l +JmrbFsruR4fXylh189jouWjoYdrSlPdBmVG99HbkQCfbtq0XIOsrBMpxqnMtUPVT +4ZWpAoIBAQCrao4XHpRM3KsfPqdkg0vqFDBA+LzKlWu1fl8w5TGpFK8H1tv5kHNv +h0XmeU5cXwiweri3KjQz7h+qVBHZeAvyrEoxJQdImax+5OUy7lysDs+SL02gLZb3 +Z7g+u4Buwx+cv4O7hwUEDDk/5X3NBFk7/iwztKUtM+Fl8jt9K3K24s87xXp9YevI +UEawden9jVcuXQjEUrwz8cjoz/y25vK5pQ6k82PVImkMZK99/PmgmGyOl7hdRA3F +ff0Kb8pRGmV/cWRKzHaC8QchW8jdU2EGxMkrFl1DvoVKLbyDf1glRncKP9iozHAR ++s184IJCUvyMxH83uKKAiBGaDRC+Lbm7 +-----END PRIVATE KEY----- diff --git a/tests/integration/test_tlsv1_3/certs/client1-cert.pem b/tests/integration/test_tlsv1_3/certs/client1-cert.pem new file mode 100644 index 00000000000..bd6eea62094 --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/client1-cert.pem @@ -0,0 +1,30 @@ +-----BEGIN CERTIFICATE----- +MIIFMDCCAxgCFAXxDGdWf+MHldd68lQPasjUzyRfMA0GCSqGSIb3DQEBCwUAMFIx +CzAJBgNVBAYTAlJVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRl +cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTIyMDIxODA5NDMw +OVoXDTMyMDIxNjA5NDMwOVowVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt +U3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UE +AwwHY2xpZW50MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAMBU0fao +RrITeF4kpN81p7qirX/Gc56+Cux6u7RF1O6WU9v+V5jLw8chQZ87z4QSrFiT1ZnT +pwWYPwJ+pDk6AWEoiKuOaceOh0bjZCuxADHs+qQrye5D8GXvyFvWE2cT1pD5JNEZ +DSl2YHqNs4uTGRP9BP817iRDcuvdxpanaWxfXGfehJRMiEVgKDs+RUpoW4aVNivI +InrUWc4RXXkzaJKqhpCU3jAJBV4jSD5ZnA8PUfcoAj6z6T3I6phuDfRP5ldA3br8 +yg0hCB7Y5QrO5lRAgEoIuNnC+U6/AIwWPI36Rjiwg3EUwI/BIiL4AWjzkjSdr0mn +zyHPRk4pcn01T0GTpQi6tfZZpumDD3LkPuEy9svMpJ8ntqDnAsIJVjbg1S60hHes +yYHoQw1HxU0vrncxwcQkVaPLx0uGlioaLlvu83AVnWXbylZXsV/pLy6dE3H51GBF +DX3Zj6nkuJitk8/hNp440/Lve7SaKFPo5NdH+8ACWGdFdz3zxgPuhBDoxEeqj4c1 +FQA1ABXx2akW3lQ5VxTAg5AYORvVhJTozosr+Kn3MlRdZjl94tnVByD8MGLLE0C4 +L/qXR/IlbkOCz5LHapdC5j62ZEBwiElmMO/tMGl4ORV9tdTBrRZ9DMmKek2E8Qwz +y770PGkhp1cTzZt6UfZEympowmfjtiZfHIq1AgMBAAEwDQYJKoZIhvcNAQELBQAD +ggIBAHwRpqnpcD3EW588GSDZhZrVf3nS9M06ljQGtDUqNSI4XJp1cVT1sMaa4LjM +cWgHtayFw+jbDLwioXHjMlV+8tERH+0x+qsADG349caDYT/OF13v/jyuboUZ9AqE +KpfOQH7jCLU7rEbEl6kvT3F3xaHJg8mE7msyRFfguB2JrqZkKIj4HANxJUJo4PwB +5bq9nE3AVNAgUeQEwfu0r5SjroNpcHfm7xWqMK2mDMCsy/DvI7n97Q7vZajcTT0x +UXfgx+3CLEvLMpa2myE5OIMOeLzfZwxrxyNH7BdZsROnkGv1cX+9HZpYcue/UDxp +P2OApbTuZKaTJOyMADc17s0seE0DTAHnHAWrJwVhf8wYKKtEs+i+Sw5LNSkh5fgS +hTzGF93yClDYzWEqMSKhKPeimtpz4ZBNuGf471KbpVbUKJJvJmOxqoZ5S0kpFILL +YMALf652uf5or5d0cDNvcJTwvMi6evchIV17d/jH+MxyJQs9VCkMpJxFbMrXb3YB +b57K3Z25P6w3Qfj4zuKQFANari7Gs6qSiaUBiEhEdTQlGspkq+FLndtX818sbMk5 +LAK6JaUH0ywV2jn5XSW0irQLDXqb6Q0bSyw6pdpDjk0o4UW67JCE4kGagRDnfSqL +ZODvO/dEtVLyAsjmOx8MkqLyseI7VESVd8eiJAyL0sifh+/E +-----END CERTIFICATE----- diff --git a/tests/integration/test_tlsv1_3/certs/client1-key.pem b/tests/integration/test_tlsv1_3/certs/client1-key.pem new file mode 100644 index 00000000000..8bc1e656566 --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/client1-key.pem @@ -0,0 +1,52 @@ +-----BEGIN PRIVATE KEY----- +MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQDAVNH2qEayE3he +JKTfNae6oq1/xnOevgrseru0RdTullPb/leYy8PHIUGfO8+EEqxYk9WZ06cFmD8C +fqQ5OgFhKIirjmnHjodG42QrsQAx7PqkK8nuQ/Bl78hb1hNnE9aQ+STRGQ0pdmB6 +jbOLkxkT/QT/Ne4kQ3Lr3caWp2lsX1xn3oSUTIhFYCg7PkVKaFuGlTYryCJ61FnO +EV15M2iSqoaQlN4wCQVeI0g+WZwPD1H3KAI+s+k9yOqYbg30T+ZXQN26/MoNIQge +2OUKzuZUQIBKCLjZwvlOvwCMFjyN+kY4sINxFMCPwSIi+AFo85I0na9Jp88hz0ZO +KXJ9NU9Bk6UIurX2Wabpgw9y5D7hMvbLzKSfJ7ag5wLCCVY24NUutIR3rMmB6EMN +R8VNL653McHEJFWjy8dLhpYqGi5b7vNwFZ1l28pWV7Ff6S8unRNx+dRgRQ192Y+p +5LiYrZPP4TaeONPy73u0mihT6OTXR/vAAlhnRXc988YD7oQQ6MRHqo+HNRUANQAV +8dmpFt5UOVcUwIOQGDkb1YSU6M6LK/ip9zJUXWY5feLZ1Qcg/DBiyxNAuC/6l0fy +JW5Dgs+Sx2qXQuY+tmRAcIhJZjDv7TBpeDkVfbXUwa0WfQzJinpNhPEMM8u+9Dxp +IadXE82belH2RMpqaMJn47YmXxyKtQIDAQABAoICAAEBsKOg19XgwjWD7ZT5e+o/ +JbdQe5RuHDKGperYnres871oBF9ZWan2I5jIwFpJmrtP8sM+V1ZxKItDzGo8QnuW +sbhsI2OW/GBDmmecIosgWWN4kzL7CgwOiDbq1OkqMmpJ04aAohAAfZrGmRT27R+s +qFUJnDh2XeicHYj2UVfu29XzVTBNgj0StsMwnT45c5ktuL3b60pHSD0K3DlhKn/y +AohJLyyDL5MBjkQ9RdLSWrR3ciOP332iSpAHq20G6ga04TQ0VH5jGN7IddJrqMry +F3nLt+Pz4EgoOcGB8Ekx8SIk0ltKJ4PZF+uk7qT0+WPrG1rAVRYxNoX8M4wyNjr4 +TcAZsV2DnGdnp+2u0SSzMczeop5hPTJKxaLaPw1JOoIk5fqW94MbEHqGnEXEIN+D +OWeUKWZ/B1YubavOeR+c3STZrh2SgmhKk6g5NMFlfnyvolPu47H8NOrewOhVG+TZ +gsQoGxSyOXwZTQ/Jd6Yg9lek8nKJBc4Res7ia/x3H+gjjRoNFI+L2HQnWztx5YMZ +H9M6hcpclZubO/w4iLq9OB2QUHn7aIT3lWRV/xS0Yh2zGCufasaMA1KSKC5zq0Fk +gCzAkYDq/ymrJs3LQQ0wegKd1akL4z5fxmXTn2v2BGoEd52uuxhL0mM/9zzRxdR2 +IsOgAym+siLXMCHTDbdVAoIBAQDuMcea66WKidS+A9frCEsabYccKzrdMEhs6Mle +orFieMC+3ZpzFIBkXPZ522I+M4nIdBKuRw9PnYTE5t30euOj60Oq905j2a+Ho4ki +kW6dC+tNDF49Hqxn9e99xbvTUi97dREcERlHA+AnRektEciyD17bi88aUy9w83Mw +G5Z+ej+9o40w8+TDopE2SIJhUAHR6LOAMq1v5y1lmTn0sbTuxZFLA0qWX9aGLi+T +4RD0MzJAtKJDbr3yPTLHAXmaMSKHhWYYgWTH9iwEhGQAm5VJy3oNJUkM7ej7Yfs7 +aTDOk61egCKhEHdWavP68MqmNOPHgnq4/edmvQnhfKtI8SMnAoIBAQDOtWDi/OnU +ZjZPnmJwwoPuXe6IjYg47bFRGv94xEpSesCAYdXNaNLPl0f/Ut9y3nXr+j+XqJWo +UqtRGFu2i9lUK3cu90GLXEaLbYWGcgL8YnJu0senLxkqxPWcGxoKmbo3xMjqk/pF +EVZ5e1qqVTlrB4q7QWmLKrS8YlcaTnChPeSBRFfryg/xvQ11Hxtq89SKkTH4ps16 +0KtiCxvfQHVASyRLIKLdyabPInB+yP3Fsn4BIx8jGtOQ/OCY01TXq9OyaRu2hJTk +qsjOLnqf6huM2so3X0Tw8AdgNoF96JJvfhwiPI5CSo9UKjhuvus1Ip5ZFFNo4Ngy +n3Zlgp1HxZzDAoIBAQC9ffqmo3sxqI8Hj3UxdIqS/rlyzm1o0+V6RwMT92gYx6nG +7fLWRGQT8+TdcotIoqWlQ7oszTlABDdAkc3XlgANQre1hkLlqqM6y/3n8zzFUVsj +E4jRJNrRZdTeAPV4mzRNCgfPhUbPuSSU+cgT48b+6L10+VeMQMtIF1T226uw+L5G +tps3a3/9pxHQ1oRquESKYo6SmT5i/M2fuvNhWBJxtdjtjTPER4AZhRqykWV0cFo1 +Ib7I2Ivh74+6w9Ciux4WJCjhq+aqMYw5F72awitU5rw1QwlHcOldO0irrfZ3EQLm +YBesfLYDmNh6NR9ydDcVXBcXnl593DvFF/IH+FYXAoIBAQCQZydLCzHy3oC8eEH+ +0fRGljooDO+IDYzcwwaLgF0HZ5eJWE97EuqKeP2kAWn2HjC07Hp2YSBDmZTyrxiK +2wG1CjRVjAeu6oShrJ4mAQnS9JdKkldFlOJ4/WUza79yflgX05IkRcIFdAo8DY+W +BLl66qbhD95CiU//dpew2fFWwx0ZrPvazar7zn1TP6rwuWvWbX5CXYyYaqP/dxE+ +khIXGyc8kI0WcWPlugJqn9CgxoO+GaIL7Ra1Z+MjACd6DyBxt3nTtKUrZZ+oYdHq +Wypp6QJxUk2gH56XeRxXMBz0ZF4VEMa0ys98FY6c1yULVqbWRhvK3aBLJRkZ6vgj +BorvAoIBAASy89mnP7d9jY7pSg/8znsUF8fQwKpRJZKS+8xgbzsZP+zT7CjxCbPL +xcNK0fl6pRBv+gyIM013R7J1uvZJ3W6rspVxlXOvofvwYSuLOjwsZA26RM8s7Do5 +e62Bg7PUHbbaD+C8HzbJlyXeQ++oddWPbIkxJMwhP1Uvy3wA6c7E7w/UACZvv20J +KriU33QmW/o0YpOX8xBVwgsCld+IfUIYm1S1mpU6k3oUfGIA5iyKx1XLTMhlaYUG +dTdExwxQp73Jk585qWSpaiQ05OrgYyzZ8OHA2kRTPK+54HSwRfn6senf3TakZHBi +zjy/DZmOU/a/EiR7MCGg+jS1x9GBxOE= +-----END PRIVATE KEY----- diff --git a/tests/integration/test_tlsv1_3/certs/client1-req.pem b/tests/integration/test_tlsv1_3/certs/client1-req.pem new file mode 100644 index 00000000000..b821609068b --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/client1-req.pem @@ -0,0 +1,27 @@ +-----BEGIN CERTIFICATE REQUEST----- +MIIEnDCCAoQCAQAwVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx +ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UEAwwHY2xp +ZW50MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAMBU0faoRrITeF4k +pN81p7qirX/Gc56+Cux6u7RF1O6WU9v+V5jLw8chQZ87z4QSrFiT1ZnTpwWYPwJ+ +pDk6AWEoiKuOaceOh0bjZCuxADHs+qQrye5D8GXvyFvWE2cT1pD5JNEZDSl2YHqN +s4uTGRP9BP817iRDcuvdxpanaWxfXGfehJRMiEVgKDs+RUpoW4aVNivIInrUWc4R +XXkzaJKqhpCU3jAJBV4jSD5ZnA8PUfcoAj6z6T3I6phuDfRP5ldA3br8yg0hCB7Y +5QrO5lRAgEoIuNnC+U6/AIwWPI36Rjiwg3EUwI/BIiL4AWjzkjSdr0mnzyHPRk4p +cn01T0GTpQi6tfZZpumDD3LkPuEy9svMpJ8ntqDnAsIJVjbg1S60hHesyYHoQw1H +xU0vrncxwcQkVaPLx0uGlioaLlvu83AVnWXbylZXsV/pLy6dE3H51GBFDX3Zj6nk +uJitk8/hNp440/Lve7SaKFPo5NdH+8ACWGdFdz3zxgPuhBDoxEeqj4c1FQA1ABXx +2akW3lQ5VxTAg5AYORvVhJTozosr+Kn3MlRdZjl94tnVByD8MGLLE0C4L/qXR/Il +bkOCz5LHapdC5j62ZEBwiElmMO/tMGl4ORV9tdTBrRZ9DMmKek2E8Qwzy770PGkh +p1cTzZt6UfZEympowmfjtiZfHIq1AgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA +fGx/D6rNeaVO/vSUGX5q1iJKd8Gnw+/8NRgbuvCDuDOSy8LyqnLmVntj8q9FHpJM +SRH3LnylMVFZdybso2ZbhR1UDReGvHCtKICG3LLP1uWwy5nS3mkGBHFm9COyFP21 +kWOit1+106gEhg2f/NXh31HFmh+myepLjPEj5KxvnQhQfaQESsDYDZAs6/qT1mqp +A7GixOXh7hIFBJ97cU7fKby0Wtv7GqKAYQkaf26ImoGijtMPIlzvwJboJWmOYzIH +zrOHqspFkJD8YvYOwLIKdahViqXU7POL9uRn0vFyaXVcyXRq83Pz+bPSW9AFYsYG +ukSZiJs1yCINZI/Mk1vlfaZWYPIbBkJZ0Ny0vw112dIEilWAkVdsmJyV95aBddQI +Md64CYWZbV5P7/0QOX+v2ZQpWVnaV0m07K6VVuTL3bw6BQ9fcj7vaql6wl8jl/9l +nEotaZiY1f1pUUko3XzXpZEFB1lGBHupuS/Plz8pfFefN/7sOZoWn1VhD9I1A8uh +b2mg6hyQ7pe2NrHOTY1+L1xxxKKHt01kvDhws09qxRXtNsLrL8tl94i1ndLjHIwD +/VRnVU04E/VoTKaEXuETLZwOZu8pLwdiejrWEAmtsbmmcKq/Bk42wa+Wrmge2Chs +V8EOAtq91AjUcQeh7s2fV6yWweMGm1J6pdkNWckCsUs= +-----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_tlsv1_3/certs/client2-cert.pem b/tests/integration/test_tlsv1_3/certs/client2-cert.pem new file mode 100644 index 00000000000..886cc533fcc --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/client2-cert.pem @@ -0,0 +1,30 @@ +-----BEGIN CERTIFICATE----- +MIIFMDCCAxgCFAXxDGdWf+MHldd68lQPasjUzyRgMA0GCSqGSIb3DQEBCwUAMFIx +CzAJBgNVBAYTAlJVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRl +cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTIyMDIxODA5NDMw +OVoXDTMyMDIxNjA5NDMwOVowVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt +U3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UE +AwwHY2xpZW50MjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAOGIanwq +rZCqMT+ePwRkiQnD0gyVt5+kwkb8X+fdBJRF0kr70YfzMpKdZP4l4W6C0Jv/ysIH +usrI5pQxcFAIe/7DLW0JPkMLKgXsOtPNZPIkc7WYkq3cbzB0ZTsK8O3IYhwn0dAY +O49T//YqM3TLTFsG89B6uCEg7dQiP9hh6boic8M/WyAseOkJNfw+wYcTWhl1toKc +dLbo8ehESUtVhCOPVT602zBUYFkleqKPeHJ/gzl3/mTnqfeUBljGI2aXwOl7r6rI +D/or7wew2HZ81dTGDqB+yqUhBIVNseJPHOuKbke2E2qWVzAkRnX4b2ehsSaSknpC +KGWyLibaQyR0/Gt8Duu1XIsZKeFjCw27yogSTQ6xTUhLDF1anQyoJX9btSQZsTbD +3vtHbD1O07KSfiG0Z1p8LaR10RAFA7f3HLwwy6c9ExpGu5ED+co8aO5Xp5wysg8X +fYZYx4CaY3moQPJPDS6eOpUXd/6h27Fm34h9VdSj2p6j9JYsmTeEgb0x+JjAQyRS ++Koj/tbSbBqjbvO+FUaldRlHCHYCQTnjsSNBf7SxqE9lfgFitcgiHKSdD7QIfwNB +EK1o7L8OugC/SQtHGe3ngUGuNmHI9w6ItGuVqoJYP3Hwa6ClGmYlTRLoAj8NkBib +toxwGIspTlTzmmLXpqeZTPaA2K5eiq8O5DKvAgMBAAEwDQYJKoZIhvcNAQELBQAD +ggIBALp4L1aky2jfgk18tney56sUL2Us2aHqyOz9LlowWFdNMtCKo0WKpZ1qXGfQ +92QE+zc/MEdmv3V/H1MmSr7trTq1u7E5vVVI9Lq2lNbRLDQLi1+qd9E7Kdl6Oxw/ +Ecc8oxIbg86p83HhzPfJG64m3x6S6m2c4sNrHRAO/gxxJex6ZSFfQwYJZFlcvvBX +CH70RBtBG/ggasVtwqBuuIRNJ2gAtiWG2RtyGlOjPiAg7nUQiYlXLHVOjvrKDvrI +KTjzRdEUMqKtIrNUBHSbWZlxKZ2Ddavshg/0T0reAN/u5KTDxiGaQxlVEA7xfm+j +etqjzTz7LnKuRsA+Z8UUYaV6mKYfKObDoUs/12IomRCUTQi1K8MP3fGmmk+4Xiyu ++t15EqWJzhjuT2RjCAL47X6ksdOtonX9t29l6ykCvYpK1mlzG+EhqDyMIn62TNfx +OFjWwhIFgyEUWtwkihIKtv3ZVtrJVO/j+HCUfq+6IpjYHdlpdb4OaHgBtpokOtM8 +PmTHJbP2bxmNIMAU1WTfV+e/JkdTKHJclC5DTGF48yRgdKSOTq0G1eJYh4DhlEIM +vOw2rXeWR6VSkvA5vF7HANEptl1tkT3dsKR4BXkSIO16ldWBEHMM4UeXx85GGM0k +TRON4FWBMi6PXX6mrmPXcUW7AyKG2JL9gNlxRgWHVK7xmZyp +-----END CERTIFICATE----- diff --git a/tests/integration/test_tlsv1_3/certs/client2-key.pem b/tests/integration/test_tlsv1_3/certs/client2-key.pem new file mode 100644 index 00000000000..462916c0670 --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/client2-key.pem @@ -0,0 +1,52 @@ +-----BEGIN PRIVATE KEY----- +MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQDhiGp8Kq2QqjE/ +nj8EZIkJw9IMlbefpMJG/F/n3QSURdJK+9GH8zKSnWT+JeFugtCb/8rCB7rKyOaU +MXBQCHv+wy1tCT5DCyoF7DrTzWTyJHO1mJKt3G8wdGU7CvDtyGIcJ9HQGDuPU//2 +KjN0y0xbBvPQerghIO3UIj/YYem6InPDP1sgLHjpCTX8PsGHE1oZdbaCnHS26PHo +RElLVYQjj1U+tNswVGBZJXqij3hyf4M5d/5k56n3lAZYxiNml8Dpe6+qyA/6K+8H +sNh2fNXUxg6gfsqlIQSFTbHiTxzrim5HthNqllcwJEZ1+G9nobEmkpJ6Qihlsi4m +2kMkdPxrfA7rtVyLGSnhYwsNu8qIEk0OsU1ISwxdWp0MqCV/W7UkGbE2w977R2w9 +TtOykn4htGdafC2kddEQBQO39xy8MMunPRMaRruRA/nKPGjuV6ecMrIPF32GWMeA +mmN5qEDyTw0unjqVF3f+oduxZt+IfVXUo9qeo/SWLJk3hIG9MfiYwEMkUviqI/7W +0mwao27zvhVGpXUZRwh2AkE547EjQX+0sahPZX4BYrXIIhyknQ+0CH8DQRCtaOy/ +DroAv0kLRxnt54FBrjZhyPcOiLRrlaqCWD9x8GugpRpmJU0S6AI/DZAYm7aMcBiL +KU5U85pi16anmUz2gNiuXoqvDuQyrwIDAQABAoICAHZuu3RuuOxB41DEGdWFsczV +7wS6zk1gKME8IGTS1GfEbpT/vd1FYaZKTtGDNOlieoehAGl5w6Zfb24ctBzjB7IV +7lHWy8JLJ4sqrQ2ySzM43yZac5QnMKBiTxJ9QV2sn5CnfG9pekVe2Af9yz2m0Hbw +pLIy72Q+NYXzYlGPwTwEgYPjTkgL8oZ1VssabWgwSl0aSng2DrhKhVXyHgcYZiaC +S0J9mKi9dkb5/ndFHfwKZ++Syp1UZhXjvp15lvd181DoqavmGTXHQmNog5NdJLDy +PJYdXu7t8sDJtwLfhpFOBXFU9MdBIZHfSr0CdAYYi710tMTM3wfgVIoEjcOkRzRx +36O66ehHfcyNsK52Z+DZ6uR4c+MOG0kzTiHQhyxjiu+3nYMGw1XdyE+k+eZDMPd3 +vTaR7kYOQvVvdOVAUuFZG9mK2p0mpofb9cFxFD0vJUqTYXxSdKUNIexR4mWQJw/h +rWOg/42GK4iLY2X6/CsDh6pTsM+HCzwmTGGkL54FvDsB2AhAhXPz/kGiBRTrh9/p +QBxacSPoqN+kF3u2qZRPEmjuimiW2AaXARbTABNSBQJIEmWzWOVdgUBVetGoN/ML +8mcYDmXhAc6F96eqPj0dX8cHfqYPguPhtzLj5V6XGym7hYQyOLBcE7tr2BcdjUfM +V6OFHsPNmsYWZ9F6zCv5AoIBAQD3M6gziCA0G0cG05ef0C3D9OVGWpHqr0yiR3MO +ZKsYbJJn4WOtWWvo8N5oqZBQ8VIoyGd1eiSIDuxXEWniFWjn57QN2nrDNTsEQPgk +HzomgFzuDZ7V4JsjJ9F2nAG5i2HoEwKNHdzfni6mhwGaapd+4GlET0jlC71p+h0X +CPsD6Jwabp6OUyT+xm8XW3mTWskBzKfq0OPbsdv8UB1dPt6jVrkjoe76TlTsWXWi +U9p9/h6kI984R9T10J61c21dokuL/KlHqb6TIQY3RcCgm2bfucmuawIq6vs1PBrK +VCvMX1BuTva9CYg/+hxm9Ky08jFWSCEEtzaORyN+4mmf4maFAoIBAQDpj1NoI7RP +mYqG9vHyXSDUUNbchpLOFKIaeh2DGk0sFmLi/obglsxOKu8K3r/EobNt+vpDTBxI +1EjPWdKuaXNYYjNjrVmPHdHPoHD8JmXzJDbZnXSylV9MVYSMNF+7BWUiPg3/QC7b +1a+ljJH/KEWFb0xrIfNPxVzyq8dyFOxcmLfRVLYlEW+fRYeaZ3QApxGi/BoYK8KN +vG8f/a8jpPwYCVa3JJ7/donEtsbxTkm66aacn8Vo2Y/tdo0nxyqC9PyBU+tV0u4w +aYtEZ28kpC9QheRx8D7WzhvsFc/KsshiB6jddjOVR6VgiUFCo+b/5PqpyZVTVrcs +tj8062A3KvyjAoIBAGRPn/eZS4gZcY8BmcuODKQx4j/UTNXw4KYRXE0A6LT2icqB +mZMkcDeMVpQeCqPt6SsHd4QiVmSnuZvzQwYtLe69BUGB4MMJ/LLTMl5mFZC+Efe/ +qy6bABkZ9VOuJr0GJGqqHCTrc0+CvudwbWQd0O/5XH4NtkTLqMcyaU+Jo2KIp5/K +N6kFcEO6fiX6RrFW665BP/p3XZ8u41fVorTN6EZb0LD26yTDWI64FpYSdN0fm4t7 +yv7ply9QwrZa6oxOaV2a345nASBvDDito2cI6IvstjyCy9RimiGWDEECOuup2deJ +T3KSRanAcnoM23Bpvz+F8XAacJb3ox2//qCUnIkCggEBAJHl2XllTF6pEFLs8giv +SjG26fFKE2yukPCvNb5O8MRIm68mxkSHjsqJoVeN/Act57MdI7ZkVgrcqTr15ljT +QJ2GgomSoS54tzbXB51Ls0XmamkYJezkyGobxbf7g42Fej6guwenJV5oJtfobs8Q +bhVDiF4oECDVrhFdYzKNhXT2ZWVbYIjZUnwQ5/t5Aorh0m+Ywgg1VcxKWLSIOR6w +ElZFhyjStIvqlXcPokjc2cvr5wtR9vRfa7wv4U9m59R0i0OSk6DCKc6OL9QkNNaT +xYasjR7rr6VpjSG2Il6BvhEWrdLh4qku30zlkKG7VzKk7Dyh0ykDM1u34NYC7tCn +hrcCggEBAO+Rnkk5eYYqGk/64+Qy5qA7djvvZ8AgihwJL3+ZUDSOxh0W+Er4NB6n +j0kI22N//D2j6hg93TNj9jI6lISfmY+TSikr/P+bQPGXl8wvekQxpjT5JhCYI93M +LXnSULuy7J1ujkMGdxEvfOTjvmD0ejtnuaGd+jM7hx4QNBbJj4VdV+r5BQOJAlfY +gk6n3RgAnu86szquWM6dObIz9BWtIcMVGlxA7yDmxjVDDHLwGpcwG+MTQRcHoeT6 +2+b7FtVN1NFLazfgPS3bxKs5jaUB+Ibm9BD8B7THviNikqRYqwoJMWpJgdWo/lOQ +X0ueOR40kfa077G7jNfb03qOPUR1mFw= +-----END PRIVATE KEY----- diff --git a/tests/integration/test_tlsv1_3/certs/client2-req.pem b/tests/integration/test_tlsv1_3/certs/client2-req.pem new file mode 100644 index 00000000000..846f6db84dc --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/client2-req.pem @@ -0,0 +1,27 @@ +-----BEGIN CERTIFICATE REQUEST----- +MIIEnDCCAoQCAQAwVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx +ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UEAwwHY2xp +ZW50MjCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAOGIanwqrZCqMT+e +PwRkiQnD0gyVt5+kwkb8X+fdBJRF0kr70YfzMpKdZP4l4W6C0Jv/ysIHusrI5pQx +cFAIe/7DLW0JPkMLKgXsOtPNZPIkc7WYkq3cbzB0ZTsK8O3IYhwn0dAYO49T//Yq +M3TLTFsG89B6uCEg7dQiP9hh6boic8M/WyAseOkJNfw+wYcTWhl1toKcdLbo8ehE +SUtVhCOPVT602zBUYFkleqKPeHJ/gzl3/mTnqfeUBljGI2aXwOl7r6rID/or7wew +2HZ81dTGDqB+yqUhBIVNseJPHOuKbke2E2qWVzAkRnX4b2ehsSaSknpCKGWyLiba +QyR0/Gt8Duu1XIsZKeFjCw27yogSTQ6xTUhLDF1anQyoJX9btSQZsTbD3vtHbD1O +07KSfiG0Z1p8LaR10RAFA7f3HLwwy6c9ExpGu5ED+co8aO5Xp5wysg8XfYZYx4Ca +Y3moQPJPDS6eOpUXd/6h27Fm34h9VdSj2p6j9JYsmTeEgb0x+JjAQyRS+Koj/tbS +bBqjbvO+FUaldRlHCHYCQTnjsSNBf7SxqE9lfgFitcgiHKSdD7QIfwNBEK1o7L8O +ugC/SQtHGe3ngUGuNmHI9w6ItGuVqoJYP3Hwa6ClGmYlTRLoAj8NkBibtoxwGIsp +TlTzmmLXpqeZTPaA2K5eiq8O5DKvAgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA +3DJlf7AkZklzzswgm487f+y2bB7IYr55JwENASDxQEOdVcdgLC3IWu3hLiFwdqac +0Sw2OHZuETwJiIX3fD+qUT6TgbsP21W7wEQ4jfKg/bsXFMbrvw/ILkOW2JLTH4Cc +9ylCN+46dQ9heATkiF/Co+uASz9IoSDdtoycA3BuKGBZI8VGa56QmJOOsMM5NgxT +RTh2r23tV4E8AGYj3HC+b1rzK1RTlsj/m5nM9Jv0/NqoV1cprS1ONr8CBhN0ttuA +WLrG+DUZTMJYFabqTptlgejQFhiFp5HT5A+eXgZ8uEUX1I3q5jq1BEWtLdmJNZ45 +QViSJOokH/+1kfRSWiAH7pdBz4URLBcsDhAag4J7kV38t7fgdaIizY8R2Ss82iEP +xqa4A0PA065wB44zng/VrPrHoH1YnGRugXEnrqgcipC0FxUl3oQjvwOSR/E7yFU0 +GIr1MpRcyrd0z4p16783qnMpE1Aa0msED2SBKIK13WcNY+CtDF/wO47ZNywl1hBo +VkM+ohPpmonaVXNGdpdoZpeGjkBUbqkn+so4aYkX/WuZ6vY2vwdV0prD1vdAFfD2 +AeJx5ypu5aeKn6nK0eMy6W/VEJx6RLCiYVOCIcssgy31rmk4iLQJP2StYVK2mZKp +5aSR4eTv1/XlMujq+ZqcuUqA1id9wP7908Xr0DzdNdA= +-----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_tlsv1_3/certs/client3-cert.pem b/tests/integration/test_tlsv1_3/certs/client3-cert.pem new file mode 100644 index 00000000000..ce9a472cb9a --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/client3-cert.pem @@ -0,0 +1,30 @@ +-----BEGIN CERTIFICATE----- +MIIFMDCCAxgCFAXxDGdWf+MHldd68lQPasjUzyRhMA0GCSqGSIb3DQEBCwUAMFIx +CzAJBgNVBAYTAlJVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRl +cm5ldCBXaWRnaXRzIFB0eSBMdGQxCzAJBgNVBAMMAmNhMB4XDTIyMDIxODA5NDMw +OVoXDTMyMDIxNjA5NDMwOVowVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUt +U3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UE +AwwHY2xpZW50MzCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAN8Bt8gv +50J66lQ+l/NUW+lqW4DesmSLv1BnjDd5SSA8tfczt999/l1epAGeEN/Pl4dAxXP/ +cxpx+J+xF6SKNxQ0RP+PHQMiDzCUgBq4OKs09kDQ/uvycUZlQuWPtR610TWjZR5r +VrNSwJQp3VGDdNyEbKj/yd6Yi5NC1iLuqPC20fw5/9BVTm1P2wWX7nv1AWs235s2 +yAG7pLNcgPiTfSmXyyT31YBjb9Onun7gv7exI/3K9mS+aWq6ci1xAXtykVCs551T +OQmDAUxda041YghEThO4MrZa6uSZqVwnoUcXTla+8biLYb3+9CnIjM5whAOTR+9r +jpsuuXEUOsrX9Mgb1HTS+ksmrA+Eka7MdVi60Hoon09uNvcTM8CSKNgnTzcPCM6t +J4NHDiimJM5WA/eY8i3NNCTa1HUGEeIK51UOdjIFKsvzG0TCI2FM7jQLJK5S38tI +deZ98iQbguVGhoCvRotLEAwW1M2rSOu7bxAZU4QJ93IuUfkLn2BipOuyuR55Z/6F +z5Jij/1lK2/pKWhntUHTIpG+bBHDF++0LN0aB29uIwYRkoz9JUgnNz4FDVbLvJ+z +5Ywr61t8AujZdfMZDpRYlzfWPGej8pm7/Eux5jgx/3jcLtqfqkfZLSuFjBKfkUU1 +eGsC80RupMJKIeppv541W6nQJlmJYKv7DCvrAgMBAAEwDQYJKoZIhvcNAQELBQAD +ggIBAD+YMVntBdeq7xJEL7xU4QEHzUGhDWodGMJfmswcxe7gf5Nztcq5YIug+akL +ewg0wzgCA5YGz00J92sKDF16RmYyPfkxmrCYdNGwISjNJyEEcPEVkdAzwILjv2Lq +0shFlSsf+Zp/M4XhHeirmzz/jJ9KHlzEYoCz1WOn+UGF12KgV2oQOamJSWOMCoMh +81oy90V5IlCBqnYfZCYj7cbYLBd5jZMZ+7lsVnxttzPTg1gIoP6vrLT32Ubnzx9N +IoAeiUg7az/fbnuOkJtu0cjz9aSdpjm2h2giyVAFJ8DkQ9C92tdr9DWZKn7rDO16 +TMdv0q8NFjRGhqdmqWUG6o2cUmQsJ/ZiIcHx5X1b7j7PYSS+ae9zi1tcpHAN6kCw +WHguIf5I8MIZxE741ZMBokFSIqd6Bh1EP/TUx1+g2a/nH3ZaNd4/KKADxfUU2Y58 +UwdKeX9YpcRz+NNO+1h3NoE1a/i0dhwiBf4OzBiV0WpAjQHT95IlQxTxfHFp42IH +GrbqIS3qK5DKlNFkBBk1beKxBGKmTH+Pw6fhjkuPYQzjmGo4xluivfeT8SiBT2iO +uIGLd+sitIooom0KEjHuHS9cdZ5XEPIUDAFhmIt7Y5K8J2fs+xtYzhibg3n0Q6qh +xTx7GzhTA1HSUE/467af5J3CSfpGAjZQZo/t2/A6tCumzk9F +-----END CERTIFICATE----- diff --git a/tests/integration/test_tlsv1_3/certs/client3-key.pem b/tests/integration/test_tlsv1_3/certs/client3-key.pem new file mode 100644 index 00000000000..b7464eb2866 --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/client3-key.pem @@ -0,0 +1,52 @@ +-----BEGIN PRIVATE KEY----- +MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQDfAbfIL+dCeupU +PpfzVFvpaluA3rJki79QZ4w3eUkgPLX3M7ffff5dXqQBnhDfz5eHQMVz/3Macfif +sRekijcUNET/jx0DIg8wlIAauDirNPZA0P7r8nFGZULlj7UetdE1o2Uea1azUsCU +Kd1Rg3TchGyo/8nemIuTQtYi7qjwttH8Of/QVU5tT9sFl+579QFrNt+bNsgBu6Sz +XID4k30pl8sk99WAY2/Tp7p+4L+3sSP9yvZkvmlqunItcQF7cpFQrOedUzkJgwFM +XWtONWIIRE4TuDK2WurkmalcJ6FHF05WvvG4i2G9/vQpyIzOcIQDk0fva46bLrlx +FDrK1/TIG9R00vpLJqwPhJGuzHVYutB6KJ9Pbjb3EzPAkijYJ083DwjOrSeDRw4o +piTOVgP3mPItzTQk2tR1BhHiCudVDnYyBSrL8xtEwiNhTO40CySuUt/LSHXmffIk +G4LlRoaAr0aLSxAMFtTNq0jru28QGVOECfdyLlH5C59gYqTrsrkeeWf+hc+SYo/9 +ZStv6SloZ7VB0yKRvmwRwxfvtCzdGgdvbiMGEZKM/SVIJzc+BQ1Wy7yfs+WMK+tb +fALo2XXzGQ6UWJc31jxno/KZu/xLseY4Mf943C7an6pH2S0rhYwSn5FFNXhrAvNE +bqTCSiHqab+eNVup0CZZiWCr+wwr6wIDAQABAoIB/0I0QFst3XnfA7H+4x1Z7e9d +o8yeUFeJJUK5eub9Grh3TY4VzICM5vbRId9ZDalj95gvom7NZ15yd1zxNhOi9LcK +zXERC4vikJ/bdix4hFpPXsvfP87MKtS7OyDriNmVIIbL+zkMpLCX4JQb2ZhZblgI ++DkztrpejxEoxmmYcI8Ft1Ep5sfyi1XoXx1J/YLPOZyarcdme/oHut2EmMUzA/VV +GvnemYOEAa7UHImOL1xZOlYd6wf9f04wC7Vx1v7PBFTu/9O04TnxqnEBStns/y11 +GbjA9k0ssI8tDxpMqZRxVtBp31jqCBpflhzRbPvca1SkZLavN6baODNZzhpqAkDX +3R4lU5C7wu4jtzydUyEsCFNdtkGKlxpZRbRZk+keUC+HeCmXPED7p9egwF6Zi8VI +oaXl1KvHZO2W5x/BV9I1taEPhmOuRR49KxkU4e+IjqaWYN1qsqYqCs/od22Rah72 +KT+thr0mdxC4lb+pvteafricUQuq/dSbEY/lva7PhPQRKVX/VxOaAxBnhA1LHVgZ +imsW8W3eOQYJbxniTrz9EblWAg4dCcupsjMDUDUyACB/E6isDtYU1J2im6p4gbqw +tXg3bRh7KruIHbPSJyrFm1uqe+v97TLhpwPHKCsxE4HiJgRzaQDRckLJQebqNp3Y +e7kLLjg6uGsjAl6OwKECggEBAP5bLGVrmBmAz8RYPnG1MQWlsFg/eIhMFCqMjT3P +swPUU2VJKC3TC3OwFLxlAr0lkXol+8L8aEvxGjHksleA+1z0lav43b1/2jKgLgI6 +Ym5BxMJa+sUJpI6K7CedJ6wf2ozbpVXazvNBZ3o2l0QbC/KpX886CZH9YJgn7N0M +TfPe9er5zmETdHGTWtA0sDI8fZ8XndKmnWG9KTQCGur6gemF8SKuzGv/BnL+BZnv +bDqSvyN8Wjk35KPNeKVW78ROxRuEdB5brryGk955hX50PRRoofW8GSmLJNKNYvIj +VRkKrDKpz8gW1C2/xa9j5tQkGRFMDAptmk+yvtmDxfZz38UCggEBAOByrXLMTcwR +bz4MYcSmEdLv2VA/bZ+y0kW0frUU5il2fyQseoFbunVbTDiXYf40uueMbOONZktM +w04CXKRaTbnS/s6SGU5VW19jv+xzwrzpB2Shm08APwgFnSw40bKCpN4ZWQbOyFVq +QIMXfA0+Go3zJz37MsSgY+mzhHp4WITobVFpdlhaLvrLPCB78uInZrFsvNN6NP+K +OIbOoTA9u+BP73THHkpQdrRJaJWowpqejz8kzQ/Xu0Xe6AG1EGVp39phKpWH9TPF +8xoxjbdIGPkzCzYO3hgz6PlnWVj8iyTxklnaUblqKkY2mOlMA00ujcdF3d3IHvaM +Xolej+XeZ+8CggEBAKeZDdzaE4Oic8RtXN/xwxZ0gYj0cYhlkNgkeqCi7dL1IepY +VQg0ypP1DwTADhjx2zTAOG7XgCWh/V+o0LaFv5sVclW5iuplhzHah9ZiAB+kaHCk +IB6a5vohoc/MZqqs5oXv6LZ0ke6JRxSpSezPYYUIg5/5Hvs6GF7J1/IjPG4XmLS2 +23zto8l+jdUpEnxXjXK5zf1SWdtgF/kz9ealH9rurd/ri7kRdn9oz+oJb6f8r8ND +GfQf1yDzr65KZXxVZt1l3llukemZR2/NZN/Y2bJL64QO6AmOrLmr/emMzHLOrH5J +lCbEnBR1C14xFpTsIDRchoaMh6RCJC0Q/e0Rlv0CggEAAOIysJsBS2ZeK75cvCtz +MoNjNZ+qTNClZ0TYotncNhmTUo8iRFQaHdAoMqjV5+xJOBQjcZni5zT8J9h2iOca +GzsraaDFnLtVSsDXxpSGFbxNHSZNuDfmB6AOCFiI6sz83Sr4YMB7pWpvqpRzFpJC +BIEKjIHqpz+CZS8hvGGw54UKuSFTJ/Hi8XXPXMlgIWfKTbSB4cs/XiorIsy5cbks +fiuSY8FM6zn53afUU5KAgZ9SLQt2CzPsNtAz1Z3i3KNYEEIFquUIIBYNaPL8/dW4 +03JR/vp8AVhi+Ghhv6nu2kxhKR1k6Pf0Bqa8X16/PJSMVlZ+Extwk8Pls2C97Ee9 +3QKCAQEAgjcbHKBjd7AeyNpPSzNpv81Rry5qqOc+Cxx8LtOHBl1wc5VB5FPxfbuX +MX2skvWPnokDoXcI1a1WQwdjaZUsSoqdeyPtw8pFWiNLJZkYImiP3zMCZXYUEkzk +3EXQZryWEqBYBqxlEvTyjbBmnrAwOPOUKARFi1l9JKJ4QpdELXo9Yl+w2IQEQ5N9 +jrSY7LwS/cb25rhEc6oh/89aY83HPyABh4lC9bsciXki54YIeS+y9ijN8yCRxikr +mVGfQ0Y/qcY9spAj05yr/vnlENBB5ohxwKKsemOnH93E2GFxc1dzmWCGvISjUduB +I68TOg71OfCKgfeixNgcOvQoN+xngA== +-----END PRIVATE KEY----- diff --git a/tests/integration/test_tlsv1_3/certs/client3-req.pem b/tests/integration/test_tlsv1_3/certs/client3-req.pem new file mode 100644 index 00000000000..7b4445b3609 --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/client3-req.pem @@ -0,0 +1,27 @@ +-----BEGIN CERTIFICATE REQUEST----- +MIIEnDCCAoQCAQAwVzELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx +ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEQMA4GA1UEAwwHY2xp +ZW50MzCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAN8Bt8gv50J66lQ+ +l/NUW+lqW4DesmSLv1BnjDd5SSA8tfczt999/l1epAGeEN/Pl4dAxXP/cxpx+J+x +F6SKNxQ0RP+PHQMiDzCUgBq4OKs09kDQ/uvycUZlQuWPtR610TWjZR5rVrNSwJQp +3VGDdNyEbKj/yd6Yi5NC1iLuqPC20fw5/9BVTm1P2wWX7nv1AWs235s2yAG7pLNc +gPiTfSmXyyT31YBjb9Onun7gv7exI/3K9mS+aWq6ci1xAXtykVCs551TOQmDAUxd +a041YghEThO4MrZa6uSZqVwnoUcXTla+8biLYb3+9CnIjM5whAOTR+9rjpsuuXEU +OsrX9Mgb1HTS+ksmrA+Eka7MdVi60Hoon09uNvcTM8CSKNgnTzcPCM6tJ4NHDiim +JM5WA/eY8i3NNCTa1HUGEeIK51UOdjIFKsvzG0TCI2FM7jQLJK5S38tIdeZ98iQb +guVGhoCvRotLEAwW1M2rSOu7bxAZU4QJ93IuUfkLn2BipOuyuR55Z/6Fz5Jij/1l +K2/pKWhntUHTIpG+bBHDF++0LN0aB29uIwYRkoz9JUgnNz4FDVbLvJ+z5Ywr61t8 +AujZdfMZDpRYlzfWPGej8pm7/Eux5jgx/3jcLtqfqkfZLSuFjBKfkUU1eGsC80Ru +pMJKIeppv541W6nQJlmJYKv7DCvrAgMBAAGgADANBgkqhkiG9w0BAQsFAAOCAgEA +Rggrols8hXGEcWeIEGn66kY9IVTzaTUf3oMfEbdf/2Q1QzHzmqp53yamHl5ioMgX +o5UBVxthgh1VOxkvCxIzlKDJprzVFkfwwc7h9c0HGt3No/ERobHDT6YRaGukAL5g +muIGBUseyBAOIfyqc5kbCRWfPrAOttAH4gd8XMBgO8XdfHAvyXBC8Ha55O6oriX9 +IAKL5+3nVJkBle+62OmROnstbcdKyK4UtOeki/6ptYVE0d9I+NfKjuk3eKtICW8Q +Pn3IEcNEZoFG2UQ19ENWwYEZyMZJt0aunqnm7L4RYiZT5w4meeendzXSKLKR6+Ye +ULt1sDRskgKoNRzmeCVzci05BG48jv/E7Az6aV/qhGiU2qIAPMdVXncWUhR3fj+E +CL/uLifOvfC6SnKw/7qQmgjUvEe4Duvi670a5QuImpm/mAIN22cXPc+QquSdR5xy +loz/o3JJQZemPAOM0CMIHZ+cGESxH30QCBNn5HfcOf5fRZVCss4Hl6JxHR2G4yN3 +RKEIUXR03qgSK91WHl3WvqwXgmIAiUuvPjo2i7kSuaUUHilZiXK1ngIqYfUTB5SQ +O8pG0fx3fbhVDA3RQfXeJE6FA2AyLvqOcsseRzvcQjQm4MU7p+RVaY17rI6/EkS8 +ac3E7BPwnXqSAkPSEgoiezv/Z0Hkmrcu6fIsUuf4ETU= +-----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_tlsv1_3/certs/dhparam4096.pem b/tests/integration/test_tlsv1_3/certs/dhparam4096.pem new file mode 100644 index 00000000000..102b8dcc72c --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/dhparam4096.pem @@ -0,0 +1,13 @@ +-----BEGIN DH PARAMETERS----- +MIICCAKCAgEA/yTb5We6gyTktHTlb/PimPgJhvY3Spp9zzBO4I2r/f7p/llPLj7u ++VDW8s4Z9+UUVQKoVoU2NLcgbgIUWrkAKuBCqqxxh+/+0NdP/klkWUX084HBvT5e +Tofnv2JT4EB1ynlNCF1q7frF/ELNyPzOWzh2w14XwoWxb3ojrfwG7N9p7CQbSwjH +f1lDRbOcLX+n/pic4X42KqqXqsg6ehtwORz5kMlT3DTAGC7sfB6rL8Y8/GrPmTNV +wny+UdnTyku8+OJ/xhL1ERiOGMCcP5jhIU1Bq9Uf0ayp+3fJazPAyP5iUprwd3DF +9UvaEqIFeaknq5qX+aVf8G7GpCpIC14db6uEJCH/oMSGakJdC0jWZzN6EeJoUILY +I0K/+DA34/Yh7SAehqc2rAukiquCv59/Lm+FlZyIzjQoOtKI06oIjGr7kbS4lvgF +NbN7AXYaou5cJaffPmfgUuU1hw9gn2kYYMb7el63BBzIKX/ptWR/uJ59h05ivYGX +J5bok81H7gYvwHaXkKdQ2t3FoFJHAekKpraiqIW7qHE4O2lb3JOU9GvAQ1QLdNNw +CKJPFKBVes+YxmncJexxvyVXj1N9XXriOG949RwpLF8d85yx3eN+3cq5XJx65Rog +OknNaTV8uTrpX/WGcVylApshMy9+4LP352ZsmXDuP7yiBqlaxyb/KLMCAQI= +-----END DH PARAMETERS----- diff --git a/tests/integration/test_tlsv1_3/certs/generate_certs.sh b/tests/integration/test_tlsv1_3/certs/generate_certs.sh new file mode 100755 index 00000000000..d6126d361f5 --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/generate_certs.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# 1. Generate CA's private key and self-signed certificate +openssl req -newkey rsa:4096 -x509 -days 3650 -nodes -batch -keyout ca-key.pem -out ca-cert.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=ca" + +# 2. Generate server's private key and certificate signing request (CSR) +openssl req -newkey rsa:4096 -nodes -batch -keyout server-key.pem -out server-req.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=server" + +# 3. Use CA's private key to sign server's CSR and get back the signed certificate +openssl x509 -req -days 3650 -in server-req.pem -CA ca-cert.pem -CAkey ca-key.pem -CAcreateserial -extfile server-ext.cnf -out server-cert.pem + +# 4. Generate client's private key and certificate signing request (CSR) +openssl req -newkey rsa:4096 -nodes -batch -keyout client1-key.pem -out client1-req.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client1" +openssl req -newkey rsa:4096 -nodes -batch -keyout client2-key.pem -out client2-req.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client2" +openssl req -newkey rsa:4096 -nodes -batch -keyout client3-key.pem -out client3-req.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client3" + +# 5. Use CA's private key to sign client's CSR and get back the signed certificate +openssl x509 -req -days 3650 -in client1-req.pem -CA ca-cert.pem -CAkey ca-key.pem -CAcreateserial -out client1-cert.pem +openssl x509 -req -days 3650 -in client2-req.pem -CA ca-cert.pem -CAkey ca-key.pem -CAcreateserial -out client2-cert.pem +openssl x509 -req -days 3650 -in client3-req.pem -CA ca-cert.pem -CAkey ca-key.pem -CAcreateserial -out client3-cert.pem + +# 6. Generate one more self-signed certificate and private key for using as wrong certificate (because it's not signed by CA) +openssl req -newkey rsa:4096 -x509 -days 3650 -nodes -batch -keyout wrong-key.pem -out wrong-cert.pem -subj "/C=RU/ST=Some-State/O=Internet Widgits Pty Ltd/CN=client" diff --git a/tests/integration/test_tlsv1_3/certs/server-cert.pem b/tests/integration/test_tlsv1_3/certs/server-cert.pem new file mode 100644 index 00000000000..6f8e5a3c6b1 --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/server-cert.pem @@ -0,0 +1,31 @@ +-----BEGIN CERTIFICATE----- +MIIFSTCCAzGgAwIBAgIUBfEMZ1Z/4weV13ryVA9qyNTPJF4wDQYJKoZIhvcNAQEL +BQAwUjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM +GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDELMAkGA1UEAwwCY2EwHhcNMjIwMjE4 +MDk0MzA2WhcNMzIwMjE2MDk0MzA2WjBWMQswCQYDVQQGEwJSVTETMBEGA1UECAwK +U29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMQ8w +DQYDVQQDDAZzZXJ2ZXIwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAwggIKAoICAQC8 +jV8igQGgCvu/7BJDI5VQl43VGAFjH2Na/E9P4E5uwkSlJVED1WKvIlxRWhOaQOfC +587nZVhQtHpdbCvBdKrHml4SVbTchs5SN2kZsHeqaQzcGnejnczE0SYo4xNyniSv +GiQ1M8G3fiZNflEIPM/+Ob2oI3YnVWFGy0a5rQcHZWS45KuGILMP0aRHyzyh/31c +K3i2xA7A3V2jBNuD4kHG8TLgfDeoCecTI0iU/LJnDOolX5XdpyeoJ6YyYOGg3F9e +bRmbNlJN3Iky3Vzyc4jYG7y6f5DqfebYMW6hCvLpf9lN6/gPNOb2KjL3hvJ+hbj+ +b9EkVAzpw7mW1VHEy+WbtYMPoKy08JTc7zr1tv/vQGr3XExwlC9iixZXMaVt1kP1 +TEVHv2FiUOiZsVaqtoFpS/wBvKeQdkzNy+66pRpG9bLuOnL4hlz+rwHkdBmHGk+q +cXdwglqIDqXKlCpIMSkFPH1364KLdJ2qBgWWoWCJjUmgbrA8/LU6DX+GBbEiw45T +PQKP//RMkOrHOYRD33WTU0iKP61zn5+9RD5OLxEUOtCvL7AfB+jt4vYrMTT2U3Kl +OckWxNx55bYLdLfGKtepGV2r5xzce0UMbWQrXQRuka3a/j5VJUTuUgcwgd6FoP4N +4ObW2H1YEtE5M30xpa1kcqJ1RGEWagakISgn2Z3TywIDAQABoxMwETAPBgNVHREE +CDAGhwQKBaxNMA0GCSqGSIb3DQEBCwUAA4ICAQCE2eJVcvsMmJu6xAfoE6/u6BrD +opMicCtlC2qt0BgSIzzDQ/iWjnWKGM1C+pO+2G0WTczj7ugsxjPzhkyBpuEZaWt0 +9/tJTKIrgaRZvEe0ifsJxyqL5LJgfxK7TbDPcUBKr1v+TOxPVRq0FuG16x+yka4C +rwxfBHU43FmtEFfgu13r515F3ggXcdlojkce8ZKtTAGEcN0MpbJ6XS90BHU0sy5A +APTm0fR0vM3kg1nuBLbSGF5KfASdw13gb6QsDbll0IqK8LvXYiX5CaVfkAe/pFkO +/2iIxYW74yC2gV+DcFdRPVfFxSKrdg0tDER35OYg1/vXRjV5BWr1EjE3qjrCcUZy +rlF3fms7Arr20ka2nSa8avn4ALpyJZmKasoxNAAsxivingNVZkql48OqsJ3n0qGk +LI6Yu+UM/pc78a3NHsdsCbnf8qvae4oJa1kyiochJu+gUOzHvs4Ydti9iTQn2Byo +2A2LzyVPBmSOhzdQ7SwpvHA4A2ftao+dZoA/+o4rmBtbmgxjpBPyPJTN0ZfKlpKl +Oyi57ov+cJmZctSUbP3M11gBva7aYu1Rd7/eXeCEl1FHhmKL/Ee+UrNZLiwspb2E +Sa+pOHdJX8VgsIYXku2UKaGT2QFITxO7fnxghioxgsyCKrQ+m1gL9vgXj/gJu+48 +c+5CZ9SobLdMkVOtQQ== +-----END CERTIFICATE----- diff --git a/tests/integration/test_tlsv1_3/certs/server-ext.cnf b/tests/integration/test_tlsv1_3/certs/server-ext.cnf new file mode 100644 index 00000000000..83d9b03ccb7 --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/server-ext.cnf @@ -0,0 +1 @@ +subjectAltName=IP:10.5.172.77 diff --git a/tests/integration/test_tlsv1_3/certs/server-key.pem b/tests/integration/test_tlsv1_3/certs/server-key.pem new file mode 100644 index 00000000000..065a2290749 --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/server-key.pem @@ -0,0 +1,52 @@ +-----BEGIN PRIVATE KEY----- +MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQC8jV8igQGgCvu/ +7BJDI5VQl43VGAFjH2Na/E9P4E5uwkSlJVED1WKvIlxRWhOaQOfC587nZVhQtHpd +bCvBdKrHml4SVbTchs5SN2kZsHeqaQzcGnejnczE0SYo4xNyniSvGiQ1M8G3fiZN +flEIPM/+Ob2oI3YnVWFGy0a5rQcHZWS45KuGILMP0aRHyzyh/31cK3i2xA7A3V2j +BNuD4kHG8TLgfDeoCecTI0iU/LJnDOolX5XdpyeoJ6YyYOGg3F9ebRmbNlJN3Iky +3Vzyc4jYG7y6f5DqfebYMW6hCvLpf9lN6/gPNOb2KjL3hvJ+hbj+b9EkVAzpw7mW +1VHEy+WbtYMPoKy08JTc7zr1tv/vQGr3XExwlC9iixZXMaVt1kP1TEVHv2FiUOiZ +sVaqtoFpS/wBvKeQdkzNy+66pRpG9bLuOnL4hlz+rwHkdBmHGk+qcXdwglqIDqXK +lCpIMSkFPH1364KLdJ2qBgWWoWCJjUmgbrA8/LU6DX+GBbEiw45TPQKP//RMkOrH +OYRD33WTU0iKP61zn5+9RD5OLxEUOtCvL7AfB+jt4vYrMTT2U3KlOckWxNx55bYL +dLfGKtepGV2r5xzce0UMbWQrXQRuka3a/j5VJUTuUgcwgd6FoP4N4ObW2H1YEtE5 +M30xpa1kcqJ1RGEWagakISgn2Z3TywIDAQABAoICAQC11lTwLp/Fm7IL9fvquc9P +CMmkv2DfGi80WO2YJ8ccM8gFyEYoP0rLgYSshAUxlvSr1+iG6grQ0izMGfzctcnZ +c3rTjco9fthNG9kFCFVvh536SqAkr5MCIH3/onZn7DGOmNRgZoikkEkaJP66xgME +tuS72W8iIcoNfw63FDIaJOONGCJ+2Nw3HkOjZVIVHRLlp5rkD5H218Vs6MtWlgY/ +eO9K5SC7sskhgL6HyGe40BCjeFpMh97L4Wj7XslZ3A0xQGAYervHES9TWX5A58EK +QT2yUkIMktzklE+PicKYA08rQa1Z5Pf0YOAELSWBdS7iWi3FLjXB35sE5rbT5puH +9hZXSDWLggbefuaUJyowDEZy2aHP5pvXKBDhEANRbU8VaDyHhlNLqVNquE5Cn4HO +zPeH+KLFbbABUok7yYZmIC9Bfn+rXvNzTX6A13AdJI/HcKA5RBGtpAY/168Pt/Aq +dzuqepu42rFAvS45RNevp72IIavx/QdAA1OTgKxh3c2Mf85pIXJ51aWWLnn+EZ5/ +EsE0crfwkuKJvjubNC4oOwMTFMIBI2WsjvaAw8pQw0Kb0ksExKd0wz9mKcqR/v0I +K9oYsaHkx5je0NOZds385+zCoQHYaw1aKUd7ZLqr5G/Nf/2TEYpMWco4ETA8lzu3 +Ty/8XkNw8jd4p+7bUuz1mQKCAQEA4MNU7GWDPwUKNNSz335nGH2oBvSGbYiwLcRM +D+x2+RTfOAFSSJ+Q5tQ+327ZkAB5dK2mxmDYKB+Ln1UBIneViUflkMyh4fuutIXI +wYo+BL71r89MqhRvvMK9hWnCGtJTJedf2iQENJzVn4J76BvTPRYywBv9pofPOlj1 +MtwwMA4CZAmQpCUaF5NQr4nliYx+slkcKwlm+cOxeZGa8mkNgQdmCcTZkRz6qsiR +vQDEDiS1+5lCJ6nWW4L2tOPejNN//hVlbPGMaA0oiu7I7w4aSxnTlLhDgJzJwmN8 +NFYl+u5AcPq9iRtBnzfPmd87S9bg10zcIiMKxw898sU24Pa0jQKCAQEA1sG5hO3c +4API//k7NEWXsx5Ns2JE/AV1LtmBgqXkn1DAJ+b6V1nIUppTs0zspEWrae9KrsAk +z47qIbPaTLHuptLrvEXk2LVfzcK32a7fXXDOB5KkBhzlJM1J3PTRQFR9lr7qX6vr +EDc4p7p55IDEGnJdXa7x+z56QjpAZaHlzexQxvoWWoLBkDuoT389sdU7CbgTa4A+ +CR6D6qKd6H6tfmv5sPlvp+aje+ObacP9I4WyVjscWkzBHxS3n/fTLjY6OFv+o8PM +TdytN4+HZnu4MDJlF3vx9P6CbnnVCaScXDxPGcoSJPcoEQqoyxuvUQLDUQkzWF14 +02EvXW0dbgiPtwKCAQA0EUwFD2ceHD7HClc4+QFNDR71rYPOsBGQKJ8uOSs+fHVR +dgznwf9BWf3OqNFBqLp6KxgtcJXihZxEpt6Ca416pesqZh1CSpmoPC3LmAjR9KLZ +vX4XEHDqG3roAx3yNLMKXtU3pYxL2+Eo+INXu8ptpkzPcCyMfX2mGKGEzLllCHnJ +TuXxAJ9QwtG4OIuyF5fqHPaHicAPMCRW80If0fJM57fdn3p/QWVYVupcDGdel2aJ +CHHo2lFMFcStFvShTwWhiLdcS4CpQhMYTETEDFJO/4aiNyV8D9Y1b/J/9U0LGlJX +Wd66elPzXGx9StdjtD2V4rpENjXy8zb4nHMgHkapAoIBACvvtmTbxTSPka/M7a/k +DQU4Te1FTZfCBhdvqG9yQTPW8Xk4aD82vyUnLbihJEj3d/pUWpMl/GH6eywp/59x +R8IZpOD/67HqaY9PJw4CGPClA4HJHoWho7/DwDjUXXsrzgXpSUoJgi3vHkgyfn2h +Wn2OqEtiX19niNvDzyj71mgq0Nvkjm42EiPQEL8y6QxY85spbc+wjQCQnayDWIsY +X6ZdsNfkMFPJe+j8x+77ie6ai8HYlhRjX59cPbUcnrf1oDOnnpEincnQPCAB3VG6 +PhSeOtBzKy1UZJr1kgBHDTZRoF1GWi/14NybsazcHSIVzp/lofuSJAYa+/XBPSQl +3EECggEBALSLZPdg13906LEyznYnjgq+nMh88usegvU9qsBAFExClLLfr6Ak77og +boNoOwbaFn+xiz5M8BTJIPizJcm5GjYaqg58zotTtG51h6VgMri+fb/BUpVr7p7n +aSq3kXDZlrwZnmooCT+KcGx++w2N2SYSyZX1TELt/dpfuWJvph+E37PkONEEiHPF +ZtSA/f9lpfP5/nx1pLmv4ksKdXqpz3/kNqaf9zbhQLgOm/VoBHL4NVPYRylGpCJb +R68/7yvHBd2EskZoJB53TlJmwu+fC6ee1UiG6aqTULfEsiGidi6jIt56Gz52ox66 +BHL/JsJ0Be5xM3V4x4PtihQ3Dw546FY= +-----END PRIVATE KEY----- diff --git a/tests/integration/test_tlsv1_3/certs/server-req.pem b/tests/integration/test_tlsv1_3/certs/server-req.pem new file mode 100644 index 00000000000..be2f756cc7b --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/server-req.pem @@ -0,0 +1,27 @@ +-----BEGIN CERTIFICATE REQUEST----- +MIIEmzCCAoMCAQAwVjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUx +ITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEPMA0GA1UEAwwGc2Vy +dmVyMIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEAvI1fIoEBoAr7v+wS +QyOVUJeN1RgBYx9jWvxPT+BObsJEpSVRA9ViryJcUVoTmkDnwufO52VYULR6XWwr +wXSqx5peElW03IbOUjdpGbB3qmkM3Bp3o53MxNEmKOMTcp4krxokNTPBt34mTX5R +CDzP/jm9qCN2J1VhRstGua0HB2VkuOSrhiCzD9GkR8s8of99XCt4tsQOwN1dowTb +g+JBxvEy4Hw3qAnnEyNIlPyyZwzqJV+V3acnqCemMmDhoNxfXm0ZmzZSTdyJMt1c +8nOI2Bu8un+Q6n3m2DFuoQry6X/ZTev4DzTm9ioy94byfoW4/m/RJFQM6cO5ltVR +xMvlm7WDD6CstPCU3O869bb/70Bq91xMcJQvYosWVzGlbdZD9UxFR79hYlDombFW +qraBaUv8AbynkHZMzcvuuqUaRvWy7jpy+IZc/q8B5HQZhxpPqnF3cIJaiA6lypQq +SDEpBTx9d+uCi3SdqgYFlqFgiY1JoG6wPPy1Og1/hgWxIsOOUz0Cj//0TJDqxzmE +Q991k1NIij+tc5+fvUQ+Ti8RFDrQry+wHwfo7eL2KzE09lNypTnJFsTceeW2C3S3 +xirXqRldq+cc3HtFDG1kK10EbpGt2v4+VSVE7lIHMIHehaD+DeDm1th9WBLROTN9 +MaWtZHKidURhFmoGpCEoJ9md08sCAwEAAaAAMA0GCSqGSIb3DQEBCwUAA4ICAQAb +FDegAoUBz9O4JR1u68IMnGkO5nINGAPQOqf9a2BxGujnSB7Lw6SHukjkUqqgnfQ0 +x/aWOI8JVAi/ptscojgMQUDsVNsij5v+jbJE+ZAobxnTmKP0wTc2ktpf4d8UMVc8 +gyM85jLHZ8caCcuy0D97W81vgIv33dNHWtP+sfbQhX9wJ2YQTahIC8NpuQfLAOUH +EFxWil0mfN+9vRQ1C5naKtvrOPqyM0RPrWiudIJ5QjI4aSXxUCupxxnaQMoI0Y50 +MvVVT3VwWgP+hL4b+yEJFHRpE7BwCZijsLIXkXmVZoveHhiSMYen1HWIP1VMDEHP +CUtG5UQcA78CBS8qg4nyFbDU4hWClAkAt96O8Y2epJYepIoYuBBSEfrgupESMLjS +E9Hfq/H6Ac/Q3zWa320udvA+ysfS8pagkoiH9+TarrsDjhxLjg2h2bGcXKlrsP1R +mRVZwfNOl3/ZNq5HBPb9Z5WXKvcsTCQAlnHJdaSmzdyArB0guwUHg8ZZNZqCdVgL +TPsfE84yI/HlwRfuQILfGxq99p/UYFwnee5CoM/PPvaAT+9z/lykMWZA7osuBcK6 +zP8XneGmZOkmez5+YJgSC0xeaDxr2R52eQXlQEJGDbFDtQap/X+cJDGyqmGnbhSu +6XkGy0l8mAkpcurMcy3wWf6+joskZAN4Joi4ZjKsQA== +-----END CERTIFICATE REQUEST----- diff --git a/tests/integration/test_tlsv1_3/certs/wrong-cert.pem b/tests/integration/test_tlsv1_3/certs/wrong-cert.pem new file mode 100644 index 00000000000..ef95a73deba --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/wrong-cert.pem @@ -0,0 +1,32 @@ +-----BEGIN CERTIFICATE----- +MIIFjTCCA3WgAwIBAgIUL2Y/QpwqqHyi43PwPeA6ygdPYK4wDQYJKoZIhvcNAQEL +BQAwVjELMAkGA1UEBhMCUlUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoM +GEludGVybmV0IFdpZGdpdHMgUHR5IEx0ZDEPMA0GA1UEAwwGY2xpZW50MB4XDTIy +MDIxODA5NDMxMFoXDTMyMDIxNjA5NDMxMFowVjELMAkGA1UEBhMCUlUxEzARBgNV +BAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5IEx0 +ZDEPMA0GA1UEAwwGY2xpZW50MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKC +AgEAxO2PSeaiNFMRRiFXpnMw07u6EIdEc1Jx3cPvZjEUg/pdEmMYkrSxr2MeqRkl +tWH8TcIIoiWDLIcM6IU0mF6a5ULu84hFb9b20qRG3wRNb5yO86HnoyzU99t98++a +9iaY1QAt03k8wq4jRjU2k/eoVSoLT5uVP5KxiNzdS2BTHFSsxrt/xcwdgkfJouHN +p+MYUekk6qaQy5fTqTpqdkgO2v/JoYCi0whBNj205d+WnS7xfeyVSJP1OJWHRZ7K +Y+LU6hz6wHIng4s/ag7VdAk0PArWs50BmH5g2zJfvt7VeTQebaJWUtSEY05odOqt +KZteUmmhxW/2M73wGVF3WAJCnaxypsjcmMZFCpMXpwyTFrqobvC3APl6SOP+Ev1M +LxhhCIDuLFu46P55KKEKjUCsYigd1VsHjjvoajGcqlPlMsVHJc9VChsQDz6agzDP +Fb/LyYbrDTTmsI57/s1jAZyemq2SEYPApJvcdZ/ucl741jI0671EZPlip9iUQgt3 +MHlc3t53/GtF2W6GF5Fogch7c+0c2BhMupAHAXwfLABvv5X8GDyjsNlwB6ea9jeC +Hw+0rEotZzCXId3daFytGNm1jI216kXLSbvz6uz1wMGS6Hrhk87whgvQ58RMNs1K +SGDFw1WFv+QZeTO7wqcn8Y/eqF7q9RBhOpPMJMX8Sx/UXuECAwEAAaNTMFEwHQYD +VR0OBBYEFCI7Iy7tY0D4HPa9BZCZxYuJ51mZMB8GA1UdIwQYMBaAFCI7Iy7tY0D4 +HPa9BZCZxYuJ51mZMA8GA1UdEwEB/wQFMAMBAf8wDQYJKoZIhvcNAQELBQADggIB +AIKYtBwTp3yvUGSXorV32dnU0Hp0MOie/itgx/la6b3h2bZSoCigKmcmvMaAaNzA +pxeYSsf5wPnONpWfo9hsGrUDMT4ETnXdzA1dbidIrhJbGsY8CN217Qt3YZWNWkrz +xLwxEwAovQZqnGDvtx+tRE8i6YJO6/kca+GB7liHFvUx8zaQ6gCwfloduG8rOAeq +noeCpW/zqYQSQGK35ntQ8MTTRbi7jMOTCikvRlldS73ODQcAR7jywgBYf/i8ANtz +NoWa4KbWuqKsQKMIGOi1fMLMaNlDSzJyw6UJ2GVCcL1NxkCZi0yudfAAxWlRis9G +zLjm7YdNBiC6RVZudGhvzjlsLZpE9DgiwXqcDv3Y1dpstD5ikrNhlQo6THH1YeFy +B8vjVGZZZu4B2JEo+QWH+zFGJosD66YoaKMVuwRPwoGDQoO0Pfbpq41A4KUhB3cf +X49/rbInqwsN5MuGp4l4+T7k7Wm0Y1Qo4FXDVbFxHvvniyHUsZk9Llzf5wBLl84m +xheUGgCHSflfXuuWi76yoADHCv+Eqi4/aLJmkUewKXJlm+XYs9bdBHUI+Y10KmhA +hgcHXF56L+N4mLRwUuLxa5qwQIqNX32+piQoO9opxnVKKCptpATLE30TOMLEXBlp +J+6b1e4BIasAAEGUhTgPj/SLL0u59Bv0K5SlSn7VZ0gI +-----END CERTIFICATE----- diff --git a/tests/integration/test_tlsv1_3/certs/wrong-key.pem b/tests/integration/test_tlsv1_3/certs/wrong-key.pem new file mode 100644 index 00000000000..b2213cd2675 --- /dev/null +++ b/tests/integration/test_tlsv1_3/certs/wrong-key.pem @@ -0,0 +1,52 @@ +-----BEGIN PRIVATE KEY----- +MIIJQwIBADANBgkqhkiG9w0BAQEFAASCCS0wggkpAgEAAoICAQDE7Y9J5qI0UxFG +IVemczDTu7oQh0RzUnHdw+9mMRSD+l0SYxiStLGvYx6pGSW1YfxNwgiiJYMshwzo +hTSYXprlQu7ziEVv1vbSpEbfBE1vnI7zoeejLNT3233z75r2JpjVAC3TeTzCriNG +NTaT96hVKgtPm5U/krGI3N1LYFMcVKzGu3/FzB2CR8mi4c2n4xhR6STqppDLl9Op +Omp2SA7a/8mhgKLTCEE2PbTl35adLvF97JVIk/U4lYdFnspj4tTqHPrAcieDiz9q +DtV0CTQ8CtaznQGYfmDbMl++3tV5NB5tolZS1IRjTmh06q0pm15SaaHFb/YzvfAZ +UXdYAkKdrHKmyNyYxkUKkxenDJMWuqhu8LcA+XpI4/4S/UwvGGEIgO4sW7jo/nko +oQqNQKxiKB3VWweOO+hqMZyqU+UyxUclz1UKGxAPPpqDMM8Vv8vJhusNNOawjnv+ +zWMBnJ6arZIRg8Ckm9x1n+5yXvjWMjTrvURk+WKn2JRCC3cweVze3nf8a0XZboYX +kWiByHtz7RzYGEy6kAcBfB8sAG+/lfwYPKOw2XAHp5r2N4IfD7SsSi1nMJch3d1o +XK0Y2bWMjbXqRctJu/Pq7PXAwZLoeuGTzvCGC9DnxEw2zUpIYMXDVYW/5Bl5M7vC +pyfxj96oXur1EGE6k8wkxfxLH9Re4QIDAQABAoICAQCjj/CAX/f/X7MsPYtQa8J1 +Sinbio42/pYmrJPNnBw/FhZxrC7/wucGFlyj9IgWZCEr8Go9SsztkeoNwn2RxJoA +q5xOV7PclX4CLIHUv/0VI8Kz5pi/NgBZMUwm7K8Xna041OI7ECqARCR2LsJ7GasN +uVMVttK6r7uXQmLnNUUydb3ffmI8xjEIQVnfWI74z60mc2+/GcOP5jXeC+/a+DSm +fudYpcAXaXbId24ls5SkTxYzEepYEtQNQFzPXXkah49yN8mpR+c74c805scxjmd9 +Kz9yhYiKwQTvaqKNpQVHmxte0gPC3lJrLPejjDtxIGOyLZw4oaqrBSpDzR9D0PTE +C+BR6VlXpVCTcAoiweuoDIxNTiJ5IbIJme3iMWxsAIJ4n10rSFFl9Cmmqbphp/6/ +XInB0X7Zyr1kBrwf+DH6DJhje5NXgGKVR9oe9jjW5v8V2tg1RrkzNU8iKBSxpvcI +x4mKhhRLYgoq/iNeYBVQrwJYktIbweVCQ5Spj7/20IrMkn3FAmMsZxGMZmLisJ9t +B0vvUkUgWxuJTsPJ2j+ytpGT0E2xIDYCpbG2EopDc8WvHcVNhagBvLC6xIjIKm7N +2zpBU2W3fPNXoToCAmaLDPYeRRpG6XaGFQAfvKUQRLBDGTfQ177qr34UBnmgvxDq +J2gA9rQm3XziLMuSlJexAQKCAQEA+yz49Ah7FFq0QffsoRb0qOJbfcmMGTRkaafb +ztto4EFSnjH2EwoSShu4DfqWw+ws1KxHlItNHHko5pVNpS4lj1OpnobW3QD7kEIV +mYKa3KowYUcCq1Gzq2RNDZqsC2BSXwx1MG0VVKYOahnu5bvzQq2Ft8W7CWBnbTbY +0Jxjs4KaOza+bH7Vfb5Yre0tlW7U5vI/YO8+YKxpxfOU9kVo8ZLQ/9r/YH8nnLa+ +Fd91+WjcUW8CTKU+Oz3lb/Vwcs6YOoAraq/wtOCqWURunBXkQtzOIn0bgBh3WEk1 +EQ+MVDHshlVVjv/rfnL571ZTT1amCJuEIwQRzLSvbso883srMQKCAQEAyLXaG3Pp +LYiRKu7Bqr5PPuqdT72UFabPpfgd5EtcFOL0xUpfRya6HyFdM25FWI8haXeg4e8N +0cIs3gMG+RRgm1xISJIZi92L0Cwj+kLFu2U5SkvAKMqZFh5q350FRi4Bp7ae4YrL +aguWLZCxhznh4D5xQGM6c8ObRfUUEMT+dnLPcj4zn9KHhoUudXjLKjPNw5v6nkbw +xtRdwANlHx/LX/d4+iwt2plDWmT+d2OLvqZcPyyghTMqV45L0p9XAXBsLnz4Zipx +7VJ8iH3jL5oaQ6YAFY+cXIrWBN0q3UYbXdkaA2ve6voioeF3KQNRmU10k7GKNRWl +pRNn62+rAR8isQKCAQAZnPVqFS9P3QwCqiCEMM4UJrkDs7jInTIcIBTnHDKuo5qk +LR4VxPImgnsbWdFj+0J7EXJfMHFVlPlZwiHf1TvZSMPEOaXRdZcxl7uSIuJd3DEA +ynf4NmWm9Zxx5bLjmhfsP1336TfCoQhZQ3m8DZV52C4Jlm1DQIRre6tSYpA8LvZB +UYzLjYeBwhZS7hu24E1vm4ZhASSQQSSsHfGzx1IzSDBt1swx7+V/MpdhrZ7fJxVI +bJSEcllNOzuZViL4Yh7d4FINGBHor/xPDA5ndkgHlXKjy7QxNM1+wEBcFATQVSL0 +c+E8qtY918Wq5VergH9/4zPvSivyfv5gwtjCT24RAoIBABP6HbJb0BqrHB/U0cvn +00Vk3rGAIgwhpUtUrcz6PzkI+enlJCSV0zKkBH3I/Pf6jw3LTWUPgSWemQ6j6H7E +K3VrMvqeKBLGw1K+Afq3yKyFP7WIYqDswV31Oxf0rgC1NY7220uBoAt3CcSRQUo/ +VZ8XN/h7p+a70mmdIhklMlqhxMoPLN48eybFfMFOe5JAw7szfDdiwjZYDti8vcTi +SkDMBeuImCvI025c3QMPEmqwbkAPdg6r8Av06tEU8PkAspPR9ntcwCgp7KE9Pm6P +fQu8qwd6WsrPOswTI2AQyUqHAFLU2sQyj13jbhPT87w5fF/y7NmpxOnwS4igfbnH +2pECggEBALO0FiJClb0GSqMXYJ+nbtRObD4AynYNVMEqYdZu5DBb6vb4T7uumTD5 +I1fKOa5SSELngUj23p2G6sVBsDyDHotGJYJwDGejHOFnEpY+J0Das0pGS40FsFC7 +qABIUaMoLKcIR9Ofcm9uu2n+koNULV2aaXj7A4OYhRCQi2PqiEx1wimdrLfGqTXn +O4rSf826ODch87vuPbfFPCaIFG28R3nByp/ZBH5QNiB3NBmc3A0tiHFnZW3cpOfW +Jm/Vu0PcNVVw32SroS2FCroR7qSWsvt61UzJtliLUiFHoUAxrXXiAxcZW1D2Hmpq +neUhT/t9hHdcMJgoxm2IITf6ip8nTnY= +-----END PRIVATE KEY----- diff --git a/tests/integration/test_tlsv1_3/configs/ssl_config.xml b/tests/integration/test_tlsv1_3/configs/ssl_config.xml new file mode 100644 index 00000000000..b18f04dd954 --- /dev/null +++ b/tests/integration/test_tlsv1_3/configs/ssl_config.xml @@ -0,0 +1,73 @@ + + + 8443 + + + + + + + + false + /etc/clickhouse-server/config.d/server-cert.pem + /etc/clickhouse-server/config.d/server-key.pem + /etc/clickhouse-server/config.d/dhparam4096.pem + /etc/clickhouse-server/config.d/ca-cert.pem + sslv2,sslv3,tlsv1,tlsv1_1,tlsv1_2 + + true + false + false + false + true + relaxed + + + + false + sslv2,sslv3,tlsv1,tlsv1_1,tlsv1_2 + true + true + false + false + false + true + relaxed + + + + + + \ No newline at end of file diff --git a/tests/integration/test_tlsv1_3/configs/users_with_ssl_auth.xml b/tests/integration/test_tlsv1_3/configs/users_with_ssl_auth.xml new file mode 100644 index 00000000000..c41776f9e78 --- /dev/null +++ b/tests/integration/test_tlsv1_3/configs/users_with_ssl_auth.xml @@ -0,0 +1,22 @@ + + + + + + client1 + + + + + client2 + client3 + + + + + + + qwe123 + + + diff --git a/tests/integration/test_tlsv1_3/test.py b/tests/integration/test_tlsv1_3/test.py new file mode 100644 index 00000000000..d48b84925f5 --- /dev/null +++ b/tests/integration/test_tlsv1_3/test.py @@ -0,0 +1,236 @@ +import pytest +from helpers.cluster import ClickHouseCluster +import urllib.request, urllib.parse +import ssl +import os.path + +HTTPS_PORT = 8443 +NODE_IP = "10.5.172.77" # It's important for the node to work at this IP because 'server-cert.pem' requires that (see server-ext.cnf). +NODE_IP_WITH_HTTPS_PORT = NODE_IP + ":" + str(HTTPS_PORT) +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance( + "node", + ipv4_address=NODE_IP, + main_configs=[ + "configs/ssl_config.xml", + "certs/server-key.pem", + "certs/server-cert.pem", + "certs/ca-cert.pem", + "certs/dhparam4096.pem" + ], + user_configs=["configs/users_with_ssl_auth.xml"], +) + + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + yield cluster + + finally: + cluster.shutdown() + + +def get_ssl_context(cert_name): + context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + context.load_verify_locations(cafile=f"{SCRIPT_DIR}/certs/ca-cert.pem") + if cert_name: + context.load_cert_chain( + f"{SCRIPT_DIR}/certs/{cert_name}-cert.pem", + f"{SCRIPT_DIR}/certs/{cert_name}-key.pem", + ) + context.verify_mode = ssl.CERT_REQUIRED + context.check_hostname = True + return context + + +def execute_query_https( + query, user, enable_ssl_auth=True, cert_name=None, password=None +): + url = f"https://{NODE_IP_WITH_HTTPS_PORT}/?query={urllib.parse.quote(query)}" + request = urllib.request.Request(url) + request.add_header("X-ClickHouse-User", user) + if enable_ssl_auth: + request.add_header("X-ClickHouse-SSL-Certificate-Auth", "on") + if password: + request.add_header("X-ClickHouse-Key", password) + response = urllib.request.urlopen( + request, context=get_ssl_context(cert_name) + ).read() + return response.decode("utf-8") + + +def test_https(): + assert ( + execute_query_https("SELECT currentUser()", user="john", cert_name="client1") + == "john\n" + ) + assert ( + execute_query_https("SELECT currentUser()", user="lucy", cert_name="client2") + == "lucy\n" + ) + assert ( + execute_query_https("SELECT currentUser()", user="lucy", cert_name="client3") + == "lucy\n" + ) + + +def test_https_wrong_cert(): + # Wrong certificate: different user's certificate + with pytest.raises(Exception) as err: + execute_query_https("SELECT currentUser()", user="john", cert_name="client2") + assert "HTTP Error 403" in str(err.value) + + # Wrong certificate: self-signed certificate. + with pytest.raises(Exception) as err: + execute_query_https("SELECT currentUser()", user="john", cert_name="wrong") + assert "unknown ca" in str(err.value) + + # No certificate. + with pytest.raises(Exception) as err: + execute_query_https("SELECT currentUser()", user="john") + assert "HTTP Error 403" in str(err.value) + + # No header enabling SSL authentication. + with pytest.raises(Exception) as err: + execute_query_https( + "SELECT currentUser()", + user="john", + enable_ssl_auth=False, + cert_name="client1", + ) + + +def test_https_non_ssl_auth(): + # Users with non-SSL authentication are allowed, in this case we can skip sending a client certificate at all (because "verificationMode" is set to "relaxed"). + # assert execute_query_https("SELECT currentUser()", user="peter", enable_ssl_auth=False) == "peter\n" + assert ( + execute_query_https( + "SELECT currentUser()", + user="jane", + enable_ssl_auth=False, + password="qwe123", + ) + == "jane\n" + ) + + # But we still can send a certificate if we want. + assert ( + execute_query_https( + "SELECT currentUser()", + user="peter", + enable_ssl_auth=False, + cert_name="client1", + ) + == "peter\n" + ) + assert ( + execute_query_https( + "SELECT currentUser()", + user="peter", + enable_ssl_auth=False, + cert_name="client2", + ) + == "peter\n" + ) + assert ( + execute_query_https( + "SELECT currentUser()", + user="peter", + enable_ssl_auth=False, + cert_name="client3", + ) + == "peter\n" + ) + + assert ( + execute_query_https( + "SELECT currentUser()", + user="jane", + enable_ssl_auth=False, + password="qwe123", + cert_name="client1", + ) + == "jane\n" + ) + assert ( + execute_query_https( + "SELECT currentUser()", + user="jane", + enable_ssl_auth=False, + password="qwe123", + cert_name="client2", + ) + == "jane\n" + ) + assert ( + execute_query_https( + "SELECT currentUser()", + user="jane", + enable_ssl_auth=False, + password="qwe123", + cert_name="client3", + ) + == "jane\n" + ) + + # However if we send a certificate it must not be wrong. + with pytest.raises(Exception) as err: + execute_query_https( + "SELECT currentUser()", + user="peter", + enable_ssl_auth=False, + cert_name="wrong", + ) + assert "unknown ca" in str(err.value) + with pytest.raises(Exception) as err: + execute_query_https( + "SELECT currentUser()", + user="jane", + enable_ssl_auth=False, + password="qwe123", + cert_name="wrong", + ) + assert "unknown ca" in str(err.value) + + +def test_create_user(): + instance.query("CREATE USER emma IDENTIFIED WITH ssl_certificate CN 'client3'") + assert ( + execute_query_https("SELECT currentUser()", user="emma", cert_name="client3") + == "emma\n" + ) + assert ( + instance.query("SHOW CREATE USER emma") + == "CREATE USER emma IDENTIFIED WITH ssl_certificate CN \\'client3\\'\n" + ) + + instance.query("ALTER USER emma IDENTIFIED WITH ssl_certificate CN 'client2'") + assert ( + execute_query_https("SELECT currentUser()", user="emma", cert_name="client2") + == "emma\n" + ) + assert ( + instance.query("SHOW CREATE USER emma") + == "CREATE USER emma IDENTIFIED WITH ssl_certificate CN \\'client2\\'\n" + ) + + with pytest.raises(Exception) as err: + execute_query_https("SELECT currentUser()", user="emma", cert_name="client3") + assert "HTTP Error 403" in str(err.value) + + assert ( + instance.query("SHOW CREATE USER lucy") + == "CREATE USER lucy IDENTIFIED WITH ssl_certificate CN \\'client2\\', \\'client3\\'\n" + ) + + assert ( + instance.query( + "SELECT name, auth_type, auth_params FROM system.users WHERE name IN ['emma', 'lucy'] ORDER BY name" + ) + == 'emma\tssl_certificate\t{"common_names":["client2"]}\n' + 'lucy\tssl_certificate\t{"common_names":["client2","client3"]}\n' + ) From 191936a806c6cdea328de07e0b58726b54ec4abb Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Thu, 23 Jun 2022 11:50:06 -0700 Subject: [PATCH 049/121] clean up comments --- .../test_tlsv1_3/configs/ssl_config.xml | 28 ------------------- 1 file changed, 28 deletions(-) diff --git a/tests/integration/test_tlsv1_3/configs/ssl_config.xml b/tests/integration/test_tlsv1_3/configs/ssl_config.xml index b18f04dd954..e3d1831e08c 100644 --- a/tests/integration/test_tlsv1_3/configs/ssl_config.xml +++ b/tests/integration/test_tlsv1_3/configs/ssl_config.xml @@ -40,34 +40,6 @@ true relaxed - - \ No newline at end of file From 1239ee4b31311ef248d1a93f6829ed43ff8c0d9b Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Thu, 23 Jun 2022 12:11:18 -0700 Subject: [PATCH 050/121] update ssl config --- tests/integration/test_tlsv1_3/configs/ssl_config.xml | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_tlsv1_3/configs/ssl_config.xml b/tests/integration/test_tlsv1_3/configs/ssl_config.xml index e3d1831e08c..9e686b55567 100644 --- a/tests/integration/test_tlsv1_3/configs/ssl_config.xml +++ b/tests/integration/test_tlsv1_3/configs/ssl_config.xml @@ -28,7 +28,6 @@ true relaxed - false sslv2,sslv3,tlsv1,tlsv1_1,tlsv1_2 From 8dd6f09cf174a7ad15c4a8ab4732091d161b9599 Mon Sep 17 00:00:00 2001 From: Larry Luo Date: Sun, 26 Jun 2022 07:19:22 -0700 Subject: [PATCH 051/121] fix style check error --- tests/integration/test_tlsv1_3/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_tlsv1_3/test.py b/tests/integration/test_tlsv1_3/test.py index d48b84925f5..80c9c68eca7 100644 --- a/tests/integration/test_tlsv1_3/test.py +++ b/tests/integration/test_tlsv1_3/test.py @@ -18,7 +18,7 @@ instance = cluster.add_instance( "certs/server-key.pem", "certs/server-cert.pem", "certs/ca-cert.pem", - "certs/dhparam4096.pem" + "certs/dhparam4096.pem", ], user_configs=["configs/users_with_ssl_auth.xml"], ) From a3cb066eff264eb0661b89f46d688a3fd76f5eae Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 27 Jun 2022 09:38:12 +0800 Subject: [PATCH 052/121] update test case --- tests/integration/test_hive_query/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_hive_query/test.py b/tests/integration/test_hive_query/test.py index 46981fcee85..155aa4ced4a 100644 --- a/tests/integration/test_hive_query/test.py +++ b/tests/integration/test_hive_query/test.py @@ -438,7 +438,7 @@ def test_hive_struct_type(started_cluster): result = node.query( """ - SELECT day, f_struct.a FROM default.test_hive_types WHERE day = '2022-02-20' SETTINGS input_format_parquet_import_nested=1 + SELECT day, f_struct.a, f_struct.d.x FROM default.test_hive_types WHERE day = '2022-02-20' SETTINGS input_format_parquet_import_nested=1 """ ) expected_result = """2022-02-20 aaa 10""" From 39ea5ffdcbb96d471f2f8663aa25484ecbde2de7 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Mon, 27 Jun 2022 01:36:27 -0400 Subject: [PATCH 053/121] compress clickhouse executable, new target 'self-extracted' is added --- programs/CMakeLists.txt | 16 ++++++++++++++++ programs/self-extracting/CMakeLists.txt | 5 +++++ 2 files changed, 21 insertions(+) create mode 100644 programs/self-extracting/CMakeLists.txt diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index ad59ec20e39..08f1a269ff6 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -18,6 +18,12 @@ option (ENABLE_CLICKHOUSE_SERVER "Server mode (main mode)" ${ENABLE_CLICKHOUSE_A option (ENABLE_CLICKHOUSE_CLIENT "Client mode (interactive tui/shell that connects to the server)" ${ENABLE_CLICKHOUSE_ALL}) +if (CLICKHOUSE_SPLIT_BINARY) + option (ENABLE_CLICKHOUSE_SELF_EXTRACTING "Self-extracting executable" OFF) +else () + option (ENABLE_CLICKHOUSE_SELF_EXTRACTING "Self-extracting executable" ON) +endif () + # https://clickhouse.com/docs/en/operations/utilities/clickhouse-local/ option (ENABLE_CLICKHOUSE_LOCAL "Local files fast processing mode" ${ENABLE_CLICKHOUSE_ALL}) @@ -97,6 +103,12 @@ else() message(STATUS "Local mode: OFF") endif() +if (ENABLE_CLICKHOUSE_SELF_EXTRACTING) + message(STATUS "Self-extracting executable: ON") +else() + message(STATUS "Self-extracting executable: OFF") +endif() + if (ENABLE_CLICKHOUSE_BENCHMARK) message(STATUS "Benchmark mode: ON") else() @@ -254,6 +266,10 @@ if (ENABLE_CLICKHOUSE_LIBRARY_BRIDGE) add_subdirectory (library-bridge) endif () +#if (ENABLE_CLICKHOUSE_SELF_EXTRACTING) + add_subdirectory (self-extracting) +#endif () + if (CLICKHOUSE_ONE_SHARED) add_library(clickhouse-lib SHARED ${CLICKHOUSE_SERVER_SOURCES} diff --git a/programs/self-extracting/CMakeLists.txt b/programs/self-extracting/CMakeLists.txt new file mode 100644 index 00000000000..2aec7938114 --- /dev/null +++ b/programs/self-extracting/CMakeLists.txt @@ -0,0 +1,5 @@ +add_custom_target (self-extracting ALL + ${CMAKE_BINARY_DIR}/utils/self-extracting-executable/compressor clickhouse ../clickhouse + DEPENDS clickhouse compressor +) + From 44b68dd651bef80b7a03de024e7e27d3f7dba2bf Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Mon, 27 Jun 2022 16:23:04 +0800 Subject: [PATCH 054/121] update test case --- tests/integration/test_hive_query/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_hive_query/test.py b/tests/integration/test_hive_query/test.py index 155aa4ced4a..276fd42de9b 100644 --- a/tests/integration/test_hive_query/test.py +++ b/tests/integration/test_hive_query/test.py @@ -441,5 +441,5 @@ def test_hive_struct_type(started_cluster): SELECT day, f_struct.a, f_struct.d.x FROM default.test_hive_types WHERE day = '2022-02-20' SETTINGS input_format_parquet_import_nested=1 """ ) - expected_result = """2022-02-20 aaa 10""" + expected_result = """2022-02-20 aaa 10""" assert result.strip() == expected_result From 3ebe6a03b13672ed93d083087019e552ee3dcc8d Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 27 Jun 2022 10:37:19 +0200 Subject: [PATCH 055/121] Revert "Revert "Fix optimization in PartialSortingTransform (SIGSEGV and possible incorrect result)"" --- src/Processors/Transforms/PartialSortingTransform.cpp | 2 +- ...02345_partial_sort_transform_optimization.reference | 10 ++++++++++ .../02345_partial_sort_transform_optimization.sql | 3 +++ 3 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/02345_partial_sort_transform_optimization.reference create mode 100644 tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql diff --git a/src/Processors/Transforms/PartialSortingTransform.cpp b/src/Processors/Transforms/PartialSortingTransform.cpp index 131bf4f8e7c..b0f866cb3fd 100644 --- a/src/Processors/Transforms/PartialSortingTransform.cpp +++ b/src/Processors/Transforms/PartialSortingTransform.cpp @@ -71,7 +71,7 @@ bool compareWithThreshold(const ColumnRawPtrs & raw_block_columns, size_t min_bl size_t raw_block_columns_size = raw_block_columns.size(); for (size_t i = 0; i < raw_block_columns_size; ++i) { - int res = sort_description[i].direction * raw_block_columns[i]->compareAt(min_block_index, 0, *threshold_columns[0], sort_description[i].nulls_direction); + int res = sort_description[i].direction * raw_block_columns[i]->compareAt(min_block_index, 0, *threshold_columns[i], sort_description[i].nulls_direction); if (res < 0) return true; diff --git a/tests/queries/0_stateless/02345_partial_sort_transform_optimization.reference b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.reference new file mode 100644 index 00000000000..e6c99ff9291 --- /dev/null +++ b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.reference @@ -0,0 +1,10 @@ +0 999999 999999 +0 999998 999998 +0 999997 999997 +0 999996 999996 +0 999995 999995 +0 999994 999994 +0 999993 999993 +0 999992 999992 +0 999991 999991 +0 999990 999990 diff --git a/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql new file mode 100644 index 00000000000..e7855c47474 --- /dev/null +++ b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql @@ -0,0 +1,3 @@ +-- Regression for PartialSortingTransform optimization +-- that requires at least 1500 rows. +select * from (select * from (select 0 a, toNullable(number) b, toString(number) c from numbers(1e6)) order by a desc, b desc, c limit 1500) limit 10; From 8ce6b8226d2665d57c9533e6a00fb027a55763be Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy <99031427+yakov-olkhovskiy@users.noreply.github.com> Date: Mon, 27 Jun 2022 08:25:21 -0400 Subject: [PATCH 056/121] Update CMakeLists.txt --- programs/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 08f1a269ff6..33b7eda2fce 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -266,9 +266,9 @@ if (ENABLE_CLICKHOUSE_LIBRARY_BRIDGE) add_subdirectory (library-bridge) endif () -#if (ENABLE_CLICKHOUSE_SELF_EXTRACTING) +if (ENABLE_CLICKHOUSE_SELF_EXTRACTING) add_subdirectory (self-extracting) -#endif () +endif () if (CLICKHOUSE_ONE_SHARED) add_library(clickhouse-lib SHARED From 5d36994c4dfc174f1f7aa7420b24f63fc15b68d0 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Mon, 27 Jun 2022 11:41:23 -0400 Subject: [PATCH 057/121] self-extracting requires utils (uses utils/self-extracting-executable/compressor) --- programs/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index 9df2b4e316c..b56f7ab1cc2 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -18,7 +18,7 @@ option (ENABLE_CLICKHOUSE_SERVER "Server mode (main mode)" ${ENABLE_CLICKHOUSE_A option (ENABLE_CLICKHOUSE_CLIENT "Client mode (interactive tui/shell that connects to the server)" ${ENABLE_CLICKHOUSE_ALL}) -if (CLICKHOUSE_SPLIT_BINARY) +if (CLICKHOUSE_SPLIT_BINARY OR NOT ENABLE_UTILS) option (ENABLE_CLICKHOUSE_SELF_EXTRACTING "Self-extracting executable" OFF) else () option (ENABLE_CLICKHOUSE_SELF_EXTRACTING "Self-extracting executable" ON) From 2e3923757cce253018e3e63dddf4b42181b64dc5 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Tue, 28 Jun 2022 17:20:33 +0800 Subject: [PATCH 058/121] fixed:Moved-from object 'column_array_of_element' is moved --- src/DataTypes/NestedUtils.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/DataTypes/NestedUtils.cpp b/src/DataTypes/NestedUtils.cpp index dbdba39fa84..5dae2b7b413 100644 --- a/src/DataTypes/NestedUtils.cpp +++ b/src/DataTypes/NestedUtils.cpp @@ -106,7 +106,7 @@ Block flatten(const Block & block) res.insert(ColumnWithTypeAndName( is_const ? ColumnConst::create(std::move(column_array_of_element), block.rows()) - : std::move(column_array_of_element), + : column_array_of_element, std::make_shared(element_types[i]), nested_name)); } @@ -331,6 +331,5 @@ std::optional NestedColumnExtractHelper::extractColumn( Block sub_block(columns); nested_tables[new_column_name_prefix] = std::make_shared(Nested::flatten(sub_block)); return extractColumn(original_column_name, new_column_name_prefix, nested_names.second); - } } From 58c8facebb517e4a391bcaec325aa2f6f7f61e61 Mon Sep 17 00:00:00 2001 From: Anton Popov Date: Tue, 28 Jun 2022 14:21:21 +0000 Subject: [PATCH 059/121] minor fixes --- src/DataTypes/FieldToDataType.cpp | 2 +- src/DataTypes/getLeastSupertype.cpp | 8 ++++---- tests/performance/json_type.xml | 30 ++++++++++++++++++++++++++--- 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/src/DataTypes/FieldToDataType.cpp b/src/DataTypes/FieldToDataType.cpp index 00b4665af94..49ece27cc1f 100644 --- a/src/DataTypes/FieldToDataType.cpp +++ b/src/DataTypes/FieldToDataType.cpp @@ -161,7 +161,7 @@ DataTypePtr FieldToDataType::operator() (const Map & map) const return std::make_shared( getLeastSupertype(key_types), - getLeastSupertype(value_types)); + getLeastSupertype(value_types)); } template diff --git a/src/DataTypes/getLeastSupertype.cpp b/src/DataTypes/getLeastSupertype.cpp index 8c6dba5a339..ac240c4afce 100644 --- a/src/DataTypes/getLeastSupertype.cpp +++ b/src/DataTypes/getLeastSupertype.cpp @@ -173,7 +173,7 @@ DataTypePtr getNumericType(const TypeIndexSet & types) else if (min_bit_width_of_integer <= 256) return std::make_shared(); else - throwOrReturn(types, + return throwOrReturn(types, " because some of them are signed integers and some are unsigned integers," " but there is no signed integer type, that can exactly represent all required unsigned integer values", ErrorCodes::NO_COMMON_TYPE); } @@ -193,7 +193,7 @@ DataTypePtr getNumericType(const TypeIndexSet & types) else if (min_bit_width_of_integer <= 256) return std::make_shared(); else - throwOrReturn(types, + return throwOrReturn(types, " but as all data types are unsigned integers, we must have found maximum unsigned integer type", ErrorCodes::NO_COMMON_TYPE); } } @@ -588,7 +588,7 @@ DataTypePtr getLeastSupertype(const TypeIndexSet & types) #define DISPATCH(TYPE) \ if (which.idx == TypeIndex::TYPE) \ - return std::make_shared>(); + return std::make_shared>(); /// NOLINT FOR_NUMERIC_TYPES(DISPATCH) #undef DISPATCH @@ -596,7 +596,7 @@ DataTypePtr getLeastSupertype(const TypeIndexSet & types) if (which.isString()) return std::make_shared(); - throwOrReturn(types, "because cannot get common type by type indexes with non-simple types", ErrorCodes::NO_COMMON_TYPE); + return throwOrReturn(types, "because cannot get common type by type indexes with non-simple types", ErrorCodes::NO_COMMON_TYPE); } if (types.contains(TypeIndex::String)) diff --git a/tests/performance/json_type.xml b/tests/performance/json_type.xml index ef11856df0b..b6406f52579 100644 --- a/tests/performance/json_type.xml +++ b/tests/performance/json_type.xml @@ -3,13 +3,37 @@ 1 + + + + + json1 + + '{"k1":1, "k2": "some"}' + + + + json2 + + '{"col' || toString(number % 100) || '":' || toString(number) || '}' + + + + json3 + + '{"k1":[{"k2":"aaa","k3":[{"k4":"bbb"},{"k4":"ccc"}]},{"k2":"ddd","k3":[{"k4":"eee"},{"k4":"fff"}]}]}' + + + + CREATE TABLE t_json_1(data JSON) ENGINE = MergeTree ORDER BY tuple() CREATE TABLE t_json_2(data JSON) ENGINE = MergeTree ORDER BY tuple() CREATE TABLE t_json_3(data JSON) ENGINE = MergeTree ORDER BY tuple() - INSERT INTO t_json_1 SELECT materialize('{"k1":1, "k2": "some"}') FROM numbers(200000) - INSERT INTO t_json_2 SELECT '{"col' || toString(number % 100) || '":' || toString(number) || '}' FROM numbers(100000) - INSERT INTO t_json_3 SELECT materialize('{"k1":[{"k2":"aaa","k3":[{"k4":"bbb"},{"k4":"ccc"}]},{"k2":"ddd","k3":[{"k4":"eee"},{"k4":"fff"}]}]}') FROM numbers_mt(100000) + INSERT INTO t_json_1 SELECT materialize({json1}) FROM numbers(200000) + INSERT INTO t_json_2 SELECT {json2} FROM numbers(100000) + INSERT INTO t_json_3 SELECT materialize({json3}) FROM numbers_mt(100000) DROP TABLE IF EXISTS t_json_1 DROP TABLE IF EXISTS t_json_2 From 9fc7c9f575f02112ea800462d70d9449e43c9d02 Mon Sep 17 00:00:00 2001 From: GruffGemini <43479425+GruffGemini@users.noreply.github.com> Date: Tue, 28 Jun 2022 17:55:16 +0300 Subject: [PATCH 060/121] Update group-by.md docs (group-by.md): fixed broken links --- docs/ru/sql-reference/statements/select/group-by.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/ru/sql-reference/statements/select/group-by.md b/docs/ru/sql-reference/statements/select/group-by.md index 5a306e63955..01df1b969bf 100644 --- a/docs/ru/sql-reference/statements/select/group-by.md +++ b/docs/ru/sql-reference/statements/select/group-by.md @@ -264,10 +264,10 @@ GROUP BY вычисляет для каждого встретившегося ### Группировка во внешней памяти {#select-group-by-in-external-memory} Можно включить сброс временных данных на диск, чтобы ограничить потребление оперативной памяти при выполнении `GROUP BY`. -Настройка [max_bytes_before_external_group_by](../../../operations/settings/settings.md#settings-max_bytes_before_external_group_by) определяет пороговое значение потребления RAM, по достижении которого временные данные `GROUP BY` сбрасываются в файловую систему. Если равно 0 (по умолчанию) - значит выключено. +Настройка [max_bytes_before_external_group_by](../../../operations/settings/query-complexity.md#settings-max_bytes_before_external_group_by) определяет пороговое значение потребления RAM, по достижении которого временные данные `GROUP BY` сбрасываются в файловую систему. Если равно 0 (по умолчанию) - значит выключено. При использовании `max_bytes_before_external_group_by`, рекомендуем выставить `max_memory_usage` приблизительно в два раза больше. Это следует сделать, потому что агрегация выполняется в две стадии: чтение и формирование промежуточных данных (1) и слияние промежуточных данных (2). Сброс данных на файловую систему может производиться только на стадии 1. Если сброса временных данных не было, то на стадии 2 может потребляться до такого же объёма памяти, как на стадии 1. -Например, если [max_memory_usage](../../../operations/settings/settings.md#settings_max_memory_usage) было выставлено в 10000000000, и вы хотите использовать внешнюю агрегацию, то имеет смысл выставить `max_bytes_before_external_group_by` в 10000000000, а `max_memory_usage` в 20000000000. При срабатывании внешней агрегации (если был хотя бы один сброс временных данных в файловую систему) максимальное потребление оперативки будет лишь чуть-чуть больше `max_bytes_before_external_group_by`. +Например, если [max_memory_usage](../../../operations/settings/query-complexity.md#settings_max_memory_usage) было выставлено в 10000000000, и вы хотите использовать внешнюю агрегацию, то имеет смысл выставить `max_bytes_before_external_group_by` в 10000000000, а `max_memory_usage` в 20000000000. При срабатывании внешней агрегации (если был хотя бы один сброс временных данных в файловую систему) максимальное потребление оперативки будет лишь чуть-чуть больше `max_bytes_before_external_group_by`. При распределённой обработке запроса внешняя агрегация производится на удалённых серверах. Для того чтобы на сервере-инициаторе запроса использовалось немного оперативки, нужно выставить настройку `distributed_aggregation_memory_efficient` в 1. From 2b11c0daa9c264ddc756ca6c6094347b3996ffe0 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 28 Jun 2022 19:26:15 +0200 Subject: [PATCH 061/121] Updated tests --- .../02345_partial_sort_transform_optimization.reference | 5 +++++ .../02345_partial_sort_transform_optimization.sql | 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02345_partial_sort_transform_optimization.reference b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.reference index e6c99ff9291..7b50765be55 100644 --- a/tests/queries/0_stateless/02345_partial_sort_transform_optimization.reference +++ b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.reference @@ -8,3 +8,8 @@ 0 999992 999992 0 999991 999991 0 999990 999990 +98974 +98973 +98972 +98971 +98970 diff --git a/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql index e7855c47474..a53a352f57e 100644 --- a/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql +++ b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql @@ -1,3 +1,3 @@ --- Regression for PartialSortingTransform optimization --- that requires at least 1500 rows. -select * from (select * from (select 0 a, toNullable(number) b, toString(number) c from numbers(1e6)) order by a desc, b desc, c limit 1500) limit 10; +-- Regression for PartialSortingTransform optimization that requires at least 1500 rows. +SELECT * FROM (SELECT * FROM (SELECT 0 a, toNullable(number) b, toString(number) c FROM numbers(1e6)) ORDER BY a DESC, b DESC, c LIMIT 1500) limit 10; +SELECT number FROM (SELECT number, 1 AS k FROM numbers(100000) ORDER BY k ASC, number DESC LIMIT 1025, 1023) LIMIT 5; From 7274169c33f10837ec5c0fb8b8edc62a01cac442 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 29 Jun 2022 10:03:10 +0800 Subject: [PATCH 062/121] update test scripts --- .../integration/test_hive_query/data/prepare_hive_data.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_hive_query/data/prepare_hive_data.sh b/tests/integration/test_hive_query/data/prepare_hive_data.sh index 39d435eb05a..d65ec9cc153 100755 --- a/tests/integration/test_hive_query/data/prepare_hive_data.sh +++ b/tests/integration/test_hive_query/data/prepare_hive_data.sh @@ -1,15 +1,15 @@ #!/bin/bash hive -e "create database test" -hive -e "create table test.demo(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'; create table test.demo_orc(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'; " -hive -e "create table test.parquet_demo(id string, score int) PARTITIONED BY(day string, hour string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'" -hive -e "create table test.demo_text(id string, score int, day string)row format delimited fields terminated by ','; load data local inpath '/demo_data.txt' into table test.demo_text " +hive -e "drop table if exists test.demo; create table test.demo(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'; create table test.demo_orc(id string, score int) PARTITIONED BY(day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'; " +hive -e "drop table if exists test.parquet_demo; create table test.parquet_demo(id string, score int) PARTITIONED BY(day string, hour string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'" +hive -e "drop table if exists test.demo_text; create table test.demo_text(id string, score int, day string)row format delimited fields terminated by ','; load data local inpath '/demo_data.txt' into table test.demo_text " hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo partition(day) select * from test.demo_text; insert into test.demo_orc partition(day) select * from test.demo_text" hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '00' as hour from test.demo;" hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '01' as hour from test.demo;" -hive -e "CREATE TABLE test.test_hive_types( f_tinyint tinyint, f_smallint smallint, f_int int, f_integer int, f_bigint bigint, f_float float, f_double double, f_decimal decimal(10,0), f_timestamp timestamp, f_date date, f_string string, f_varchar varchar(100), f_char char(100), f_bool boolean, f_array_int array, f_array_string array, f_array_float array, f_map_int map, f_map_string map, f_map_float map, f_struct struct>) PARTITIONED BY( day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat';" +hive -e "drop table if test.test_hive_types; exists CREATE TABLE test.test_hive_types( f_tinyint tinyint, f_smallint smallint, f_int int, f_integer int, f_bigint bigint, f_float float, f_double double, f_decimal decimal(10,0), f_timestamp timestamp, f_date date, f_string string, f_varchar varchar(100), f_char char(100), f_bool boolean, f_array_int array, f_array_string array, f_array_float array, f_map_int map, f_map_string map, f_map_float map, f_struct struct>) PARTITIONED BY( day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat';" hive -e "insert into test.test_hive_types partition(day='2022-02-20') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, '2022-02-20 14:47:04', '2022-02-20', 'hello world', 'hello world', 'hello world', true, array(1,2,3), array('hello world', 'hello world'), array(float(1.1),float(1.2)), map('a', 100, 'b', 200, 'c', 300), map('a', 'aa', 'b', 'bb', 'c', 'cc'), map('a', float(111.1), 'b', float(222.2), 'c', float(333.3)), named_struct('a', 'aaa', 'b', 200, 'c', float(333.3), 'd', named_struct('x', 10, 'y', 'xyz')); insert into test.test_hive_types partition(day='2022-02-19') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, '2022-02-19 14:47:04', '2022-02-19', 'hello world', 'hello world', 'hello world', true, array(1,2,3), array('hello world', 'hello world'), array(float(1.1),float(1.2)), map('a', 100, 'b', 200, 'c', 300), map('a', 'aa', 'b', 'bb', 'c', 'cc'), map('a', float(111.1), 'b', float(222.2), 'c', float(333.3)), named_struct('a', 'aaa', 'b', 200, 'c', float(333.3), 'd', named_struct('x', 11, 'y', 'abc'));" From ee30c4a3f7039ca6a4e029d3f973f8a943125e21 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Wed, 29 Jun 2022 10:23:00 +0800 Subject: [PATCH 063/121] update test scripts --- tests/integration/test_hive_query/data/prepare_hive_data.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_hive_query/data/prepare_hive_data.sh b/tests/integration/test_hive_query/data/prepare_hive_data.sh index d65ec9cc153..495ea201870 100755 --- a/tests/integration/test_hive_query/data/prepare_hive_data.sh +++ b/tests/integration/test_hive_query/data/prepare_hive_data.sh @@ -9,7 +9,7 @@ hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.demo pa hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '00' as hour from test.demo;" hive -e "set hive.exec.dynamic.partition.mode=nonstrict;insert into test.parquet_demo partition(day, hour) select id, score, day, '01' as hour from test.demo;" -hive -e "drop table if test.test_hive_types; exists CREATE TABLE test.test_hive_types( f_tinyint tinyint, f_smallint smallint, f_int int, f_integer int, f_bigint bigint, f_float float, f_double double, f_decimal decimal(10,0), f_timestamp timestamp, f_date date, f_string string, f_varchar varchar(100), f_char char(100), f_bool boolean, f_array_int array, f_array_string array, f_array_float array, f_map_int map, f_map_string map, f_map_float map, f_struct struct>) PARTITIONED BY( day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat';" +hive -e "drop table if exists test.test_hive_types; CREATE TABLE test.test_hive_types( f_tinyint tinyint, f_smallint smallint, f_int int, f_integer int, f_bigint bigint, f_float float, f_double double, f_decimal decimal(10,0), f_timestamp timestamp, f_date date, f_string string, f_varchar varchar(100), f_char char(100), f_bool boolean, f_array_int array, f_array_string array, f_array_float array, f_map_int map, f_map_string map, f_map_float map, f_struct struct>) PARTITIONED BY( day string) ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat';" hive -e "insert into test.test_hive_types partition(day='2022-02-20') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, '2022-02-20 14:47:04', '2022-02-20', 'hello world', 'hello world', 'hello world', true, array(1,2,3), array('hello world', 'hello world'), array(float(1.1),float(1.2)), map('a', 100, 'b', 200, 'c', 300), map('a', 'aa', 'b', 'bb', 'c', 'cc'), map('a', float(111.1), 'b', float(222.2), 'c', float(333.3)), named_struct('a', 'aaa', 'b', 200, 'c', float(333.3), 'd', named_struct('x', 10, 'y', 'xyz')); insert into test.test_hive_types partition(day='2022-02-19') select 1, 2, 3, 4, 5, 6.11, 7.22, 8.333, '2022-02-19 14:47:04', '2022-02-19', 'hello world', 'hello world', 'hello world', true, array(1,2,3), array('hello world', 'hello world'), array(float(1.1),float(1.2)), map('a', 100, 'b', 200, 'c', 300), map('a', 'aa', 'b', 'bb', 'c', 'cc'), map('a', float(111.1), 'b', float(222.2), 'c', float(333.3)), named_struct('a', 'aaa', 'b', 200, 'c', float(333.3), 'd', named_struct('x', 11, 'y', 'abc'));" From dea3b5bfcecfa8ce778bd61fab86e448ad648fec Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 29 Jun 2022 08:56:15 +0300 Subject: [PATCH 064/121] Revert "Non Negative Derivative window function" --- src/Common/IntervalKind.cpp | 27 +---- src/Common/IntervalKind.h | 5 +- src/Processors/Transforms/WindowTransform.cpp | 112 ------------------ .../02232_non_negative_derivative.reference | 64 ---------- .../02232_non_negative_derivative.sql | 63 ---------- 5 files changed, 5 insertions(+), 266 deletions(-) delete mode 100644 tests/queries/0_stateless/02232_non_negative_derivative.reference delete mode 100644 tests/queries/0_stateless/02232_non_negative_derivative.sql diff --git a/src/Common/IntervalKind.cpp b/src/Common/IntervalKind.cpp index 75c2a83e9fb..d3cd4eeff8f 100644 --- a/src/Common/IntervalKind.cpp +++ b/src/Common/IntervalKind.cpp @@ -9,13 +9,13 @@ namespace ErrorCodes extern const int SYNTAX_ERROR; } -Float64 IntervalKind::toAvgSeconds() const +Int32 IntervalKind::toAvgSeconds() const { switch (kind) { - case IntervalKind::Nanosecond: return 0.000000001; - case IntervalKind::Microsecond: return 0.000001; - case IntervalKind::Millisecond: return 0.001; + case IntervalKind::Nanosecond: + case IntervalKind::Microsecond: + case IntervalKind::Millisecond: return 0; /// fractional parts of seconds have 0 seconds case IntervalKind::Second: return 1; case IntervalKind::Minute: return 60; case IntervalKind::Hour: return 3600; @@ -28,25 +28,6 @@ Float64 IntervalKind::toAvgSeconds() const __builtin_unreachable(); } -bool IntervalKind::isFixedLength() const -{ - switch (kind) - { - case IntervalKind::Nanosecond: - case IntervalKind::Microsecond: - case IntervalKind::Millisecond: - case IntervalKind::Second: - case IntervalKind::Minute: - case IntervalKind::Hour: - case IntervalKind::Day: - case IntervalKind::Week: return true; - case IntervalKind::Month: - case IntervalKind::Quarter: - case IntervalKind::Year: return false; - } - __builtin_unreachable(); -} - IntervalKind IntervalKind::fromAvgSeconds(Int64 num_seconds) { if (num_seconds) diff --git a/src/Common/IntervalKind.h b/src/Common/IntervalKind.h index 65c14515e34..d5f2b5672cd 100644 --- a/src/Common/IntervalKind.h +++ b/src/Common/IntervalKind.h @@ -31,15 +31,12 @@ struct IntervalKind /// Returns number of seconds in one interval. /// For `Month`, `Quarter` and `Year` the function returns an average number of seconds. - Float64 toAvgSeconds() const; + Int32 toAvgSeconds() const; /// Chooses an interval kind based on number of seconds. /// For example, `IntervalKind::fromAvgSeconds(3600)` returns `IntervalKind::Hour`. static IntervalKind fromAvgSeconds(Int64 num_seconds); - /// Returns whether IntervalKind has a fixed number of seconds (e.g. Day) or non-fixed(e.g. Month) - bool isFixedLength() const; - /// Returns an uppercased version of what `toString()` returns. const char * toKeyword() const; diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp index 09805696472..3eb0f62cb01 100644 --- a/src/Processors/Transforms/WindowTransform.cpp +++ b/src/Processors/Transforms/WindowTransform.cpp @@ -13,7 +13,6 @@ #include #include #include -#include #include #include @@ -28,7 +27,6 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int NOT_IMPLEMENTED; extern const int ILLEGAL_COLUMN; - extern const int ILLEGAL_TYPE_OF_ARGUMENT; } // Interface for true window functions. It's not much of an interface, they just @@ -2202,109 +2200,6 @@ struct WindowFunctionNthValue final : public WindowFunction } }; -struct NonNegativeDerivativeState -{ - Float64 previous_metric = 0; - Float64 previous_timestamp = 0; -}; - -// nonNegativeDerivative(metric_column, timestamp_column[, INTERVAL 1 SECOND]) -struct WindowFunctionNonNegativeDerivative final : public StatefulWindowFunction -{ - static constexpr size_t ARGUMENT_METRIC = 0; - static constexpr size_t ARGUMENT_TIMESTAMP = 1; - static constexpr size_t ARGUMENT_INTERVAL = 2; - - WindowFunctionNonNegativeDerivative(const std::string & name_, - const DataTypes & argument_types_, const Array & parameters_) - : StatefulWindowFunction(name_, argument_types_, parameters_) - { - if (!parameters.empty()) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Function {} cannot be parameterized", name_); - } - - if (argument_types.size() != 2 && argument_types.size() != 3) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Function {} takes 2 or 3 arguments", name_); - } - - if (!isNumber(argument_types[ARGUMENT_METRIC])) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Argument {} must be a number, '{}' given", - ARGUMENT_METRIC, - argument_types[ARGUMENT_METRIC]->getName()); - } - - if (!isDateTime(argument_types[ARGUMENT_TIMESTAMP]) && !isDateTime64(argument_types[ARGUMENT_TIMESTAMP])) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Argument {} must be DateTime or DateTime64, '{}' given", - ARGUMENT_TIMESTAMP, - argument_types[ARGUMENT_TIMESTAMP]->getName()); - } - - if (argument_types.size() == 3) - { - const DataTypeInterval * interval_datatype = checkAndGetDataType(argument_types[ARGUMENT_INTERVAL].get()); - if (!interval_datatype) - { - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Argument {} must be an INTERVAL, '{}' given", - ARGUMENT_INTERVAL, - argument_types[ARGUMENT_INTERVAL]->getName()); - } - if (!interval_datatype->getKind().isFixedLength()) - { - throw Exception( - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, - "The INTERVAL must be a week or shorter, '{}' given", - argument_types[ARGUMENT_INTERVAL]->getName()); - } - interval_length = interval_datatype->getKind().toAvgSeconds(); - interval_specified = true; - } - } - - - DataTypePtr getReturnType() const override { return argument_types[0]; } - - bool allocatesMemoryInArena() const override { return false; } - - void windowInsertResultInto(const WindowTransform * transform, - size_t function_index) override - { - const auto & current_block = transform->blockAt(transform->current_row); - const auto & workspace = transform->workspaces[function_index]; - auto & state = getState(workspace); - - auto interval_duration = interval_specified ? interval_length * - (*current_block.input_columns[workspace.argument_column_indices[ARGUMENT_INTERVAL]]).getFloat64(0) : 1; - - Float64 last_metric = state.previous_metric; - Float64 last_timestamp = state.previous_timestamp; - - Float64 curr_metric = WindowFunctionHelpers::getValue(transform, function_index, ARGUMENT_METRIC, transform->current_row); - Float64 curr_timestamp = WindowFunctionHelpers::getValue(transform, function_index, ARGUMENT_TIMESTAMP, transform->current_row); - - Float64 time_elapsed = curr_timestamp - last_timestamp; - Float64 metric_diff = curr_metric - last_metric; - Float64 result = (time_elapsed != 0) ? (metric_diff / time_elapsed * interval_duration) : 0; - - state.previous_metric = curr_metric; - state.previous_timestamp = curr_timestamp; - - WindowFunctionHelpers::setValueToOutputColumn(transform, function_index, result >= 0 ? result : 0); - } -private: - Float64 interval_length = 1; - bool interval_specified = false; -}; - void registerWindowFunctions(AggregateFunctionFactory & factory) { @@ -2404,13 +2299,6 @@ void registerWindowFunctions(AggregateFunctionFactory & factory) return std::make_shared( name, argument_types, parameters); }, properties}); - - factory.registerFunction("nonNegativeDerivative", {[](const std::string & name, - const DataTypes & argument_types, const Array & parameters, const Settings *) - { - return std::make_shared( - name, argument_types, parameters); - }, properties}); } } diff --git a/tests/queries/0_stateless/02232_non_negative_derivative.reference b/tests/queries/0_stateless/02232_non_negative_derivative.reference deleted file mode 100644 index 7559f527c7a..00000000000 --- a/tests/queries/0_stateless/02232_non_negative_derivative.reference +++ /dev/null @@ -1,64 +0,0 @@ -1 -1979-12-12 21:21:21.123 1.1 3.5045052519931732e-9 -1979-12-12 21:21:22.000 1.3345 0.26738883339230357 -1979-12-12 21:21:23.000 1.54 0.20550000000000002 -1979-12-12 21:21:23.000 1.54 0 -1979-12-12 21:21:21.129 2.1 0 -1979-12-12 21:21:21.124 2.34 0 -1979-12-12 21:21:21.127 3.7 453.33916989529325 -1979-12-12 21:21:21.123 1.1 1.0513515755979521e-17 -1979-12-12 21:21:22.000 1.3345 8.021665001769108e-10 -1979-12-12 21:21:23.000 1.54 6.165000000000001e-10 -1979-12-12 21:21:23.000 1.54 0 -1979-12-12 21:21:21.129 2.1 0 -1979-12-12 21:21:21.124 2.34 0 -1979-12-12 21:21:21.127 3.7 0.0000013600175096858798 -1979-12-12 21:21:21.123 1.1 1.4018021007972692e-14 -1979-12-12 21:21:22.000 1.3345 0.0000010695553335692141 -1979-12-12 21:21:23.000 1.54 8.22e-7 -1979-12-12 21:21:23.000 1.54 0 -1979-12-12 21:21:21.129 2.1 0 -1979-12-12 21:21:21.124 2.34 0 -1979-12-12 21:21:21.127 3.7 0.001813356679581173 -1979-12-12 21:21:21.123 1.1 1.7522526259965866e-11 -1979-12-12 21:21:22.000 1.3345 0.0013369441669615178 -1979-12-12 21:21:23.000 1.54 0.0010275000000000002 -1979-12-12 21:21:23.000 1.54 0 -1979-12-12 21:21:21.129 2.1 0 -1979-12-12 21:21:21.124 2.34 0 -1979-12-12 21:21:21.127 3.7 2.2666958494764664 -1979-12-12 21:21:21.123 1.1 2.102703151195904e-8 -1979-12-12 21:21:22.000 1.3345 1.6043330003538214 -1979-12-12 21:21:23.000 1.54 1.233 -1979-12-12 21:21:23.000 1.54 0 -1979-12-12 21:21:21.129 2.1 0 -1979-12-12 21:21:21.124 2.34 0 -1979-12-12 21:21:21.127 3.7 2720.0350193717595 -1979-12-12 21:21:21.123 1.1 0.0000014718922058371327 -1979-12-12 21:21:22.000 1.3345 112.3033100247675 -1979-12-12 21:21:23.000 1.54 86.31 -1979-12-12 21:21:23.000 1.54 0 -1979-12-12 21:21:21.129 2.1 0 -1979-12-12 21:21:21.124 2.34 0 -1979-12-12 21:21:21.127 3.7 190402.45135602317 -1979-12-12 21:21:21.123 1.1 0.0001009297512574034 -1979-12-12 21:21:21.124 2.34 35712459.78375156 -1979-12-12 21:21:21.127 3.7 13056168.092984445 -1979-12-12 21:21:21.129 2.1 0 -1979-12-12 21:21:22.000 1.3345 0 -1979-12-12 21:21:23.000 1.54 5918.400000000001 -1979-12-12 21:21:23.000 1.54 0 -1979-12-12 21:21:21.123 1.1 0.0027251032839498914 -1979-12-12 21:21:21.124 2.34 964236414.1612921 -1979-12-12 21:21:21.127 3.7 352516538.51058006 -1979-12-12 21:21:21.129 2.1 0 -1979-12-12 21:21:22.000 1.3345 0 -1979-12-12 21:21:23.000 1.54 159796.80000000002 -1979-12-12 21:21:23.000 1.54 0 -1979-12-12 21:21:21.123 1.1 0.021195247764054712 -1979-12-12 21:21:21.124 2.34 7499616554.587828 -1979-12-12 21:21:21.127 3.7 2741795299.5267334 -1979-12-12 21:21:21.129 2.1 0 -1979-12-12 21:21:22.000 1.3345 0 -1979-12-12 21:21:23.000 1.54 1242864 -1979-12-12 21:21:23.000 1.54 0 diff --git a/tests/queries/0_stateless/02232_non_negative_derivative.sql b/tests/queries/0_stateless/02232_non_negative_derivative.sql deleted file mode 100644 index c4cbadb68a8..00000000000 --- a/tests/queries/0_stateless/02232_non_negative_derivative.sql +++ /dev/null @@ -1,63 +0,0 @@ -DROP TABLE IF EXISTS nnd; - -CREATE TABLE nnd -( - id Int8, ts DateTime64(3, 'UTC'), metric Float64 -) -ENGINE=MergeTree() -ORDER BY id; - -INSERT INTO nnd VALUES (1, toDateTime64('1979-12-12 21:21:21.123', 3, 'UTC'), 1.1), (2, toDateTime64('1979-12-12 21:21:21.124', 3, 'UTC'), 2.34), (3, toDateTime64('1979-12-12 21:21:21.127', 3, 'UTC'), 3.7); -INSERT INTO nnd VALUES (4, toDateTime64('1979-12-12 21:21:21.129', 3, 'UTC'), 2.1), (5, toDateTime('1979-12-12 21:21:22', 'UTC'), 1.3345), (6, toDateTime('1979-12-12 21:21:23', 'UTC'), 1.54), (7, toDateTime('1979-12-12 21:21:23', 'UTC'), 1.54); - --- shall work for precise intervals --- INTERVAL 1 SECOND shall be default -SELECT ( - SELECT - ts, - metric, - nonNegativeDerivative(metric, ts) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv - FROM nnd - LIMIT 5, 1 - ) = ( - SELECT - ts, - metric, - nonNegativeDerivative(metric, ts, toIntervalSecond(1)) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv - FROM nnd - LIMIT 5, 1 - ); -SELECT ts, metric, nonNegativeDerivative(metric, ts) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; --- Nanosecond -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 3 NANOSECOND) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; --- Microsecond -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 4 MICROSECOND) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; --- Millisecond -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 5 MILLISECOND) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; --- Second -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 6 SECOND) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; --- Minute -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 7 MINUTE) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; --- Hour -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 8 HOUR) OVER (ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; --- Day -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 9 DAY) OVER (ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; --- Week -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 10 WEEK) OVER (ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; - --- shall not work for month, quarter, year (intervals with floating number of seconds) --- Month -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 11 MONTH) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } --- Quarter -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 12 QUARTER) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } --- Year -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 13 YEAR) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError ILLEGAL_TYPE_OF_ARGUMENT } - --- test against wrong arguments/types -SELECT ts, metric, nonNegativeDerivative(metric, 1, INTERVAL 3 NANOSECOND) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError BAD_ARGUMENTS } -SELECT ts, metric, nonNegativeDerivative('string not datetime', ts, INTERVAL 3 NANOSECOND) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError BAD_ARGUMENTS } -SELECT ts, metric, nonNegativeDerivative(metric, ts, INTERVAL 3 NANOSECOND, id) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError BAD_ARGUMENTS } -SELECT ts, metric, nonNegativeDerivative(metric) OVER (PARTITION BY metric ORDER BY ts ASC Rows BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS deriv FROM nnd; -- { serverError BAD_ARGUMENTS } - --- cleanup -DROP TABLE IF EXISTS nnd; From 00372e4646f06008867da06ebcb938b0f118abf6 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 29 Jun 2022 11:05:36 +0200 Subject: [PATCH 065/121] Fixed tests --- .../0_stateless/02345_partial_sort_transform_optimization.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql index a53a352f57e..fe2ab096ab7 100644 --- a/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql +++ b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql @@ -1,3 +1,5 @@ +-- Tags: no-backward-compatibility-check:22.6.1 + -- Regression for PartialSortingTransform optimization that requires at least 1500 rows. SELECT * FROM (SELECT * FROM (SELECT 0 a, toNullable(number) b, toString(number) c FROM numbers(1e6)) ORDER BY a DESC, b DESC, c LIMIT 1500) limit 10; SELECT number FROM (SELECT number, 1 AS k FROM numbers(100000) ORDER BY k ASC, number DESC LIMIT 1025, 1023) LIMIT 5; From 78ea290789eba0f60ac42e2bf45c3439fd61abd6 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 29 Jun 2022 06:47:21 -0400 Subject: [PATCH 066/121] add remove command --- programs/self-extracting/CMakeLists.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/programs/self-extracting/CMakeLists.txt b/programs/self-extracting/CMakeLists.txt index 2aec7938114..89f6b56b826 100644 --- a/programs/self-extracting/CMakeLists.txt +++ b/programs/self-extracting/CMakeLists.txt @@ -1,5 +1,6 @@ add_custom_target (self-extracting ALL - ${CMAKE_BINARY_DIR}/utils/self-extracting-executable/compressor clickhouse ../clickhouse + ${CMAKE_COMMAND} -E remove clickhouse + COMMAND ${CMAKE_BINARY_DIR}/utils/self-extracting-executable/compressor clickhouse ../clickhouse DEPENDS clickhouse compressor ) From eeae73e0cf9470490a7dd7957306b3b18efe5f44 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 29 Jun 2022 13:10:30 +0200 Subject: [PATCH 067/121] SQL create drop index update implementation --- .../InterpreterCreateIndexQuery.cpp | 26 +++++++++---------- .../InterpreterDropIndexQuery.cpp | 21 ++++++++------- src/Parsers/ASTAlterQuery.cpp | 2 -- src/Parsers/ASTCreateIndexQuery.cpp | 15 +++++++++++ src/Parsers/ASTCreateIndexQuery.h | 9 ++++--- src/Parsers/ASTDropIndexQuery.cpp | 4 ++- src/Parsers/ASTDropIndexQuery.h | 7 ++--- src/Parsers/ASTIndexDeclaration.cpp | 2 +- src/Parsers/ASTIndexDeclaration.h | 2 +- src/Parsers/ParserCreateIndexQuery.cpp | 11 +++++--- src/Parsers/ParserCreateIndexQuery.h | 4 +-- src/Parsers/ParserDropIndexQuery.cpp | 6 ++--- src/Parsers/ParserDropIndexQuery.h | 2 +- 13 files changed, 66 insertions(+), 45 deletions(-) diff --git a/src/Interpreters/InterpreterCreateIndexQuery.cpp b/src/Interpreters/InterpreterCreateIndexQuery.cpp index 29c151d1e4d..ef19eaa1c42 100644 --- a/src/Interpreters/InterpreterCreateIndexQuery.cpp +++ b/src/Interpreters/InterpreterCreateIndexQuery.cpp @@ -20,6 +20,7 @@ namespace ErrorCodes BlockIO InterpreterCreateIndexQuery::execute() { + auto current_context = getContext(); const auto & create_index = query_ptr->as(); AccessRightsElements required_access; @@ -29,23 +30,23 @@ BlockIO InterpreterCreateIndexQuery::execute() { DDLQueryOnClusterParams params; params.access_to_check = std::move(required_access); - return executeDDLQueryOnCluster(query_ptr, getContext(), params); + return executeDDLQueryOnCluster(query_ptr, current_context, params); } - getContext()->checkAccess(required_access); - auto table_id = getContext()->resolveStorageID(create_index, Context::ResolveOrdinary); + current_context->checkAccess(required_access); + auto table_id = current_context->resolveStorageID(create_index, Context::ResolveOrdinary); query_ptr->as().setDatabase(table_id.database_name); DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name); if (typeid_cast(database.get()) - && !getContext()->getClientInfo().is_replicated_database_internal) + && !current_context->getClientInfo().is_replicated_database_internal) { auto guard = DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name); guard->releaseTableLock(); - return typeid_cast(database.get())->tryEnqueueReplicatedDDL(query_ptr, getContext()); + return assert_cast(database.get())->tryEnqueueReplicatedDDL(query_ptr, current_context); } - StoragePtr table = DatabaseCatalog::instance().getTable(table_id, getContext()); + StoragePtr table = DatabaseCatalog::instance().getTable(table_id, current_context); if (table->isStaticStorage()) throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is read-only"); @@ -53,23 +54,20 @@ BlockIO InterpreterCreateIndexQuery::execute() AlterCommands alter_commands; AlterCommand command; + command.ast = create_index.convertToASTAlterCommand(); command.index_decl = create_index.index_decl; command.type = AlterCommand::ADD_INDEX; command.index_name = create_index.index_name->as().name(); command.if_not_exists = create_index.if_not_exists; - /// Fill name in ASTIndexDeclaration - auto & ast_index_decl = command.index_decl->as(); - ast_index_decl.name = command.index_name; - alter_commands.emplace_back(std::move(command)); - auto alter_lock = table->lockForAlter(getContext()->getSettingsRef().lock_acquire_timeout); + auto alter_lock = table->lockForAlter(current_context->getSettingsRef().lock_acquire_timeout); StorageInMemoryMetadata metadata = table->getInMemoryMetadata(); - alter_commands.validate(table, getContext()); + alter_commands.validate(table, current_context); alter_commands.prepare(metadata); - table->checkAlterIsPossible(alter_commands, getContext()); - table->alter(alter_commands, getContext(), alter_lock); + table->checkAlterIsPossible(alter_commands, current_context); + table->alter(alter_commands, current_context, alter_lock); return {}; } diff --git a/src/Interpreters/InterpreterDropIndexQuery.cpp b/src/Interpreters/InterpreterDropIndexQuery.cpp index 6cc9334fad2..2339e0dc68e 100644 --- a/src/Interpreters/InterpreterDropIndexQuery.cpp +++ b/src/Interpreters/InterpreterDropIndexQuery.cpp @@ -18,6 +18,7 @@ namespace ErrorCodes BlockIO InterpreterDropIndexQuery::execute() { + auto current_context = getContext(); const auto & drop_index = query_ptr->as(); AccessRightsElements required_access; @@ -27,23 +28,23 @@ BlockIO InterpreterDropIndexQuery::execute() { DDLQueryOnClusterParams params; params.access_to_check = std::move(required_access); - return executeDDLQueryOnCluster(query_ptr, getContext(), params); + return executeDDLQueryOnCluster(query_ptr, current_context, params); } - getContext()->checkAccess(required_access); - auto table_id = getContext()->resolveStorageID(drop_index, Context::ResolveOrdinary); + current_context->checkAccess(required_access); + auto table_id = current_context->resolveStorageID(drop_index, Context::ResolveOrdinary); query_ptr->as().setDatabase(table_id.database_name); DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_id.database_name); if (typeid_cast(database.get()) - && !getContext()->getClientInfo().is_replicated_database_internal) + && !current_context->getClientInfo().is_replicated_database_internal) { auto guard = DatabaseCatalog::instance().getDDLGuard(table_id.database_name, table_id.table_name); guard->releaseTableLock(); - return typeid_cast(database.get())->tryEnqueueReplicatedDDL(query_ptr, getContext()); + return assert_cast(database.get())->tryEnqueueReplicatedDDL(query_ptr, current_context); } - StoragePtr table = DatabaseCatalog::instance().getTable(table_id, getContext()); + StoragePtr table = DatabaseCatalog::instance().getTable(table_id, current_context); if (table->isStaticStorage()) throw Exception(ErrorCodes::TABLE_IS_READ_ONLY, "Table is read-only"); @@ -58,12 +59,12 @@ BlockIO InterpreterDropIndexQuery::execute() alter_commands.emplace_back(std::move(command)); - auto alter_lock = table->lockForAlter(getContext()->getSettingsRef().lock_acquire_timeout); + auto alter_lock = table->lockForAlter(current_context->getSettingsRef().lock_acquire_timeout); StorageInMemoryMetadata metadata = table->getInMemoryMetadata(); - alter_commands.validate(table, getContext()); + alter_commands.validate(table, current_context); alter_commands.prepare(metadata); - table->checkAlterIsPossible(alter_commands, getContext()); - table->alter(alter_commands, getContext(), alter_lock); + table->checkAlterIsPossible(alter_commands, current_context); + table->alter(alter_commands, current_context, alter_lock); return {}; } diff --git a/src/Parsers/ASTAlterQuery.cpp b/src/Parsers/ASTAlterQuery.cpp index cfcc5decdf5..f53c39b192f 100644 --- a/src/Parsers/ASTAlterQuery.cpp +++ b/src/Parsers/ASTAlterQuery.cpp @@ -1,4 +1,3 @@ -#include #include #include #include @@ -557,7 +556,6 @@ void ASTAlterQuery::formatQueryImpl(const FormatSettings & settings, FormatState frame.need_parens = false; std::string indent_str = settings.one_line ? "" : std::string(4u * frame.indent, ' '); - settings.ostr << (settings.hilite ? hilite_keyword : "") << indent_str; switch (alter_object) diff --git a/src/Parsers/ASTCreateIndexQuery.cpp b/src/Parsers/ASTCreateIndexQuery.cpp index 7a5c80551d6..50470fbc1e4 100644 --- a/src/Parsers/ASTCreateIndexQuery.cpp +++ b/src/Parsers/ASTCreateIndexQuery.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB @@ -23,6 +24,9 @@ ASTPtr ASTCreateIndexQuery::clone() const res->index_decl = index_decl->clone(); res->children.push_back(res->index_decl); + + cloneTableOptions(*res); + return res; } @@ -58,4 +62,15 @@ void ASTCreateIndexQuery::formatQueryImpl(const FormatSettings & settings, Forma index_decl->formatImpl(settings, state, frame); } +ASTPtr ASTCreateIndexQuery::convertToASTAlterCommand() const +{ + auto command = std::make_shared(); + command->type = ASTAlterCommand::ADD_INDEX; + command->index = index_name->clone(); + command->index_decl = index_decl->clone(); + command->if_not_exists = if_not_exists; + + return command; +} + } diff --git a/src/Parsers/ASTCreateIndexQuery.h b/src/Parsers/ASTCreateIndexQuery.h index f3c6a7830a4..424a0e493d9 100644 --- a/src/Parsers/ASTCreateIndexQuery.h +++ b/src/Parsers/ASTCreateIndexQuery.h @@ -14,13 +14,13 @@ namespace DB class ASTCreateIndexQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster { public: - bool if_not_exists{false}; - ASTPtr index_name; /// Stores the IndexDeclaration here. ASTPtr index_decl; + bool if_not_exists{false}; + String getID(char delim) const override; ASTPtr clone() const override; @@ -30,7 +30,10 @@ public: return removeOnCluster(clone(), params.default_database); } - virtual QueryKind getQueryKind() const override { return QueryKind::Create; } + QueryKind getQueryKind() const override { return QueryKind::Create; } + + /// Convert ASTCreateIndexQuery to ASTAlterCommand + ASTPtr convertToASTAlterCommand() const; protected: void formatQueryImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; diff --git a/src/Parsers/ASTDropIndexQuery.cpp b/src/Parsers/ASTDropIndexQuery.cpp index 78152d213b8..a07336a2d26 100644 --- a/src/Parsers/ASTDropIndexQuery.cpp +++ b/src/Parsers/ASTDropIndexQuery.cpp @@ -20,6 +20,8 @@ ASTPtr ASTDropIndexQuery::clone() const res->index_name = index_name->clone(); res->children.push_back(res->index_name); + cloneTableOptions(*res); + return res; } @@ -53,9 +55,9 @@ void ASTDropIndexQuery::formatQueryImpl(const FormatSettings & settings, FormatS ASTPtr ASTDropIndexQuery::convertToASTAlterCommand() const { auto command = std::make_shared(); + command->type = ASTAlterCommand::DROP_INDEX; command->index = index_name->clone(); command->if_exists = if_exists; - command->type = ASTAlterCommand::DROP_INDEX; return command; } diff --git a/src/Parsers/ASTDropIndexQuery.h b/src/Parsers/ASTDropIndexQuery.h index d7e39f797b5..6c2aaeb5936 100644 --- a/src/Parsers/ASTDropIndexQuery.h +++ b/src/Parsers/ASTDropIndexQuery.h @@ -17,10 +17,11 @@ namespace DB class ASTDropIndexQuery : public ASTQueryWithTableAndOutput, public ASTQueryWithOnCluster { public: - bool if_exists{false}; ASTPtr index_name; + bool if_exists{false}; + String getID(char delim) const override; ASTPtr clone() const override; @@ -30,9 +31,9 @@ public: return removeOnCluster(clone(), params.default_database); } - virtual QueryKind getQueryKind() const override { return QueryKind::Drop; } + QueryKind getQueryKind() const override { return QueryKind::Drop; } - /// Convert ASTDropIndexQuery to ASTAlterCommand. + /// Convert ASTDropIndexQuery to ASTAlterCommand ASTPtr convertToASTAlterCommand() const; protected: diff --git a/src/Parsers/ASTIndexDeclaration.cpp b/src/Parsers/ASTIndexDeclaration.cpp index cc988d1d307..d223661451e 100644 --- a/src/Parsers/ASTIndexDeclaration.cpp +++ b/src/Parsers/ASTIndexDeclaration.cpp @@ -25,7 +25,7 @@ ASTPtr ASTIndexDeclaration::clone() const void ASTIndexDeclaration::formatImpl(const FormatSettings & s, FormatState & state, FormatStateStacked frame) const { - if (from_create_index) + if (part_of_create_index_query) { s.ostr << "("; expr->formatImpl(s, state, frame); diff --git a/src/Parsers/ASTIndexDeclaration.h b/src/Parsers/ASTIndexDeclaration.h index 31d5ef0e7f8..e22c1da4489 100644 --- a/src/Parsers/ASTIndexDeclaration.h +++ b/src/Parsers/ASTIndexDeclaration.h @@ -16,7 +16,7 @@ public: IAST * expr; ASTFunction * type; UInt64 granularity; - bool from_create_index = false; + bool part_of_create_index_query = false; /** Get the text that identifies this element. */ String getID(char) const override { return "Index"; } diff --git a/src/Parsers/ParserCreateIndexQuery.cpp b/src/Parsers/ParserCreateIndexQuery.cpp index 22411c71ee5..af0d9064626 100644 --- a/src/Parsers/ParserCreateIndexQuery.cpp +++ b/src/Parsers/ParserCreateIndexQuery.cpp @@ -43,7 +43,7 @@ bool ParserCreateIndexDeclaration::parseImpl(Pos & pos, ASTPtr & node, Expected return false; auto index = std::make_shared(); - index->from_create_index = true; + index->part_of_create_index_query = true; index->granularity = granularity->as().value.safeGet(); index->set(index->expr, expr); index->set(index->type, type); @@ -87,18 +87,21 @@ bool ParserCreateIndexQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expect return false; if (!parseDatabaseAndTableAsAST(pos, expected, query->database, query->table)) - return false; + return false; /// [ON cluster_name] if (s_on.ignore(pos, expected)) { - if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) - return false; + if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) + return false; } if (!parser_create_idx_decl.parse(pos, index_decl, expected)) return false; + auto & ast_index_decl = index_decl->as(); + ast_index_decl.name = index_name->as().name(); + query->index_name = index_name; query->children.push_back(index_name); diff --git a/src/Parsers/ParserCreateIndexQuery.h b/src/Parsers/ParserCreateIndexQuery.h index 3dfdccc301f..3cb91cd03c6 100644 --- a/src/Parsers/ParserCreateIndexQuery.h +++ b/src/Parsers/ParserCreateIndexQuery.h @@ -12,7 +12,7 @@ namespace DB class ParserCreateIndexQuery : public IParserBase { protected: - const char * getName() const override{ return "CREATE INDEX query"; } + const char * getName() const override { return "CREATE INDEX query"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; @@ -21,7 +21,7 @@ protected: class ParserCreateIndexDeclaration : public IParserBase { public: - ParserCreateIndexDeclaration() {} + ParserCreateIndexDeclaration() = default; protected: const char * getName() const override { return "index declaration in create index"; } diff --git a/src/Parsers/ParserDropIndexQuery.cpp b/src/Parsers/ParserDropIndexQuery.cpp index 0844ea16ae0..89ed4f01838 100644 --- a/src/Parsers/ParserDropIndexQuery.cpp +++ b/src/Parsers/ParserDropIndexQuery.cpp @@ -39,13 +39,13 @@ bool ParserDropIndexQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected return false; if (!parseDatabaseAndTableAsAST(pos, expected, query->database, query->table)) - return false; + return false; /// [ON cluster_name] if (s_on.ignore(pos, expected)) { - if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) - return false; + if (!ASTQueryWithOnCluster::parse(pos, cluster_str, expected)) + return false; query->cluster = std::move(cluster_str); } diff --git a/src/Parsers/ParserDropIndexQuery.h b/src/Parsers/ParserDropIndexQuery.h index 1b6535c7efb..fc25ace469b 100644 --- a/src/Parsers/ParserDropIndexQuery.h +++ b/src/Parsers/ParserDropIndexQuery.h @@ -12,7 +12,7 @@ namespace DB class ParserDropIndexQuery : public IParserBase { protected: - const char * getName() const override{ return "DROP INDEX query"; } + const char * getName() const override { return "DROP INDEX query"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; }; From 656d0c0f04b66b8423a7e5b8c1d628326597d8c2 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Wed, 29 Jun 2022 11:24:30 +0000 Subject: [PATCH 068/121] Update version_date.tsv and changelogs after v22.6.2.12-stable --- docs/changelogs/v22.6.2.12-stable.md | 22 ++++++++++++++++++++++ utils/list-versions/version_date.tsv | 1 + 2 files changed, 23 insertions(+) create mode 100644 docs/changelogs/v22.6.2.12-stable.md diff --git a/docs/changelogs/v22.6.2.12-stable.md b/docs/changelogs/v22.6.2.12-stable.md new file mode 100644 index 00000000000..224367b994a --- /dev/null +++ b/docs/changelogs/v22.6.2.12-stable.md @@ -0,0 +1,22 @@ +--- +sidebar_position: 1 +sidebar_label: 2022 +--- + +# 2022 Changelog + +### ClickHouse release v22.6.2.12-stable FIXME as compared to v22.6.1.1985-stable + +#### Improvement +* Backported in [#38484](https://github.com/ClickHouse/ClickHouse/issues/38484): Improve the stability for hive storage integration test. Move the data prepare step into test.py. [#38260](https://github.com/ClickHouse/ClickHouse/pull/38260) ([lgbo](https://github.com/lgbo-ustc)). + +#### Bug Fix (user-visible misbehavior in official stable or prestable release) + +* Backported in [#38404](https://github.com/ClickHouse/ClickHouse/issues/38404): Fix bug with nested short-circuit functions that led to execution of arguments even if condition is false. Closes [#38040](https://github.com/ClickHouse/ClickHouse/issues/38040). [#38173](https://github.com/ClickHouse/ClickHouse/pull/38173) ([Kruglov Pavel](https://github.com/Avogar)). + +#### NOT FOR CHANGELOG / INSIGNIFICANT + +* Remove processor description from span attributes - it is not working [#38157](https://github.com/ClickHouse/ClickHouse/pull/38157) ([Ilya Yatsishin](https://github.com/qoega)). +* Checkout full repositories for performance tests [#38327](https://github.com/ClickHouse/ClickHouse/pull/38327) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). +* Try to fix 02305_schema_inference_with_globs [#38337](https://github.com/ClickHouse/ClickHouse/pull/38337) ([Kruglov Pavel](https://github.com/Avogar)). + diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv index b33cbcebdb7..64e2050f683 100644 --- a/utils/list-versions/version_date.tsv +++ b/utils/list-versions/version_date.tsv @@ -1,3 +1,4 @@ +v22.6.2.12-stable 2022-06-29 v22.6.1.1985-stable 2022-06-16 v22.5.1.2079-stable 2022-05-19 v22.4.5.9-stable 2022-05-06 From 3ff26939fc4aa4da32253acae7a2a56d32f4e0d7 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 29 Jun 2022 15:46:40 +0300 Subject: [PATCH 069/121] Update run.sh --- docker/test/stress/run.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index 119ea04080f..0b517fb4af8 100755 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -42,6 +42,7 @@ function install_packages() function configure() { # install test configs + export USE_DATABASE_ORDINARY=1 /usr/share/clickhouse-test/config/install.sh # we mount tests folder from repo to /usr/share From 41460dcaca29b84d36fa52155b4dd4c9ef7675bd Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Wed, 29 Jun 2022 17:37:29 +0200 Subject: [PATCH 070/121] quick fix for 02112_with_fill_interval Seems like the problem is that now data more often come to FillingTransform in multiple chunks. Don't know why it affects the results, will continue investigation. --- tests/queries/0_stateless/02112_with_fill_interval.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/queries/0_stateless/02112_with_fill_interval.sql b/tests/queries/0_stateless/02112_with_fill_interval.sql index d2416f9a84b..16773780515 100644 --- a/tests/queries/0_stateless/02112_with_fill_interval.sql +++ b/tests/queries/0_stateless/02112_with_fill_interval.sql @@ -1,3 +1,5 @@ +SET max_threads = 1; + DROP TABLE IF EXISTS with_fill_date; CREATE TABLE with_fill_date (d Date, d32 Date32) ENGINE = Memory; From 65110fdffc7be3a39b2fcc4c0a4cd019c1d1c190 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 29 Jun 2022 17:50:25 +0200 Subject: [PATCH 071/121] ColumnVector refactor replicate SSE42 optimization --- src/Columns/ColumnVector.cpp | 220 ++++++++++++++++++----------------- src/Columns/ColumnVector.h | 4 +- 2 files changed, 114 insertions(+), 110 deletions(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 8a986c1ca86..7cfb90d4371 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -577,6 +577,113 @@ ColumnPtr ColumnVector::index(const IColumn & indexes, size_t limit) const return selectIndexImpl(*this, indexes, limit); } +#ifdef __SSE2__ + +namespace +{ + /** Optimization for ColumnVector replicate using SIMD instructions. + * For such optimization it is important that data is right padded with 15 bytes. + * + * Replicate span size is offsets[i] - offsets[i - 1]. + * + * Split spans into 3 categories. + * 1. Span with 0 size. Continue iteration. + * + * 2. Span with 1 size. Update pointer from which data must be copied into result. + * Then if we see span with size 1 or greater than 1 copy data directly into result data and reset pointer. + * Example: + * Data: 1 2 3 4 + * Offsets: 1 2 3 4 + * Result data: 1 2 3 4 + * + * 3. Span with size greater than 1. Save single data element into register and copy it into result data. + * Example: + * Data: 1 2 3 4 + * Offsets: 4 4 4 4 + * Result data: 1 1 1 1 + * + * Additional handling for tail is needed if pointer from which data must be copied from span with size 1 is not null. + */ + template + requires (std::is_same_v || std::is_same_v) + void replicateSSE42Int32(const IntType * __restrict data, IntType * __restrict result_data, const IColumn::Offsets & offsets) + { + const IntType * data_copy_begin_ptr = nullptr; + size_t offsets_size = offsets.size(); + + for (size_t offset_index = 0; offset_index < offsets_size; ++offset_index) + { + size_t span = offsets[offset_index] - offsets[offset_index - 1]; + if (span == 1) + { + if (!data_copy_begin_ptr) + data_copy_begin_ptr = data + offset_index; + + continue; + } + + /// Copy data + + if (data_copy_begin_ptr) + { + size_t copy_size = (data + offset_index) - data_copy_begin_ptr; + bool remainder = copy_size % 4; + size_t sse_copy_counter = (copy_size / 4) + remainder; + auto * result_data_copy = result_data; + + while (sse_copy_counter) + { + _mm_storeu_si128(reinterpret_cast<__m128i *>(result_data_copy), *(reinterpret_cast(data_copy_begin_ptr))); + result_data_copy += 4; + data_copy_begin_ptr += 4; + --sse_copy_counter; + } + + result_data += copy_size; + data_copy_begin_ptr = nullptr; + } + + if (span == 0) + continue; + + /// Copy single data element into result data + + bool span_remainder = span % 4; + size_t copy_counter = (span / 4) + span_remainder; + auto * result_data_tmp = result_data; + __m128i copy_element_data = _mm_set1_epi32(data[offset_index]); + + while (copy_counter) + { + _mm_storeu_si128(reinterpret_cast<__m128i *>(result_data_tmp), copy_element_data); + result_data_tmp += 4; + --copy_counter; + } + + result_data += span; + } + + /// Copy tail if needed + + if (data_copy_begin_ptr) + { + size_t copy_size = (data + offsets_size) - data_copy_begin_ptr; + bool remainder = copy_size % 4; + size_t sse_copy_counter = (copy_size / 4) + remainder; + + while (sse_copy_counter) + { + _mm_storeu_si128(reinterpret_cast<__m128i *>(result_data), *(reinterpret_cast(data_copy_begin_ptr))); + result_data += 4; + data_copy_begin_ptr += 4; + --sse_copy_counter; + } + } + } +} + +#endif + template ColumnPtr ColumnVector::replicate(const IColumn::Offsets & offsets) const { @@ -587,13 +694,16 @@ ColumnPtr ColumnVector::replicate(const IColumn::Offsets & offsets) const if (0 == size) return this->create(); + auto res = this->create(offsets.back()); + #ifdef __SSE2__ if constexpr (std::is_same_v) - return replicateSSE2(offsets); + { + replicateSSE42Int32(getData().data(), res->getData().data(), offsets); + return res; + } #endif - auto res = this->create(offsets.back()); - auto it = res->getData().begin(); // NOLINT for (size_t i = 0; i < size; ++i) { @@ -605,110 +715,6 @@ ColumnPtr ColumnVector::replicate(const IColumn::Offsets & offsets) const return res; } - -#ifdef __SSE2__ - -template -ColumnPtr ColumnVector::replicateSSE2(const IColumn::Offsets & offsets) const -{ - auto res = this->create(offsets.back()); - - auto it = res->getData().begin(); // NOLINT - - /// Column is using PaddedPODArray, so we don't have to worry about the 4 out of range elements. - - IColumn::Offset prev_offset = 0; - std::optional copy_begin; - size_t size = offsets.size(); - for (size_t i = 0; i < size; ++i) - { - size_t span = offsets[i] - prev_offset; - prev_offset = offsets[i]; - if (span == 1) - { - if (!copy_begin) - copy_begin = i; - continue; - } - - /// data : 11 22 33 44 55 - /// offsets: 0 1 2 3 3 - /// res: 22 33 44 - if (copy_begin) - { - size_t copy_size = i - (*copy_begin); - bool remain = (copy_size & 3); - size_t sse_copy_counter = (copy_size >> 2); - sse_copy_counter = remain * (sse_copy_counter + 1) + (!remain) * (sse_copy_counter); - auto it_tmp = it; // NOLINT - size_t data_start = *copy_begin; - copy_begin.reset(); - constexpr const int copy_mask = _MM_SHUFFLE(3, 2, 1, 0); - while (sse_copy_counter) - { - __m128i data_to_copy = _mm_loadu_si128(reinterpret_cast(&data[data_start])); - auto copy_result = _mm_shuffle_epi32(data_to_copy, copy_mask); - _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), copy_result); - it_tmp += 4; - data_start += 4; - --sse_copy_counter; - } - - it += copy_size; - } - - if (span == 0) - continue; - - /// data : 11 22 33 - /// offsets: 0 0 4 - /// res: 33 33 33 33 - size_t shuffle_size = span; - bool shuffle_remain = (shuffle_size & 3); - size_t sse_shuffle_counter = (shuffle_size >> 2); - sse_shuffle_counter = shuffle_remain * (sse_shuffle_counter + 1) + (!shuffle_remain) * (sse_shuffle_counter); - auto it_tmp = it; // NOLINT - constexpr const int shuffle_mask = (_MM_SHUFFLE(0, 0, 0, 0)); - __m128i data_to_shuffle = _mm_loadu_si128(reinterpret_cast(&data[i])); - auto shuffle_result = _mm_shuffle_epi32(data_to_shuffle, shuffle_mask); - while (sse_shuffle_counter) - { - _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), shuffle_result); - it_tmp += 4; - --sse_shuffle_counter; - } - it += shuffle_size; - } - - /// data : 11 22 33 44 55 - /// offsets: 1 2 3 4 5 - /// res: 11 22 33 44 55 - if (copy_begin) - { - size_t copy_size = (size - (*copy_begin)); - bool remain = (copy_size & 3); - size_t sse_copy_counter = (copy_size >> 2); - sse_copy_counter = remain * (sse_copy_counter + 1) + (!remain) * (sse_copy_counter); - auto it_tmp = it; // NOLINT - size_t data_start = *copy_begin; - constexpr const int copy_mask = (_MM_SHUFFLE(3, 2, 1, 0)); - while (sse_copy_counter) - { - __m128i data_to_copy = _mm_loadu_si128(reinterpret_cast(&data[data_start])); - auto copy_result = _mm_shuffle_epi32(data_to_copy, copy_mask); - _mm_storeu_si128(reinterpret_cast<__m128i *>(it_tmp), copy_result); - it_tmp += 4; - data_start += 4; - --sse_copy_counter; - } - it += copy_size; - } - - return res; -} -#endif - - template void ColumnVector::gather(ColumnGathererStream & gatherer) { diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 6aae3a3e3fb..6ba9abaca32 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -136,9 +136,7 @@ private: /// Sugar constructor. ColumnVector(std::initializer_list il) : data{il} {} - #ifdef __SSE2__ - ColumnPtr replicateSSE2(const IColumn::Offsets & offsets) const; - #endif + public: bool isNumeric() const override { return is_arithmetic_v; } From 45c18145ecc7b87d94aec801f07b68ba68ff510d Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 29 Jun 2022 17:51:27 +0200 Subject: [PATCH 072/121] Fixed tests --- .../0_stateless/02345_partial_sort_transform_optimization.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql index fe2ab096ab7..eb395e5ec41 100644 --- a/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql +++ b/tests/queries/0_stateless/02345_partial_sort_transform_optimization.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check:22.6.1 +-- Tags: no-backward-compatibility-check -- Regression for PartialSortingTransform optimization that requires at least 1500 rows. SELECT * FROM (SELECT * FROM (SELECT 0 a, toNullable(number) b, toString(number) c FROM numbers(1e6)) ORDER BY a DESC, b DESC, c LIMIT 1500) limit 10; From 62e7a89f262909aa6d2c56ef9e88960d7d401500 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 29 Jun 2022 17:53:08 +0200 Subject: [PATCH 073/121] Proper fix for ipv4/ipv6 conversion error --- src/Databases/TablesLoader.cpp | 13 +++++--- src/Databases/TablesLoader.h | 2 +- src/Interpreters/InterpreterCreateQuery.cpp | 5 +++ .../__init__.py | 1 + .../test.py | 31 +++++++++++++++++++ ...2316_cast_to_ip_address_default_column.sql | 17 +++++++++- 6 files changed, 63 insertions(+), 6 deletions(-) create mode 100644 tests/integration/test_server_start_and_ip_conversions/__init__.py create mode 100644 tests/integration/test_server_start_and_ip_conversions/test.py diff --git a/src/Databases/TablesLoader.cpp b/src/Databases/TablesLoader.cpp index e973c9211be..7e9b83d423a 100644 --- a/src/Databases/TablesLoader.cpp +++ b/src/Databases/TablesLoader.cpp @@ -171,6 +171,11 @@ void TablesLoader::removeUnresolvableDependencies(bool remove_loaded) void TablesLoader::loadTablesInTopologicalOrder(ThreadPool & pool) { + /// Compatibility setting which should be enabled by default on attach + /// Otherwise server will be unable to start for some old-format of IPv6/IPv4 types of columns + ContextMutablePtr load_context = Context::createCopy(global_context); + load_context->setSetting("cast_ipv4_ipv6_default_on_conversion_error", 1); + /// Load independent tables in parallel. /// Then remove loaded tables from dependency graph, find tables/dictionaries that do not have unresolved dependencies anymore, /// move them to the list of independent tables and load. @@ -183,7 +188,7 @@ void TablesLoader::loadTablesInTopologicalOrder(ThreadPool & pool) assert(metadata.parsed_tables.size() == tables_processed + metadata.independent_database_objects.size() + getNumberOfTablesWithDependencies()); logDependencyGraph(); - startLoadingIndependentTables(pool, level); + startLoadingIndependentTables(pool, level, load_context); TableNames new_independent_database_objects; for (const auto & table_name : metadata.independent_database_objects) @@ -237,7 +242,7 @@ DependenciesInfosIter TablesLoader::removeResolvedDependency(const DependenciesI return metadata.dependencies_info.erase(info_it); } -void TablesLoader::startLoadingIndependentTables(ThreadPool & pool, size_t level) +void TablesLoader::startLoadingIndependentTables(ThreadPool & pool, size_t level, ContextMutablePtr load_context) { size_t total_tables = metadata.parsed_tables.size(); @@ -245,10 +250,10 @@ void TablesLoader::startLoadingIndependentTables(ThreadPool & pool, size_t level for (const auto & table_name : metadata.independent_database_objects) { - pool.scheduleOrThrowOnError([this, total_tables, &table_name]() + pool.scheduleOrThrowOnError([this, load_context, total_tables, &table_name]() { const auto & path_and_query = metadata.parsed_tables[table_name]; - databases[table_name.database]->loadTableFromMetadata(global_context, path_and_query.path, table_name, path_and_query.ast, force_restore); + databases[table_name.database]->loadTableFromMetadata(load_context, path_and_query.path, table_name, path_and_query.ast, force_restore); logAboutProgress(log, ++tables_processed, total_tables, stopwatch); }); } diff --git a/src/Databases/TablesLoader.h b/src/Databases/TablesLoader.h index 189906df6ff..43e8bfdb92c 100644 --- a/src/Databases/TablesLoader.h +++ b/src/Databases/TablesLoader.h @@ -104,7 +104,7 @@ private: DependenciesInfosIter removeResolvedDependency(const DependenciesInfosIter & info_it, TableNames & independent_database_objects); - void startLoadingIndependentTables(ThreadPool & pool, size_t level); + void startLoadingIndependentTables(ThreadPool & pool, size_t level, ContextMutablePtr load_context); void checkCyclicDependencies() const; diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 20d88c91709..0fb048332b7 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -996,6 +996,11 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) throw Exception("Temporary tables cannot be inside a database. You should not specify a database for a temporary table.", ErrorCodes::BAD_DATABASE_FOR_TEMPORARY_TABLE); + /// Compatibility setting which should be enabled by default on attach + /// Otherwise server will be unable to start for some old-format of IPv6/IPv4 types + if (create.attach) + getContext()->setSetting("cast_ipv4_ipv6_default_on_conversion_error", 1); + String current_database = getContext()->getCurrentDatabase(); auto database_name = create.database ? create.getDatabase() : current_database; diff --git a/tests/integration/test_server_start_and_ip_conversions/__init__.py b/tests/integration/test_server_start_and_ip_conversions/__init__.py new file mode 100644 index 00000000000..e5a0d9b4834 --- /dev/null +++ b/tests/integration/test_server_start_and_ip_conversions/__init__.py @@ -0,0 +1 @@ +#!/usr/bin/env python3 diff --git a/tests/integration/test_server_start_and_ip_conversions/test.py b/tests/integration/test_server_start_and_ip_conversions/test.py new file mode 100644 index 00000000000..f91617f60b8 --- /dev/null +++ b/tests/integration/test_server_start_and_ip_conversions/test.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 +import logging +import pytest +from helpers.cluster import ClickHouseCluster + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance("node", stay_alive=True) + +@pytest.fixture(scope="module", autouse=True) +def start_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_restart_success(): + node.query(""" + CREATE TABLE ipv4_test + ( + id UInt64, + value String + ) ENGINE=MergeTree ORDER BY id""", + settings={"cast_ipv4_ipv6_default_on_conversion_error": 1}) + + node.query("ALTER TABLE ipv4_test MODIFY COLUMN value IPv4 DEFAULT ''", settings={"cast_ipv4_ipv6_default_on_conversion_error": 1}) + + node.restart_clickhouse() + + assert node.query("SELECT 1") == "1\n" diff --git a/tests/queries/0_stateless/02316_cast_to_ip_address_default_column.sql b/tests/queries/0_stateless/02316_cast_to_ip_address_default_column.sql index 200cec8fed9..128acd7d132 100644 --- a/tests/queries/0_stateless/02316_cast_to_ip_address_default_column.sql +++ b/tests/queries/0_stateless/02316_cast_to_ip_address_default_column.sql @@ -11,6 +11,13 @@ CREATE TABLE ipv4_test ALTER TABLE ipv4_test MODIFY COLUMN value IPv4 DEFAULT ''; +SET cast_ipv4_ipv6_default_on_conversion_error = 0; + +DETACH TABLE ipv4_test; +ATTACH TABLE ipv4_test; + +SET cast_ipv4_ipv6_default_on_conversion_error = 1; + DROP TABLE ipv4_test; DROP TABLE IF EXISTS ipv6_test; @@ -20,7 +27,15 @@ CREATE TABLE ipv6_test value String ) ENGINE=MergeTree ORDER BY id; -ALTER TABLE ipv6_test MODIFY COLUMN value IPv4 DEFAULT ''; +ALTER TABLE ipv6_test MODIFY COLUMN value IPv6 DEFAULT ''; + +SET cast_ipv4_ipv6_default_on_conversion_error = 0; + +DETACH TABLE ipv6_test; +ATTACH TABLE ipv6_test; + +SET cast_ipv4_ipv6_default_on_conversion_error = 1; + SELECT * FROM ipv6_test; DROP TABLE ipv6_test; From 9b387a57ed79ff37a1530ac46592412799e8634b Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 29 Jun 2022 17:56:59 +0200 Subject: [PATCH 074/121] Test ipv6 as well --- .../test.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_server_start_and_ip_conversions/test.py b/tests/integration/test_server_start_and_ip_conversions/test.py index f91617f60b8..73604394d9d 100644 --- a/tests/integration/test_server_start_and_ip_conversions/test.py +++ b/tests/integration/test_server_start_and_ip_conversions/test.py @@ -15,7 +15,7 @@ def start_cluster(): cluster.shutdown() -def test_restart_success(): +def test_restart_success_ipv4(): node.query(""" CREATE TABLE ipv4_test ( @@ -29,3 +29,19 @@ def test_restart_success(): node.restart_clickhouse() assert node.query("SELECT 1") == "1\n" + + +def test_restart_success_ipv6(): + node.query(""" + CREATE TABLE ipv6_test + ( + id UInt64, + value String + ) ENGINE=MergeTree ORDER BY id""", + settings={"cast_ipv4_ipv6_default_on_conversion_error": 1}) + + node.query("ALTER TABLE ipv6_test MODIFY COLUMN value IPv6 DEFAULT ''", settings={"cast_ipv4_ipv6_default_on_conversion_error": 1}) + + node.restart_clickhouse() + + assert node.query("SELECT 1") == "1\n" From eb3f49426c3064df729f168e2b42b718d5bd4f1c Mon Sep 17 00:00:00 2001 From: Suzy Wang Date: Wed, 29 Jun 2022 09:58:55 -0700 Subject: [PATCH 075/121] Remove zlib in mariadb-connector-c --- contrib/mariadb-connector-c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/contrib/mariadb-connector-c b/contrib/mariadb-connector-c index 5f4034a3a63..e39608998f5 160000 --- a/contrib/mariadb-connector-c +++ b/contrib/mariadb-connector-c @@ -1 +1 @@ -Subproject commit 5f4034a3a6376416504f17186c55fe401c6d8e5e +Subproject commit e39608998f5f6944ece9ec61f48e9172ec1de660 From 09be594c8135b4b40f4ddaefc1357cbdc75e50b4 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 29 Jun 2022 19:15:47 +0200 Subject: [PATCH 076/121] Dictionaries added TSA annotations --- src/Dictionaries/CacheDictionary.h | 2 +- .../CassandraDictionarySource.cpp | 6 +---- src/Dictionaries/CassandraDictionarySource.h | 5 +++-- src/Dictionaries/IDictionary.h | 22 +++++++++---------- 4 files changed, 16 insertions(+), 19 deletions(-) diff --git a/src/Dictionaries/CacheDictionary.h b/src/Dictionaries/CacheDictionary.h index 0dbaf2716ba..d4716999b47 100644 --- a/src/Dictionaries/CacheDictionary.h +++ b/src/Dictionaries/CacheDictionary.h @@ -190,7 +190,7 @@ private: /// Dictionary source should be used with mutex mutable std::mutex source_mutex; - mutable DictionarySourcePtr source_ptr; + mutable DictionarySourcePtr source_ptr TSA_GUARDED_BY(source_mutex); CacheDictionaryStoragePtr cache_storage_ptr; mutable CacheDictionaryUpdateQueue update_queue; diff --git a/src/Dictionaries/CassandraDictionarySource.cpp b/src/Dictionaries/CassandraDictionarySource.cpp index e51caf7112a..e0cf2483b3d 100644 --- a/src/Dictionaries/CassandraDictionarySource.cpp +++ b/src/Dictionaries/CassandraDictionarySource.cpp @@ -194,12 +194,8 @@ QueryPipeline CassandraDictionarySource::loadUpdatedAll() CassSessionShared CassandraDictionarySource::getSession() { /// Reuse connection if exists, create new one if not - auto session = maybe_session.lock(); - if (session) - return session; - std::lock_guard lock(connect_mutex); - session = maybe_session.lock(); + auto session = maybe_session.lock(); if (session) return session; diff --git a/src/Dictionaries/CassandraDictionarySource.h b/src/Dictionaries/CassandraDictionarySource.h index c2038a966ea..e73383aa75c 100644 --- a/src/Dictionaries/CassandraDictionarySource.h +++ b/src/Dictionaries/CassandraDictionarySource.h @@ -82,9 +82,10 @@ private: Block sample_block; ExternalQueryBuilder query_builder; - std::mutex connect_mutex; CassClusterPtr cluster; - CassSessionWeak maybe_session; + + std::mutex connect_mutex; + CassSessionWeak maybe_session TSA_GUARDED_BY(connect_mutex); }; } diff --git a/src/Dictionaries/IDictionary.h b/src/Dictionaries/IDictionary.h index 480befdcfb2..3f3c60206d6 100644 --- a/src/Dictionaries/IDictionary.h +++ b/src/Dictionaries/IDictionary.h @@ -62,26 +62,26 @@ public: std::string getFullName() const { - std::lock_guard lock{name_mutex}; + std::lock_guard lock{mutex}; return dictionary_id.getNameForLogs(); } StorageID getDictionaryID() const { - std::lock_guard lock{name_mutex}; + std::lock_guard lock{mutex}; return dictionary_id; } void updateDictionaryName(const StorageID & new_name) const { - std::lock_guard lock{name_mutex}; + std::lock_guard lock{mutex}; assert(new_name.uuid == dictionary_id.uuid && dictionary_id.uuid != UUIDHelpers::Nil); dictionary_id = new_name; } std::string getLoadableName() const final { - std::lock_guard lock{name_mutex}; + std::lock_guard lock{mutex}; return dictionary_id.getInternalDictionaryName(); } @@ -92,6 +92,8 @@ public: std::string getDatabaseOrNoDatabaseTag() const { + std::lock_guard lock{mutex}; + if (!dictionary_id.database_name.empty()) return dictionary_id.database_name; @@ -278,22 +280,20 @@ public: void setDictionaryComment(String new_comment) { - std::lock_guard lock{name_mutex}; + std::lock_guard lock{mutex}; dictionary_comment = std::move(new_comment); } String getDictionaryComment() const { - std::lock_guard lock{name_mutex}; + std::lock_guard lock{mutex}; return dictionary_comment; } private: - mutable std::mutex name_mutex; - mutable StorageID dictionary_id; - -protected: - String dictionary_comment; + mutable std::mutex mutex; + mutable StorageID dictionary_id TSA_GUARDED_BY(mutex); + String dictionary_comment TSA_GUARDED_BY(mutex); }; } From 8f5582f95efd18eac8c926a89574b20e617b939c Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 29 Jun 2022 20:29:50 +0200 Subject: [PATCH 077/121] Review and style fixes --- src/Interpreters/InterpreterCreateQuery.cpp | 9 ++++---- .../test.py | 23 ++++++++++++++----- ...2316_cast_to_ip_address_default_column.sql | 1 + 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 0fb048332b7..7a00bbf524c 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -996,11 +996,6 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) throw Exception("Temporary tables cannot be inside a database. You should not specify a database for a temporary table.", ErrorCodes::BAD_DATABASE_FOR_TEMPORARY_TABLE); - /// Compatibility setting which should be enabled by default on attach - /// Otherwise server will be unable to start for some old-format of IPv6/IPv4 types - if (create.attach) - getContext()->setSetting("cast_ipv4_ipv6_default_on_conversion_error", 1); - String current_database = getContext()->getCurrentDatabase(); auto database_name = create.database ? create.getDatabase() : current_database; @@ -1043,6 +1038,10 @@ BlockIO InterpreterCreateQuery::createTable(ASTCreateQuery & create) create.attach = true; create.attach_short_syntax = true; create.if_not_exists = if_not_exists; + + /// Compatibility setting which should be enabled by default on attach + /// Otherwise server will be unable to start for some old-format of IPv6/IPv4 types + getContext()->setSetting("cast_ipv4_ipv6_default_on_conversion_error", 1); } /// TODO throw exception if !create.attach_short_syntax && !create.attach_from_path && !internal diff --git a/tests/integration/test_server_start_and_ip_conversions/test.py b/tests/integration/test_server_start_and_ip_conversions/test.py index 73604394d9d..abb6a546f64 100644 --- a/tests/integration/test_server_start_and_ip_conversions/test.py +++ b/tests/integration/test_server_start_and_ip_conversions/test.py @@ -6,6 +6,7 @@ from helpers.cluster import ClickHouseCluster cluster = ClickHouseCluster(__file__) node = cluster.add_instance("node", stay_alive=True) + @pytest.fixture(scope="module", autouse=True) def start_cluster(): try: @@ -16,15 +17,20 @@ def start_cluster(): def test_restart_success_ipv4(): - node.query(""" + node.query( + """ CREATE TABLE ipv4_test ( id UInt64, value String ) ENGINE=MergeTree ORDER BY id""", - settings={"cast_ipv4_ipv6_default_on_conversion_error": 1}) + settings={"cast_ipv4_ipv6_default_on_conversion_error": 1}, + ) - node.query("ALTER TABLE ipv4_test MODIFY COLUMN value IPv4 DEFAULT ''", settings={"cast_ipv4_ipv6_default_on_conversion_error": 1}) + node.query( + "ALTER TABLE ipv4_test MODIFY COLUMN value IPv4 DEFAULT ''", + settings={"cast_ipv4_ipv6_default_on_conversion_error": 1}, + ) node.restart_clickhouse() @@ -32,15 +38,20 @@ def test_restart_success_ipv4(): def test_restart_success_ipv6(): - node.query(""" + node.query( + """ CREATE TABLE ipv6_test ( id UInt64, value String ) ENGINE=MergeTree ORDER BY id""", - settings={"cast_ipv4_ipv6_default_on_conversion_error": 1}) + settings={"cast_ipv4_ipv6_default_on_conversion_error": 1}, + ) - node.query("ALTER TABLE ipv6_test MODIFY COLUMN value IPv6 DEFAULT ''", settings={"cast_ipv4_ipv6_default_on_conversion_error": 1}) + node.query( + "ALTER TABLE ipv6_test MODIFY COLUMN value IPv6 DEFAULT ''", + settings={"cast_ipv4_ipv6_default_on_conversion_error": 1}, + ) node.restart_clickhouse() diff --git a/tests/queries/0_stateless/02316_cast_to_ip_address_default_column.sql b/tests/queries/0_stateless/02316_cast_to_ip_address_default_column.sql index 128acd7d132..eabe6ed1d65 100644 --- a/tests/queries/0_stateless/02316_cast_to_ip_address_default_column.sql +++ b/tests/queries/0_stateless/02316_cast_to_ip_address_default_column.sql @@ -1,4 +1,5 @@ -- Tags: no-backward-compatibility-check +-- TODO: remove after new 22.6 release SET cast_ipv4_ipv6_default_on_conversion_error = 1; From 615070425e8c759e282af1c3917c4cab6482b89b Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 29 Jun 2022 20:30:23 +0200 Subject: [PATCH 078/121] Fix comment --- .../0_stateless/02316_cast_to_ip_address_default_column.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02316_cast_to_ip_address_default_column.sql b/tests/queries/0_stateless/02316_cast_to_ip_address_default_column.sql index eabe6ed1d65..35f210be43d 100644 --- a/tests/queries/0_stateless/02316_cast_to_ip_address_default_column.sql +++ b/tests/queries/0_stateless/02316_cast_to_ip_address_default_column.sql @@ -1,5 +1,5 @@ -- Tags: no-backward-compatibility-check --- TODO: remove after new 22.6 release +-- TODO: remove no-backward-compatibility-check after new 22.6 release SET cast_ipv4_ipv6_default_on_conversion_error = 1; From 6bf9c7cd4d5de11b94b777c5d37158f9eeb9f084 Mon Sep 17 00:00:00 2001 From: HeenaBansal2009 Date: Wed, 29 Jun 2022 12:21:05 -0700 Subject: [PATCH 079/121] Updated index file for Geo functions --- docs/en/sql-reference/functions/geo/index.md | 96 ++++++++++---------- 1 file changed, 48 insertions(+), 48 deletions(-) diff --git a/docs/en/sql-reference/functions/geo/index.md b/docs/en/sql-reference/functions/geo/index.md index ea43b3ef96c..c0162cb5b63 100644 --- a/docs/en/sql-reference/functions/geo/index.md +++ b/docs/en/sql-reference/functions/geo/index.md @@ -20,57 +20,57 @@ sidebar_position: 62 ## H3 Indexes Functions -- [h3IsValid](./h3#h3IsValid) -- [h3GetResolution](./h3#h3GetResolution) -- [h3EdgeAngle](./h3#h3EdgeAngle) -- [h3EdgeLengthM​](./h3#h3EdgeLengthM​) -- [h3EdgeLengthKm] (./h3#h3EdgeLengthKm) -- [geoToH3](./h3#geoToH3) -- [h3ToGeo](./h3#h3ToGeo) -- [h3ToGeoBoundary](./h3#h3ToGeoBoundary) -- [h3kRing](./h3#h3kRing) -- [h3GetBaseCell](./h3#h3GetBaseCell) -- [h3HexAreaM2](./h3#h3HexAreaM2) -- [h3HexAreaKm2](./h3#h3HexAreaKm2) -- [h3IndexesAreNeighbors](./h3#h3IndexesAreNeighbors) -- [h3ToChildren](./h3#h3ToChildren) -- [h3ToParent](./h3#h3ToParent) -- [h3ToString](./h3#h3ToString) -- [stringToH3](./h3#stringToH3) -- [h3GetResolution](./h3#h3GetResolution) -- [h3IsResClassIII](./h3#h3IsResClassIII) -- [h3IsPentagon](./h3#h3IsPentagon) -- [h3GetFaces](./h3#h3GetFaces) -- [h3CellAreaM2](./h3#h3CellAreaM2) -- [h3CellAreaRads2](./h3#h3CellAreaRads2) -- [h3ToCenterChild](./h3#h3ToCenterChild) -- [h3ExactEdgeLengthM](./h3#h3ExactEdgeLengthM) -- [h3ExactEdgeLengthKm](./h3#h3ExactEdgeLengthKm) -- [h3ExactEdgeLengthRads](./h3#h3ExactEdgeLengthRads) -- [h3NumHexagons](./h3#h3NumHexagons) -- [h3Line](./h3#h3Line) -- [h3Distance](./h3#h3Distance) -- [h3HexRing](./h3#h3HexRing) -- [h3GetUnidirectionalEdge](./h3#h3GetUnidirectionalEdge) -- [h3UnidirectionalEdgeIsValid](./h3#h3UnidirectionalEdgeIsValid) -- [h3GetOriginIndexFromUnidirectionalEdge](./h3#h3GetOriginIndexFromUnidirectionalEdge) -- [h3GetDestinationIndexFromUnidirectionalEdge](./h3#h3GetDestinationIndexFromUnidirectionalEdge) -- [h3GetIndexesFromUnidirectionalEdge](./h3#h3GetIndexesFromUnidirectionalEdge) -- [h3GetUnidirectionalEdgesFromHexagon](./h3#h3GetUnidirectionalEdgesFromHexagon) -- [h3GetUnidirectionalEdgeBoundary](./h3#h3GetUnidirectionalEdgeBoundary) +- [h3IsValid](./h3.md#h3IsValid) +- [h3GetResolution](./h3.md#h3GetResolution) +- [h3EdgeAngle](./h3.md#h3EdgeAngle) +- [h3EdgeLengthM​](./h3.md#h3EdgeLengthM​) +- [h3EdgeLengthKm](./h3.md#h3EdgeLengthKm) +- [geoToH3](./h3.md#geoToH3) +- [h3ToGeo](./h3.md#h3ToGeo) +- [h3ToGeoBoundary](./h3.md#h3ToGeoBoundary) +- [h3kRing](./h3.md#h3kRing) +- [h3GetBaseCell](./h3.md#h3GetBaseCell) +- [h3HexAreaM2](./h3.md#h3HexAreaM2) +- [h3HexAreaKm2](./h3.md#h3HexAreaKm2) +- [h3IndexesAreNeighbors](./h3.md#h3IndexesAreNeighbors) +- [h3ToChildren](./h3.md#h3ToChildren) +- [h3ToParent](./h3.md#h3ToParent) +- [h3ToString](./h3.md#h3ToString) +- [stringToH3](./h3.md#stringToH3) +- [h3GetResolution](./h3.md#h3GetResolution) +- [h3IsResClassIII](./h3.md#h3IsResClassIII) +- [h3IsPentagon](./h3.md#h3IsPentagon) +- [h3GetFaces](./h3.md#h3GetFaces) +- [h3CellAreaM2](./h3.md#h3CellAreaM2) +- [h3CellAreaRads2](./h3.md#h3CellAreaRads2) +- [h3ToCenterChild](./h3.md#h3ToCenterChild) +- [h3ExactEdgeLengthM](./h3.md#h3ExactEdgeLengthM) +- [h3ExactEdgeLengthKm](./h3.md#h3ExactEdgeLengthKm) +- [h3ExactEdgeLengthRads](./h3.md#h3ExactEdgeLengthRads) +- [h3NumHexagons](./h3.md#h3NumHexagons) +- [h3Line](./h3.md#h3Line) +- [h3Distance](./h3.md#h3Distance) +- [h3HexRing](./h3.md#h3HexRing) +- [h3GetUnidirectionalEdge](./h3.md#h3GetUnidirectionalEdge) +- [h3UnidirectionalEdgeIsValid](./h3.md#h3UnidirectionalEdgeIsValid) +- [h3GetOriginIndexFromUnidirectionalEdge](./h3.md#h3GetOriginIndexFromUnidirectionalEdge) +- [h3GetDestinationIndexFromUnidirectionalEdge](./h3.md#h3GetDestinationIndexFromUnidirectionalEdge) +- [h3GetIndexesFromUnidirectionalEdge](./h3.md#h3GetIndexesFromUnidirectionalEdge) +- [h3GetUnidirectionalEdgesFromHexagon](./h3.md#h3GetUnidirectionalEdgesFromHexagon) +- [h3GetUnidirectionalEdgeBoundary](./h3.md#h3GetUnidirectionalEdgeBoundary) ## S2 Index Functions -- [geoToS2](./s2#geoToS2) -- [s2ToGeo](./s2#s2ToGeo) -- [s2GetNeighbors](./s2#s2GetNeighbors) -- [s2CellsIntersect](./s2#s2CellsIntersect) -- [s2CapContains](./s2#s2CapContains) -- [s2CapUnion](./s2#s2CapUnion) -- [s2RectAdd](./s2#s2RectAdd) -- [s2RectContains](./s2#s2RectContains) -- [s2RectUinion](./s2#s2RectUinion) -- [s2RectIntersection](./s2#s2RectIntersection) +- [geoToS2](./s2.md#geoToS2) +- [s2ToGeo](./s2.md#s2ToGeo) +- [s2GetNeighbors](./s2.md#s2GetNeighbors) +- [s2CellsIntersect](./s2.md#s2CellsIntersect) +- [s2CapContains](./s2.md#s2CapContains) +- [s2CapUnion](./s2.md#s2CapUnion) +- [s2RectAdd](./s2.md#s2RectAdd) +- [s2RectContains](./s2.md#s2RectContains) +- [s2RectUinion](./s2.md#s2RectUinion) +- [s2RectIntersection](./s2.md#s2RectIntersection) [Original article](https://clickhouse.com/docs/en/sql-reference/functions/geo/) From 81bb2242fdd2c98af7ad0dcd66d3b2275f0aadba Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Wed, 29 Jun 2022 15:08:16 +0000 Subject: [PATCH 080/121] Fix countSubstrings() & position() on patterns with 0-bytes SQL functions countSubstrings(), countSubstringsCaseInsensitive(), countSubstringsUTF8(), position(), positionCaseInsensitive(), positionUTF8() with non-const pattern argument use fallback sorters LibCASCIICaseSensitiveStringSearcher and LibCASCIICaseInsensitiveStringSearcher which call ::strstr(), resp. ::strcasestr(). These functions assume that the haystack is 0-terminated and they even document that. However, the callers did not check if the haystack contains 0-byte (perhaps because its sort of expensive). As a consequence, if the haystack contained a zero byte in it's payload, matches behind this zero byte were ignored. create table t (id UInt32, pattern String) engine = MergeTree() order by id; insert into t values (1, 'x'); select countSubstrings('aaaxxxaa\0xxx', pattern) from t; We returned 3 before this commit, now we return 6 --- src/Common/StringSearcher.h | 69 +++++++------------ src/Functions/PositionImpl.h | 6 +- ...sition_countsubstrings_zero_byte.reference | 12 ++++ ...346_position_countsubstrings_zero_byte.sql | 24 +++++++ 4 files changed, 62 insertions(+), 49 deletions(-) create mode 100644 tests/queries/0_stateless/02346_position_countsubstrings_zero_byte.reference create mode 100644 tests/queries/0_stateless/02346_position_countsubstrings_zero_byte.sql diff --git a/src/Common/StringSearcher.h b/src/Common/StringSearcher.h index a82115a9923..7d669ddd369 100644 --- a/src/Common/StringSearcher.h +++ b/src/Common/StringSearcher.h @@ -826,66 +826,43 @@ using UTF8CaseInsensitiveStringSearcher = StringSearcher; using ASCIICaseSensitiveTokenSearcher = TokenSearcher; using ASCIICaseInsensitiveTokenSearcher = TokenSearcher; - -/** Uses functions from libc. - * It makes sense to use only with short haystacks when cheap initialization is required. - * There is no option for case-insensitive search for UTF-8 strings. - * It is required that strings are zero-terminated. - */ - -struct LibCASCIICaseSensitiveStringSearcher : public StringSearcherBase +/// Use only with short haystacks where cheap initialization is required. +template +struct StdLibASCIIStringSearcher : public StringSearcherBase { - const char * const needle; + const char * const needle_start; + const char * const needle_end; template requires (sizeof(CharT) == 1) - LibCASCIICaseSensitiveStringSearcher(const CharT * const needle_, const size_t /* needle_size */) - : needle(reinterpret_cast(needle_)) {} + StdLibASCIIStringSearcher(const CharT * const needle_start_, const size_t needle_size_) + : needle_start{reinterpret_cast(needle_start_)} + , needle_end{reinterpret_cast(needle_start) + needle_size_} + {} template requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, const CharT * const haystack_end) const + const CharT * search(const CharT * haystack_start, const CharT * const haystack_end) const { - const auto * res = strstr(reinterpret_cast(haystack), reinterpret_cast(needle)); - if (!res) - return haystack_end; - return reinterpret_cast(res); + if constexpr (CaseInsensitive) + { + return std::search( + haystack_start, haystack_end, needle_start, needle_end, + [](char c1, char c2) {return std::toupper(c1) == std::toupper(c2);}); + } + else + { + return std::search( + haystack_start, haystack_end, needle_start, needle_end); + } } template requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, const size_t haystack_size) const + const CharT * search(const CharT * haystack_start, const size_t haystack_length) const { - return search(haystack, haystack + haystack_size); + return search(haystack_start, haystack_start + haystack_length); } }; -struct LibCASCIICaseInsensitiveStringSearcher : public StringSearcherBase -{ - const char * const needle; - - template - requires (sizeof(CharT) == 1) - LibCASCIICaseInsensitiveStringSearcher(const CharT * const needle_, const size_t /* needle_size */) - : needle(reinterpret_cast(needle_)) {} - - template - requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, const CharT * const haystack_end) const - { - const auto * res = strcasestr(reinterpret_cast(haystack), reinterpret_cast(needle)); - if (!res) - return haystack_end; - return reinterpret_cast(res); - } - - template - requires (sizeof(CharT) == 1) - const CharT * search(const CharT * haystack, const size_t haystack_size) const - { - return search(haystack, haystack + haystack_size); - } -}; - - } diff --git a/src/Functions/PositionImpl.h b/src/Functions/PositionImpl.h index 5380fcc36d9..76f10373a58 100644 --- a/src/Functions/PositionImpl.h +++ b/src/Functions/PositionImpl.h @@ -26,7 +26,7 @@ struct PositionCaseSensitiveASCII using MultiSearcherInBigHaystack = MultiVolnitsky; /// For searching single substring, that is different each time. This object is created for each row of data. It must have cheap initialization. - using SearcherInSmallHaystack = LibCASCIICaseSensitiveStringSearcher; + using SearcherInSmallHaystack = StdLibASCIIStringSearcher; static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t haystack_size_hint) { @@ -62,7 +62,7 @@ struct PositionCaseInsensitiveASCII /// `Volnitsky` is not used here, because one person has measured that this is better. It will be good if you question it. using SearcherInBigHaystack = ASCIICaseInsensitiveStringSearcher; using MultiSearcherInBigHaystack = MultiVolnitskyCaseInsensitive; - using SearcherInSmallHaystack = LibCASCIICaseInsensitiveStringSearcher; + using SearcherInSmallHaystack = StdLibASCIIStringSearcher; static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t /*haystack_size_hint*/) { @@ -94,7 +94,7 @@ struct PositionCaseSensitiveUTF8 { using SearcherInBigHaystack = VolnitskyUTF8; using MultiSearcherInBigHaystack = MultiVolnitskyUTF8; - using SearcherInSmallHaystack = LibCASCIICaseSensitiveStringSearcher; + using SearcherInSmallHaystack = StdLibASCIIStringSearcher; static SearcherInBigHaystack createSearcherInBigHaystack(const char * needle_data, size_t needle_size, size_t haystack_size_hint) { diff --git a/tests/queries/0_stateless/02346_position_countsubstrings_zero_byte.reference b/tests/queries/0_stateless/02346_position_countsubstrings_zero_byte.reference new file mode 100644 index 00000000000..2b70bdc272e --- /dev/null +++ b/tests/queries/0_stateless/02346_position_countsubstrings_zero_byte.reference @@ -0,0 +1,12 @@ +6 +6 +6 +6 +6 +6 +7 +7 +7 +7 +7 +7 diff --git a/tests/queries/0_stateless/02346_position_countsubstrings_zero_byte.sql b/tests/queries/0_stateless/02346_position_countsubstrings_zero_byte.sql new file mode 100644 index 00000000000..6208baf41c4 --- /dev/null +++ b/tests/queries/0_stateless/02346_position_countsubstrings_zero_byte.sql @@ -0,0 +1,24 @@ +drop table if exists tab; + +create table tab (id UInt32, haystack String, pattern String) engine = MergeTree() order by id; +insert into tab values (1, 'aaaxxxaa\0xxx', 'x'); + +select countSubstrings('aaaxxxaa\0xxx', pattern) from tab where id = 1; +select countSubstringsCaseInsensitive('aaaxxxaa\0xxx', pattern) from tab where id = 1; +select countSubstringsCaseInsensitiveUTF8('aaaxxxaa\0xxx', pattern) from tab where id = 1; + +select countSubstrings(haystack, pattern) from tab where id = 1; +select countSubstringsCaseInsensitive(haystack, pattern) from tab where id = 1; +select countSubstringsCaseInsensitiveUTF8(haystack, pattern) from tab where id = 1; + +insert into tab values (2, 'aaaaa\0x', 'x'); + +select position('aaaaa\0x', pattern) from tab where id = 2; +select positionCaseInsensitive('aaaaa\0x', pattern) from tab where id = 2; +select positionCaseInsensitiveUTF8('aaaaa\0x', pattern) from tab where id = 2; + +select position(haystack, pattern) from tab where id = 2; +select positionCaseInsensitive(haystack, pattern) from tab where id = 2; +select positionCaseInsensitiveUTF8(haystack, pattern) from tab where id = 2; + +drop table if exists tab; From 572e5402173cee7f45afe9a0b5c38e0ac825acee Mon Sep 17 00:00:00 2001 From: GruffGemini <43479425+GruffGemini@users.noreply.github.com> Date: Thu, 30 Jun 2022 09:29:18 +0300 Subject: [PATCH 081/121] docs (en, group-by.md): fixed broken links --- docs/en/sql-reference/statements/select/group-by.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/sql-reference/statements/select/group-by.md b/docs/en/sql-reference/statements/select/group-by.md index e02db6d4f6b..1d6edc5fa3d 100644 --- a/docs/en/sql-reference/statements/select/group-by.md +++ b/docs/en/sql-reference/statements/select/group-by.md @@ -312,11 +312,11 @@ The aggregation can be performed more effectively, if a table is sorted by some ### GROUP BY in External Memory You can enable dumping temporary data to the disk to restrict memory usage during `GROUP BY`. -The [max_bytes_before_external_group_by](../../../operations/settings/settings.md#settings-max_bytes_before_external_group_by) setting determines the threshold RAM consumption for dumping `GROUP BY` temporary data to the file system. If set to 0 (the default), it is disabled. +The [max_bytes_before_external_group_by](../../../operations/settings/query-complexity.md#settings-max_bytes_before_external_group_by) setting determines the threshold RAM consumption for dumping `GROUP BY` temporary data to the file system. If set to 0 (the default), it is disabled. When using `max_bytes_before_external_group_by`, we recommend that you set `max_memory_usage` about twice as high. This is necessary because there are two stages to aggregation: reading the data and forming intermediate data (1) and merging the intermediate data (2). Dumping data to the file system can only occur during stage 1. If the temporary data wasn’t dumped, then stage 2 might require up to the same amount of memory as in stage 1. -For example, if [max_memory_usage](../../../operations/settings/settings.md#settings_max_memory_usage) was set to 10000000000 and you want to use external aggregation, it makes sense to set `max_bytes_before_external_group_by` to 10000000000, and `max_memory_usage` to 20000000000. When external aggregation is triggered (if there was at least one dump of temporary data), maximum consumption of RAM is only slightly more than `max_bytes_before_external_group_by`. +For example, if [max_memory_usage](../../../operations/settings/query-complexity.md#settings_max_memory_usage) was set to 10000000000 and you want to use external aggregation, it makes sense to set `max_bytes_before_external_group_by` to 10000000000, and `max_memory_usage` to 20000000000. When external aggregation is triggered (if there was at least one dump of temporary data), maximum consumption of RAM is only slightly more than `max_bytes_before_external_group_by`. With distributed query processing, external aggregation is performed on remote servers. In order for the requester server to use only a small amount of RAM, set `distributed_aggregation_memory_efficient` to 1. From c64cfbcf4fec90c3acbb3e1f0ec27ee3df04130e Mon Sep 17 00:00:00 2001 From: GruffGemini <43479425+GruffGemini@users.noreply.github.com> Date: Thu, 30 Jun 2022 09:32:42 +0300 Subject: [PATCH 082/121] Update group-by.md --- docs/zh/sql-reference/statements/select/group-by.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/zh/sql-reference/statements/select/group-by.md b/docs/zh/sql-reference/statements/select/group-by.md index bebe4b164bf..33fa27b8dd9 100644 --- a/docs/zh/sql-reference/statements/select/group-by.md +++ b/docs/zh/sql-reference/statements/select/group-by.md @@ -116,11 +116,11 @@ GROUP BY domain ### 在外部存储器中分组 {#select-group-by-in-external-memory} 您可以启用将临时数据转储到磁盘以限制内存使用期间 `GROUP BY`. -该 [max_bytes_before_external_group_by](../../../operations/settings/settings.md#settings-max_bytes_before_external_group_by) 设置确定倾销的阈值RAM消耗 `GROUP BY` 临时数据到文件系统。 如果设置为0(默认值),它将被禁用。 +该 [max_bytes_before_external_group_by](../../../operations/settings/query-complexity.md#settings-max_bytes_before_external_group_by) 设置确定倾销的阈值RAM消耗 `GROUP BY` 临时数据到文件系统。 如果设置为0(默认值),它将被禁用。 使用时 `max_bytes_before_external_group_by`,我们建议您设置 `max_memory_usage` 大约两倍高。 这是必要的,因为聚合有两个阶段:读取数据和形成中间数据(1)和合并中间数据(2)。 将数据转储到文件系统只能在阶段1中发生。 如果未转储临时数据,则阶段2可能需要与阶段1相同的内存量。 -例如,如果 [max_memory_usage](../../../operations/settings/settings.md#settings_max_memory_usage) 设置为10000000000,你想使用外部聚合,这是有意义的设置 `max_bytes_before_external_group_by` 到10000000000,和 `max_memory_usage` 到20000000000。 当触发外部聚合(如果至少有一个临时数据转储)时,RAM的最大消耗仅略高于 `max_bytes_before_external_group_by`. +例如,如果 [max_memory_usage](../../../operations/settings/query-complexity.md#settings_max_memory_usage) 设置为10000000000,你想使用外部聚合,这是有意义的设置 `max_bytes_before_external_group_by` 到10000000000,和 `max_memory_usage` 到20000000000。 当触发外部聚合(如果至少有一个临时数据转储)时,RAM的最大消耗仅略高于 `max_bytes_before_external_group_by`. 通过分布式查询处理,在远程服务器上执行外部聚合。 为了使请求者服务器只使用少量的RAM,设置 `distributed_aggregation_memory_efficient` 到1。 From 47ac47350bbd59766fa1cb2c67ebae13dfb2140c Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 22 Jun 2022 22:30:50 +0200 Subject: [PATCH 083/121] Store projections in backups. --- .../MergeTree/DataPartStorageOnDisk.cpp | 36 +++++++++---- .../MergeTree/DataPartStorageOnDisk.h | 1 + src/Storages/MergeTree/IDataPartStorage.h | 1 + src/Storages/MergeTree/MergeTreeData.cpp | 47 ++++++++++++----- .../test_backup_restore_new/test.py | 52 +++++++++++++++++++ .../test_backup_restore_on_cluster/test.py | 43 +++++++++++++++ 6 files changed, 156 insertions(+), 24 deletions(-) diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp index 083cbc90cb1..68a2a0cbd15 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp @@ -615,36 +615,50 @@ void DataPartStorageOnDisk::backup( TemporaryFilesOnDisks & temp_dirs, const MergeTreeDataPartChecksums & checksums, const NameSet & files_without_checksums, + const String & path_in_backup, BackupEntries & backup_entries) const { - auto disk = volume->getDisk(); + fs::path part_path_on_disk = fs::path{root_path} / part_dir; + fs::path part_path_in_backup = fs::path{path_in_backup} / part_dir; + auto disk = volume->getDisk(); auto temp_dir_it = temp_dirs.find(disk); if (temp_dir_it == temp_dirs.end()) - temp_dir_it = temp_dirs.emplace(disk, std::make_shared(disk, "tmp/backup_")).first; + temp_dir_it = temp_dirs.emplace(disk, std::make_shared(disk, "tmp/backup/")).first; auto temp_dir_owner = temp_dir_it->second; fs::path temp_dir = temp_dir_owner->getPath(); - - fs::path temp_part_dir = temp_dir / part_dir; + fs::path temp_part_dir = temp_dir / part_path_in_backup.relative_path(); disk->createDirectories(temp_part_dir); + /// For example, + /// part_path_in_backup = /data/test/table/0_1_1_0 + /// part_path_on_disk = store/f57/f5728353-44bb-4575-85e8-28deb893657a/0_1_1_0 + /// tmp_part_dir = tmp/backup/1aaaaaa/data/test/table/0_1_1_0 + /// Or, for projections: + /// part_path_in_backup = /data/test/table/0_1_1_0/prjmax.proj + /// part_path_on_disk = store/f57/f5728353-44bb-4575-85e8-28deb893657a/0_1_1_0/prjmax.proj + /// tmp_part_dir = tmp/backup/1aaaaaa/data/test/table/0_1_1_0/prjmax.proj + for (const auto & [filepath, checksum] : checksums.files) { - String relative_filepath = fs::path(part_dir) / filepath; - String full_filepath = fs::path(root_path) / part_dir / filepath; + if (filepath.ends_with(".proj")) + continue; /// Skip *.proj files - they're actually directories and will be handled. + String filepath_on_disk = part_path_on_disk / filepath; + String filepath_in_backup = part_path_in_backup / filepath; String hardlink_filepath = temp_part_dir / filepath; - disk->createHardLink(full_filepath, hardlink_filepath); + + disk->createHardLink(filepath_on_disk, hardlink_filepath); UInt128 file_hash{checksum.file_hash.first, checksum.file_hash.second}; backup_entries.emplace_back( - relative_filepath, + filepath_in_backup, std::make_unique(disk, hardlink_filepath, checksum.file_size, file_hash, temp_dir_owner)); } for (const auto & filepath : files_without_checksums) { - String relative_filepath = fs::path(part_dir) / filepath; - String full_filepath = fs::path(root_path) / part_dir / filepath; - backup_entries.emplace_back(relative_filepath, std::make_unique(disk, full_filepath)); + String filepath_on_disk = part_path_on_disk / filepath; + String filepath_in_backup = part_path_in_backup / filepath; + backup_entries.emplace_back(filepath_in_backup, std::make_unique(disk, filepath_on_disk)); } } diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.h b/src/Storages/MergeTree/DataPartStorageOnDisk.h index d6fcb2f1442..bb1a8879d63 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.h +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.h @@ -88,6 +88,7 @@ public: TemporaryFilesOnDisks & temp_dirs, const MergeTreeDataPartChecksums & checksums, const NameSet & files_without_checksums, + const String & path_in_backup, BackupEntries & backup_entries) const override; DataPartStoragePtr freeze( diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index 0e165e74ed0..72810680812 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -173,6 +173,7 @@ public: TemporaryFilesOnDisks & temp_dirs, const MergeTreeDataPartChecksums & checksums, const NameSet & files_without_checksums, + const String & path_in_backup, BackupEntries & backup_entries) const = 0; /// Creates hardlinks into 'to/dir_path' for every file in data part. diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 2c4dcfa05ee..02d4d9ebe8b 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3993,14 +3993,23 @@ BackupEntries MergeTreeData::backupParts(const ContextPtr & local_context, const BackupEntries backup_entries; std::map> temp_dirs; - fs::path data_path_in_backup_fs = data_path_in_backup; for (const auto & part : data_parts) - part->data_part_storage->backup(temp_dirs, part->checksums, part->getFileNamesWithoutChecksums(), backup_entries); + { + part->data_part_storage->backup( + temp_dirs, part->checksums, part->getFileNamesWithoutChecksums(), data_path_in_backup, backup_entries); - /// TODO: try to write better code later. - for (auto & entry : backup_entries) - entry.first = data_path_in_backup_fs / entry.first; + auto projection_parts = part->getProjectionParts(); + for (const auto & [projection_name, projection_part] : projection_parts) + { + projection_part->data_part_storage->backup( + temp_dirs, + projection_part->checksums, + projection_part->getFileNamesWithoutChecksums(), + fs::path{data_path_in_backup} / part->name, + backup_entries); + } + } return backup_entries; } @@ -4116,27 +4125,39 @@ void MergeTreeData::restorePartFromBackup(std::shared_ptr r auto disk = reservation->getDisk(); String part_name = part_info.getPartName(); - auto temp_part_dir_owner = std::make_shared(disk, relative_data_path + "restoring_" + part_name + "_"); - String temp_part_dir = temp_part_dir_owner->getPath(); + auto temp_part_dir_owner = std::make_shared(disk, fs::path{relative_data_path} / ("restoring_" + part_name + "_")); + fs::path temp_part_dir = temp_part_dir_owner->getPath(); disk->createDirectories(temp_part_dir); + std::unordered_set subdirs; - assert(temp_part_dir.starts_with(relative_data_path)); - String relative_temp_part_dir = temp_part_dir.substr(relative_data_path.size()); + /// temp_part_name = "restoring__", for example "restoring_0_1_1_0_1baaaaa" + String temp_part_name = temp_part_dir.filename(); for (const String & filename : filenames) { + /// Needs to create subdirectories before copying the files. Subdirectories are used to represent projections. + auto separator_pos = filename.rfind('/'); + if (separator_pos != String::npos) + { + String subdir = filename.substr(0, separator_pos); + if (subdirs.emplace(subdir).second) + disk->createDirectories(temp_part_dir / subdir); + } + + /// TODO Transactions: Decide what to do with version metadata (if any). Let's just skip it for now. + if (filename.ends_with(IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME)) + continue; + auto backup_entry = backup->readFile(part_path_in_backup_fs / filename); auto read_buffer = backup_entry->getReadBuffer(); - auto write_buffer = disk->writeFile(fs::path(temp_part_dir) / filename); + auto write_buffer = disk->writeFile(temp_part_dir / filename); copyData(*read_buffer, *write_buffer); reservation->update(reservation->getSize() - backup_entry->getSize()); } auto single_disk_volume = std::make_shared(disk->getName(), disk, 0); - auto data_part_storage = std::make_shared(single_disk_volume, relative_data_path, relative_temp_part_dir); + auto data_part_storage = std::make_shared(single_disk_volume, relative_data_path, temp_part_name); auto part = createPart(part_name, part_info, data_part_storage); - /// TODO Transactions: Decide what to do with version metadata (if any). Let's just remove it for now. - disk->removeFileIfExists(fs::path(temp_part_dir) / IMergeTreeDataPart::TXN_VERSION_METADATA_FILE_NAME); part->version.setCreationTID(Tx::PrehistoricTID, nullptr); part->loadColumnsChecksumsIndexes(false, true); diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index 3996a31e7c9..35545e95537 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -711,3 +711,55 @@ def test_system_users_async(): instance.query("SHOW CREATE USER u1") == "CREATE USER u1 IDENTIFIED WITH sha256_password SETTINGS custom_c = 3\n" ) + + +def test_projection(): + create_and_fill_table(n=3) + + instance.query("ALTER TABLE test.table ADD PROJECTION prjmax (SELECT MAX(x))") + instance.query(f"INSERT INTO test.table VALUES (100, 'a'), (101, 'b')") + + assert ( + instance.query( + "SELECT count() FROM system.projection_parts WHERE database='test' AND table='table' AND name='prjmax'" + ) + == "2\n" + ) + + backup_name = new_backup_name() + instance.query(f"BACKUP TABLE test.table TO {backup_name}") + + assert os.path.exists( + os.path.join( + get_path_to_backup(backup_name), "data/test/table/1_5_5_0/data.bin" + ) + ) + + assert os.path.exists( + os.path.join( + get_path_to_backup(backup_name), + "data/test/table/1_5_5_0/prjmax.proj/data.bin", + ) + ) + + instance.query("DROP TABLE test.table") + + assert ( + instance.query( + "SELECT count() FROM system.projection_parts WHERE database='test' AND table='table' AND name='prjmax'" + ) + == "0\n" + ) + + instance.query(f"RESTORE TABLE test.table FROM {backup_name}") + + assert instance.query("SELECT * FROM test.table ORDER BY x") == TSV( + [[0, "0"], [1, "1"], [2, "2"], [100, "a"], [101, "b"]] + ) + + assert ( + instance.query( + "SELECT count() FROM system.projection_parts WHERE database='test' AND table='table' AND name='prjmax'" + ) + == "2\n" + ) diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py index 6264959fbce..63198f40af9 100644 --- a/tests/integration/test_backup_restore_on_cluster/test.py +++ b/tests/integration/test_backup_restore_on_cluster/test.py @@ -514,3 +514,46 @@ def test_system_users(): node1.query("SHOW CREATE USER u1") == "CREATE USER u1 SETTINGS custom_a = 123\n" ) assert node1.query("SHOW GRANTS FOR u1") == "GRANT SELECT ON default.tbl TO u1\n" + + +def test_projection(): + node1.query( + "CREATE TABLE tbl ON CLUSTER 'cluster' (x UInt32, y String) ENGINE=ReplicatedMergeTree('/clickhouse/tables/tbl/', '{replica}') " + "ORDER BY y PARTITION BY x%10" + ) + node1.query(f"INSERT INTO tbl SELECT number, toString(number) FROM numbers(3)") + + node1.query("ALTER TABLE tbl ADD PROJECTION prjmax (SELECT MAX(x))") + node1.query(f"INSERT INTO tbl VALUES (100, 'a'), (101, 'b')") + + assert ( + node1.query( + "SELECT count() FROM system.projection_parts WHERE database='default' AND table='tbl' AND name='prjmax'" + ) + == "2\n" + ) + + backup_name = new_backup_name() + node1.query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}") + + node1.query(f"DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY") + + assert ( + node1.query( + "SELECT count() FROM system.projection_parts WHERE database='default' AND table='tbl' AND name='prjmax'" + ) + == "0\n" + ) + + node1.query(f"RESTORE TABLE tbl FROM {backup_name}") + + assert node1.query("SELECT * FROM tbl ORDER BY x") == TSV( + [[0, "0"], [1, "1"], [2, "2"], [100, "a"], [101, "b"]] + ) + + assert ( + node1.query( + "SELECT count() FROM system.projection_parts WHERE database='default' AND table='tbl' AND name='prjmax'" + ) + == "2\n" + ) From 64b51a3772f3c03043f9530dd7d434075b045fb8 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 23 Jun 2022 00:56:41 +0200 Subject: [PATCH 084/121] Improve gathering metadata for backup. --- src/Backups/BackupEntriesCollector.cpp | 537 ++++++++++-------- src/Backups/BackupEntriesCollector.h | 56 +- src/Backups/RestorerFromBackup.cpp | 7 +- src/Backups/RestorerFromBackup.h | 1 + src/Databases/DatabaseMemory.cpp | 48 ++ src/Databases/DatabaseMemory.h | 2 + src/Databases/DatabaseReplicated.cpp | 28 +- src/Databases/DatabaseReplicated.h | 2 +- src/Databases/DatabasesCommon.cpp | 45 +- src/Databases/DatabasesCommon.h | 5 +- src/Databases/IDatabase.cpp | 18 +- src/Databases/IDatabase.h | 14 +- src/Storages/IStorage.cpp | 29 +- src/Storages/IStorage.h | 6 +- src/Storages/StorageReplicatedMergeTree.cpp | 76 ++- src/Storages/StorageReplicatedMergeTree.h | 4 +- .../test_backup_restore_new/test.py | 28 +- 17 files changed, 528 insertions(+), 378 deletions(-) diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index 322bc00ee3c..9ee57cb4fd5 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -27,15 +27,16 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; } - -bool BackupEntriesCollector::TableKey::operator ==(const TableKey & right) const +namespace { - return (name == right.name) && (is_temporary == right.is_temporary); -} + String tableNameWithTypeToString(const String & database_name, const String & table_name, bool first_char_uppercase) + { + if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) + return fmt::format("{}emporary table {}", first_char_uppercase ? 'T' : 't', backQuoteIfNeed(table_name)); + else + return fmt::format("{}able {}.{}", first_char_uppercase ? 'T' : 't', backQuoteIfNeed(database_name), backQuoteIfNeed(table_name)); + } -bool BackupEntriesCollector::TableKey::operator <(const TableKey & right) const -{ - return (name < right.name) || ((name == right.name) && (is_temporary < right.is_temporary)); } std::string_view BackupEntriesCollector::toString(Stage stage) @@ -86,7 +87,7 @@ BackupEntries BackupEntriesCollector::getBackupEntries() /// Find databases and tables which we're going to put to the backup. setStage(Stage::kFindingTables); - collectDatabasesAndTablesInfo(); + gatherMetadataAndCheckConsistency(); /// Make backup entries for the definitions of the found databases. makeBackupEntriesForDatabasesDefs(); @@ -100,7 +101,7 @@ BackupEntries BackupEntriesCollector::getBackupEntries() /// Run all the tasks added with addPostCollectingTask(). setStage(Stage::kRunningPostTasks); - runPostCollectingTasks(); + runPostTasks(); /// No more backup entries or tasks are allowed after this point. setStage(Stage::kWritingBackup); @@ -156,56 +157,30 @@ void BackupEntriesCollector::calculateRootPathInBackup() } /// Finds databases and tables which we will put to the backup. -void BackupEntriesCollector::collectDatabasesAndTablesInfo() +void BackupEntriesCollector::gatherMetadataAndCheckConsistency() { bool use_timeout = (timeout.count() >= 0); auto start_time = std::chrono::steady_clock::now(); - int pass = 0; - do + int pass = 1; + for (;;) { - database_infos.clear(); - table_infos.clear(); - consistent = true; + consistency = true; /// Collect information about databases and tables specified in the BACKUP query. - for (const auto & element : backup_query_elements) - { - switch (element.type) - { - case ASTBackupQuery::ElementType::TABLE: - { - collectTableInfo({element.database_name, element.table_name}, false, element.partitions, true); - break; - } - - case ASTBackupQuery::ElementType::TEMPORARY_TABLE: - { - collectTableInfo({"", element.table_name}, true, element.partitions, true); - break; - } - - case ASTBackupQuery::ElementType::DATABASE: - { - collectDatabaseInfo(element.database_name, element.except_tables, true); - break; - } - - case ASTBackupQuery::ElementType::ALL: - { - collectAllDatabasesInfo(element.except_databases, element.except_tables); - break; - } - } - } + gatherDatabasesMetadata(); + gatherTablesMetadata(); /// We have to check consistency of collected information to protect from the case when some table or database is /// renamed during this collecting making the collected information invalid. checkConsistency(); + if (consistency) + break; + /// Two passes is absolute minimum (see `previous_table_names` & `previous_database_names`). auto elapsed = std::chrono::steady_clock::now() - start_time; - if (!consistent && (pass >= 2) && use_timeout) + if ((pass >= 2) && use_timeout) { if (elapsed > timeout) throw Exception( @@ -218,224 +193,298 @@ void BackupEntriesCollector::collectDatabasesAndTablesInfo() if (pass >= 2) LOG_WARNING(log, "Couldn't collect tables and databases to make a backup (pass #{}, elapsed {})", pass, to_string(elapsed)); ++pass; - } while (!consistent); + } LOG_INFO(log, "Will backup {} databases and {} tables", database_infos.size(), table_infos.size()); } -void BackupEntriesCollector::collectTableInfo( - const QualifiedTableName & table_name, bool is_temporary_table, const std::optional & partitions, bool throw_if_not_found) +void BackupEntriesCollector::gatherDatabasesMetadata() { - /// Gather information about the table. - DatabasePtr database; - StoragePtr storage; - TableLockHolder table_lock; - ASTPtr create_table_query; + database_infos.clear(); - TableKey table_key{table_name, is_temporary_table}; - - if (throw_if_not_found) + /// Collect information about databases and tables specified in the BACKUP query. + for (const auto & element : backup_query_elements) { - auto resolved_id = is_temporary_table - ? context->resolveStorageID(StorageID{"", table_name.table}, Context::ResolveExternal) - : context->resolveStorageID(StorageID{table_name.database, table_name.table}, Context::ResolveGlobal); - std::tie(database, storage) = DatabaseCatalog::instance().getDatabaseAndTable(resolved_id, context); - table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); - create_table_query = storage->getCreateQueryForBackup(*this); + switch (element.type) + { + case ASTBackupQuery::ElementType::TABLE: + { + gatherDatabaseMetadata( + element.database_name, + /* throw_if_database_not_found= */ true, + /* backup_create_database_query= */ false, + element.table_name, + /* throw_if_table_not_found= */ true, + element.partitions, + /* all_tables= */ false, + /* except_table_names= */ {}); + break; + } + + case ASTBackupQuery::ElementType::TEMPORARY_TABLE: + { + gatherDatabaseMetadata( + DatabaseCatalog::TEMPORARY_DATABASE, + /* throw_if_database_not_found= */ true, + /* backup_create_database_query= */ false, + element.table_name, + /* throw_if_table_not_found= */ true, + element.partitions, + /* all_tables= */ false, + /* except_table_names= */ {}); + break; + } + + case ASTBackupQuery::ElementType::DATABASE: + { + gatherDatabaseMetadata( + element.database_name, + /* throw_if_database_not_found= */ true, + /* backup_create_database_query= */ true, + /* table_name= */ {}, + /* throw_if_table_not_found= */ false, + /* partitions= */ {}, + /* all_tables= */ true, + /* except_table_names= */ element.except_tables); + break; + } + + case ASTBackupQuery::ElementType::ALL: + { + for (const auto & [database_name, database] : DatabaseCatalog::instance().getDatabases()) + { + if (!element.except_databases.contains(database_name)) + { + gatherDatabaseMetadata( + database_name, + /* throw_if_database_not_found= */ false, + /* backup_create_database_query= */ true, + /* table_name= */ {}, + /* throw_if_table_not_found= */ false, + /* partitions= */ {}, + /* all_tables= */ true, + /* except_table_names= */ element.except_tables); + if (!consistency) + return; + } + } + break; + } + } + + if (!consistency) + return; } - else - { - auto resolved_id = is_temporary_table - ? context->tryResolveStorageID(StorageID{"", table_name.table}, Context::ResolveExternal) - : context->tryResolveStorageID(StorageID{table_name.database, table_name.table}, Context::ResolveGlobal); - if (!resolved_id.empty()) - std::tie(database, storage) = DatabaseCatalog::instance().tryGetDatabaseAndTable(resolved_id, context); +} +void BackupEntriesCollector::gatherDatabaseMetadata( + const String & database_name, + bool throw_if_database_not_found, + bool backup_create_database_query, + const std::optional & table_name, + bool throw_if_table_not_found, + const std::optional & partitions, + bool all_tables, + const std::set & except_table_names) +{ + auto it = database_infos.find(database_name); + if (it == database_infos.end()) + { + DatabasePtr database; + if (throw_if_database_not_found) + { + database = DatabaseCatalog::instance().getDatabase(database_name); + } + else + { + database = DatabaseCatalog::instance().tryGetDatabase(database_name); + if (!database) + return; + } + + DatabaseInfo new_database_info; + new_database_info.database = database; + it = database_infos.emplace(database_name, new_database_info).first; + } + + DatabaseInfo & database_info = it->second; + + if (backup_create_database_query && !database_info.create_database_query && !DatabaseCatalog::isPredefinedDatabaseName(database_name)) + { + ASTPtr create_database_query; + try + { + create_database_query = database_info.database->getCreateDatabaseQueryForBackup(); + } + catch (...) + { + /// The database has been dropped recently. + consistency = false; + return; + } + + database_info.create_database_query = create_database_query; + const auto & create = create_database_query->as(); + + if (create.getDatabase() != database_name) + { + /// The database has been renamed recently. + consistency = false; + return; + } + } + + if (table_name) + { + auto & table_params = database_info.tables[*table_name]; + if (throw_if_table_not_found) + table_params.throw_if_table_not_found = true; + if (partitions) + { + table_params.partitions.emplace(); + insertAtEnd(*table_params.partitions, *partitions); + } + database_info.except_table_names.emplace(*table_name); + } + + if (all_tables) + { + database_info.all_tables = all_tables; + for (const auto & except_table_name : except_table_names) + if (except_table_name.first == database_name) + database_info.except_table_names.emplace(except_table_name.second); + } +} + +void BackupEntriesCollector::gatherTablesMetadata() +{ + if (!consistency) + return; + + table_infos.clear(); + for (const auto & [database_name, database_info] : database_infos) + { + const auto & database = database_info.database; + bool is_temporary_database = (database_name == DatabaseCatalog::TEMPORARY_DATABASE); + + auto filter_by_table_name = [database_info = &database_info](const String & table_name) + { + /// We skip inner tables of materialized views. + if (table_name.starts_with(".inner_id.")) + return false; + + if (database_info->tables.contains(table_name)) + return true; + + if (database_info->all_tables) + return !database_info->except_table_names.contains(table_name); + + return false; + }; + + auto db_tables = database->getTablesForBackup(filter_by_table_name, context, consistency); + + if (!consistency) + return; + + /// Check that all tables were found. + std::unordered_set found_table_names; + for (const auto & db_table : db_tables) + { + const auto & create_table_query = db_table.first; + const auto & create = create_table_query->as(); + found_table_names.emplace(create.getTable()); + + if ((is_temporary_database && !create.temporary) || (!is_temporary_database && (create.getDatabase() != database_name))) + { + consistency = false; + return; + } + } + + for (const auto & [table_name, table_info] : database_info.tables) + { + if (table_info.throw_if_table_not_found && !found_table_names.contains(table_name)) + throw Exception(ErrorCodes::UNKNOWN_TABLE, "{} not found", tableNameWithTypeToString(database_name, table_name, true)); + } + + for (const auto & db_table : db_tables) + { + const auto & create_table_query = db_table.first; + const auto & create = create_table_query->as(); + String table_name = create.getTable(); + + fs::path data_path_in_backup; + if (is_temporary_database) + { + auto table_name_in_backup = renaming_map.getNewTemporaryTableName(table_name); + data_path_in_backup = root_path_in_backup / "temporary_tables" / "data" / escapeForFileName(table_name_in_backup); + } + else + { + auto table_name_in_backup = renaming_map.getNewTableName({database_name, table_name}); + data_path_in_backup + = root_path_in_backup / "data" / escapeForFileName(table_name_in_backup.database) / escapeForFileName(table_name_in_backup.table); + } + + /// Add information to `table_infos`. + auto & res_table_info = table_infos[QualifiedTableName{database_name, table_name}]; + res_table_info.database = database; + res_table_info.storage = db_table.second; + res_table_info.create_table_query = create_table_query; + res_table_info.data_path_in_backup = data_path_in_backup; + + auto partitions_it = database_info.tables.find(table_name); + if (partitions_it != database_info.tables.end()) + res_table_info.partitions = partitions_it->second.partitions; + } + } +} + +void BackupEntriesCollector::lockTablesForReading() +{ + if (!consistency) + return; + + for (auto & table_info : table_infos | boost::adaptors::map_values) + { + auto storage = table_info.storage; + TableLockHolder table_lock; if (storage) { try { table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); - create_table_query = storage->getCreateQueryForBackup(*this); } catch (Exception & e) { if (e.code() != ErrorCodes::TABLE_IS_DROPPED) throw; + consistency = false; + return; } } - - if (!create_table_query) - { - consistent &= !table_infos.contains(table_key); - return; - } - } - - fs::path data_path_in_backup; - if (is_temporary_table) - { - auto table_name_in_backup = renaming_map.getNewTemporaryTableName(table_name.table); - data_path_in_backup = root_path_in_backup / "temporary_tables" / "data" / escapeForFileName(table_name_in_backup); - } - else - { - auto table_name_in_backup = renaming_map.getNewTableName(table_name); - data_path_in_backup - = root_path_in_backup / "data" / escapeForFileName(table_name_in_backup.database) / escapeForFileName(table_name_in_backup.table); - } - - /// Check that information is consistent. - const auto & create = create_table_query->as(); - if ((create.getTable() != table_name.table) || (is_temporary_table != create.temporary) || (create.getDatabase() != table_name.database)) - { - /// Table was renamed recently. - consistent = false; - return; - } - - if (auto it = table_infos.find(table_key); it != table_infos.end()) - { - const auto & table_info = it->second; - if ((table_info.database != database) || (table_info.storage != storage)) - { - /// Table was renamed recently. - consistent = false; - return; - } - } - - /// Add information to `table_infos`. - auto & res_table_info = table_infos[table_key]; - res_table_info.database = database; - res_table_info.storage = storage; - res_table_info.table_lock = table_lock; - res_table_info.create_table_query = create_table_query; - res_table_info.data_path_in_backup = data_path_in_backup; - - if (partitions) - { - if (!res_table_info.partitions) - res_table_info.partitions.emplace(); - insertAtEnd(*res_table_info.partitions, *partitions); - } -} - -void BackupEntriesCollector::collectDatabaseInfo(const String & database_name, const std::set & except_table_names, bool throw_if_not_found) -{ - /// Gather information about the database. - DatabasePtr database; - ASTPtr create_database_query; - - if (throw_if_not_found) - { - database = DatabaseCatalog::instance().getDatabase(database_name); - create_database_query = database->getCreateDatabaseQueryForBackup(); - } - else - { - database = DatabaseCatalog::instance().tryGetDatabase(database_name); - if (!database) - { - consistent &= !database_infos.contains(database_name); - return; - } - - try - { - create_database_query = database->getCreateDatabaseQueryForBackup(); - } - catch (...) - { - /// The database has been dropped recently. - consistent &= !database_infos.contains(database_name); - return; - } - } - - /// Check that information is consistent. - const auto & create = create_database_query->as(); - if (create.getDatabase() != database_name) - { - /// Database was renamed recently. - consistent = false; - return; - } - - if (auto it = database_infos.find(database_name); it != database_infos.end()) - { - const auto & database_info = it->second; - if (database_info.database != database) - { - /// Database was renamed recently. - consistent = false; - return; - } - } - - /// Add information to `database_infos`. - auto & res_database_info = database_infos[database_name]; - res_database_info.database = database; - res_database_info.create_database_query = create_database_query; - - /// Add information about tables too. - for (auto it = database->getTablesIteratorForBackup(*this); it->isValid(); it->next()) - { - if (except_table_names.contains({database_name, it->name()})) - continue; - - collectTableInfo({database_name, it->name()}, /* is_temporary_table= */ false, {}, /* throw_if_not_found= */ false); - if (!consistent) - return; - } -} - -void BackupEntriesCollector::collectAllDatabasesInfo(const std::set & except_database_names, const std::set & except_table_names) -{ - for (const auto & [database_name, database] : DatabaseCatalog::instance().getDatabases()) - { - if (except_database_names.contains(database_name)) - continue; - collectDatabaseInfo(database_name, except_table_names, false); - if (!consistent) - return; } } /// Check for consistency of collected information about databases and tables. void BackupEntriesCollector::checkConsistency() { - if (!consistent) + if (!consistency) return; /// Already inconsistent, no more checks necessary - /// Databases found while we were scanning tables and while we were scanning databases - must be the same. - for (const auto & [key, table_info] : table_infos) - { - auto it = database_infos.find(key.name.database); - if (it != database_infos.end()) - { - const auto & database_info = it->second; - if (database_info.database != table_info.database) - { - consistent = false; - return; - } - } - } - /// We need to scan tables at least twice to be sure that we haven't missed any table which could be renamed /// while we were scanning. std::set database_names; - std::set table_names; + std::set table_names; boost::range::copy(database_infos | boost::adaptors::map_keys, std::inserter(database_names, database_names.end())); boost::range::copy(table_infos | boost::adaptors::map_keys, std::inserter(table_names, table_names.end())); - if (!previous_database_names || !previous_table_names || (*previous_database_names != database_names) - || (*previous_table_names != table_names)) + if ((previous_database_names != database_names) || (previous_table_names != table_names)) { previous_database_names = std::move(database_names); previous_table_names = std::move(table_names); - consistent = false; + consistency = false; } } @@ -444,6 +493,9 @@ void BackupEntriesCollector::makeBackupEntriesForDatabasesDefs() { for (const auto & [database_name, database_info] : database_infos) { + if (!database_info.create_database_query) + continue; /// We don't store CREATE queries for predefined databases (see DatabaseCatalog::isPredefinedDatabaseName()). + LOG_TRACE(log, "Adding definition of database {}", backQuoteIfNeed(database_name)); ASTPtr new_create_query = database_info.create_database_query; @@ -459,22 +511,23 @@ void BackupEntriesCollector::makeBackupEntriesForDatabasesDefs() /// Calls IDatabase::backupTable() for all the tables found to make backup entries for tables. void BackupEntriesCollector::makeBackupEntriesForTablesDefs() { - for (const auto & [key, table_info] : table_infos) + for (const auto & [table_name, table_info] : table_infos) { - LOG_TRACE(log, "Adding definition of {}table {}", (key.is_temporary ? "temporary " : ""), key.name.getFullName()); + LOG_TRACE(log, "Adding definition of {}", tableNameWithTypeToString(table_name.database, table_name.table, false)); + bool is_temporary_database = (table_name.database == DatabaseCatalog::TEMPORARY_DATABASE); ASTPtr new_create_query = table_info.create_table_query; renameDatabaseAndTableNameInCreateQuery(context->getGlobalContext(), renaming_map, new_create_query); fs::path metadata_path_in_backup; - if (key.is_temporary) + if (is_temporary_database) { - auto new_name = renaming_map.getNewTemporaryTableName(key.name.table); + auto new_name = renaming_map.getNewTemporaryTableName(table_name.table); metadata_path_in_backup = root_path_in_backup / "temporary_tables" / "metadata" / (escapeForFileName(new_name) + ".sql"); } else { - auto new_name = renaming_map.getNewTableName(key.name); + auto new_name = renaming_map.getNewTableName({table_name.database, table_name.table}); metadata_path_in_backup = root_path_in_backup / "metadata" / escapeForFileName(new_name.database) / (escapeForFileName(new_name.table) + ".sql"); } @@ -488,10 +541,18 @@ void BackupEntriesCollector::makeBackupEntriesForTablesData() if (backup_settings.structure_only) return; - for (const auto & [key, table_info] : table_infos) + for (const auto & [table_name, table_info] : table_infos) { - LOG_TRACE(log, "Adding data of {}table {}", (key.is_temporary ? "temporary " : ""), key.name.getFullName()); const auto & storage = table_info.storage; + if (!storage) + { + /// This storage exists on other replica and has not been created on this replica yet. + /// We store metadata only for such tables. + /// TODO: Need special processing if it's a ReplicatedMergeTree. + continue; + } + + LOG_TRACE(log, "Adding data of {}", tableNameWithTypeToString(table_name.database, table_name.table, false)); const auto & data_path_in_backup = table_info.data_path_in_backup; const auto & partitions = table_info.partitions; storage->backupData(*this, data_path_in_backup, partitions); @@ -519,21 +580,21 @@ void BackupEntriesCollector::addBackupEntries(BackupEntries && backup_entries_) insertAtEnd(backup_entries, std::move(backup_entries_)); } -void BackupEntriesCollector::addPostCollectingTask(std::function task) +void BackupEntriesCollector::addPostTask(std::function task) { if (current_stage == Stage::kWritingBackup) throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding post tasks is not allowed"); - post_collecting_tasks.push(std::move(task)); + post_tasks.push(std::move(task)); } /// Runs all the tasks added with addPostCollectingTask(). -void BackupEntriesCollector::runPostCollectingTasks() +void BackupEntriesCollector::runPostTasks() { /// Post collecting tasks can add other post collecting tasks, our code is fine with that. - while (!post_collecting_tasks.empty()) + while (!post_tasks.empty()) { - auto task = std::move(post_collecting_tasks.front()); - post_collecting_tasks.pop(); + auto task = std::move(post_tasks.front()); + post_tasks.pop(); std::move(task)(); } } diff --git a/src/Backups/BackupEntriesCollector.h b/src/Backups/BackupEntriesCollector.h index 1466815f3a7..c34c6204abb 100644 --- a/src/Backups/BackupEntriesCollector.h +++ b/src/Backups/BackupEntriesCollector.h @@ -49,7 +49,7 @@ public: /// Adds a function which must be called after all IStorage::backup() have finished their work on all hosts. /// This function is designed to help making a consistent in some complex cases like /// 1) we need to join (in a backup) the data of replicated tables gathered on different hosts. - void addPostCollectingTask(std::function task); + void addPostTask(std::function task); /// Writing a backup includes a few stages: enum class Stage @@ -79,16 +79,31 @@ public: private: void setStage(Stage new_stage, const String & error_message = {}); + void calculateRootPathInBackup(); - void collectDatabasesAndTablesInfo(); - void collectTableInfo(const QualifiedTableName & table_name, bool is_temporary_table, const std::optional & partitions, bool throw_if_not_found); - void collectDatabaseInfo(const String & database_name, const std::set & except_table_names, bool throw_if_not_found); - void collectAllDatabasesInfo(const std::set & except_database_names, const std::set & except_table_names); + + void gatherMetadataAndCheckConsistency(); + + void gatherDatabasesMetadata(); + + void gatherDatabaseMetadata( + const String & database_name, + bool throw_if_database_not_found, + bool backup_create_database_query, + const std::optional & table_name, + bool throw_if_table_not_found, + const std::optional & partitions, + bool all_tables, + const std::set & except_table_names); + + void gatherTablesMetadata(); + void lockTablesForReading(); void checkConsistency(); + void makeBackupEntriesForDatabasesDefs(); void makeBackupEntriesForTablesDefs(); void makeBackupEntriesForTablesData(); - void runPostCollectingTasks(); + void runPostTasks(); const ASTBackupQuery::Elements backup_query_elements; const BackupSettings backup_settings; @@ -105,6 +120,17 @@ private: { DatabasePtr database; ASTPtr create_database_query; + + struct TableParams + { + bool throw_if_table_not_found = false; + std::optional partitions; + }; + + std::unordered_map tables; + + bool all_tables = false; + std::unordered_set except_table_names; }; struct TableInfo @@ -117,22 +143,14 @@ private: std::optional partitions; }; - struct TableKey - { - QualifiedTableName name; - bool is_temporary = false; - bool operator ==(const TableKey & right) const; - bool operator <(const TableKey & right) const; - }; - std::unordered_map database_infos; - std::map table_infos; - std::optional> previous_database_names; - std::optional> previous_table_names; - bool consistent = false; + std::map table_infos; + std::set previous_database_names; + std::set previous_table_names; + bool consistency = false; BackupEntries backup_entries; - std::queue> post_collecting_tasks; + std::queue> post_tasks; }; } diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index 14f5b7f48f0..16ffead3976 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -102,6 +102,7 @@ RestorerFromBackup::RestorerFromBackup( , backup(backup_) , context(context_) , timeout(timeout_) + , create_table_timeout_ms(context->getConfigRef().getUInt64("backups.create_table_timeout", 300000)) , log(&Poco::Logger::get("RestorerFromBackup")) { } @@ -674,7 +675,7 @@ void RestorerFromBackup::createTables() table_key.name.getFullName(), serializeAST(*create_table_query)); - database->createTableRestoredFromBackup(create_table_query, *this); + database->createTableRestoredFromBackup(create_table_query, context, restore_coordination, create_table_timeout_ms); } table_info.created = true; @@ -689,7 +690,9 @@ void RestorerFromBackup::createTables() if (!restore_settings.allow_different_table_def) { - ASTPtr create_table_query = storage->getCreateQueryForBackup(context, nullptr); + ASTPtr create_table_query = database->getCreateTableQuery(resolved_id.table_name, context); + bool consistency = true; + storage->adjustCreateQueryForBackup(create_table_query, consistency); ASTPtr expected_create_query = table_info.create_table_query; if (serializeAST(*create_table_query) != serializeAST(*expected_create_query)) { diff --git a/src/Backups/RestorerFromBackup.h b/src/Backups/RestorerFromBackup.h index 65139e0b946..86edf08b484 100644 --- a/src/Backups/RestorerFromBackup.h +++ b/src/Backups/RestorerFromBackup.h @@ -94,6 +94,7 @@ private: BackupPtr backup; ContextMutablePtr context; std::chrono::seconds timeout; + UInt64 create_table_timeout_ms; Poco::Logger * log; Stage current_stage = Stage::kPreparing; diff --git a/src/Databases/DatabaseMemory.cpp b/src/Databases/DatabaseMemory.cpp index 5268252731f..62cee31bbad 100644 --- a/src/Databases/DatabaseMemory.cpp +++ b/src/Databases/DatabaseMemory.cpp @@ -145,4 +145,52 @@ void DatabaseMemory::alterTable(ContextPtr local_context, const StorageID & tabl DatabaseCatalog::instance().updateLoadingDependencies(table_id, std::move(new_dependencies)); } +std::vector> DatabaseMemory::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context, bool & consistency) const +{ + /// We need a special processing for the temporary database. + if (getDatabaseName() != DatabaseCatalog::TEMPORARY_DATABASE) + return DatabaseWithOwnTablesBase::getTablesForBackup(filter, local_context, consistency); + + std::vector> res; + + /// `this->tables` for the temporary database doesn't contain real names of tables. + /// That's why we need to call Context::getExternalTables() and then resolve those names using tryResolveStorageID() below. + auto external_tables = local_context->getExternalTables(); + + for (const auto & [table_name, storage] : external_tables) + { + if (!filter(table_name)) + continue; + + bool ok = false; + + if (auto storage_id = local_context->tryResolveStorageID(StorageID{"", table_name}, Context::ResolveExternal)) + { + /// Here `storage_id.table_name` looks like looks like "_tmp_ab9b15a3-fb43-4670-abec-14a0e9eb70f1" + /// it's not the real name of the table. + if (auto create_table_query = tryGetCreateTableQuery(storage_id.table_name, local_context)) + { + const auto & create = create_table_query->as(); + if (create.getTable() == table_name) + { + storage->adjustCreateQueryForBackup(create_table_query, consistency); + if (consistency) + { + res.emplace_back(create_table_query, storage); + ok = true; + } + } + } + } + + if (!ok) + { + consistency = false; + return {}; + } + } + + return res; +} + } diff --git a/src/Databases/DatabaseMemory.h b/src/Databases/DatabaseMemory.h index eef9f306343..8ec216b165d 100644 --- a/src/Databases/DatabaseMemory.h +++ b/src/Databases/DatabaseMemory.h @@ -50,6 +50,8 @@ public: void alterTable(ContextPtr local_context, const StorageID & table_id, const StorageInMemoryMetadata & metadata) override; + std::vector> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context, bool & consistency) const override; + private: const String data_path; using NameToASTCreate = std::unordered_map; diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 5a22eeaf570..6286723aaa3 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -923,7 +923,11 @@ String DatabaseReplicated::readMetadataFile(const String & table_name) const } -void DatabaseReplicated::createTableRestoredFromBackup(const ASTPtr & create_table_query, const RestorerFromBackup & restorer) +void DatabaseReplicated::createTableRestoredFromBackup( + const ASTPtr & create_table_query, + ContextMutablePtr local_context, + std::shared_ptr restore_coordination, + UInt64 timeout_ms) { /// Because of the replication multiple nodes can try to restore the same tables again and failed with "Table already exists" /// because of some table could be restored already on other node and then replicated to this node. @@ -931,29 +935,25 @@ void DatabaseReplicated::createTableRestoredFromBackup(const ASTPtr & create_tab /// IRestoreCoordination::acquireCreatingTableInReplicatedDatabase() and then for other nodes this function returns false which means /// this table is already being created by some other node. String table_name = create_table_query->as().getTable(); - if (restorer.getRestoreCoordination()->acquireCreatingTableInReplicatedDatabase(getZooKeeperPath(), table_name)) + if (restore_coordination->acquireCreatingTableInReplicatedDatabase(getZooKeeperPath(), table_name)) { - restorer.executeCreateQuery(create_table_query); + DatabaseAtomic::createTableRestoredFromBackup(create_table_query, local_context, restore_coordination, timeout_ms); } /// Wait until the table is actually created no matter if it's created by the current or another node and replicated to the /// current node afterwards. We have to wait because `RestorerFromBackup` is going to restore data of the table then. /// TODO: The following code doesn't look very reliable, probably we need to rewrite it somehow. - auto timeout = restorer.getTimeout(); - bool use_timeout = (timeout.count() >= 0); + auto timeout = std::chrono::milliseconds{timeout_ms}; auto start_time = std::chrono::steady_clock::now(); - while (!isTableExist(table_name, restorer.getContext())) + while (!isTableExist(table_name, local_context)) { waitForReplicaToProcessAllEntries(50); - if (use_timeout) - { - auto elapsed = std::chrono::steady_clock::now() - start_time; - if (elapsed > timeout) - throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, - "Couldn't restore table {}.{} on other node or sync it (elapsed {})", - backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(table_name), to_string(elapsed)); - } + auto elapsed = std::chrono::steady_clock::now() - start_time; + if (elapsed > timeout) + throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, + "Couldn't restore table {}.{} on other node or sync it (elapsed {})", + backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(table_name), to_string(elapsed)); } } diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index 3aa2aa378b7..958ee3f133f 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -72,7 +72,7 @@ public: void shutdown() override; - void createTableRestoredFromBackup(const ASTPtr & create_table_query, const RestorerFromBackup & restorer) override; + void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr local_context, std::shared_ptr restore_coordination, UInt64 timeout_ms) override; friend struct DatabaseReplicatedTask; friend class DatabaseReplicatedDDLWorker; diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index 5dd17789e60..4ab0ed4792e 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -322,22 +322,45 @@ StoragePtr DatabaseWithOwnTablesBase::getTableUnlocked(const String & table_name backQuote(database_name), backQuote(table_name)); } -DatabaseTablesIteratorPtr DatabaseWithOwnTablesBase::getTablesIteratorForBackup(const BackupEntriesCollector & backup_entries_collector) const +std::vector> DatabaseWithOwnTablesBase::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context, bool & consistency) const { - /// Backup all the tables in this database. - /// Here we skip inner tables of materialized views. - auto skip_internal_tables = [](const String & table_name) { return !table_name.starts_with(".inner_id."); }; - return getTablesIterator(backup_entries_collector.getContext(), skip_internal_tables); + std::vector> res; + + for (auto it = getTablesIterator(local_context, filter); it->isValid(); it->next()) + { + bool ok = false; + + if (auto create_table_query = tryGetCreateTableQuery(it->name(), local_context)) + { + const auto & create = create_table_query->as(); + if (create.getTable() == it->name()) + { + auto storage = it->table(); + storage->adjustCreateQueryForBackup(create_table_query, consistency); + if (consistency) + { + res.emplace_back(create_table_query, storage); + ok = true; + } + } + } + + if (!ok) + { + consistency = false; + return {}; + } + } + + return res; } -void DatabaseWithOwnTablesBase::checkCreateTableQueryForBackup(const ASTPtr &, const BackupEntriesCollector &) const -{ -} - -void DatabaseWithOwnTablesBase::createTableRestoredFromBackup(const ASTPtr & create_table_query, const RestorerFromBackup & restorer) +void DatabaseWithOwnTablesBase::createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr local_context, std::shared_ptr, UInt64) { /// Creates a table by executing a "CREATE TABLE" query. - restorer.executeCreateQuery(create_table_query); + InterpreterCreateQuery interpreter{create_table_query, local_context}; + interpreter.setInternal(true); + interpreter.execute(); } } diff --git a/src/Databases/DatabasesCommon.h b/src/Databases/DatabasesCommon.h index c960d295529..2b320349e2d 100644 --- a/src/Databases/DatabasesCommon.h +++ b/src/Databases/DatabasesCommon.h @@ -36,9 +36,8 @@ public: DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; - DatabaseTablesIteratorPtr getTablesIteratorForBackup(const BackupEntriesCollector & backup_entries_collector) const override; - void checkCreateTableQueryForBackup(const ASTPtr & create_table_query, const BackupEntriesCollector & backup_entries_collector) const override; - void createTableRestoredFromBackup(const ASTPtr & create_table_query, const RestorerFromBackup & restorer) override; + std::vector> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context, bool & consistency) const override; + void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr local_context, std::shared_ptr restore_coordination, UInt64 timeout_ms) override; void shutdown() override; diff --git a/src/Databases/IDatabase.cpp b/src/Databases/IDatabase.cpp index 3adba0d85c8..e09eb5186ec 100644 --- a/src/Databases/IDatabase.cpp +++ b/src/Databases/IDatabase.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include @@ -26,29 +25,22 @@ ASTPtr IDatabase::getCreateDatabaseQueryForBackup() const { auto query = getCreateDatabaseQuery(); - /// We don't want to see any UUIDs in backup (after RESTORE the table will have another UUID anyway). + /// We don't want to see any UUIDs in backup (after RESTORE the database will have another UUID anyway). auto & create = query->as(); create.uuid = UUIDHelpers::Nil; return query; } -DatabaseTablesIteratorPtr IDatabase::getTablesIteratorForBackup(const BackupEntriesCollector &) const -{ - /// IDatabase doesn't own any tables. - return std::make_unique(Tables{}, getDatabaseName()); -} - -void IDatabase::checkCreateTableQueryForBackup(const ASTPtr & create_table_query, const BackupEntriesCollector &) const +std::vector> IDatabase::getTablesForBackup(const FilterByNameFunction &, const ContextPtr &, bool &) const { /// Cannot restore any table because IDatabase doesn't own any tables. throw Exception(ErrorCodes::CANNOT_BACKUP_TABLE, - "Database engine {} does not support backups, cannot backup table {}.{}", - getEngineName(), backQuoteIfNeed(getDatabaseName()), - backQuoteIfNeed(create_table_query->as().getTable())); + "Database engine {} does not support backups, cannot backup tables in database {}", + getEngineName(), backQuoteIfNeed(getDatabaseName())); } -void IDatabase::createTableRestoredFromBackup(const ASTPtr & create_table_query, const RestorerFromBackup &) +void IDatabase::createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr, std::shared_ptr, UInt64) { /// Cannot restore any table because IDatabase doesn't own any tables. throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index 2223d657f7f..c8c9ff9d9a5 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -30,8 +30,7 @@ class SettingsChanges; using DictionariesWithID = std::vector>; struct ParsedTablesMetadata; struct QualifiedTableName; -class BackupEntriesCollector; -class RestorerFromBackup; +class IRestoreCoordination; namespace ErrorCodes { @@ -333,17 +332,14 @@ public: throw Exception(ErrorCodes::LOGICAL_ERROR, "Database engine {} does not run a replication thread!", getEngineName()); } - /// Returns a slightly changed version of the CREATE DATABASE query which must be written to a backup. + /// Returns a CREATE DATABASE query prepared for writing to a backup. virtual ASTPtr getCreateDatabaseQueryForBackup() const; - /// Returns an iterator that passes through all the tables when an user wants to backup the whole database. - virtual DatabaseTablesIteratorPtr getTablesIteratorForBackup(const BackupEntriesCollector & restorer) const; - - /// Checks a CREATE TABLE query before it will be written to a backup. Called by IStorage::getCreateQueryForBackup(). - virtual void checkCreateTableQueryForBackup(const ASTPtr & create_table_query, const BackupEntriesCollector & backup_entries_collector) const; + /// Returns CREATE TABLE queries and corresponding tables prepared for writing to a backup. + virtual std::vector> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & context, bool & consistency) const; /// Creates a table restored from backup. - virtual void createTableRestoredFromBackup(const ASTPtr & create_table_query, const RestorerFromBackup & restorer); + virtual void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr context, std::shared_ptr restore_coordination, UInt64 timeout_ms); virtual ~IDatabase() = default; diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 0fcf48b9afc..5f0fe303f27 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -248,40 +248,21 @@ bool IStorage::isStaticStorage() const return false; } -ASTPtr IStorage::getCreateQueryForBackup(const ContextPtr & context, DatabasePtr * database) const +void IStorage::adjustCreateQueryForBackup(ASTPtr & create_query, bool &) const { - auto table_id = getStorageID(); - auto db = DatabaseCatalog::instance().tryGetDatabase(table_id.getDatabaseName()); - if (!db) - throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {}.{} is dropped", table_id.database_name, table_id.table_name); - ASTPtr query = db->tryGetCreateTableQuery(table_id.getTableName(), context); - if (!query) - throw Exception(ErrorCodes::TABLE_IS_DROPPED, "Table {}.{} is dropped", table_id.database_name, table_id.table_name); + create_query = create_query->clone(); /// We don't want to see any UUIDs in backup (after RESTORE the table will have another UUID anyway). - auto & create = query->as(); + auto & create = create_query->as(); create.uuid = UUIDHelpers::Nil; create.to_inner_uuid = UUIDHelpers::Nil; - /// If this is a definition of a system table we'll remove columns and comment because they're excessive for backups. - if (create.storage && create.storage->engine && create.storage->engine->name.starts_with("System")) + /// If this is a definition of a system table we'll remove columns and comment because they're reduntant for backups. + if (isSystemStorage()) { create.reset(create.columns_list); create.reset(create.comment); } - - if (database) - *database = db; - - return query; -} - -ASTPtr IStorage::getCreateQueryForBackup(const BackupEntriesCollector & backup_entries_collector) const -{ - DatabasePtr database; - auto query = getCreateQueryForBackup(backup_entries_collector.getContext(), &database); - database->checkCreateTableQueryForBackup(query, backup_entries_collector); - return query; } void IStorage::backupData(BackupEntriesCollector &, const String &, const std::optional &) diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 6dd329db02b..952f7bacbd3 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -223,10 +223,8 @@ public: /// Initially reserved virtual column name may be shadowed by real column. bool isVirtualColumn(const String & column_name, const StorageMetadataPtr & metadata_snapshot) const; - /// Returns a slightly changed version of the CREATE TABLE query which must be written to a backup. - /// The function can throw `TABLE_IS_DROPPED` if this storage is not attached to a database. - virtual ASTPtr getCreateQueryForBackup(const ContextPtr & context, DatabasePtr * database) const; - virtual ASTPtr getCreateQueryForBackup(const BackupEntriesCollector & backup_entries_collector) const; + /// Modify a CREATE TABLE query to make a variant which must be written to a backup. + virtual void adjustCreateQueryForBackup(ASTPtr & create_query, bool & consistency) const; /// Makes backup entries to backup the data of this storage. virtual void backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index fac11db2ab9..e2f82603702 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -8253,44 +8253,58 @@ void StorageReplicatedMergeTree::createAndStoreFreezeMetadata(DiskPtr disk, Data } -ASTPtr StorageReplicatedMergeTree::getCreateQueryForBackup(const ContextPtr & local_context, DatabasePtr * database) const +void StorageReplicatedMergeTree::adjustCreateQueryForBackup(ASTPtr & create_query, bool & consistency) const { - ASTPtr query = MergeTreeData::getCreateQueryForBackup(local_context, database); + MergeTreeData::adjustCreateQueryForBackup(create_query, consistency); /// Before storing the metadata in a backup we have to find a zookeeper path in its definition and turn the table's UUID in there /// back into "{uuid}", and also we probably can remove the zookeeper path and replica name if they're default. /// So we're kind of reverting what we had done to the table's definition in registerStorageMergeTree.cpp before we created this table. - auto & create = query->as(); - if (create.storage && create.storage->engine && (create.uuid != UUIDHelpers::Nil)) + auto & create = create_query->as(); + + if (!create.storage || !create.storage->engine) { - auto & engine = *(create.storage->engine); - if (auto * engine_args_ast = typeid_cast(engine.arguments.get())) - { - auto & engine_args = engine_args_ast->children; - if (engine_args.size() >= 2) - { - auto * zookeeper_path_ast = typeid_cast(engine_args[0].get()); - auto * replica_name_ast = typeid_cast(engine_args[1].get()); - if (zookeeper_path_ast && (zookeeper_path_ast->value.getType() == Field::Types::String) && - replica_name_ast && (replica_name_ast->value.getType() == Field::Types::String)) - { - String & zookeeper_path_arg = zookeeper_path_ast->value.get(); - String & replica_name_arg = replica_name_ast->value.get(); - String table_uuid_str = toString(create.uuid); - if (size_t uuid_pos = zookeeper_path_arg.find(table_uuid_str); uuid_pos != String::npos) - zookeeper_path_arg.replace(uuid_pos, table_uuid_str.size(), "{uuid}"); - const auto & config = getContext()->getConfigRef(); - if ((zookeeper_path_arg == getDefaultZooKeeperPath(config)) && (replica_name_arg == getDefaultReplicaName(config)) - && ((engine_args.size() == 2) || !engine_args[2]->as())) - { - engine_args.erase(engine_args.begin(), engine_args.begin() + 2); - } - } - } - } + /// The CREATE query doesn't correspond to this storage. + consistency = false; + return; } - return query; + auto & engine = *(create.storage->engine); + if (!engine.name.starts_with("Replicated") || !engine.name.ends_with("MergeTree")) + { + /// The CREATE query doesn't correspond to this storage. + consistency = false; + return; + } + + if (create.uuid == UUIDHelpers::Nil) + return; + + auto * engine_args_ast = typeid_cast(engine.arguments.get()); + if (!engine_args_ast) + return; + + auto & engine_args = engine_args_ast->children; + if (engine_args.size() < 2) + return; + + auto * zookeeper_path_ast = typeid_cast(engine_args[0].get()); + auto * replica_name_ast = typeid_cast(engine_args[1].get()); + if (zookeeper_path_ast && (zookeeper_path_ast->value.getType() == Field::Types::String) && + replica_name_ast && (replica_name_ast->value.getType() == Field::Types::String)) + { + String & zookeeper_path_arg = zookeeper_path_ast->value.get(); + String & replica_name_arg = replica_name_ast->value.get(); + String table_uuid_str = toString(create.uuid); + if (size_t uuid_pos = zookeeper_path_arg.find(table_uuid_str); uuid_pos != String::npos) + zookeeper_path_arg.replace(uuid_pos, table_uuid_str.size(), "{uuid}"); + const auto & config = getContext()->getConfigRef(); + if ((zookeeper_path_arg == getDefaultZooKeeperPath(config)) && (replica_name_arg == getDefaultReplicaName(config)) + && ((engine_args.size() == 2) || !engine_args[2]->as())) + { + engine_args.erase(engine_args.begin(), engine_args.begin() + 2); + } + } } void StorageReplicatedMergeTree::backupData( @@ -8370,7 +8384,7 @@ void StorageReplicatedMergeTree::backupData( backup_entries_collector.addBackupEntry(data_path / relative_path, backup_entry); } }; - backup_entries_collector.addPostCollectingTask(post_collecting_task); + backup_entries_collector.addPostTask(post_collecting_task); } void StorageReplicatedMergeTree::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 73a08a2b921..f3bb4786cca 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -232,8 +232,8 @@ public: int getMetadataVersion() const { return metadata_version; } - /// Returns a slightly changed version of the CREATE TABLE query which must be written to a backup. - ASTPtr getCreateQueryForBackup(const ContextPtr & context, DatabasePtr * database) const override; + /// Modify a CREATE TABLE query to make a variant which must be written to a backup. + void adjustCreateQueryForBackup(ASTPtr & create_query, bool & consistency) const override; /// Makes backup entries to backup the data of the storage. void backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) override; diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index 35545e95537..0a39576ce10 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -138,7 +138,7 @@ def test_backup_table_under_another_name(): assert instance.query("SELECT count(), sum(x) FROM test.table2") == "100\t4950\n" -def test_materialized_view(): +def test_materialized_view_select_1(): backup_name = new_backup_name() instance.query( "CREATE MATERIALIZED VIEW mv_1(x UInt8) ENGINE=MergeTree ORDER BY tuple() POPULATE AS SELECT 1 AS x" @@ -456,18 +456,32 @@ def test_temporary_table(): ) == TSV([["e"], ["q"], ["w"]]) -# "BACKUP DATABASE _temporary_and_external_tables" is allowed but the backup must not contain these tables. -def test_temporary_tables_database(): +# The backup created by "BACKUP DATABASE _temporary_and_external_tables" must not contain tables from other sessions. +def test_temporary_database(): session_id = new_session_id() instance.http_query( "CREATE TEMPORARY TABLE temp_tbl(s String)", params={"session_id": session_id} ) - backup_name = new_backup_name() - instance.query(f"BACKUP DATABASE _temporary_and_external_tables TO {backup_name}") + other_session_id = new_session_id() + instance.http_query( + "CREATE TEMPORARY TABLE other_temp_tbl(s String)", + params={"session_id": other_session_id}, + ) - assert os.listdir(os.path.join(get_path_to_backup(backup_name), "metadata/")) == [ - "_temporary_and_external_tables.sql" # database metadata only + backup_name = new_backup_name() + instance.http_query( + f"BACKUP DATABASE _temporary_and_external_tables TO {backup_name}", + params={"session_id": session_id}, + ) + + assert os.listdir( + os.path.join(get_path_to_backup(backup_name), "temporary_tables/metadata") + ) == ["temp_tbl.sql"] + + assert sorted(os.listdir(get_path_to_backup(backup_name))) == [ + ".backup", + "temporary_tables", ] From 461a31f237dff13ede4f7cc4b764d1a99e37f593 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 23 Jun 2022 12:17:54 +0200 Subject: [PATCH 085/121] Improve gathering metadata for backup - part 2. --- src/Backups/BackupEntriesCollector.cpp | 164 +++++++++++--------- src/Backups/BackupEntriesCollector.h | 3 +- src/Backups/RestorerFromBackup.cpp | 3 +- src/Common/ErrorCodes.cpp | 2 +- src/Databases/DatabaseMemory.cpp | 45 +++--- src/Databases/DatabaseMemory.h | 2 +- src/Databases/DatabasesCommon.cpp | 32 ++-- src/Databases/DatabasesCommon.h | 2 +- src/Databases/IDatabase.cpp | 2 +- src/Databases/IDatabase.h | 2 +- src/Storages/IStorage.cpp | 10 +- src/Storages/IStorage.h | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 17 +- src/Storages/StorageReplicatedMergeTree.h | 2 +- 14 files changed, 146 insertions(+), 142 deletions(-) diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index 9ee57cb4fd5..21b2741e237 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -21,7 +21,7 @@ namespace DB namespace ErrorCodes { - extern const int CANNOT_COLLECT_OBJECTS_FOR_BACKUP; + extern const int INCONSISTENT_METADATA_FOR_BACKUP; extern const int CANNOT_BACKUP_TABLE; extern const int TABLE_IS_DROPPED; extern const int LOGICAL_ERROR; @@ -162,37 +162,37 @@ void BackupEntriesCollector::gatherMetadataAndCheckConsistency() bool use_timeout = (timeout.count() >= 0); auto start_time = std::chrono::steady_clock::now(); - int pass = 1; - for (;;) + for (size_t pass = 1;; ++pass) { - consistency = true; - - /// Collect information about databases and tables specified in the BACKUP query. - gatherDatabasesMetadata(); - gatherTablesMetadata(); - - /// We have to check consistency of collected information to protect from the case when some table or database is - /// renamed during this collecting making the collected information invalid. - checkConsistency(); - - if (consistency) - break; - - /// Two passes is absolute minimum (see `previous_table_names` & `previous_database_names`). - auto elapsed = std::chrono::steady_clock::now() - start_time; - if ((pass >= 2) && use_timeout) + try { - if (elapsed > timeout) - throw Exception( - ErrorCodes::CANNOT_COLLECT_OBJECTS_FOR_BACKUP, - "Couldn't collect tables and databases to make a backup (pass #{}, elapsed {})", - pass, - to_string(elapsed)); - } + /// Collect information about databases and tables specified in the BACKUP query. + database_infos.clear(); + table_infos.clear(); + gatherDatabasesMetadata(); + gatherTablesMetadata(); - if (pass >= 2) - LOG_WARNING(log, "Couldn't collect tables and databases to make a backup (pass #{}, elapsed {})", pass, to_string(elapsed)); - ++pass; + /// We have to check consistency of collected information to protect from the case when some table or database is + /// renamed during this collecting making the collected information invalid. + auto comparing_error = compareWithPrevious(); + if (!comparing_error) + break; /// no error, everything's fine + + if (pass >= 2) /// Two passes is minimum (we need to compare with table names with previous ones to be sure we don't miss anything). + throw *comparing_error; + } + catch (Exception & e) + { + if (e.code() != ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP) + throw; + + auto elapsed = std::chrono::steady_clock::now() - start_time; + e.addMessage("Couldn't gather tables and databases to make a backup (pass #{}, elapsed {})", pass, to_string(elapsed)); + if (use_timeout && (elapsed > timeout)) + throw; + else + LOG_WARNING(log, "{}", e.displayText()); + } } LOG_INFO(log, "Will backup {} databases and {} tables", database_infos.size(), table_infos.size()); @@ -200,8 +200,6 @@ void BackupEntriesCollector::gatherMetadataAndCheckConsistency() void BackupEntriesCollector::gatherDatabasesMetadata() { - database_infos.clear(); - /// Collect information about databases and tables specified in the BACKUP query. for (const auto & element : backup_query_elements) { @@ -264,16 +262,11 @@ void BackupEntriesCollector::gatherDatabasesMetadata() /* partitions= */ {}, /* all_tables= */ true, /* except_table_names= */ element.except_tables); - if (!consistency) - return; } } break; } } - - if (!consistency) - return; } } @@ -318,20 +311,14 @@ void BackupEntriesCollector::gatherDatabaseMetadata( } catch (...) { - /// The database has been dropped recently. - consistency = false; - return; + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Couldn't get a create query for database {}", database_name); } database_info.create_database_query = create_database_query; const auto & create = create_database_query->as(); if (create.getDatabase() != database_name) - { - /// The database has been renamed recently. - consistency = false; - return; - } + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query with unexpected name {} for database {}", backQuoteIfNeed(create.getDatabase()), backQuoteIfNeed(database_name)); } if (table_name) @@ -358,9 +345,6 @@ void BackupEntriesCollector::gatherDatabaseMetadata( void BackupEntriesCollector::gatherTablesMetadata() { - if (!consistency) - return; - table_infos.clear(); for (const auto & [database_name, database_info] : database_infos) { @@ -382,12 +366,8 @@ void BackupEntriesCollector::gatherTablesMetadata() return false; }; - auto db_tables = database->getTablesForBackup(filter_by_table_name, context, consistency); + auto db_tables = database->getTablesForBackup(filter_by_table_name, context); - if (!consistency) - return; - - /// Check that all tables were found. std::unordered_set found_table_names; for (const auto & db_table : db_tables) { @@ -395,13 +375,14 @@ void BackupEntriesCollector::gatherTablesMetadata() const auto & create = create_table_query->as(); found_table_names.emplace(create.getTable()); - if ((is_temporary_database && !create.temporary) || (!is_temporary_database && (create.getDatabase() != database_name))) - { - consistency = false; - return; - } + if (is_temporary_database && !create.temporary) + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a non-temporary create query for {}", tableNameWithTypeToString(database_name, create.getTable(), false)); + + if (!is_temporary_database && (create.getDatabase() != database_name)) + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query with unexpected database name {} for {}", backQuoteIfNeed(create.getDatabase()), tableNameWithTypeToString(database_name, create.getTable(), false)); } + /// Check that all tables were found. for (const auto & [table_name, table_info] : database_info.tables) { if (table_info.throw_if_table_not_found && !found_table_names.contains(table_name)) @@ -443,10 +424,7 @@ void BackupEntriesCollector::gatherTablesMetadata() void BackupEntriesCollector::lockTablesForReading() { - if (!consistency) - return; - - for (auto & table_info : table_infos | boost::adaptors::map_values) + for (auto & [table_name, table_info] : table_infos) { auto storage = table_info.storage; TableLockHolder table_lock; @@ -460,19 +438,15 @@ void BackupEntriesCollector::lockTablesForReading() { if (e.code() != ErrorCodes::TABLE_IS_DROPPED) throw; - consistency = false; - return; + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "{} is dropped", tableNameWithTypeToString(table_name.database, table_name.table, true)); } } } } -/// Check for consistency of collected information about databases and tables. -void BackupEntriesCollector::checkConsistency() +/// Check consistency of collected information about databases and tables. +std::optional BackupEntriesCollector::compareWithPrevious() { - if (!consistency) - return; /// Already inconsistent, no more checks necessary - /// We need to scan tables at least twice to be sure that we haven't missed any table which could be renamed /// while we were scanning. std::set database_names; @@ -480,12 +454,62 @@ void BackupEntriesCollector::checkConsistency() boost::range::copy(database_infos | boost::adaptors::map_keys, std::inserter(database_names, database_names.end())); boost::range::copy(table_infos | boost::adaptors::map_keys, std::inserter(table_names, table_names.end())); - if ((previous_database_names != database_names) || (previous_table_names != table_names)) + if (previous_database_names != database_names) { + std::optional comparing_error; + for (const auto & database_name : database_names) + { + if (!previous_database_names.contains(database_name)) + { + comparing_error = Exception{ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Database {} were added during scanning", backQuoteIfNeed(database_name)}; + break; + } + } + if (!comparing_error) + { + for (const auto & database_name : previous_database_names) + { + if (!database_names.contains(database_name)) + { + comparing_error = Exception{ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Database {} were removed during scanning", backQuoteIfNeed(database_name)}; + break; + } + } + } + assert(comparing_error); previous_database_names = std::move(database_names); previous_table_names = std::move(table_names); - consistency = false; + return comparing_error; } + + if (previous_table_names != table_names) + { + std::optional comparing_error; + for (const auto & table_name : table_names) + { + if (!previous_table_names.contains(table_name)) + { + comparing_error = Exception{ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "{} were added during scanning", tableNameWithTypeToString(table_name.database, table_name.table, true)}; + break; + } + } + if (!comparing_error) + { + for (const auto & table_name : previous_table_names) + { + if (!table_names.contains(table_name)) + { + comparing_error = Exception{ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "{} were removed during scanning", tableNameWithTypeToString(table_name.database, table_name.table, true)}; + break; + } + } + } + assert(comparing_error); + previous_table_names = std::move(table_names); + return comparing_error; + } + + return {}; } /// Make backup entries for all the definitions of all the databases found. diff --git a/src/Backups/BackupEntriesCollector.h b/src/Backups/BackupEntriesCollector.h index c34c6204abb..03b7a968650 100644 --- a/src/Backups/BackupEntriesCollector.h +++ b/src/Backups/BackupEntriesCollector.h @@ -98,7 +98,7 @@ private: void gatherTablesMetadata(); void lockTablesForReading(); - void checkConsistency(); + std::optional compareWithPrevious(); void makeBackupEntriesForDatabasesDefs(); void makeBackupEntriesForTablesDefs(); @@ -147,7 +147,6 @@ private: std::map table_infos; std::set previous_database_names; std::set previous_table_names; - bool consistency = false; BackupEntries backup_entries; std::queue> post_tasks; diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index 16ffead3976..84cc1dec1fb 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -691,8 +691,7 @@ void RestorerFromBackup::createTables() if (!restore_settings.allow_different_table_def) { ASTPtr create_table_query = database->getCreateTableQuery(resolved_id.table_name, context); - bool consistency = true; - storage->adjustCreateQueryForBackup(create_table_query, consistency); + storage->adjustCreateQueryForBackup(create_table_query); ASTPtr expected_create_query = table_info.create_table_query; if (serializeAST(*create_table_query) != serializeAST(*expected_create_query)) { diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 6f2ac41cc08..8e7eaf4c6e6 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -631,7 +631,7 @@ M(660, HDFS_ERROR) \ M(661, CANNOT_SEND_SIGNAL) \ M(662, FS_METADATA_ERROR) \ - M(663, CANNOT_COLLECT_OBJECTS_FOR_BACKUP) \ + M(663, INCONSISTENT_METADATA_FOR_BACKUP) \ M(664, ACCESS_STORAGE_DOESNT_ALLOW_BACKUP) \ \ M(999, KEEPER_EXCEPTION) \ diff --git a/src/Databases/DatabaseMemory.cpp b/src/Databases/DatabaseMemory.cpp index 62cee31bbad..8540c785419 100644 --- a/src/Databases/DatabaseMemory.cpp +++ b/src/Databases/DatabaseMemory.cpp @@ -19,6 +19,7 @@ namespace ErrorCodes { extern const int UNKNOWN_TABLE; extern const int LOGICAL_ERROR; + extern const int INCONSISTENT_METADATA_FOR_BACKUP; } DatabaseMemory::DatabaseMemory(const String & name_, ContextPtr context_) @@ -145,11 +146,11 @@ void DatabaseMemory::alterTable(ContextPtr local_context, const StorageID & tabl DatabaseCatalog::instance().updateLoadingDependencies(table_id, std::move(new_dependencies)); } -std::vector> DatabaseMemory::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context, bool & consistency) const +std::vector> DatabaseMemory::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const { /// We need a special processing for the temporary database. if (getDatabaseName() != DatabaseCatalog::TEMPORARY_DATABASE) - return DatabaseWithOwnTablesBase::getTablesForBackup(filter, local_context, consistency); + return DatabaseWithOwnTablesBase::getTablesForBackup(filter, local_context); std::vector> res; @@ -162,32 +163,22 @@ std::vector> DatabaseMemory::getTablesForBackup(co if (!filter(table_name)) continue; - bool ok = false; - - if (auto storage_id = local_context->tryResolveStorageID(StorageID{"", table_name}, Context::ResolveExternal)) - { - /// Here `storage_id.table_name` looks like looks like "_tmp_ab9b15a3-fb43-4670-abec-14a0e9eb70f1" - /// it's not the real name of the table. - if (auto create_table_query = tryGetCreateTableQuery(storage_id.table_name, local_context)) - { - const auto & create = create_table_query->as(); - if (create.getTable() == table_name) - { - storage->adjustCreateQueryForBackup(create_table_query, consistency); - if (consistency) - { - res.emplace_back(create_table_query, storage); - ok = true; - } - } - } - } + auto storage_id = local_context->tryResolveStorageID(StorageID{"", table_name}, Context::ResolveExternal); + if (!storage_id) + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Couldn't resolve the name of temporary table {}", backQuoteIfNeed(table_name)); - if (!ok) - { - consistency = false; - return {}; - } + /// Here `storage_id.table_name` looks like looks like "_tmp_ab9b15a3-fb43-4670-abec-14a0e9eb70f1" + /// it's not the real name of the table. + auto create_table_query = tryGetCreateTableQuery(storage_id.table_name, local_context); + if (!create_table_query) + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Couldn't get a create query for temporary table {}", backQuoteIfNeed(table_name)); + + const auto & create = create_table_query->as(); + if (create.getTable() != table_name) + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query with unexpected name {} for temporary table {}", backQuoteIfNeed(create.getTable()), backQuoteIfNeed(table_name)); + + storage->adjustCreateQueryForBackup(create_table_query); + res.emplace_back(create_table_query, storage); } return res; diff --git a/src/Databases/DatabaseMemory.h b/src/Databases/DatabaseMemory.h index 8ec216b165d..6262543b0c1 100644 --- a/src/Databases/DatabaseMemory.h +++ b/src/Databases/DatabaseMemory.h @@ -50,7 +50,7 @@ public: void alterTable(ContextPtr local_context, const StorageID & table_id, const StorageInMemoryMetadata & metadata) override; - std::vector> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context, bool & consistency) const override; + std::vector> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const override; private: const String data_path; diff --git a/src/Databases/DatabasesCommon.cpp b/src/Databases/DatabasesCommon.cpp index 4ab0ed4792e..93a9523d115 100644 --- a/src/Databases/DatabasesCommon.cpp +++ b/src/Databases/DatabasesCommon.cpp @@ -25,6 +25,7 @@ namespace ErrorCodes extern const int NOT_IMPLEMENTED; extern const int LOGICAL_ERROR; extern const int CANNOT_GET_CREATE_TABLE_QUERY; + extern const int INCONSISTENT_METADATA_FOR_BACKUP; } void applyMetadataChangesToCreateQuery(const ASTPtr & query, const StorageInMemoryMetadata & metadata) @@ -322,34 +323,23 @@ StoragePtr DatabaseWithOwnTablesBase::getTableUnlocked(const String & table_name backQuote(database_name), backQuote(table_name)); } -std::vector> DatabaseWithOwnTablesBase::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context, bool & consistency) const +std::vector> DatabaseWithOwnTablesBase::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const { std::vector> res; for (auto it = getTablesIterator(local_context, filter); it->isValid(); it->next()) { - bool ok = false; + auto create_table_query = tryGetCreateTableQuery(it->name(), local_context); + if (!create_table_query) + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Couldn't get a create query for table {}.{}", backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(it->name())); - if (auto create_table_query = tryGetCreateTableQuery(it->name(), local_context)) - { - const auto & create = create_table_query->as(); - if (create.getTable() == it->name()) - { - auto storage = it->table(); - storage->adjustCreateQueryForBackup(create_table_query, consistency); - if (consistency) - { - res.emplace_back(create_table_query, storage); - ok = true; - } - } - } + const auto & create = create_table_query->as(); + if (create.getTable() != it->name()) + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query with unexpected name {} for table {}.{}", backQuoteIfNeed(create.getTable()), backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(it->name())); - if (!ok) - { - consistency = false; - return {}; - } + auto storage = it->table(); + storage->adjustCreateQueryForBackup(create_table_query); + res.emplace_back(create_table_query, storage); } return res; diff --git a/src/Databases/DatabasesCommon.h b/src/Databases/DatabasesCommon.h index 2b320349e2d..c5842d7dac3 100644 --- a/src/Databases/DatabasesCommon.h +++ b/src/Databases/DatabasesCommon.h @@ -36,7 +36,7 @@ public: DatabaseTablesIteratorPtr getTablesIterator(ContextPtr context, const FilterByNameFunction & filter_by_table_name) const override; - std::vector> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context, bool & consistency) const override; + std::vector> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const override; void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr local_context, std::shared_ptr restore_coordination, UInt64 timeout_ms) override; void shutdown() override; diff --git a/src/Databases/IDatabase.cpp b/src/Databases/IDatabase.cpp index e09eb5186ec..a75f213a6bb 100644 --- a/src/Databases/IDatabase.cpp +++ b/src/Databases/IDatabase.cpp @@ -32,7 +32,7 @@ ASTPtr IDatabase::getCreateDatabaseQueryForBackup() const return query; } -std::vector> IDatabase::getTablesForBackup(const FilterByNameFunction &, const ContextPtr &, bool &) const +std::vector> IDatabase::getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const { /// Cannot restore any table because IDatabase doesn't own any tables. throw Exception(ErrorCodes::CANNOT_BACKUP_TABLE, diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index c8c9ff9d9a5..cdea03aa1cb 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -336,7 +336,7 @@ public: virtual ASTPtr getCreateDatabaseQueryForBackup() const; /// Returns CREATE TABLE queries and corresponding tables prepared for writing to a backup. - virtual std::vector> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & context, bool & consistency) const; + virtual std::vector> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & context) const; /// Creates a table restored from backup. virtual void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr context, std::shared_ptr restore_coordination, UInt64 timeout_ms); diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 5f0fe303f27..a3f35ccc0f8 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -24,6 +24,7 @@ namespace ErrorCodes extern const int TABLE_IS_DROPPED; extern const int NOT_IMPLEMENTED; extern const int DEADLOCK_AVOIDED; + extern const int INCONSISTENT_METADATA_FOR_BACKUP; } bool IStorage::isVirtualColumn(const String & column_name, const StorageMetadataPtr & metadata_snapshot) const @@ -248,7 +249,7 @@ bool IStorage::isStaticStorage() const return false; } -void IStorage::adjustCreateQueryForBackup(ASTPtr & create_query, bool &) const +void IStorage::adjustCreateQueryForBackup(ASTPtr & create_query) const { create_query = create_query->clone(); @@ -260,6 +261,13 @@ void IStorage::adjustCreateQueryForBackup(ASTPtr & create_query, bool &) const /// If this is a definition of a system table we'll remove columns and comment because they're reduntant for backups. if (isSystemStorage()) { + if (!create.storage || !create.storage->engine) + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query without table engine for a system table {}", getStorageID().getFullTableName()); + + auto & engine = *(create.storage->engine); + if (!engine.name.starts_with("System")) + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query with an unexpected table engine {} for a system table {}", engine.name, getStorageID().getFullTableName()); + create.reset(create.columns_list); create.reset(create.comment); } diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 952f7bacbd3..34170785896 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -224,7 +224,7 @@ public: bool isVirtualColumn(const String & column_name, const StorageMetadataPtr & metadata_snapshot) const; /// Modify a CREATE TABLE query to make a variant which must be written to a backup. - virtual void adjustCreateQueryForBackup(ASTPtr & create_query, bool & consistency) const; + virtual void adjustCreateQueryForBackup(ASTPtr & create_query) const; /// Makes backup entries to backup the data of this storage. virtual void backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index e2f82603702..66fb2a64a50 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -158,6 +158,7 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int CONCURRENT_ACCESS_NOT_SUPPORTED; extern const int CHECKSUM_DOESNT_MATCH; + extern const int INCONSISTENT_METADATA_FOR_BACKUP; } namespace ActionLocks @@ -8253,9 +8254,9 @@ void StorageReplicatedMergeTree::createAndStoreFreezeMetadata(DiskPtr disk, Data } -void StorageReplicatedMergeTree::adjustCreateQueryForBackup(ASTPtr & create_query, bool & consistency) const +void StorageReplicatedMergeTree::adjustCreateQueryForBackup(ASTPtr & create_query) const { - MergeTreeData::adjustCreateQueryForBackup(create_query, consistency); + MergeTreeData::adjustCreateQueryForBackup(create_query); /// Before storing the metadata in a backup we have to find a zookeeper path in its definition and turn the table's UUID in there /// back into "{uuid}", and also we probably can remove the zookeeper path and replica name if they're default. @@ -8263,19 +8264,11 @@ void StorageReplicatedMergeTree::adjustCreateQueryForBackup(ASTPtr & create_quer auto & create = create_query->as(); if (!create.storage || !create.storage->engine) - { - /// The CREATE query doesn't correspond to this storage. - consistency = false; - return; - } + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query without table engine for a replicated table {}", getStorageID().getFullTableName()); auto & engine = *(create.storage->engine); if (!engine.name.starts_with("Replicated") || !engine.name.ends_with("MergeTree")) - { - /// The CREATE query doesn't correspond to this storage. - consistency = false; - return; - } + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query with an unexpected table engine {} for a replicated table {}", engine.name, getStorageID().getFullTableName()); if (create.uuid == UUIDHelpers::Nil) return; diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index f3bb4786cca..86120b354bd 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -233,7 +233,7 @@ public: int getMetadataVersion() const { return metadata_version; } /// Modify a CREATE TABLE query to make a variant which must be written to a backup. - void adjustCreateQueryForBackup(ASTPtr & create_query, bool & consistency) const override; + void adjustCreateQueryForBackup(ASTPtr & create_query) const override; /// Makes backup entries to backup the data of the storage. void backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) override; From 44db346fea82a35880eebba05b950954be3a8c07 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 23 Jun 2022 14:24:45 +0200 Subject: [PATCH 086/121] Improve gathering metadata for backup - part 3. --- src/Access/AccessControl.cpp | 2 +- src/Backups/BackupEntriesCollector.cpp | 40 +++--- src/Backups/BackupEntriesCollector.h | 2 + src/Backups/BackupUtils.cpp | 2 +- src/Backups/RestorerFromBackup.cpp | 162 +++++++++++-------------- src/Backups/RestorerFromBackup.h | 30 ++--- src/Databases/DDLRenamingVisitor.cpp | 68 +++++------ src/Databases/DDLRenamingVisitor.h | 3 - 8 files changed, 135 insertions(+), 174 deletions(-) diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index aa58044a6b0..b5b22caa400 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -467,7 +467,7 @@ void AccessControl::backup(BackupEntriesCollector & backup_entries_collector, Ac void AccessControl::restore(RestorerFromBackup & restorer, const String & data_path_in_backup) { /// The restorer must already know about `data_path_in_backup`, but let's check. - restorer.checkPathInBackupToRestoreAccess(data_path_in_backup); + restorer.checkPathInBackupIsRegisteredToRestoreAccess(data_path_in_backup); } void AccessControl::insertFromBackup(const std::vector> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr restore_coordination) diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index 21b2741e237..feed9be4e92 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -36,7 +36,6 @@ namespace else return fmt::format("{}able {}.{}", first_char_uppercase ? 'T' : 't', backQuoteIfNeed(database_name), backQuoteIfNeed(table_name)); } - } std::string_view BackupEntriesCollector::toString(Stage stage) @@ -319,6 +318,9 @@ void BackupEntriesCollector::gatherDatabaseMetadata( if (create.getDatabase() != database_name) throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query with unexpected name {} for database {}", backQuoteIfNeed(create.getDatabase()), backQuoteIfNeed(database_name)); + + String new_database_name = renaming_map.getNewDatabaseName(database_name); + database_info.metadata_path_in_backup = root_path_in_backup / "metadata" / (escapeForFileName(new_database_name) + ".sql"); } if (table_name) @@ -395,17 +397,19 @@ void BackupEntriesCollector::gatherTablesMetadata() const auto & create = create_table_query->as(); String table_name = create.getTable(); - fs::path data_path_in_backup; - if (is_temporary_database) + fs::path metadata_path_in_backup, data_path_in_backup; + auto table_name_in_backup = renaming_map.getNewTableName({database_name, table_name}); + if (table_name_in_backup.database == DatabaseCatalog::TEMPORARY_DATABASE) { - auto table_name_in_backup = renaming_map.getNewTemporaryTableName(table_name); - data_path_in_backup = root_path_in_backup / "temporary_tables" / "data" / escapeForFileName(table_name_in_backup); + metadata_path_in_backup = root_path_in_backup / "temporary_tables" / "metadata" / (escapeForFileName(table_name_in_backup.table) + ".sql"); + data_path_in_backup = root_path_in_backup / "temporary_tables" / "data" / escapeForFileName(table_name_in_backup.table); } else { - auto table_name_in_backup = renaming_map.getNewTableName({database_name, table_name}); - data_path_in_backup - = root_path_in_backup / "data" / escapeForFileName(table_name_in_backup.database) / escapeForFileName(table_name_in_backup.table); + metadata_path_in_backup + = root_path_in_backup / "metadata" / escapeForFileName(table_name_in_backup.database) / (escapeForFileName(table_name_in_backup.table) + ".sql"); + data_path_in_backup = root_path_in_backup / "data" / escapeForFileName(table_name_in_backup.database) + / escapeForFileName(table_name_in_backup.table); } /// Add information to `table_infos`. @@ -413,6 +417,7 @@ void BackupEntriesCollector::gatherTablesMetadata() res_table_info.database = database; res_table_info.storage = db_table.second; res_table_info.create_table_query = create_table_query; + res_table_info.metadata_path_in_backup = metadata_path_in_backup; res_table_info.data_path_in_backup = data_path_in_backup; auto partitions_it = database_info.tables.find(table_name); @@ -525,9 +530,7 @@ void BackupEntriesCollector::makeBackupEntriesForDatabasesDefs() ASTPtr new_create_query = database_info.create_database_query; renameDatabaseAndTableNameInCreateQuery(context->getGlobalContext(), renaming_map, new_create_query); - String new_database_name = renaming_map.getNewDatabaseName(database_name); - auto metadata_path_in_backup = root_path_in_backup / "metadata" / (escapeForFileName(new_database_name) + ".sql"); - + const String & metadata_path_in_backup = database_info.metadata_path_in_backup; backup_entries.emplace_back(metadata_path_in_backup, std::make_shared(serializeAST(*new_create_query))); } } @@ -538,24 +541,11 @@ void BackupEntriesCollector::makeBackupEntriesForTablesDefs() for (const auto & [table_name, table_info] : table_infos) { LOG_TRACE(log, "Adding definition of {}", tableNameWithTypeToString(table_name.database, table_name.table, false)); - bool is_temporary_database = (table_name.database == DatabaseCatalog::TEMPORARY_DATABASE); ASTPtr new_create_query = table_info.create_table_query; renameDatabaseAndTableNameInCreateQuery(context->getGlobalContext(), renaming_map, new_create_query); - fs::path metadata_path_in_backup; - if (is_temporary_database) - { - auto new_name = renaming_map.getNewTemporaryTableName(table_name.table); - metadata_path_in_backup = root_path_in_backup / "temporary_tables" / "metadata" / (escapeForFileName(new_name) + ".sql"); - } - else - { - auto new_name = renaming_map.getNewTableName({table_name.database, table_name.table}); - metadata_path_in_backup - = root_path_in_backup / "metadata" / escapeForFileName(new_name.database) / (escapeForFileName(new_name.table) + ".sql"); - } - + const String & metadata_path_in_backup = table_info.metadata_path_in_backup; backup_entries.emplace_back(metadata_path_in_backup, std::make_shared(serializeAST(*new_create_query))); } } diff --git a/src/Backups/BackupEntriesCollector.h b/src/Backups/BackupEntriesCollector.h index 03b7a968650..5e37e268fc4 100644 --- a/src/Backups/BackupEntriesCollector.h +++ b/src/Backups/BackupEntriesCollector.h @@ -120,6 +120,7 @@ private: { DatabasePtr database; ASTPtr create_database_query; + String metadata_path_in_backup; struct TableParams { @@ -139,6 +140,7 @@ private: StoragePtr storage; TableLockHolder table_lock; ASTPtr create_table_query; + String metadata_path_in_backup; std::filesystem::path data_path_in_backup; std::optional partitions; }; diff --git a/src/Backups/BackupUtils.cpp b/src/Backups/BackupUtils.cpp index c5de4bd7e67..9ff91050177 100644 --- a/src/Backups/BackupUtils.cpp +++ b/src/Backups/BackupUtils.cpp @@ -39,7 +39,7 @@ DDLRenamingMap makeRenamingMapFromBackupQuery(const ASTBackupQuery::Elements & e const String & new_table_name = element.new_table_name; assert(!table_name.empty()); assert(!new_table_name.empty()); - map.setNewTemporaryTableName(table_name, new_table_name); + map.setNewTableName({DatabaseCatalog::TEMPORARY_DATABASE, table_name}, {DatabaseCatalog::TEMPORARY_DATABASE, new_table_name}); break; } diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index 84cc1dec1fb..74d4de631e3 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -39,7 +39,13 @@ namespace ErrorCodes namespace { - constexpr const std::string_view sql_ext = ".sql"; + String tableNameWithTypeToString(const String & database_name, const String & table_name, bool first_char_uppercase) + { + if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) + return fmt::format("{}emporary table {}", first_char_uppercase ? 'T' : 't', backQuoteIfNeed(table_name)); + else + return fmt::format("{}able {}.{}", first_char_uppercase ? 'T' : 't', backQuoteIfNeed(database_name), backQuoteIfNeed(table_name)); + } String tryGetTableEngine(const IAST & ast) { @@ -64,16 +70,6 @@ namespace } } -bool RestorerFromBackup::TableKey::operator ==(const TableKey & right) const -{ - return (name == right.name) && (is_temporary == right.is_temporary); -} - -bool RestorerFromBackup::TableKey::operator <(const TableKey & right) const -{ - return (name < right.name) || ((name == right.name) && (is_temporary < right.is_temporary)); -} - std::string_view RestorerFromBackup::toString(Stage stage) { switch (stage) @@ -135,10 +131,11 @@ void RestorerFromBackup::run(bool only_check_access) /// Find all the databases and tables which we will read from the backup. setStage(Stage::kFindingTablesInBackup); - collectDatabaseAndTableInfos(); + findDatabasesAndTablesInBackup(); /// Check access rights. - checkAccessForCollectedInfos(); + checkAccessForObjectsFoundInBackup(); + if (only_check_access) return; @@ -303,7 +300,7 @@ void RestorerFromBackup::findRootPathsInBackup() ", ")); } -void RestorerFromBackup::collectDatabaseAndTableInfos() +void RestorerFromBackup::findDatabasesAndTablesInBackup() { database_infos.clear(); table_infos.clear(); @@ -313,22 +310,22 @@ void RestorerFromBackup::collectDatabaseAndTableInfos() { case ASTBackupQuery::ElementType::TABLE: { - collectTableInfo({element.database_name, element.table_name}, false, element.partitions); + findTableInBackup({element.database_name, element.table_name}, element.partitions); break; } case ASTBackupQuery::ElementType::TEMPORARY_TABLE: { - collectTableInfo({element.database_name, element.table_name}, true, element.partitions); + findTableInBackup({DatabaseCatalog::TEMPORARY_DATABASE, element.table_name}, element.partitions); break; } case ASTBackupQuery::ElementType::DATABASE: { - collectDatabaseInfo(element.database_name, element.except_tables, /* throw_if_no_database_metadata_in_backup= */ true); + findDatabaseInBackup(element.database_name, element.except_tables); break; } case ASTBackupQuery::ElementType::ALL: { - collectAllDatabasesInfo(element.except_databases, element.except_tables); + findEverythingInBackup(element.except_databases, element.except_tables); break; } } @@ -337,9 +334,9 @@ void RestorerFromBackup::collectDatabaseAndTableInfos() LOG_INFO(log, "Will restore {} databases and {} tables", database_infos.size(), table_infos.size()); } -void RestorerFromBackup::collectTableInfo(const QualifiedTableName & table_name_in_backup, bool is_temporary_table, const std::optional & partitions) +void RestorerFromBackup::findTableInBackup(const QualifiedTableName & table_name_in_backup, const std::optional & partitions) { - String database_name_in_backup = is_temporary_table ? DatabaseCatalog::TEMPORARY_DATABASE : table_name_in_backup.database; + bool is_temporary_table = (table_name_in_backup.database == DatabaseCatalog::TEMPORARY_DATABASE); std::optional metadata_path; std::optional root_path_in_use; @@ -366,21 +363,20 @@ void RestorerFromBackup::collectTableInfo(const QualifiedTableName & table_name_ } if (!metadata_path) - throw Exception(ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Table {} not found in backup", table_name_in_backup.getFullName()); + throw Exception( + ErrorCodes::BACKUP_ENTRY_NOT_FOUND, + "{} not found in backup", + tableNameWithTypeToString(table_name_in_backup.database, table_name_in_backup.table, true)); - TableKey table_key; fs::path data_path_in_backup; if (is_temporary_table) { data_path_in_backup = *root_path_in_use / "temporary_tables" / "data" / escapeForFileName(table_name_in_backup.table); - table_key.name.table = renaming_map.getNewTemporaryTableName(table_name_in_backup.table); - table_key.is_temporary = true; } else { data_path_in_backup = *root_path_in_use / "data" / escapeForFileName(table_name_in_backup.database) / escapeForFileName(table_name_in_backup.table); - table_key.name = renaming_map.getNewTableName(table_name_in_backup); } auto read_buffer = backup->readFile(*metadata_path)->getReadBuffer(); @@ -391,25 +387,26 @@ void RestorerFromBackup::collectTableInfo(const QualifiedTableName & table_name_ ASTPtr create_table_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); renameDatabaseAndTableNameInCreateQuery(context->getGlobalContext(), renaming_map, create_table_query); - if (auto it = table_infos.find(table_key); it != table_infos.end()) + QualifiedTableName table_name = renaming_map.getNewTableName(table_name_in_backup); + + if (auto it = table_infos.find(table_name); it != table_infos.end()) { const TableInfo & table_info = it->second; if (table_info.create_table_query && (serializeAST(*table_info.create_table_query) != serializeAST(*create_table_query))) { throw Exception( ErrorCodes::CANNOT_RESTORE_TABLE, - "Extracted two different create queries for the same {}table {}: {} and {}", - (is_temporary_table ? "temporary " : ""), - table_key.name.getFullName(), + "Extracted two different create queries for the same {}: {} and {}", + tableNameWithTypeToString(table_name.database, table_name.table, false), serializeAST(*table_info.create_table_query), serializeAST(*create_table_query)); } } - TableInfo & res_table_info = table_infos[table_key]; + TableInfo & res_table_info = table_infos[table_name]; res_table_info.create_table_query = create_table_query; res_table_info.data_path_in_backup = data_path_in_backup; - res_table_info.dependencies = getDependenciesSetFromCreateQuery(context->getGlobalContext(), table_key.name, create_table_query); + res_table_info.dependencies = getDependenciesSetFromCreateQuery(context->getGlobalContext(), table_name, create_table_query); if (partitions) { @@ -426,27 +423,37 @@ void RestorerFromBackup::collectTableInfo(const QualifiedTableName & table_name_ } } -void RestorerFromBackup::collectDatabaseInfo(const String & database_name_in_backup, const std::set & except_table_names, bool throw_if_no_database_metadata_in_backup) +void RestorerFromBackup::findDatabaseInBackup(const String & database_name_in_backup, const std::set & except_table_names) { std::optional metadata_path; std::unordered_set table_names_in_backup; for (const auto & root_path_in_backup : root_paths_in_backup) { - fs::path try_metadata_path = root_path_in_backup / "metadata" / (escapeForFileName(database_name_in_backup) + ".sql"); - if (!metadata_path && backup->fileExists(try_metadata_path)) + fs::path try_metadata_path, try_tables_metadata_path; + if (database_name_in_backup == DatabaseCatalog::TEMPORARY_DATABASE) + { + try_tables_metadata_path = root_path_in_backup / "temporary_tables" / "metadata"; + } + else + { + try_metadata_path = root_path_in_backup / "metadata" / (escapeForFileName(database_name_in_backup) + ".sql"); + try_tables_metadata_path = root_path_in_backup / "metadata" / escapeForFileName(database_name_in_backup); + } + + if (!metadata_path && !try_metadata_path.empty() && backup->fileExists(try_metadata_path)) metadata_path = try_metadata_path; - Strings file_names = backup->listFiles(root_path_in_backup / "metadata" / escapeForFileName(database_name_in_backup)); + Strings file_names = backup->listFiles(try_tables_metadata_path); for (const String & file_name : file_names) { - if (!file_name.ends_with(sql_ext)) + if (!file_name.ends_with(".sql")) continue; - String file_name_without_ext = file_name.substr(0, file_name.length() - sql_ext.length()); + String file_name_without_ext = file_name.substr(0, file_name.length() - strlen(".sql")); table_names_in_backup.insert(unescapeForFileName(file_name_without_ext)); } } - if (!metadata_path && throw_if_no_database_metadata_in_backup) + if (!metadata_path && table_names_in_backup.empty()) throw Exception(ErrorCodes::BACKUP_ENTRY_NOT_FOUND, "Database {} not found in backup", backQuoteIfNeed(database_name_in_backup)); if (metadata_path) @@ -480,33 +487,26 @@ void RestorerFromBackup::collectDatabaseInfo(const String & database_name_in_bac if (except_table_names.contains({database_name_in_backup, table_name_in_backup})) continue; - collectTableInfo({database_name_in_backup, table_name_in_backup}, /* is_temporary_table= */ false, /* partitions= */ {}); + findTableInBackup({database_name_in_backup, table_name_in_backup}, /* partitions= */ {}); } } -void RestorerFromBackup::collectAllDatabasesInfo(const std::set & except_database_names, const std::set & except_table_names) +void RestorerFromBackup::findEverythingInBackup(const std::set & except_database_names, const std::set & except_table_names) { std::unordered_set database_names_in_backup; - std::unordered_set temporary_table_names_in_backup; for (const auto & root_path_in_backup : root_paths_in_backup) { Strings file_names = backup->listFiles(root_path_in_backup / "metadata"); for (String & file_name : file_names) { - if (file_name.ends_with(sql_ext)) - file_name.resize(file_name.length() - sql_ext.length()); + if (file_name.ends_with(".sql")) + file_name.resize(file_name.length() - strlen(".sql")); database_names_in_backup.emplace(unescapeForFileName(file_name)); } - file_names = backup->listFiles(root_path_in_backup / "temporary_tables" / "metadata"); - for (String & file_name : file_names) - { - if (!file_name.ends_with(sql_ext)) - continue; - file_name.resize(file_name.length() - sql_ext.length()); - temporary_table_names_in_backup.emplace(unescapeForFileName(file_name)); - } + if (backup->hasFiles(root_path_in_backup / "temporary_tables" / "metadata")) + database_names_in_backup.emplace(DatabaseCatalog::TEMPORARY_DATABASE); } for (const String & database_name_in_backup : database_names_in_backup) @@ -514,14 +514,11 @@ void RestorerFromBackup::collectAllDatabasesInfo(const std::set & except if (except_database_names.contains(database_name_in_backup)) continue; - collectDatabaseInfo(database_name_in_backup, except_table_names, /* throw_if_no_database_metadata_in_backup= */ false); + findDatabaseInBackup(database_name_in_backup, except_table_names); } - - for (const String & temporary_table_name_in_backup : temporary_table_names_in_backup) - collectTableInfo({"", temporary_table_name_in_backup}, /* is_temporary_table= */ true, /* partitions= */ {}); } -void RestorerFromBackup::checkAccessForCollectedInfos() const +void RestorerFromBackup::checkAccessForObjectsFoundInBackup() const { AccessRightsElements required_access; for (const auto & database_name : database_infos | boost::adaptors::map_keys) @@ -545,7 +542,7 @@ void RestorerFromBackup::checkAccessForCollectedInfos() const if (hasSystemTableEngine(*table_info.create_table_query)) continue; - if (table_name.is_temporary) + if (table_name.database == DatabaseCatalog::TEMPORARY_DATABASE) { if (restore_settings.create_table != RestoreTableCreationMode::kMustExist) required_access.emplace_back(AccessType::CREATE_TEMPORARY_TABLE); @@ -579,7 +576,7 @@ void RestorerFromBackup::checkAccessForCollectedInfos() const flags = AccessType::SHOW_TABLES; } - required_access.emplace_back(flags, table_name.name.database, table_name.name.table); + required_access.emplace_back(flags, table_name.database, table_name.table); } if (access_restore_task) @@ -611,7 +608,9 @@ void RestorerFromBackup::createDatabases() create_database_query->as().if_not_exists = true; } LOG_TRACE(log, "Creating database {}: {}", backQuoteIfNeed(database_name), serializeAST(*create_database_query)); - executeCreateQuery(create_database_query); + InterpreterCreateQuery interpreter{create_database_query, context}; + interpreter.setInternal(true); + interpreter.execute(); } DatabasePtr database = DatabaseCatalog::instance().getDatabase(database_name); @@ -644,15 +643,11 @@ void RestorerFromBackup::createTables() if (tables_to_create.empty()) break; /// We've already created all the tables. - for (const auto & table_key : tables_to_create) + for (const auto & table_name : tables_to_create) { - auto & table_info = table_infos.at(table_key); + auto & table_info = table_infos.at(table_name); - DatabasePtr database; - if (table_key.is_temporary) - database = DatabaseCatalog::instance().getDatabaseForTemporaryTables(); - else - database = DatabaseCatalog::instance().getDatabase(table_key.name.database); + DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_name.database); bool need_create_table = (restore_settings.create_table != RestoreTableCreationMode::kMustExist); if (need_create_table && hasSystemTableEngine(*table_info.create_table_query)) @@ -670,9 +665,8 @@ void RestorerFromBackup::createTables() } LOG_TRACE( log, - "Creating {}table {}: {}", - (table_key.is_temporary ? "temporary " : ""), - table_key.name.getFullName(), + "Creating {}: {}", + tableNameWithTypeToString(table_name.database, table_name.table, false), serializeAST(*create_table_query)); database->createTableRestoredFromBackup(create_table_query, context, restore_coordination, create_table_timeout_ms); @@ -680,9 +674,9 @@ void RestorerFromBackup::createTables() table_info.created = true; - auto resolved_id = table_key.is_temporary - ? context->resolveStorageID(StorageID{"", table_key.name.table}, Context::ResolveExternal) - : context->resolveStorageID(StorageID{table_key.name.database, table_key.name.table}, Context::ResolveGlobal); + auto resolved_id = (table_name.database == DatabaseCatalog::TEMPORARY_DATABASE) + ? context->resolveStorageID(StorageID{"", table_name.table}, Context::ResolveExternal) + : context->resolveStorageID(StorageID{table_name.database, table_name.table}, Context::ResolveGlobal); auto storage = database->getTable(resolved_id.table_name, context); table_info.storage = storage; @@ -697,10 +691,9 @@ void RestorerFromBackup::createTables() { throw Exception( ErrorCodes::CANNOT_RESTORE_TABLE, - "The {}table {} has a different definition: {} " + "{} has a different definition: {} " "comparing to its definition in the backup: {}", - (table_key.is_temporary ? "temporary " : ""), - table_key.name.getFullName(), + tableNameWithTypeToString(table_name.database, table_name.table, true), serializeAST(*create_table_query), serializeAST(*expected_create_query)); } @@ -717,9 +710,9 @@ void RestorerFromBackup::createTables() } /// Returns the list of tables without dependencies or those which dependencies have been created before. -std::vector RestorerFromBackup::findTablesWithoutDependencies() const +std::vector RestorerFromBackup::findTablesWithoutDependencies() const { - std::vector tables_without_dependencies; + std::vector tables_without_dependencies; bool all_tables_created = true; for (const auto & [key, table_info] : table_infos) @@ -734,7 +727,7 @@ std::vector RestorerFromBackup::findTablesWithoutD bool all_dependencies_met = true; for (const auto & dependency : table_info.dependencies) { - auto it = table_infos.find(TableKey{dependency, false}); + auto it = table_infos.find(dependency); if ((it != table_infos.end()) && !it->second.created) { all_dependencies_met = false; @@ -753,7 +746,7 @@ std::vector RestorerFromBackup::findTablesWithoutD return {}; /// Cyclic dependency? We'll try to create those tables anyway but probably it's going to fail. - std::vector tables_with_cyclic_dependencies; + std::vector tables_with_cyclic_dependencies; for (const auto & [key, table_info] : table_infos) { if (!table_info.created) @@ -766,7 +759,7 @@ std::vector RestorerFromBackup::findTablesWithoutD "Some tables have cyclic dependency from each other: {}", boost::algorithm::join( tables_with_cyclic_dependencies - | boost::adaptors::transformed([](const TableKey & key) -> String { return key.name.getFullName(); }), + | boost::adaptors::transformed([](const QualifiedTableName & table_name) -> String { return table_name.getFullName(); }), ", ")); return tables_with_cyclic_dependencies; @@ -786,19 +779,12 @@ void RestorerFromBackup::addDataRestoreTasks(DataRestoreTasks && new_tasks) insertAtEnd(data_restore_tasks, std::move(new_tasks)); } -void RestorerFromBackup::checkPathInBackupToRestoreAccess(const String & path) +void RestorerFromBackup::checkPathInBackupIsRegisteredToRestoreAccess(const String & path) { if (!access_restore_task || !access_restore_task->hasDataPath(path)) throw Exception(ErrorCodes::LOGICAL_ERROR, "Path to restore access was not added"); } -void RestorerFromBackup::executeCreateQuery(const ASTPtr & create_query) const -{ - InterpreterCreateQuery interpreter{create_query, context}; - interpreter.setInternal(true); - interpreter.execute(); -} - void RestorerFromBackup::throwPartitionsNotSupported(const StorageID & storage_id, const String & table_engine) { throw Exception( diff --git a/src/Backups/RestorerFromBackup.h b/src/Backups/RestorerFromBackup.h index 86edf08b484..3bdbafe844c 100644 --- a/src/Backups/RestorerFromBackup.h +++ b/src/Backups/RestorerFromBackup.h @@ -48,15 +48,14 @@ public: std::shared_ptr getRestoreCoordination() const { return restore_coordination; } std::chrono::seconds getTimeout() const { return timeout; } ContextMutablePtr getContext() const { return context; } - void executeCreateQuery(const ASTPtr & create_query) const; /// Adds a data restore task which will be later returned by getDataRestoreTasks(). /// This function can be called by implementations of IStorage::restoreFromBackup() in inherited storage classes. void addDataRestoreTask(DataRestoreTask && new_task); void addDataRestoreTasks(DataRestoreTasks && new_tasks); - /// Adds a new data path to restore access control. - void checkPathInBackupToRestoreAccess(const String & path); + /// Checks that a specified path is already registered to be used for restoring access control. + void checkPathInBackupIsRegisteredToRestoreAccess(const String & path); /// Reading a backup includes a few stages: enum class Stage @@ -104,11 +103,14 @@ private: void run(bool only_check_access); void setStage(Stage new_stage, const String & error_message = {}); void findRootPathsInBackup(); - void collectDatabaseAndTableInfos(); - void collectTableInfo(const QualifiedTableName & table_name_in_backup, bool is_temporary_table, const std::optional & partitions); - void collectDatabaseInfo(const String & database_name_in_backup, const std::set & except_table_names, bool throw_if_no_database_metadata_in_backup); - void collectAllDatabasesInfo(const std::set & except_database_names, const std::set & except_table_names); - void checkAccessForCollectedInfos() const; + + void findDatabasesAndTablesInBackup(); + void findTableInBackup(const QualifiedTableName & table_name_in_backup, const std::optional & partitions); + void findDatabaseInBackup(const String & database_name_in_backup, const std::set & except_table_names); + void findEverythingInBackup(const std::set & except_database_names, const std::set & except_table_names); + + void checkAccessForObjectsFoundInBackup() const; + void createDatabases(); void createTables(); @@ -128,18 +130,10 @@ private: TableLockHolder table_lock; }; - struct TableKey - { - QualifiedTableName name; - bool is_temporary = false; - bool operator ==(const TableKey & right) const; - bool operator <(const TableKey & right) const; - }; - - std::vector findTablesWithoutDependencies() const; + std::vector findTablesWithoutDependencies() const; std::unordered_map database_infos; - std::map table_infos; + std::map table_infos; std::vector data_restore_tasks; std::shared_ptr access_restore_task; }; diff --git a/src/Databases/DDLRenamingVisitor.cpp b/src/Databases/DDLRenamingVisitor.cpp index caedfc55f3d..fc14d7abbd9 100644 --- a/src/Databases/DDLRenamingVisitor.cpp +++ b/src/Databases/DDLRenamingVisitor.cpp @@ -19,7 +19,6 @@ namespace DB namespace ErrorCodes { extern const int WRONG_DDL_RENAMING_SETTINGS; - extern const int LOGICAL_ERROR; } namespace @@ -31,24 +30,41 @@ namespace { /// CREATE TEMPORARY TABLE String table_name = create.getTable(); - const auto & new_table_name = data.renaming_map.getNewTemporaryTableName(table_name); - if (new_table_name != table_name) - create.setTable(new_table_name); + QualifiedTableName full_table_name{DatabaseCatalog::TEMPORARY_DATABASE, table_name}; + const auto & new_table_name = data.renaming_map.getNewTableName(full_table_name); + if (new_table_name != full_table_name) + { + create.setTable(new_table_name.table); + if (new_table_name.database != DatabaseCatalog::TEMPORARY_DATABASE) + { + create.temporary = false; + create.setDatabase(new_table_name.database); + } + } + } else if (create.table) { /// CREATE TABLE or CREATE DICTIONARY or CREATE VIEW - QualifiedTableName qualified_name; - qualified_name.table = create.getTable(); - qualified_name.database = create.getDatabase(); + QualifiedTableName full_name; + full_name.table = create.getTable(); + full_name.database = create.getDatabase(); - if (!qualified_name.database.empty() && !qualified_name.table.empty()) + if (!full_name.database.empty() && !full_name.table.empty()) { - auto new_qualified_name = data.renaming_map.getNewTableName(qualified_name); - if (new_qualified_name != qualified_name) + auto new_table_name = data.renaming_map.getNewTableName(full_name); + if (new_table_name != full_name) { - create.setTable(new_qualified_name.table); - create.setDatabase(new_qualified_name.database); + create.setTable(new_table_name.table); + if (new_table_name.database == DatabaseCatalog::TEMPORARY_DATABASE) + { + create.temporary = true; + create.setDatabase(""); + } + else + { + create.setDatabase(new_table_name.database); + } } } } @@ -301,7 +317,7 @@ void renameDatabaseAndTableNameInCreateQuery(const ContextPtr & global_context, void DDLRenamingMap::setNewTableName(const QualifiedTableName & old_table_name, const QualifiedTableName & new_table_name) { if (old_table_name.table.empty() || old_table_name.database.empty() || new_table_name.table.empty() || new_table_name.database.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty names are not allowed"); + throw Exception(ErrorCodes::WRONG_DDL_RENAMING_SETTINGS, "Empty names are not allowed"); auto it = old_to_new_table_names.find(old_table_name); if ((it != old_to_new_table_names.end())) @@ -321,7 +337,7 @@ void DDLRenamingMap::setNewTableName(const QualifiedTableName & old_table_name, void DDLRenamingMap::setNewDatabaseName(const String & old_database_name, const String & new_database_name) { if (old_database_name.empty() || new_database_name.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty names are not allowed"); + throw Exception(ErrorCodes::WRONG_DDL_RENAMING_SETTINGS, "Empty names are not allowed"); auto it = old_to_new_database_names.find(old_database_name); if ((it != old_to_new_database_names.end())) @@ -351,28 +367,4 @@ QualifiedTableName DDLRenamingMap::getNewTableName(const QualifiedTableName & ol return {getNewDatabaseName(old_table_name.database), old_table_name.table}; } -void DDLRenamingMap::setNewTemporaryTableName(const String & old_table_name, const String & new_table_name) -{ - if (old_table_name.empty() || new_table_name.empty()) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty names are not allowed"); - - auto it = old_to_new_temporary_table_names.find(old_table_name); - if ((it != old_to_new_temporary_table_names.end())) - { - if (it->second == new_table_name) - return; - throw Exception(ErrorCodes::WRONG_DDL_RENAMING_SETTINGS, "Wrong renaming: it's specified that temporary table {} should be renamed to {} and to {} at the same time", - backQuoteIfNeed(old_table_name), backQuoteIfNeed(it->second), backQuoteIfNeed(new_table_name)); - } - old_to_new_temporary_table_names[old_table_name] = new_table_name; -} - -const String & DDLRenamingMap::getNewTemporaryTableName(const String & old_table_name) const -{ - auto it = old_to_new_temporary_table_names.find(old_table_name); - if (it != old_to_new_temporary_table_names.end()) - return it->second; - return old_table_name; -} - } diff --git a/src/Databases/DDLRenamingVisitor.h b/src/Databases/DDLRenamingVisitor.h index 9d0f770d105..72b578b9fcb 100644 --- a/src/Databases/DDLRenamingVisitor.h +++ b/src/Databases/DDLRenamingVisitor.h @@ -25,16 +25,13 @@ class DDLRenamingMap public: void setNewTableName(const QualifiedTableName & old_table_name, const QualifiedTableName & new_table_name); void setNewDatabaseName(const String & old_database_name, const String & new_database_name); - void setNewTemporaryTableName(const String & old_table_name, const String & new_table_name); QualifiedTableName getNewTableName(const QualifiedTableName & old_table_name) const; const String & getNewDatabaseName(const String & old_database_name) const; - const String & getNewTemporaryTableName(const String & old_table_name) const; private: std::unordered_map old_to_new_table_names; std::unordered_map old_to_new_database_names; - std::unordered_map old_to_new_temporary_table_names; }; /// Visits ASTCreateQuery and changes names of databases or tables. From aaf7f665498393f0cf051e78e825122bb105bf3a Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 23 Jun 2022 19:45:36 +0200 Subject: [PATCH 087/121] Improve gathering metadata for backup - part 4. --- src/Backups/BackupCoordinationDistributed.cpp | 15 ++- src/Backups/BackupCoordinationDistributed.h | 7 +- src/Backups/BackupCoordinationHelpers.cpp | 95 +++++++++++-------- src/Backups/BackupCoordinationHelpers.h | 10 +- src/Backups/BackupCoordinationLocal.cpp | 8 +- src/Backups/BackupCoordinationLocal.h | 5 +- src/Backups/BackupEntriesCollector.cpp | 93 ++++++++++-------- src/Backups/BackupEntriesCollector.h | 29 +----- src/Backups/IBackupCoordination.h | 9 +- src/Backups/IRestoreCoordination.h | 9 +- .../RestoreCoordinationDistributed.cpp | 15 ++- src/Backups/RestoreCoordinationDistributed.h | 11 +-- src/Backups/RestoreCoordinationLocal.cpp | 8 +- src/Backups/RestoreCoordinationLocal.h | 9 +- src/Backups/RestorerFromBackup.cpp | 94 ++++++++++-------- src/Backups/RestorerFromBackup.h | 29 +----- 16 files changed, 232 insertions(+), 214 deletions(-) diff --git a/src/Backups/BackupCoordinationDistributed.cpp b/src/Backups/BackupCoordinationDistributed.cpp index 945239482fc..77377194012 100644 --- a/src/Backups/BackupCoordinationDistributed.cpp +++ b/src/Backups/BackupCoordinationDistributed.cpp @@ -131,7 +131,7 @@ namespace BackupCoordinationDistributed::BackupCoordinationDistributed(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_) : zookeeper_path(zookeeper_path_) , get_zookeeper(get_zookeeper_) - , stage_sync(zookeeper_path_ + "/stage", get_zookeeper_, &Poco::Logger::get("BackupCoordination")) + , status_sync(zookeeper_path_ + "/status", get_zookeeper_, &Poco::Logger::get("BackupCoordination")) { createRootNodes(); } @@ -157,14 +157,19 @@ void BackupCoordinationDistributed::removeAllNodes() } -void BackupCoordinationDistributed::syncStage(const String & current_host, int new_stage, const Strings & wait_hosts, std::chrono::seconds timeout) +void BackupCoordinationDistributed::setStatus(const String & current_host, const String & new_status) { - stage_sync.syncStage(current_host, new_stage, wait_hosts, timeout); + status_sync.set(current_host, new_status); } -void BackupCoordinationDistributed::syncStageError(const String & current_host, const String & error_message) +void BackupCoordinationDistributed::setStatusAndWait(const String & current_host, const String & new_status, const Strings & other_hosts) { - stage_sync.syncStageError(current_host, error_message); + status_sync.setAndWait(current_host, new_status, other_hosts); +} + +void BackupCoordinationDistributed::setStatusAndWaitFor(const String & current_host, const String & new_status, const Strings & other_hosts, UInt64 timeout_ms) +{ + status_sync.setAndWaitFor(current_host, new_status, other_hosts, timeout_ms); } diff --git a/src/Backups/BackupCoordinationDistributed.h b/src/Backups/BackupCoordinationDistributed.h index 2872e1f3ae4..03da567bf07 100644 --- a/src/Backups/BackupCoordinationDistributed.h +++ b/src/Backups/BackupCoordinationDistributed.h @@ -14,8 +14,9 @@ public: BackupCoordinationDistributed(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_); ~BackupCoordinationDistributed() override; - void syncStage(const String & current_host, int new_stage, const Strings & wait_hosts, std::chrono::seconds timeout) override; - void syncStageError(const String & current_host, const String & error_message) override; + void setStatus(const String & current_host, const String & new_status) override; + void setStatusAndWait(const String & current_host, const String & new_status, const Strings & other_hosts) override; + void setStatusAndWaitFor(const String & current_host, const String & new_status, const Strings & other_hosts, UInt64 timeout_ms) override; void addReplicatedPartNames( const String & table_zk_path, @@ -51,7 +52,7 @@ private: const String zookeeper_path; const zkutil::GetZooKeeper get_zookeeper; - BackupCoordinationStageSync stage_sync; + BackupCoordinationStatusSync status_sync; mutable std::mutex mutex; mutable std::optional replicated_part_names; diff --git a/src/Backups/BackupCoordinationHelpers.cpp b/src/Backups/BackupCoordinationHelpers.cpp index 9528f888770..7c77e488119 100644 --- a/src/Backups/BackupCoordinationHelpers.cpp +++ b/src/Backups/BackupCoordinationHelpers.cpp @@ -243,7 +243,7 @@ void BackupCoordinationReplicatedPartNames::preparePartNames() const /// Helps to wait until all hosts come to a specified stage. -BackupCoordinationStageSync::BackupCoordinationStageSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_) +BackupCoordinationStatusSync::BackupCoordinationStatusSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_) : zookeeper_path(zookeeper_path_) , get_zookeeper(get_zookeeper_) , log(log_) @@ -251,20 +251,46 @@ BackupCoordinationStageSync::BackupCoordinationStageSync(const String & zookeepe createRootNodes(); } -void BackupCoordinationStageSync::createRootNodes() +void BackupCoordinationStatusSync::createRootNodes() { auto zookeeper = get_zookeeper(); zookeeper->createAncestors(zookeeper_path); zookeeper->createIfNotExists(zookeeper_path, ""); } -void BackupCoordinationStageSync::syncStage(const String & current_host, int new_stage, const Strings & wait_hosts, std::chrono::seconds timeout) +void BackupCoordinationStatusSync::set(const String & current_host, const String & new_status) { - /// Put new stage to ZooKeeper. - auto zookeeper = get_zookeeper(); - zookeeper->createIfNotExists(zookeeper_path + "/" + current_host + "|" + std::to_string(new_stage), ""); + setImpl(current_host, new_status, {}, {}); +} - if (wait_hosts.empty() || ((wait_hosts.size() == 1) && (wait_hosts.front() == current_host))) +void BackupCoordinationStatusSync::setAndWait(const String & current_host, const String & new_status, const Strings & other_hosts) +{ + setImpl(current_host, new_status, other_hosts, {}); +} + +void BackupCoordinationStatusSync::setAndWaitFor(const String & current_host, const String & new_status, const Strings & other_hosts, UInt64 timeout_ms) +{ + setImpl(current_host, new_status, other_hosts, timeout_ms); +} + +void BackupCoordinationStatusSync::setImpl(const String & current_host, const String & new_status, const Strings & other_hosts, const std::optional & timeout_ms) +{ + /// Put new status to ZooKeeper. + auto zookeeper = get_zookeeper(); + + String result_status = new_status; + String message; + std::string_view error_prefix = "error: "; + bool is_error_status = new_status.starts_with(error_prefix); + if (is_error_status) + { + message = new_status.substr(error_prefix.length()); + result_status = "error"; + } + + zookeeper->createIfNotExists(zookeeper_path + "/" + current_host + "|" + result_status, message); + + if (other_hosts.empty() || ((other_hosts.size() == 1) && (other_hosts.front() == current_host)) || is_error_status) return; /// Wait for other hosts. @@ -273,41 +299,35 @@ void BackupCoordinationStageSync::syncStage(const String & current_host, int new std::optional host_with_error; std::optional error_message; - std::map> unready_hosts; - for (const String & host : wait_hosts) - unready_hosts.emplace(host, std::optional{}); + std::map unready_hosts; + for (const String & host : other_hosts) + unready_hosts.emplace(host, ""); /// Process ZooKeeper's nodes and set `all_hosts_ready` or `unready_host` or `error_message`. auto process_zk_nodes = [&](const Strings & zk_nodes) { for (const String & zk_node : zk_nodes) { - if (zk_node == "error") + if (zk_node.starts_with("remove_watch-")) + continue; + + size_t separator_pos = zk_node.find('|'); + if (separator_pos == String::npos) + throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Unexpected zk node {}", zookeeper_path + "/" + zk_node); + String host = zk_node.substr(0, separator_pos); + String status = zk_node.substr(separator_pos + 1); + if (status == "error") { - String str = zookeeper->get(zookeeper_path + "/" + zk_node); - size_t separator_pos = str.find('|'); - if (separator_pos == String::npos) - throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Unexpected value of zk node {}: {}", zookeeper_path + "/" + zk_node, str); - host_with_error = str.substr(0, separator_pos); - error_message = str.substr(separator_pos + 1); - return; + host_with_error = host; + error_message = zookeeper->get(zookeeper_path + "/" + zk_node); + return; } - else if (!zk_node.starts_with("remove_watch-")) + auto it = unready_hosts.find(host); + if (it != unready_hosts.end()) { - size_t separator_pos = zk_node.find('|'); - if (separator_pos == String::npos) - throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Unexpected zk node {}", zookeeper_path + "/" + zk_node); - String host = zk_node.substr(0, separator_pos); - int found_stage = parseFromString(zk_node.substr(separator_pos + 1)); - auto it = unready_hosts.find(host); - if (it != unready_hosts.end()) - { - auto & stage = it->second; - if (!stage || (stage < found_stage)) - stage = found_stage; - if (stage >= new_stage) - unready_hosts.erase(it); - } + it->second = status; + if (status == result_status) + unready_hosts.erase(it); } } }; @@ -324,7 +344,8 @@ void BackupCoordinationStageSync::syncStage(const String & current_host, int new auto watch_triggered = [&] { return !watch_set; }; - bool use_timeout = (timeout.count() >= 0); + bool use_timeout = timeout_ms.has_value(); + std::chrono::milliseconds timeout{timeout_ms.value_or(0)}; std::chrono::steady_clock::time_point start_time = std::chrono::steady_clock::now(); std::chrono::steady_clock::duration elapsed; std::mutex dummy_mutex; @@ -371,10 +392,4 @@ void BackupCoordinationStageSync::syncStage(const String & current_host, int new } } -void BackupCoordinationStageSync::syncStageError(const String & current_host, const String & error_message) -{ - auto zookeeper = get_zookeeper(); - zookeeper->createIfNotExists(zookeeper_path + "/error", current_host + "|" + error_message); -} - } diff --git a/src/Backups/BackupCoordinationHelpers.h b/src/Backups/BackupCoordinationHelpers.h index b0cd0440b98..ea07543ecb8 100644 --- a/src/Backups/BackupCoordinationHelpers.h +++ b/src/Backups/BackupCoordinationHelpers.h @@ -58,16 +58,18 @@ private: /// Helps to wait until all hosts come to a specified stage. -class BackupCoordinationStageSync +class BackupCoordinationStatusSync { public: - BackupCoordinationStageSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_); + BackupCoordinationStatusSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_); - void syncStage(const String & current_host, int stage, const Strings & wait_hosts, std::chrono::seconds timeout); - void syncStageError(const String & current_host, const String & error_message); + void set(const String & current_host, const String & new_status); + void setAndWait(const String & current_host, const String & new_status, const Strings & other_hosts); + void setAndWaitFor(const String & current_host, const String & new_status, const Strings & other_hosts, UInt64 timeout_ms); private: void createRootNodes(); + void setImpl(const String & current_host, const String & new_status, const Strings & other_hosts, const std::optional & timeout_ms); String zookeeper_path; zkutil::GetZooKeeper get_zookeeper; diff --git a/src/Backups/BackupCoordinationLocal.cpp b/src/Backups/BackupCoordinationLocal.cpp index 55a3c671a6e..bace1d800e0 100644 --- a/src/Backups/BackupCoordinationLocal.cpp +++ b/src/Backups/BackupCoordinationLocal.cpp @@ -13,11 +13,15 @@ using FileInfo = IBackupCoordination::FileInfo; BackupCoordinationLocal::BackupCoordinationLocal() = default; BackupCoordinationLocal::~BackupCoordinationLocal() = default; -void BackupCoordinationLocal::syncStage(const String &, int, const Strings &, std::chrono::seconds) +void BackupCoordinationLocal::setStatus(const String &, const String &) { } -void BackupCoordinationLocal::syncStageError(const String &, const String &) +void BackupCoordinationLocal::setStatusAndWait(const String &, const String &, const Strings &) +{ +} + +void BackupCoordinationLocal::setStatusAndWaitFor(const String &, const String &, const Strings &, UInt64) { } diff --git a/src/Backups/BackupCoordinationLocal.h b/src/Backups/BackupCoordinationLocal.h index 6529184c61a..090c5653f04 100644 --- a/src/Backups/BackupCoordinationLocal.h +++ b/src/Backups/BackupCoordinationLocal.h @@ -19,8 +19,9 @@ public: BackupCoordinationLocal(); ~BackupCoordinationLocal() override; - void syncStage(const String & current_host, int stage, const Strings & wait_hosts, std::chrono::seconds timeout) override; - void syncStageError(const String & current_host, const String & error_message) override; + void setStatus(const String & current_host, const String & new_status) override; + void setStatusAndWait(const String & current_host, const String & new_status, const Strings & other_hosts) override; + void setStatusAndWaitFor(const String & current_host, const String & new_status, const Strings & other_hosts, UInt64 timeout_ms) override; void addReplicatedPartNames(const String & table_zk_path, const String & table_name_for_logs, const String & replica_name, const std::vector & part_names_and_checksums) override; diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index feed9be4e92..63f6d75170c 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -29,27 +29,44 @@ namespace ErrorCodes namespace { - String tableNameWithTypeToString(const String & database_name, const String & table_name, bool first_char_uppercase) - { - if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) - return fmt::format("{}emporary table {}", first_char_uppercase ? 'T' : 't', backQuoteIfNeed(table_name)); - else - return fmt::format("{}able {}.{}", first_char_uppercase ? 'T' : 't', backQuoteIfNeed(database_name), backQuoteIfNeed(table_name)); - } -} + /// Initial status. + constexpr const char kPreparingStatus[] = "preparing"; -std::string_view BackupEntriesCollector::toString(Stage stage) -{ - switch (stage) + /// Finding all tables and databases which we're going to put to the backup and collecting their metadata. + constexpr const char kGatheringMetadataStatus[] = "gathering metadata"; + + /// Making temporary hard links and prepare backup entries. + constexpr const char kExtractingDataFromTablesStatus[] = "extracting data from tables"; + + /// Running special tasks for replicated tables which can also prepare some backup entries. + constexpr const char kRunningPostTasksStatus[] = "running post-tasks"; + + /// Writing backup entries to the backup and removing temporary hard links. + constexpr const char kWritingBackupStatus[] = "writing backup"; + + /// Prefix for error statuses. + constexpr const char kErrorStatus[] = "error: "; + + /// Uppercases the first character of a passed string. + String toUpperFirst(const String & str) { - case Stage::kPreparing: return "Preparing"; - case Stage::kFindingTables: return "Finding tables"; - case Stage::kExtractingDataFromTables: return "Extracting data from tables"; - case Stage::kRunningPostTasks: return "Running post tasks"; - case Stage::kWritingBackup: return "Writing backup"; - case Stage::kError: return "Error"; + String res = str; + res[0] = std::toupper(res[0]); + return res; + } + + /// Outputs "table " or "temporary table " + String tableNameWithTypeToString(const String & database_name, const String & table_name, bool first_upper) + { + String str; + if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) + str = fmt::format("temporary table {}", backQuoteIfNeed(table_name)); + else + str = fmt::format("table {}.{}", backQuoteIfNeed(database_name), backQuoteIfNeed(table_name)); + if (first_upper) + str[0] = std::toupper(str[0]); + return str; } - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown backup stage: {}", static_cast(stage)); } @@ -65,6 +82,7 @@ BackupEntriesCollector::BackupEntriesCollector( , context(context_) , timeout(timeout_) , log(&Poco::Logger::get("BackupEntriesCollector")) + , current_status(kPreparingStatus) { } @@ -75,7 +93,7 @@ BackupEntries BackupEntriesCollector::getBackupEntries() try { /// getBackupEntries() must not be called multiple times. - if (current_stage != Stage::kPreparing) + if (current_status != kPreparingStatus) throw Exception(ErrorCodes::LOGICAL_ERROR, "Already making backup entries"); /// Calculate the root path for collecting backup entries, it's either empty or has the format "shards//replicas//". @@ -85,7 +103,7 @@ BackupEntries BackupEntriesCollector::getBackupEntries() renaming_map = makeRenamingMapFromBackupQuery(backup_query_elements); /// Find databases and tables which we're going to put to the backup. - setStage(Stage::kFindingTables); + setStatus(kGatheringMetadataStatus); gatherMetadataAndCheckConsistency(); /// Make backup entries for the definitions of the found databases. @@ -95,15 +113,15 @@ BackupEntries BackupEntriesCollector::getBackupEntries() makeBackupEntriesForTablesDefs(); /// Make backup entries for the data of the found tables. - setStage(Stage::kExtractingDataFromTables); + setStatus(kExtractingDataFromTablesStatus); makeBackupEntriesForTablesData(); /// Run all the tasks added with addPostCollectingTask(). - setStage(Stage::kRunningPostTasks); + setStatus(kRunningPostTasksStatus); runPostTasks(); /// No more backup entries or tasks are allowed after this point. - setStage(Stage::kWritingBackup); + setStatus(kWritingBackupStatus); return std::move(backup_entries); } @@ -111,7 +129,7 @@ BackupEntries BackupEntriesCollector::getBackupEntries() { try { - setStage(Stage::kError, getCurrentExceptionMessage(false)); + setStatus(kErrorStatus + getCurrentExceptionMessage(false)); } catch (...) { @@ -120,24 +138,21 @@ BackupEntries BackupEntriesCollector::getBackupEntries() } } -void BackupEntriesCollector::setStage(Stage new_stage, const String & error_message) +void BackupEntriesCollector::setStatus(const String & new_status) { - if (new_stage == Stage::kError) - LOG_ERROR(log, "{} failed with error: {}", toString(current_stage), error_message); - else - LOG_TRACE(log, "{}", toString(new_stage)); - - current_stage = new_stage; - - if (new_stage == Stage::kError) + bool is_error_status = new_status.starts_with(kErrorStatus); + if (is_error_status) { - backup_coordination->syncStageError(backup_settings.host_id, error_message); + LOG_ERROR(log, "{} failed with {}", toUpperFirst(current_status), new_status); + backup_coordination->setStatus(backup_settings.host_id, new_status); } else { + LOG_TRACE(log, "{}", toUpperFirst(new_status)); + current_status = new_status; auto all_hosts = BackupSettings::Util::filterHostIDs(backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num); - backup_coordination->syncStage(backup_settings.host_id, static_cast(new_stage), all_hosts, timeout); + backup_coordination->setStatusAndWait(backup_settings.host_id, new_status, all_hosts); } } @@ -575,28 +590,28 @@ void BackupEntriesCollector::makeBackupEntriesForTablesData() void BackupEntriesCollector::addBackupEntry(const String & file_name, BackupEntryPtr backup_entry) { - if (current_stage == Stage::kWritingBackup) + if (current_status == kWritingBackupStatus) throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding backup entries is not allowed"); backup_entries.emplace_back(file_name, backup_entry); } void BackupEntriesCollector::addBackupEntries(const BackupEntries & backup_entries_) { - if (current_stage == Stage::kWritingBackup) + if (current_status == kWritingBackupStatus) throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding backup entries is not allowed"); insertAtEnd(backup_entries, backup_entries_); } void BackupEntriesCollector::addBackupEntries(BackupEntries && backup_entries_) { - if (current_stage == Stage::kWritingBackup) + if (current_status == kWritingBackupStatus) throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding backup entries is not allowed"); insertAtEnd(backup_entries, std::move(backup_entries_)); } void BackupEntriesCollector::addPostTask(std::function task) { - if (current_stage == Stage::kWritingBackup) + if (current_status == kWritingBackupStatus) throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding post tasks is not allowed"); post_tasks.push(std::move(task)); } diff --git a/src/Backups/BackupEntriesCollector.h b/src/Backups/BackupEntriesCollector.h index 5e37e268fc4..4afca3f4cf9 100644 --- a/src/Backups/BackupEntriesCollector.h +++ b/src/Backups/BackupEntriesCollector.h @@ -51,35 +51,10 @@ public: /// 1) we need to join (in a backup) the data of replicated tables gathered on different hosts. void addPostTask(std::function task); - /// Writing a backup includes a few stages: - enum class Stage - { - /// Initial stage. - kPreparing, - - /// Finding all tables and databases which we're going to put to the backup. - kFindingTables, - - /// Making temporary hard links and prepare backup entries. - kExtractingDataFromTables, - - /// Running special tasks for replicated databases or tables which can also prepare some backup entries. - kRunningPostTasks, - - /// Writing backup entries to the backup and removing temporary hard links. - kWritingBackup, - - /// An error happens during any of the stages above, the backup won't be written. - kError, - }; - static std::string_view toString(Stage stage); - /// Throws an exception that a specified table engine doesn't support partitions. [[noreturn]] static void throwPartitionsNotSupported(const StorageID & storage_id, const String & table_engine); private: - void setStage(Stage new_stage, const String & error_message = {}); - void calculateRootPathInBackup(); void gatherMetadataAndCheckConsistency(); @@ -105,6 +80,8 @@ private: void makeBackupEntriesForTablesData(); void runPostTasks(); + void setStatus(const String & new_status); + const ASTBackupQuery::Elements backup_query_elements; const BackupSettings backup_settings; std::shared_ptr backup_coordination; @@ -112,7 +89,7 @@ private: std::chrono::seconds timeout; Poco::Logger * log; - Stage current_stage = Stage::kPreparing; + String current_status; std::filesystem::path root_path_in_backup; DDLRenamingMap renaming_map; diff --git a/src/Backups/IBackupCoordination.h b/src/Backups/IBackupCoordination.h index 92b7139ed5f..58ed45e810d 100644 --- a/src/Backups/IBackupCoordination.h +++ b/src/Backups/IBackupCoordination.h @@ -13,11 +13,10 @@ class IBackupCoordination public: virtual ~IBackupCoordination() = default; - /// Sets the current stage and waits for other hosts to come to this stage too. - virtual void syncStage(const String & current_host, int stage, const Strings & wait_hosts, std::chrono::seconds timeout) = 0; - - /// Sets that the current host encountered an error, so other hosts should know that and stop waiting in syncStage(). - virtual void syncStageError(const String & current_host, const String & error_message) = 0; + /// Sets the current status and waits for other hosts to come to this status too. If status starts with "error:" it'll stop waiting on all the hosts. + virtual void setStatus(const String & current_host, const String & new_status) = 0; + virtual void setStatusAndWait(const String & current_host, const String & new_status, const Strings & other_hosts) = 0; + virtual void setStatusAndWaitFor(const String & current_host, const String & new_status, const Strings & other_hosts, UInt64 timeout_ms) = 0; struct PartNameAndChecksum { diff --git a/src/Backups/IRestoreCoordination.h b/src/Backups/IRestoreCoordination.h index fd9a67e1b96..b2ef32c2588 100644 --- a/src/Backups/IRestoreCoordination.h +++ b/src/Backups/IRestoreCoordination.h @@ -13,11 +13,10 @@ class IRestoreCoordination public: virtual ~IRestoreCoordination() = default; - /// Sets the current stage and waits for other hosts to come to this stage too. - virtual void syncStage(const String & current_host, int stage, const Strings & wait_hosts, std::chrono::seconds timeout) = 0; - - /// Sets that the current host encountered an error, so other hosts should know that and stop waiting in syncStage(). - virtual void syncStageError(const String & current_host, const String & error_message) = 0; + /// Sets the current status and waits for other hosts to come to this status too. If status starts with "error:" it'll stop waiting on all the hosts. + virtual void setStatus(const String & current_host, const String & new_status) = 0; + virtual void setStatusAndWait(const String & current_host, const String & new_status, const Strings & other_hosts) = 0; + virtual void setStatusAndWaitFor(const String & current_host, const String & new_status, const Strings & other_hosts, UInt64 timeout_ms) = 0; /// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table. virtual bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) = 0; diff --git a/src/Backups/RestoreCoordinationDistributed.cpp b/src/Backups/RestoreCoordinationDistributed.cpp index e131ce7fe24..0b21f7367d8 100644 --- a/src/Backups/RestoreCoordinationDistributed.cpp +++ b/src/Backups/RestoreCoordinationDistributed.cpp @@ -9,7 +9,7 @@ namespace DB RestoreCoordinationDistributed::RestoreCoordinationDistributed(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_) : zookeeper_path(zookeeper_path_) , get_zookeeper(get_zookeeper_) - , stage_sync(zookeeper_path_ + "/stage", get_zookeeper_, &Poco::Logger::get("RestoreCoordination")) + , status_sync(zookeeper_path_ + "/status", get_zookeeper_, &Poco::Logger::get("RestoreCoordination")) { createRootNodes(); } @@ -26,14 +26,19 @@ void RestoreCoordinationDistributed::createRootNodes() zookeeper->createIfNotExists(zookeeper_path + "/repl_access_storages_acquired", ""); } -void RestoreCoordinationDistributed::syncStage(const String & current_host, int new_stage, const Strings & wait_hosts, std::chrono::seconds timeout) +void RestoreCoordinationDistributed::setStatus(const String & current_host, const String & new_status) { - stage_sync.syncStage(current_host, new_stage, wait_hosts, timeout); + status_sync.set(current_host, new_status); } -void RestoreCoordinationDistributed::syncStageError(const String & current_host, const String & error_message) +void RestoreCoordinationDistributed::setStatusAndWait(const String & current_host, const String & new_status, const Strings & other_hosts) { - stage_sync.syncStageError(current_host, error_message); + status_sync.setAndWait(current_host, new_status, other_hosts); +} + +void RestoreCoordinationDistributed::setStatusAndWaitFor(const String & current_host, const String & new_status, const Strings & other_hosts, UInt64 timeout_ms) +{ + status_sync.setAndWaitFor(current_host, new_status, other_hosts, timeout_ms); } bool RestoreCoordinationDistributed::acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) diff --git a/src/Backups/RestoreCoordinationDistributed.h b/src/Backups/RestoreCoordinationDistributed.h index 0ea5db3f062..ecc8910bb9e 100644 --- a/src/Backups/RestoreCoordinationDistributed.h +++ b/src/Backups/RestoreCoordinationDistributed.h @@ -14,11 +14,10 @@ public: RestoreCoordinationDistributed(const String & zookeeper_path, zkutil::GetZooKeeper get_zookeeper); ~RestoreCoordinationDistributed() override; - /// Sets the current stage and waits for other hosts to come to this stage too. - void syncStage(const String & current_host, int new_stage, const Strings & wait_hosts, std::chrono::seconds timeout) override; - - /// Sets that the current host encountered an error, so other hosts should know that and stop waiting in syncStage(). - void syncStageError(const String & current_host, const String & error_message) override; + /// Sets the current status and waits for other hosts to come to this status too. If status starts with "error:" it'll stop waiting on all the hosts. + void setStatus(const String & current_host, const String & new_status) override; + void setStatusAndWait(const String & current_host, const String & new_status, const Strings & other_hosts) override; + void setStatusAndWaitFor(const String & current_host, const String & new_status, const Strings & other_hosts, UInt64 timeout_ms) override; /// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table. bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) override; @@ -42,7 +41,7 @@ private: const String zookeeper_path; const zkutil::GetZooKeeper get_zookeeper; - BackupCoordinationStageSync stage_sync; + BackupCoordinationStatusSync status_sync; }; } diff --git a/src/Backups/RestoreCoordinationLocal.cpp b/src/Backups/RestoreCoordinationLocal.cpp index 9cecc3f90c9..e654ace454d 100644 --- a/src/Backups/RestoreCoordinationLocal.cpp +++ b/src/Backups/RestoreCoordinationLocal.cpp @@ -7,11 +7,15 @@ namespace DB RestoreCoordinationLocal::RestoreCoordinationLocal() = default; RestoreCoordinationLocal::~RestoreCoordinationLocal() = default; -void RestoreCoordinationLocal::syncStage(const String &, int, const Strings &, std::chrono::seconds) +void RestoreCoordinationLocal::setStatus(const String &, const String &) { } -void RestoreCoordinationLocal::syncStageError(const String &, const String &) +void RestoreCoordinationLocal::setStatusAndWait(const String &, const String &, const Strings &) +{ +} + +void RestoreCoordinationLocal::setStatusAndWaitFor(const String &, const String &, const Strings &, UInt64) { } diff --git a/src/Backups/RestoreCoordinationLocal.h b/src/Backups/RestoreCoordinationLocal.h index b73f345df47..f38e1f94b9c 100644 --- a/src/Backups/RestoreCoordinationLocal.h +++ b/src/Backups/RestoreCoordinationLocal.h @@ -17,11 +17,10 @@ public: RestoreCoordinationLocal(); ~RestoreCoordinationLocal() override; - /// Sets the current stage and waits for other hosts to come to this stage too. - void syncStage(const String & current_host, int stage, const Strings & wait_hosts, std::chrono::seconds timeout) override; - - /// Sets that the current host encountered an error, so other hosts should know that and stop waiting in syncStage(). - void syncStageError(const String & current_host, const String & error_message) override; + /// Sets the current status and waits for other hosts to come to this status too. If status starts with "error:" it'll stop waiting on all the hosts. + void setStatus(const String & current_host, const String & new_status) override; + void setStatusAndWait(const String & current_host, const String & new_status, const Strings & other_hosts) override; + void setStatusAndWaitFor(const String & current_host, const String & new_status, const Strings & other_hosts, UInt64 timeout_ms) override; /// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table. bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) override; diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index 74d4de631e3..bbe11525c58 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -39,12 +39,43 @@ namespace ErrorCodes namespace { - String tableNameWithTypeToString(const String & database_name, const String & table_name, bool first_char_uppercase) + /// Initial status. + constexpr const char kPreparingStatus[] = "preparing"; + + /// Finding databases and tables in the backup which we're going to restore. + constexpr const char kFindingTablesInBackupStatus[] = "finding tables in backup"; + + /// Creating databases or finding them and checking their definitions. + constexpr const char kCreatingDatabasesStatus[] = "creating databases"; + + /// Creating tables or finding them and checking their definition. + constexpr const char kCreatingTablesStatus[] = "creating tables"; + + /// Inserting restored data to tables. + constexpr const char kInsertingDataToTablesStatus[] = "inserting data to tables"; + + /// Prefix for error statuses. + constexpr const char kErrorStatus[] = "error: "; + + /// Uppercases the first character of a passed string. + String toUpperFirst(const String & str) { + String res = str; + res[0] = std::toupper(res[0]); + return res; + } + + /// Outputs "table " or "temporary table " + String tableNameWithTypeToString(const String & database_name, const String & table_name, bool first_upper) + { + String str; if (database_name == DatabaseCatalog::TEMPORARY_DATABASE) - return fmt::format("{}emporary table {}", first_char_uppercase ? 'T' : 't', backQuoteIfNeed(table_name)); + str = fmt::format("temporary table {}", backQuoteIfNeed(table_name)); else - return fmt::format("{}able {}.{}", first_char_uppercase ? 'T' : 't', backQuoteIfNeed(database_name), backQuoteIfNeed(table_name)); + str = fmt::format("table {}.{}", backQuoteIfNeed(database_name), backQuoteIfNeed(table_name)); + if (first_upper) + str[0] = std::toupper(str[0]); + return str; } String tryGetTableEngine(const IAST & ast) @@ -70,20 +101,6 @@ namespace } } -std::string_view RestorerFromBackup::toString(Stage stage) -{ - switch (stage) - { - case Stage::kPreparing: return "Preparing"; - case Stage::kFindingTablesInBackup: return "Finding tables in backup"; - case Stage::kCreatingDatabases: return "Creating databases"; - case Stage::kCreatingTables: return "Creating tables"; - case Stage::kInsertingDataToTables: return "Inserting data to tables"; - case Stage::kError: return "Error"; - } - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown restore stage: {}", static_cast(stage)); -} - RestorerFromBackup::RestorerFromBackup( const ASTBackupQuery::Elements & restore_query_elements_, @@ -100,6 +117,7 @@ RestorerFromBackup::RestorerFromBackup( , timeout(timeout_) , create_table_timeout_ms(context->getConfigRef().getUInt64("backups.create_table_timeout", 300000)) , log(&Poco::Logger::get("RestorerFromBackup")) + , current_status(kPreparingStatus) { } @@ -120,7 +138,7 @@ void RestorerFromBackup::run(bool only_check_access) try { /// restoreMetadata() must not be called multiple times. - if (current_stage != Stage::kPreparing) + if (current_status != kPreparingStatus) throw Exception(ErrorCodes::LOGICAL_ERROR, "Already restoring"); /// Calculate the root path in the backup for restoring, it's either empty or has the format "shards//replicas//". @@ -130,7 +148,7 @@ void RestorerFromBackup::run(bool only_check_access) renaming_map = makeRenamingMapFromBackupQuery(restore_query_elements); /// Find all the databases and tables which we will read from the backup. - setStage(Stage::kFindingTablesInBackup); + setStatus(kFindingTablesInBackupStatus); findDatabasesAndTablesInBackup(); /// Check access rights. @@ -140,23 +158,23 @@ void RestorerFromBackup::run(bool only_check_access) return; /// Create databases using the create queries read from the backup. - setStage(Stage::kCreatingDatabases); + setStatus(kCreatingDatabasesStatus); createDatabases(); /// Create tables using the create queries read from the backup. - setStage(Stage::kCreatingTables); + setStatus(kCreatingTablesStatus); createTables(); /// All what's left is to insert data to tables. /// No more data restoring tasks are allowed after this point. - setStage(Stage::kInsertingDataToTables); + setStatus(kInsertingDataToTablesStatus); } catch (...) { try { /// Other hosts should know that we've encountered an error. - setStage(Stage::kError, getCurrentExceptionMessage(false)); + setStatus(kErrorStatus + getCurrentExceptionMessage(false)); } catch (...) { @@ -168,7 +186,7 @@ void RestorerFromBackup::run(bool only_check_access) RestorerFromBackup::DataRestoreTasks RestorerFromBackup::getDataRestoreTasks() { - if (current_stage != Stage::kInsertingDataToTables) + if (current_status != kInsertingDataToTablesStatus) throw Exception(ErrorCodes::LOGICAL_ERROR, "Metadata wasn't restored"); if (data_restore_tasks.empty() && !access_restore_task) @@ -197,27 +215,23 @@ RestorerFromBackup::DataRestoreTasks RestorerFromBackup::getDataRestoreTasks() return res_tasks; } -void RestorerFromBackup::setStage(Stage new_stage, const String & error_message) +void RestorerFromBackup::setStatus(const String & new_status) { - if (new_stage == Stage::kError) - LOG_ERROR(log, "{} failed with error: {}", toString(current_stage), error_message); - else - LOG_TRACE(log, "{}", toString(new_stage)); - - current_stage = new_stage; - - if (!restore_coordination) - return; - - if (new_stage == Stage::kError) + bool is_error_status = new_status.starts_with(kErrorStatus); + if (is_error_status) { - restore_coordination->syncStageError(restore_settings.host_id, error_message); + LOG_ERROR(log, "{} failed with {}", toUpperFirst(current_status), new_status); + if (restore_coordination) + restore_coordination->setStatus(restore_settings.host_id, new_status); } else { + LOG_TRACE(log, "{}", toUpperFirst(new_status)); + current_status = new_status; auto all_hosts = BackupSettings::Util::filterHostIDs(restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num); - restore_coordination->syncStage(restore_settings.host_id, static_cast(new_stage), all_hosts, timeout); + if (restore_coordination) + restore_coordination->setStatusAndWait(restore_settings.host_id, new_status, all_hosts); } } @@ -767,14 +781,14 @@ std::vector RestorerFromBackup::findTablesWithoutDependencie void RestorerFromBackup::addDataRestoreTask(DataRestoreTask && new_task) { - if (current_stage == Stage::kInsertingDataToTables) + if (current_status == kInsertingDataToTablesStatus) throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding data-restoring tasks is not allowed"); data_restore_tasks.push_back(std::move(new_task)); } void RestorerFromBackup::addDataRestoreTasks(DataRestoreTasks && new_tasks) { - if (current_stage == Stage::kInsertingDataToTables) + if (current_status == kInsertingDataToTablesStatus) throw Exception(ErrorCodes::LOGICAL_ERROR, "Adding data-restoring tasks is not allowed"); insertAtEnd(data_restore_tasks, std::move(new_tasks)); } diff --git a/src/Backups/RestorerFromBackup.h b/src/Backups/RestorerFromBackup.h index 3bdbafe844c..07258837aab 100644 --- a/src/Backups/RestorerFromBackup.h +++ b/src/Backups/RestorerFromBackup.h @@ -57,29 +57,6 @@ public: /// Checks that a specified path is already registered to be used for restoring access control. void checkPathInBackupIsRegisteredToRestoreAccess(const String & path); - /// Reading a backup includes a few stages: - enum class Stage - { - /// Initial stage. - kPreparing, - - /// Finding databases and tables in the backup which we're going to restore. - kFindingTablesInBackup, - - /// Creating databases or finding them and checking their definitions. - kCreatingDatabases, - - /// Creating tables or finding them and checking their definition. - kCreatingTables, - - /// Inserting restored data to tables. - kInsertingDataToTables, - - /// An error happens during any of the stages above, the backup is not restored properly. - kError = -1, - }; - static std::string_view toString(Stage stage); - /// Throws an exception that a specified table engine doesn't support partitions. [[noreturn]] static void throwPartitionsNotSupported(const StorageID & storage_id, const String & table_engine); @@ -96,12 +73,12 @@ private: UInt64 create_table_timeout_ms; Poco::Logger * log; - Stage current_stage = Stage::kPreparing; + String current_status; std::vector root_paths_in_backup; DDLRenamingMap renaming_map; void run(bool only_check_access); - void setStage(Stage new_stage, const String & error_message = {}); + void findRootPathsInBackup(); void findDatabasesAndTablesInBackup(); @@ -114,6 +91,8 @@ private: void createDatabases(); void createTables(); + void setStatus(const String & new_status); + struct DatabaseInfo { ASTPtr create_database_query; From 6ca400fd89ef5e5bbaf067a2b013a58b0fcf7039 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 23 Jun 2022 20:49:44 +0200 Subject: [PATCH 088/121] Improve gathering metadata for backup - part 5. --- src/Backups/BackupCoordinationDistributed.cpp | 12 +- src/Backups/BackupCoordinationDistributed.h | 6 +- src/Backups/BackupCoordinationHelpers.cpp | 61 +++-- src/Backups/BackupCoordinationHelpers.h | 10 +- src/Backups/BackupCoordinationLocal.cpp | 8 +- src/Backups/BackupCoordinationLocal.h | 6 +- src/Backups/BackupEntriesCollector.cpp | 210 ++++++++++++------ src/Backups/BackupEntriesCollector.h | 29 +-- src/Backups/BackupsWorker.cpp | 15 +- src/Backups/IBackupCoordination.h | 6 +- src/Backups/IRestoreCoordination.h | 6 +- .../RestoreCoordinationDistributed.cpp | 12 +- src/Backups/RestoreCoordinationDistributed.h | 6 +- src/Backups/RestoreCoordinationLocal.cpp | 8 +- src/Backups/RestoreCoordinationLocal.h | 6 +- src/Backups/RestorerFromBackup.cpp | 134 +++++------ src/Backups/RestorerFromBackup.h | 34 +-- 17 files changed, 317 insertions(+), 252 deletions(-) diff --git a/src/Backups/BackupCoordinationDistributed.cpp b/src/Backups/BackupCoordinationDistributed.cpp index 77377194012..9612b62dcdb 100644 --- a/src/Backups/BackupCoordinationDistributed.cpp +++ b/src/Backups/BackupCoordinationDistributed.cpp @@ -157,19 +157,19 @@ void BackupCoordinationDistributed::removeAllNodes() } -void BackupCoordinationDistributed::setStatus(const String & current_host, const String & new_status) +void BackupCoordinationDistributed::setStatus(const String & current_host, const String & new_status, const String & message) { - status_sync.set(current_host, new_status); + status_sync.set(current_host, new_status, message); } -void BackupCoordinationDistributed::setStatusAndWait(const String & current_host, const String & new_status, const Strings & other_hosts) +Strings BackupCoordinationDistributed::setStatusAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts) { - status_sync.setAndWait(current_host, new_status, other_hosts); + return status_sync.setAndWait(current_host, new_status, message, all_hosts); } -void BackupCoordinationDistributed::setStatusAndWaitFor(const String & current_host, const String & new_status, const Strings & other_hosts, UInt64 timeout_ms) +Strings BackupCoordinationDistributed::setStatusAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms) { - status_sync.setAndWaitFor(current_host, new_status, other_hosts, timeout_ms); + return status_sync.setAndWaitFor(current_host, new_status, message, all_hosts, timeout_ms); } diff --git a/src/Backups/BackupCoordinationDistributed.h b/src/Backups/BackupCoordinationDistributed.h index 03da567bf07..84cd4b3dddb 100644 --- a/src/Backups/BackupCoordinationDistributed.h +++ b/src/Backups/BackupCoordinationDistributed.h @@ -14,9 +14,9 @@ public: BackupCoordinationDistributed(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_); ~BackupCoordinationDistributed() override; - void setStatus(const String & current_host, const String & new_status) override; - void setStatusAndWait(const String & current_host, const String & new_status, const Strings & other_hosts) override; - void setStatusAndWaitFor(const String & current_host, const String & new_status, const Strings & other_hosts, UInt64 timeout_ms) override; + void setStatus(const String & current_host, const String & new_status, const String & message) override; + Strings setStatusAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts) override; + Strings setStatusAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms) override; void addReplicatedPartNames( const String & table_zk_path, diff --git a/src/Backups/BackupCoordinationHelpers.cpp b/src/Backups/BackupCoordinationHelpers.cpp index 7c77e488119..d86e8cff52c 100644 --- a/src/Backups/BackupCoordinationHelpers.cpp +++ b/src/Backups/BackupCoordinationHelpers.cpp @@ -258,51 +258,45 @@ void BackupCoordinationStatusSync::createRootNodes() zookeeper->createIfNotExists(zookeeper_path, ""); } -void BackupCoordinationStatusSync::set(const String & current_host, const String & new_status) +void BackupCoordinationStatusSync::set(const String & current_host, const String & new_status, const String & message) { - setImpl(current_host, new_status, {}, {}); + setImpl(current_host, new_status, message, {}, {}); } -void BackupCoordinationStatusSync::setAndWait(const String & current_host, const String & new_status, const Strings & other_hosts) +Strings BackupCoordinationStatusSync::setAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts) { - setImpl(current_host, new_status, other_hosts, {}); + return setImpl(current_host, new_status, message, all_hosts, {}); } -void BackupCoordinationStatusSync::setAndWaitFor(const String & current_host, const String & new_status, const Strings & other_hosts, UInt64 timeout_ms) +Strings BackupCoordinationStatusSync::setAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms) { - setImpl(current_host, new_status, other_hosts, timeout_ms); + return setImpl(current_host, new_status, message, all_hosts, timeout_ms); } -void BackupCoordinationStatusSync::setImpl(const String & current_host, const String & new_status, const Strings & other_hosts, const std::optional & timeout_ms) +Strings BackupCoordinationStatusSync::setImpl(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, const std::optional & timeout_ms) { - /// Put new status to ZooKeeper. + /// Put new status to ZooKeeper. auto zookeeper = get_zookeeper(); + zookeeper->createIfNotExists(zookeeper_path + "/" + current_host + "|" + new_status, message); - String result_status = new_status; - String message; - std::string_view error_prefix = "error: "; - bool is_error_status = new_status.starts_with(error_prefix); - if (is_error_status) - { - message = new_status.substr(error_prefix.length()); - result_status = "error"; - } - - zookeeper->createIfNotExists(zookeeper_path + "/" + current_host + "|" + result_status, message); - - if (other_hosts.empty() || ((other_hosts.size() == 1) && (other_hosts.front() == current_host)) || is_error_status) - return; + if (all_hosts.empty() || (new_status == kErrorStatus)) + return {}; + if ((all_hosts.size() == 1) && (all_hosts.front() == current_host)) + return {message}; + /// Wait for other hosts. - /// Current stages of all hosts. + Strings ready_hosts_results; + ready_hosts_results.resize(all_hosts.size()); + + std::map /* index in `ready_hosts_results` */> unready_hosts; + for (size_t i = 0; i != all_hosts.size(); ++i) + unready_hosts[all_hosts[i]].push_back(i); + std::optional host_with_error; std::optional error_message; - std::map unready_hosts; - for (const String & host : other_hosts) - unready_hosts.emplace(host, ""); - /// Process ZooKeeper's nodes and set `all_hosts_ready` or `unready_host` or `error_message`. auto process_zk_nodes = [&](const Strings & zk_nodes) { @@ -316,18 +310,19 @@ void BackupCoordinationStatusSync::setImpl(const String & current_host, const St throw Exception(ErrorCodes::FAILED_TO_SYNC_BACKUP_OR_RESTORE, "Unexpected zk node {}", zookeeper_path + "/" + zk_node); String host = zk_node.substr(0, separator_pos); String status = zk_node.substr(separator_pos + 1); - if (status == "error") + if (status == kErrorStatus) { host_with_error = host; error_message = zookeeper->get(zookeeper_path + "/" + zk_node); return; } auto it = unready_hosts.find(host); - if (it != unready_hosts.end()) + if ((it != unready_hosts.end()) && (status == new_status)) { - it->second = status; - if (status == result_status) - unready_hosts.erase(it); + String result = zookeeper->get(zookeeper_path + "/" + zk_node); + for (size_t i : it->second) + ready_hosts_results[i] = result; + unready_hosts.erase(it); } } }; @@ -390,6 +385,8 @@ void BackupCoordinationStatusSync::setImpl(const String & current_host, const St unready_hosts.begin()->first, to_string(elapsed)); } + + return ready_hosts_results; } } diff --git a/src/Backups/BackupCoordinationHelpers.h b/src/Backups/BackupCoordinationHelpers.h index ea07543ecb8..7d343edd3d0 100644 --- a/src/Backups/BackupCoordinationHelpers.h +++ b/src/Backups/BackupCoordinationHelpers.h @@ -63,13 +63,15 @@ class BackupCoordinationStatusSync public: BackupCoordinationStatusSync(const String & zookeeper_path_, zkutil::GetZooKeeper get_zookeeper_, Poco::Logger * log_); - void set(const String & current_host, const String & new_status); - void setAndWait(const String & current_host, const String & new_status, const Strings & other_hosts); - void setAndWaitFor(const String & current_host, const String & new_status, const Strings & other_hosts, UInt64 timeout_ms); + void set(const String & current_host, const String & new_status, const String & message); + Strings setAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts); + Strings setAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms); + + static constexpr const char * kErrorStatus = "error"; private: void createRootNodes(); - void setImpl(const String & current_host, const String & new_status, const Strings & other_hosts, const std::optional & timeout_ms); + Strings setImpl(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, const std::optional & timeout_ms); String zookeeper_path; zkutil::GetZooKeeper get_zookeeper; diff --git a/src/Backups/BackupCoordinationLocal.cpp b/src/Backups/BackupCoordinationLocal.cpp index bace1d800e0..6ff9800797e 100644 --- a/src/Backups/BackupCoordinationLocal.cpp +++ b/src/Backups/BackupCoordinationLocal.cpp @@ -13,16 +13,18 @@ using FileInfo = IBackupCoordination::FileInfo; BackupCoordinationLocal::BackupCoordinationLocal() = default; BackupCoordinationLocal::~BackupCoordinationLocal() = default; -void BackupCoordinationLocal::setStatus(const String &, const String &) +void BackupCoordinationLocal::setStatus(const String &, const String &, const String &) { } -void BackupCoordinationLocal::setStatusAndWait(const String &, const String &, const Strings &) +Strings BackupCoordinationLocal::setStatusAndWait(const String &, const String &, const String &, const Strings &) { + return {}; } -void BackupCoordinationLocal::setStatusAndWaitFor(const String &, const String &, const Strings &, UInt64) +Strings BackupCoordinationLocal::setStatusAndWaitFor(const String &, const String &, const String &, const Strings &, UInt64) { + return {}; } void BackupCoordinationLocal::addReplicatedPartNames(const String & table_zk_path, const String & table_name_for_logs, const String & replica_name, const std::vector & part_names_and_checksums) diff --git a/src/Backups/BackupCoordinationLocal.h b/src/Backups/BackupCoordinationLocal.h index 090c5653f04..7fdd88d37cb 100644 --- a/src/Backups/BackupCoordinationLocal.h +++ b/src/Backups/BackupCoordinationLocal.h @@ -19,9 +19,9 @@ public: BackupCoordinationLocal(); ~BackupCoordinationLocal() override; - void setStatus(const String & current_host, const String & new_status) override; - void setStatusAndWait(const String & current_host, const String & new_status, const Strings & other_hosts) override; - void setStatusAndWaitFor(const String & current_host, const String & new_status, const Strings & other_hosts, UInt64 timeout_ms) override; + void setStatus(const String & current_host, const String & new_status, const String & message) override; + Strings setStatusAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts) override; + Strings setStatusAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms) override; void addReplicatedPartNames(const String & table_zk_path, const String & table_name_for_logs, const String & replica_name, const std::vector & part_names_and_checksums) override; diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index 63f6d75170c..1ba94552589 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -9,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -29,23 +31,20 @@ namespace ErrorCodes namespace { - /// Initial status. - constexpr const char kPreparingStatus[] = "preparing"; - /// Finding all tables and databases which we're going to put to the backup and collecting their metadata. - constexpr const char kGatheringMetadataStatus[] = "gathering metadata"; + constexpr const char * kGatheringMetadataStatus = "gathering metadata"; /// Making temporary hard links and prepare backup entries. - constexpr const char kExtractingDataFromTablesStatus[] = "extracting data from tables"; + constexpr const char * kExtractingDataFromTablesStatus = "extracting data from tables"; /// Running special tasks for replicated tables which can also prepare some backup entries. - constexpr const char kRunningPostTasksStatus[] = "running post-tasks"; + constexpr const char * kRunningPostTasksStatus = "running post-tasks"; /// Writing backup entries to the backup and removing temporary hard links. - constexpr const char kWritingBackupStatus[] = "writing backup"; + constexpr const char * kWritingBackupStatus = "writing backup"; - /// Prefix for error statuses. - constexpr const char kErrorStatus[] = "error: "; + /// Error status. + constexpr const char * kErrorStatus = BackupCoordinationStatusSync::kErrorStatus; /// Uppercases the first character of a passed string. String toUpperFirst(const String & str) @@ -67,6 +66,19 @@ namespace str[0] = std::toupper(str[0]); return str; } + + /// How long we should sleep after finding an inconsistency error. + std::chrono::milliseconds getSleepTimeAfterInconsistencyError(size_t pass) + { + size_t ms; + if (pass == 1) /* pass is 1-based */ + ms = 0; + else if ((pass % 10) != 1) + ms = 0; + else + ms = 1000; + return std::chrono::milliseconds{ms}; + } } @@ -74,36 +86,37 @@ BackupEntriesCollector::BackupEntriesCollector( const ASTBackupQuery::Elements & backup_query_elements_, const BackupSettings & backup_settings_, std::shared_ptr backup_coordination_, - const ContextPtr & context_, - std::chrono::seconds timeout_) + const ContextPtr & context_) : backup_query_elements(backup_query_elements_) , backup_settings(backup_settings_) , backup_coordination(backup_coordination_) , context(context_) - , timeout(timeout_) + , consistent_metadata_snapshot_timeout(context->getConfigRef().getUInt64("backups.consistent_metadata_snapshot_timeout", 300000)) , log(&Poco::Logger::get("BackupEntriesCollector")) - , current_status(kPreparingStatus) { } BackupEntriesCollector::~BackupEntriesCollector() = default; -BackupEntries BackupEntriesCollector::getBackupEntries() +BackupEntries BackupEntriesCollector::run() { try { - /// getBackupEntries() must not be called multiple times. - if (current_status != kPreparingStatus) + /// run() can be called onle once. + if (!current_status.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Already making backup entries"); - /// Calculate the root path for collecting backup entries, it's either empty or has the format "shards//replicas//". - calculateRootPathInBackup(); + /// Find other hosts working along with us to execute this ON CLUSTER query. + all_hosts + = BackupSettings::Util::filterHostIDs(backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num); /// Do renaming in the create queries according to the renaming config. renaming_map = makeRenamingMapFromBackupQuery(backup_query_elements); + /// Calculate the root path for collecting backup entries, it's either empty or has the format "shards//replicas//". + calculateRootPathInBackup(); + /// Find databases and tables which we're going to put to the backup. - setStatus(kGatheringMetadataStatus); gatherMetadataAndCheckConsistency(); /// Make backup entries for the definitions of the found databases. @@ -129,7 +142,7 @@ BackupEntries BackupEntriesCollector::getBackupEntries() { try { - setStatus(kErrorStatus + getCurrentExceptionMessage(false)); + setStatus(kErrorStatus, getCurrentExceptionMessage(false)); } catch (...) { @@ -138,21 +151,34 @@ BackupEntries BackupEntriesCollector::getBackupEntries() } } -void BackupEntriesCollector::setStatus(const String & new_status) +Strings BackupEntriesCollector::setStatus(const String & new_status, const String & message) { - bool is_error_status = new_status.starts_with(kErrorStatus); - if (is_error_status) + if (new_status == kErrorStatus) { - LOG_ERROR(log, "{} failed with {}", toUpperFirst(current_status), new_status); - backup_coordination->setStatus(backup_settings.host_id, new_status); + LOG_ERROR(log, "{} failed with error: {}", toUpperFirst(current_status), message); + backup_coordination->setStatus(backup_settings.host_id, new_status, message); + return {}; } else { LOG_TRACE(log, "{}", toUpperFirst(new_status)); current_status = new_status; - auto all_hosts - = BackupSettings::Util::filterHostIDs(backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num); - backup_coordination->setStatusAndWait(backup_settings.host_id, new_status, all_hosts); + if (new_status.starts_with(kGatheringMetadataStatus)) + { + auto now = std::chrono::steady_clock::now(); + auto end_of_timeout = std::max(now, consistent_metadata_snapshot_start_time + consistent_metadata_snapshot_timeout); + + return backup_coordination->setStatusAndWaitFor( + backup_settings.host_id, + new_status, + message, + all_hosts, + std::chrono::duration_cast(end_of_timeout - now).count()); + } + else + { + return backup_coordination->setStatusAndWait(backup_settings.host_id, new_status, message, all_hosts); + } } } @@ -173,45 +199,87 @@ void BackupEntriesCollector::calculateRootPathInBackup() /// Finds databases and tables which we will put to the backup. void BackupEntriesCollector::gatherMetadataAndCheckConsistency() { - bool use_timeout = (timeout.count() >= 0); - auto start_time = std::chrono::steady_clock::now(); + consistent_metadata_snapshot_start_time = std::chrono::steady_clock::now(); + auto end_of_timeout = consistent_metadata_snapshot_start_time + consistent_metadata_snapshot_timeout; + setStatus(fmt::format("{} ({})", kGatheringMetadataStatus, 1)); for (size_t pass = 1;; ++pass) { - try + String new_status = fmt::format("{} ({})", kGatheringMetadataStatus, pass + 1); + std::optional inconsistency_error; + if (tryGatherMetadataAndCompareWithPrevious(inconsistency_error)) { - /// Collect information about databases and tables specified in the BACKUP query. - database_infos.clear(); - table_infos.clear(); - gatherDatabasesMetadata(); - gatherTablesMetadata(); - - /// We have to check consistency of collected information to protect from the case when some table or database is - /// renamed during this collecting making the collected information invalid. - auto comparing_error = compareWithPrevious(); - if (!comparing_error) - break; /// no error, everything's fine + /// Gathered metadata and checked consistency, cool! But we have to check that other hosts cope with that too. + auto all_hosts_results = setStatus(new_status, "consistent"); - if (pass >= 2) /// Two passes is minimum (we need to compare with table names with previous ones to be sure we don't miss anything). - throw *comparing_error; - } - catch (Exception & e) - { - if (e.code() != ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP) - throw; + std::optional host_with_inconsistency; + std::optional inconsistency_error_on_other_host; + for (size_t i = 0; i != all_hosts.size(); ++i) + { + if ((i < all_hosts_results.size()) && (all_hosts_results[i] != "consistent")) + { + host_with_inconsistency = all_hosts[i]; + inconsistency_error_on_other_host = all_hosts_results[i]; + break; + } + } - auto elapsed = std::chrono::steady_clock::now() - start_time; - e.addMessage("Couldn't gather tables and databases to make a backup (pass #{}, elapsed {})", pass, to_string(elapsed)); - if (use_timeout && (elapsed > timeout)) - throw; - else - LOG_WARNING(log, "{}", e.displayText()); + if (!host_with_inconsistency) + break; /// All hosts managed to gather metadata and everything is consistent, so we can go further to writing the backup. + + inconsistency_error = Exception{ + ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, + "Found inconsistency on host {}: {}", + *host_with_inconsistency, + *inconsistency_error_on_other_host}; } + else + { + /// Failed to gather metadata or something wasn't consistent. We'll let other hosts know that and try again. + setStatus(new_status, inconsistency_error->displayText()); + } + + /// Two passes is minimum (we need to compare with table names with previous ones to be sure we don't miss anything). + if (pass >= 2) + { + if (std::chrono::steady_clock::now() > end_of_timeout) + inconsistency_error->rethrow(); + else + LOG_WARNING(log, "{}", inconsistency_error->displayText()); + } + + auto sleep_time = getSleepTimeAfterInconsistencyError(pass); + if (sleep_time.count() > 0) + sleepForNanoseconds(std::chrono::duration_cast(sleep_time).count()); } LOG_INFO(log, "Will backup {} databases and {} tables", database_infos.size(), table_infos.size()); } +bool BackupEntriesCollector::tryGatherMetadataAndCompareWithPrevious(std::optional & inconsistency_error) +{ + try + { + /// Collect information about databases and tables specified in the BACKUP query. + database_infos.clear(); + table_infos.clear(); + gatherDatabasesMetadata(); + gatherTablesMetadata(); + } + catch (Exception & e) + { + if (e.code() != ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP) + throw; + + inconsistency_error = e; + return false; + } + + /// We have to check consistency of collected information to protect from the case when some table or database is + /// renamed during this collecting making the collected information invalid. + return compareWithPrevious(inconsistency_error); +} + void BackupEntriesCollector::gatherDatabasesMetadata() { /// Collect information about databases and tables specified in the BACKUP query. @@ -465,7 +533,7 @@ void BackupEntriesCollector::lockTablesForReading() } /// Check consistency of collected information about databases and tables. -std::optional BackupEntriesCollector::compareWithPrevious() +bool BackupEntriesCollector::compareWithPrevious(std::optional & inconsistency_error) { /// We need to scan tables at least twice to be sure that we haven't missed any table which could be renamed /// while we were scanning. @@ -476,60 +544,64 @@ std::optional BackupEntriesCollector::compareWithPrevious() if (previous_database_names != database_names) { - std::optional comparing_error; + bool error_message_ready = false; for (const auto & database_name : database_names) { if (!previous_database_names.contains(database_name)) { - comparing_error = Exception{ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Database {} were added during scanning", backQuoteIfNeed(database_name)}; + inconsistency_error = Exception{ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Database {} were added during scanning", backQuoteIfNeed(database_name)}; + error_message_ready = true; break; } } - if (!comparing_error) + if (!error_message_ready) { for (const auto & database_name : previous_database_names) { if (!database_names.contains(database_name)) { - comparing_error = Exception{ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Database {} were removed during scanning", backQuoteIfNeed(database_name)}; + inconsistency_error = Exception{ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Database {} were removed during scanning", backQuoteIfNeed(database_name)}; + error_message_ready = true; break; } } } - assert(comparing_error); + assert(error_message_ready); previous_database_names = std::move(database_names); previous_table_names = std::move(table_names); - return comparing_error; + return false; } if (previous_table_names != table_names) { - std::optional comparing_error; + bool error_message_ready = false; for (const auto & table_name : table_names) { if (!previous_table_names.contains(table_name)) { - comparing_error = Exception{ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "{} were added during scanning", tableNameWithTypeToString(table_name.database, table_name.table, true)}; + inconsistency_error = Exception{ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "{} were added during scanning", tableNameWithTypeToString(table_name.database, table_name.table, true)}; + error_message_ready = true; break; } } - if (!comparing_error) + if (!error_message_ready) { for (const auto & table_name : previous_table_names) { if (!table_names.contains(table_name)) { - comparing_error = Exception{ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "{} were removed during scanning", tableNameWithTypeToString(table_name.database, table_name.table, true)}; + inconsistency_error = Exception{ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "{} were removed during scanning", tableNameWithTypeToString(table_name.database, table_name.table, true)}; + error_message_ready = true; break; } } } - assert(comparing_error); + assert(error_message_ready); previous_table_names = std::move(table_names); - return comparing_error; + return false; } - return {}; + return true; } /// Make backup entries for all the definitions of all the databases found. diff --git a/src/Backups/BackupEntriesCollector.h b/src/Backups/BackupEntriesCollector.h index 4afca3f4cf9..49901295003 100644 --- a/src/Backups/BackupEntriesCollector.h +++ b/src/Backups/BackupEntriesCollector.h @@ -27,26 +27,25 @@ public: BackupEntriesCollector(const ASTBackupQuery::Elements & backup_query_elements_, const BackupSettings & backup_settings_, std::shared_ptr backup_coordination_, - const ContextPtr & context_, - std::chrono::seconds timeout_ = std::chrono::seconds(-1) /* no timeout */); + const ContextPtr & context_); ~BackupEntriesCollector(); /// Collects backup entries and returns the result. - /// This function first generates a list of databases and then call IDatabase::backup() for each database from this list. - /// At this moment IDatabase::backup() calls IStorage::backup() and they both call addBackupEntry() to build a list of backup entries. - BackupEntries getBackupEntries(); + /// This function first generates a list of databases and then call IDatabase::getTablesForBackup() for each database from this list. + /// Then it calls IStorage::backupData() to build a list of backup entries. + BackupEntries run(); const BackupSettings & getBackupSettings() const { return backup_settings; } std::shared_ptr getBackupCoordination() const { return backup_coordination; } ContextPtr getContext() const { return context; } - /// Adds a backup entry which will be later returned by getBackupEntries(). - /// These function can be called by implementations of IStorage::backup() in inherited storage classes. + /// Adds a backup entry which will be later returned by run(). + /// These function can be called by implementations of IStorage::backupData() in inherited storage classes. void addBackupEntry(const String & file_name, BackupEntryPtr backup_entry); void addBackupEntries(const BackupEntries & backup_entries_); void addBackupEntries(BackupEntries && backup_entries_); - /// Adds a function which must be called after all IStorage::backup() have finished their work on all hosts. + /// Adds a function which must be called after all IStorage::backupData() have finished their work on all hosts. /// This function is designed to help making a consistent in some complex cases like /// 1) we need to join (in a backup) the data of replicated tables gathered on different hosts. void addPostTask(std::function task); @@ -59,6 +58,8 @@ private: void gatherMetadataAndCheckConsistency(); + bool tryGatherMetadataAndCompareWithPrevious(std::optional & inconsistency_error); + void gatherDatabasesMetadata(); void gatherDatabaseMetadata( @@ -73,25 +74,25 @@ private: void gatherTablesMetadata(); void lockTablesForReading(); - std::optional compareWithPrevious(); + bool compareWithPrevious(std::optional & inconsistency_error); void makeBackupEntriesForDatabasesDefs(); void makeBackupEntriesForTablesDefs(); void makeBackupEntriesForTablesData(); void runPostTasks(); - void setStatus(const String & new_status); + Strings setStatus(const String & new_status, const String & message = ""); const ASTBackupQuery::Elements backup_query_elements; const BackupSettings backup_settings; std::shared_ptr backup_coordination; ContextPtr context; - std::chrono::seconds timeout; + std::chrono::milliseconds consistent_metadata_snapshot_timeout; Poco::Logger * log; - String current_status; - std::filesystem::path root_path_in_backup; + Strings all_hosts; DDLRenamingMap renaming_map; + std::filesystem::path root_path_in_backup; struct DatabaseInfo { @@ -122,6 +123,8 @@ private: std::optional partitions; }; + String current_status; + std::chrono::steady_clock::time_point consistent_metadata_snapshot_start_time; std::unordered_map database_infos; std::map table_infos; std::set previous_database_names; diff --git a/src/Backups/BackupsWorker.cpp b/src/Backups/BackupsWorker.cpp index 84dc63b4f9f..635b2810941 100644 --- a/src/Backups/BackupsWorker.cpp +++ b/src/Backups/BackupsWorker.cpp @@ -166,9 +166,8 @@ UUID BackupsWorker::startMakingBackup(const ASTPtr & query, const ContextPtr & c BackupEntries backup_entries; { - auto timeout = std::chrono::seconds{context_in_use->getConfigRef().getInt("backups.backup_prepare_timeout", -1)}; - BackupEntriesCollector backup_entries_collector{backup_query->elements, backup_settings, backup_coordination, context_in_use, timeout}; - backup_entries = backup_entries_collector.getBackupEntries(); + BackupEntriesCollector backup_entries_collector{backup_query->elements, backup_settings, backup_coordination, context_in_use}; + backup_entries = backup_entries_collector.run(); } writeBackupEntries(backup, std::move(backup_entries), backups_thread_pool); @@ -272,8 +271,8 @@ UUID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePtr conte String addr_database = address->default_database.empty() ? current_database : address->default_database; for (auto & element : restore_elements) element.setCurrentDatabase(addr_database); - RestorerFromBackup dummy_restorer{restore_elements, restore_settings, nullptr, backup, context_in_use, {}}; - dummy_restorer.checkAccessOnly(); + RestorerFromBackup dummy_restorer{restore_elements, restore_settings, nullptr, backup, context_in_use}; + dummy_restorer.run(RestorerFromBackup::CHECK_ACCESS_ONLY); } } @@ -325,11 +324,9 @@ UUID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePtr conte DataRestoreTasks data_restore_tasks; { - auto timeout = std::chrono::seconds{context_in_use->getConfigRef().getInt("backups.restore_metadata_timeout", -1)}; RestorerFromBackup restorer{restore_query->elements, restore_settings, restore_coordination, - backup, context_in_use, timeout}; - restorer.restoreMetadata(); - data_restore_tasks = restorer.getDataRestoreTasks(); + backup, context_in_use}; + data_restore_tasks = restorer.run(RestorerFromBackup::RESTORE); } restoreTablesData(std::move(data_restore_tasks), restores_thread_pool); diff --git a/src/Backups/IBackupCoordination.h b/src/Backups/IBackupCoordination.h index 58ed45e810d..9df5b9efdc4 100644 --- a/src/Backups/IBackupCoordination.h +++ b/src/Backups/IBackupCoordination.h @@ -14,9 +14,9 @@ public: virtual ~IBackupCoordination() = default; /// Sets the current status and waits for other hosts to come to this status too. If status starts with "error:" it'll stop waiting on all the hosts. - virtual void setStatus(const String & current_host, const String & new_status) = 0; - virtual void setStatusAndWait(const String & current_host, const String & new_status, const Strings & other_hosts) = 0; - virtual void setStatusAndWaitFor(const String & current_host, const String & new_status, const Strings & other_hosts, UInt64 timeout_ms) = 0; + virtual void setStatus(const String & current_host, const String & new_status, const String & message) = 0; + virtual Strings setStatusAndWait(const String & current_host, const String & new_status, const String & message, const Strings & other_hosts) = 0; + virtual Strings setStatusAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & other_hosts, UInt64 timeout_ms) = 0; struct PartNameAndChecksum { diff --git a/src/Backups/IRestoreCoordination.h b/src/Backups/IRestoreCoordination.h index b2ef32c2588..ba76a6e0c99 100644 --- a/src/Backups/IRestoreCoordination.h +++ b/src/Backups/IRestoreCoordination.h @@ -14,9 +14,9 @@ public: virtual ~IRestoreCoordination() = default; /// Sets the current status and waits for other hosts to come to this status too. If status starts with "error:" it'll stop waiting on all the hosts. - virtual void setStatus(const String & current_host, const String & new_status) = 0; - virtual void setStatusAndWait(const String & current_host, const String & new_status, const Strings & other_hosts) = 0; - virtual void setStatusAndWaitFor(const String & current_host, const String & new_status, const Strings & other_hosts, UInt64 timeout_ms) = 0; + virtual void setStatus(const String & current_host, const String & new_status, const String & message) = 0; + virtual Strings setStatusAndWait(const String & current_host, const String & new_status, const String & message, const Strings & other_hosts) = 0; + virtual Strings setStatusAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & other_hosts, UInt64 timeout_ms) = 0; /// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table. virtual bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) = 0; diff --git a/src/Backups/RestoreCoordinationDistributed.cpp b/src/Backups/RestoreCoordinationDistributed.cpp index 0b21f7367d8..8cbaa01810d 100644 --- a/src/Backups/RestoreCoordinationDistributed.cpp +++ b/src/Backups/RestoreCoordinationDistributed.cpp @@ -26,19 +26,19 @@ void RestoreCoordinationDistributed::createRootNodes() zookeeper->createIfNotExists(zookeeper_path + "/repl_access_storages_acquired", ""); } -void RestoreCoordinationDistributed::setStatus(const String & current_host, const String & new_status) +void RestoreCoordinationDistributed::setStatus(const String & current_host, const String & new_status, const String & message) { - status_sync.set(current_host, new_status); + status_sync.set(current_host, new_status, message); } -void RestoreCoordinationDistributed::setStatusAndWait(const String & current_host, const String & new_status, const Strings & other_hosts) +Strings RestoreCoordinationDistributed::setStatusAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts) { - status_sync.setAndWait(current_host, new_status, other_hosts); + return status_sync.setAndWait(current_host, new_status, message, all_hosts); } -void RestoreCoordinationDistributed::setStatusAndWaitFor(const String & current_host, const String & new_status, const Strings & other_hosts, UInt64 timeout_ms) +Strings RestoreCoordinationDistributed::setStatusAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms) { - status_sync.setAndWaitFor(current_host, new_status, other_hosts, timeout_ms); + return status_sync.setAndWaitFor(current_host, new_status, message, all_hosts, timeout_ms); } bool RestoreCoordinationDistributed::acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) diff --git a/src/Backups/RestoreCoordinationDistributed.h b/src/Backups/RestoreCoordinationDistributed.h index ecc8910bb9e..52b961cf0ef 100644 --- a/src/Backups/RestoreCoordinationDistributed.h +++ b/src/Backups/RestoreCoordinationDistributed.h @@ -15,9 +15,9 @@ public: ~RestoreCoordinationDistributed() override; /// Sets the current status and waits for other hosts to come to this status too. If status starts with "error:" it'll stop waiting on all the hosts. - void setStatus(const String & current_host, const String & new_status) override; - void setStatusAndWait(const String & current_host, const String & new_status, const Strings & other_hosts) override; - void setStatusAndWaitFor(const String & current_host, const String & new_status, const Strings & other_hosts, UInt64 timeout_ms) override; + void setStatus(const String & current_host, const String & new_status, const String & message) override; + Strings setStatusAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts) override; + Strings setStatusAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms) override; /// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table. bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) override; diff --git a/src/Backups/RestoreCoordinationLocal.cpp b/src/Backups/RestoreCoordinationLocal.cpp index e654ace454d..a999cc13195 100644 --- a/src/Backups/RestoreCoordinationLocal.cpp +++ b/src/Backups/RestoreCoordinationLocal.cpp @@ -7,16 +7,18 @@ namespace DB RestoreCoordinationLocal::RestoreCoordinationLocal() = default; RestoreCoordinationLocal::~RestoreCoordinationLocal() = default; -void RestoreCoordinationLocal::setStatus(const String &, const String &) +void RestoreCoordinationLocal::setStatus(const String &, const String &, const String &) { } -void RestoreCoordinationLocal::setStatusAndWait(const String &, const String &, const Strings &) +Strings RestoreCoordinationLocal::setStatusAndWait(const String &, const String &, const String &, const Strings &) { + return {}; } -void RestoreCoordinationLocal::setStatusAndWaitFor(const String &, const String &, const Strings &, UInt64) +Strings RestoreCoordinationLocal::setStatusAndWaitFor(const String &, const String &, const String &, const Strings &, UInt64) { + return {}; } bool RestoreCoordinationLocal::acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) diff --git a/src/Backups/RestoreCoordinationLocal.h b/src/Backups/RestoreCoordinationLocal.h index f38e1f94b9c..68624481a7a 100644 --- a/src/Backups/RestoreCoordinationLocal.h +++ b/src/Backups/RestoreCoordinationLocal.h @@ -18,9 +18,9 @@ public: ~RestoreCoordinationLocal() override; /// Sets the current status and waits for other hosts to come to this status too. If status starts with "error:" it'll stop waiting on all the hosts. - void setStatus(const String & current_host, const String & new_status) override; - void setStatusAndWait(const String & current_host, const String & new_status, const Strings & other_hosts) override; - void setStatusAndWaitFor(const String & current_host, const String & new_status, const Strings & other_hosts, UInt64 timeout_ms) override; + void setStatus(const String & current_host, const String & new_status, const String & message) override; + Strings setStatusAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts) override; + Strings setStatusAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms) override; /// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table. bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) override; diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index bbe11525c58..12151eff08f 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -39,23 +40,20 @@ namespace ErrorCodes namespace { - /// Initial status. - constexpr const char kPreparingStatus[] = "preparing"; - /// Finding databases and tables in the backup which we're going to restore. - constexpr const char kFindingTablesInBackupStatus[] = "finding tables in backup"; + constexpr const char * kFindingTablesInBackupStatus = "finding tables in backup"; /// Creating databases or finding them and checking their definitions. - constexpr const char kCreatingDatabasesStatus[] = "creating databases"; + constexpr const char * kCreatingDatabasesStatus = "creating databases"; /// Creating tables or finding them and checking their definition. - constexpr const char kCreatingTablesStatus[] = "creating tables"; + constexpr const char * kCreatingTablesStatus = "creating tables"; /// Inserting restored data to tables. - constexpr const char kInsertingDataToTablesStatus[] = "inserting data to tables"; + constexpr const char * kInsertingDataToTablesStatus = "inserting data to tables"; - /// Prefix for error statuses. - constexpr const char kErrorStatus[] = "error: "; + /// Error status. + constexpr const char * kErrorStatus = BackupCoordinationStatusSync::kErrorStatus; /// Uppercases the first character of a passed string. String toUpperFirst(const String & str) @@ -107,46 +105,37 @@ RestorerFromBackup::RestorerFromBackup( const RestoreSettings & restore_settings_, std::shared_ptr restore_coordination_, const BackupPtr & backup_, - const ContextMutablePtr & context_, - std::chrono::seconds timeout_) + const ContextMutablePtr & context_) : restore_query_elements(restore_query_elements_) , restore_settings(restore_settings_) , restore_coordination(restore_coordination_) , backup(backup_) , context(context_) - , timeout(timeout_) - , create_table_timeout_ms(context->getConfigRef().getUInt64("backups.create_table_timeout", 300000)) + , create_table_timeout(context->getConfigRef().getUInt64("backups.create_table_timeout", 300000)) , log(&Poco::Logger::get("RestorerFromBackup")) - , current_status(kPreparingStatus) { } RestorerFromBackup::~RestorerFromBackup() = default; -void RestorerFromBackup::restoreMetadata() -{ - run(/* only_check_access= */ false); -} - -void RestorerFromBackup::checkAccessOnly() -{ - run(/* only_check_access= */ true); -} - -void RestorerFromBackup::run(bool only_check_access) +RestorerFromBackup::DataRestoreTasks RestorerFromBackup::run(Mode mode) { try { - /// restoreMetadata() must not be called multiple times. - if (current_status != kPreparingStatus) + /// run() can be called onle once. + if (!current_status.empty()) throw Exception(ErrorCodes::LOGICAL_ERROR, "Already restoring"); - /// Calculate the root path in the backup for restoring, it's either empty or has the format "shards//replicas//". - findRootPathsInBackup(); + /// Find other hosts working along with us to execute this ON CLUSTER query. + all_hosts = BackupSettings::Util::filterHostIDs( + restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num); /// Do renaming in the create queries according to the renaming config. renaming_map = makeRenamingMapFromBackupQuery(restore_query_elements); + /// Calculate the root path in the backup for restoring, it's either empty or has the format "shards//replicas//". + findRootPathsInBackup(); + /// Find all the databases and tables which we will read from the backup. setStatus(kFindingTablesInBackupStatus); findDatabasesAndTablesInBackup(); @@ -154,8 +143,8 @@ void RestorerFromBackup::run(bool only_check_access) /// Check access rights. checkAccessForObjectsFoundInBackup(); - if (only_check_access) - return; + if (mode == Mode::CHECK_ACCESS_ONLY) + return {}; /// Create databases using the create queries read from the backup. setStatus(kCreatingDatabasesStatus); @@ -168,13 +157,14 @@ void RestorerFromBackup::run(bool only_check_access) /// All what's left is to insert data to tables. /// No more data restoring tasks are allowed after this point. setStatus(kInsertingDataToTablesStatus); + return getDataRestoreTasks(); } catch (...) { try { /// Other hosts should know that we've encountered an error. - setStatus(kErrorStatus + getCurrentExceptionMessage(false)); + setStatus(kErrorStatus, getCurrentExceptionMessage(false)); } catch (...) { @@ -183,55 +173,20 @@ void RestorerFromBackup::run(bool only_check_access) } } - -RestorerFromBackup::DataRestoreTasks RestorerFromBackup::getDataRestoreTasks() +void RestorerFromBackup::setStatus(const String & new_status, const String & message) { - if (current_status != kInsertingDataToTablesStatus) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Metadata wasn't restored"); - - if (data_restore_tasks.empty() && !access_restore_task) - return {}; - - LOG_TRACE(log, "Will insert data to tables"); - - /// Storages and table locks must exist while we're executing data restoring tasks. - auto storages = std::make_shared>(); - auto table_locks = std::make_shared>(); - storages->reserve(table_infos.size()); - table_locks->reserve(table_infos.size()); - for (const auto & table_info : table_infos | boost::adaptors::map_values) + if (new_status == kErrorStatus) { - storages->push_back(table_info.storage); - table_locks->push_back(table_info.table_lock); - } - - DataRestoreTasks res_tasks; - for (const auto & task : data_restore_tasks) - res_tasks.push_back([task, storages, table_locks] { task(); }); - - if (access_restore_task) - res_tasks.push_back([task = access_restore_task, access_control = &context->getAccessControl()] { task->restore(*access_control); }); - - return res_tasks; -} - -void RestorerFromBackup::setStatus(const String & new_status) -{ - bool is_error_status = new_status.starts_with(kErrorStatus); - if (is_error_status) - { - LOG_ERROR(log, "{} failed with {}", toUpperFirst(current_status), new_status); + LOG_ERROR(log, "{} failed with {}", toUpperFirst(current_status), message); if (restore_coordination) - restore_coordination->setStatus(restore_settings.host_id, new_status); + restore_coordination->setStatus(restore_settings.host_id, new_status, message); } else { LOG_TRACE(log, "{}", toUpperFirst(new_status)); current_status = new_status; - auto all_hosts - = BackupSettings::Util::filterHostIDs(restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num); if (restore_coordination) - restore_coordination->setStatusAndWait(restore_settings.host_id, new_status, all_hosts); + restore_coordination->setStatusAndWait(restore_settings.host_id, new_status, message, all_hosts); } } @@ -677,13 +632,18 @@ void RestorerFromBackup::createTables() create_table_query = create_table_query->clone(); create_table_query->as().if_not_exists = true; } + LOG_TRACE( log, "Creating {}: {}", tableNameWithTypeToString(table_name.database, table_name.table, false), serializeAST(*create_table_query)); - database->createTableRestoredFromBackup(create_table_query, context, restore_coordination, create_table_timeout_ms); + database->createTableRestoredFromBackup( + create_table_query, + context, + restore_coordination, + std::chrono::duration_cast(create_table_timeout).count()); } table_info.created = true; @@ -799,6 +759,34 @@ void RestorerFromBackup::checkPathInBackupIsRegisteredToRestoreAccess(const Stri throw Exception(ErrorCodes::LOGICAL_ERROR, "Path to restore access was not added"); } +RestorerFromBackup::DataRestoreTasks RestorerFromBackup::getDataRestoreTasks() +{ + if (data_restore_tasks.empty() && !access_restore_task) + return {}; + + LOG_TRACE(log, "Will insert data to tables"); + + /// Storages and table locks must exist while we're executing data restoring tasks. + auto storages = std::make_shared>(); + auto table_locks = std::make_shared>(); + storages->reserve(table_infos.size()); + table_locks->reserve(table_infos.size()); + for (const auto & table_info : table_infos | boost::adaptors::map_values) + { + storages->push_back(table_info.storage); + table_locks->push_back(table_info.table_lock); + } + + DataRestoreTasks res_tasks; + for (const auto & task : data_restore_tasks) + res_tasks.push_back([task, storages, table_locks] { task(); }); + + if (access_restore_task) + res_tasks.push_back([task = access_restore_task, access_control = &context->getAccessControl()] { task->restore(*access_control); }); + + return res_tasks; +} + void RestorerFromBackup::throwPartitionsNotSupported(const StorageID & storage_id, const String & table_engine) { throw Exception( diff --git a/src/Backups/RestorerFromBackup.h b/src/Backups/RestorerFromBackup.h index 07258837aab..f4d19a10cf6 100644 --- a/src/Backups/RestorerFromBackup.h +++ b/src/Backups/RestorerFromBackup.h @@ -26,27 +26,29 @@ public: const RestoreSettings & restore_settings_, std::shared_ptr restore_coordination_, const BackupPtr & backup_, - const ContextMutablePtr & context_, - std::chrono::seconds timeout_); + const ContextMutablePtr & context_); ~RestorerFromBackup(); - /// Restores the definition of databases and tables and prepares tasks to restore the data of the tables. - /// restoreMetadata() checks access rights internally so checkAccessRightsOnly() shouldn't be called first. - void restoreMetadata(); + enum Mode + { + /// Restores databases and tables. + RESTORE, - /// Only checks access rights without restoring anything. - void checkAccessOnly(); + /// Only checks access rights without restoring anything. + CHECK_ACCESS_ONLY + }; using DataRestoreTask = std::function; using DataRestoreTasks = std::vector; - DataRestoreTasks getDataRestoreTasks(); + + /// Restores the metadata of databases and tables and returns tasks to restore the data of tables. + DataRestoreTasks run(Mode mode); BackupPtr getBackup() const { return backup; } const RestoreSettings & getRestoreSettings() const { return restore_settings; } bool isNonEmptyTableAllowed() const { return getRestoreSettings().allow_non_empty_tables; } std::shared_ptr getRestoreCoordination() const { return restore_coordination; } - std::chrono::seconds getTimeout() const { return timeout; } ContextMutablePtr getContext() const { return context; } /// Adds a data restore task which will be later returned by getDataRestoreTasks(). @@ -69,15 +71,12 @@ private: std::shared_ptr restore_coordination; BackupPtr backup; ContextMutablePtr context; - std::chrono::seconds timeout; - UInt64 create_table_timeout_ms; + std::chrono::milliseconds create_table_timeout; Poco::Logger * log; - String current_status; - std::vector root_paths_in_backup; + Strings all_hosts; DDLRenamingMap renaming_map; - - void run(bool only_check_access); + std::vector root_paths_in_backup; void findRootPathsInBackup(); @@ -91,7 +90,9 @@ private: void createDatabases(); void createTables(); - void setStatus(const String & new_status); + DataRestoreTasks getDataRestoreTasks(); + + void setStatus(const String & new_status, const String & message = ""); struct DatabaseInfo { @@ -111,6 +112,7 @@ private: std::vector findTablesWithoutDependencies() const; + String current_status; std::unordered_map database_infos; std::map table_infos; std::vector data_restore_tasks; From 7689e0c36f1cb9e89faeb12694560d5fc350cdf9 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Fri, 24 Jun 2022 21:29:38 +0200 Subject: [PATCH 089/121] Improve gathering metadata for backup - part 6. --- src/Backups/BackupCoordinationDistributed.cpp | 22 +-- src/Backups/BackupCoordinationDistributed.h | 8 +- src/Backups/BackupCoordinationHelpers.cpp | 8 +- src/Backups/BackupCoordinationHelpers.h | 6 +- src/Backups/BackupCoordinationLocal.cpp | 16 +- src/Backups/BackupCoordinationLocal.h | 8 +- src/Backups/BackupEntriesCollector.cpp | 152 ++++++++++-------- src/Backups/BackupEntriesCollector.h | 7 +- src/Backups/DDLAdjustingForBackupVisitor.cpp | 114 +++++++++++++ src/Backups/DDLAdjustingForBackupVisitor.h | 36 +++++ src/Backups/IBackupCoordination.h | 8 +- src/Backups/RestorerFromBackup.cpp | 10 +- src/Databases/DDLRenamingVisitor.cpp | 15 +- src/Databases/DDLRenamingVisitor.h | 6 +- src/Databases/DatabaseReplicated.cpp | 40 +++++ src/Databases/DatabaseReplicated.h | 1 + src/Databases/IDatabase.cpp | 13 +- src/Databases/IDatabase.h | 3 - src/Storages/IStorage.cpp | 22 +-- .../extractZkPathFromCreateQuery.cpp | 60 +++++++ .../MergeTree/extractZkPathFromCreateQuery.h | 19 +++ src/Storages/StorageReplicatedMergeTree.cpp | 73 ++++----- src/Storages/StorageReplicatedMergeTree.h | 3 + .../test_backup_restore_on_cluster/test.py | 41 +++++ 24 files changed, 490 insertions(+), 201 deletions(-) create mode 100644 src/Backups/DDLAdjustingForBackupVisitor.cpp create mode 100644 src/Backups/DDLAdjustingForBackupVisitor.h create mode 100644 src/Storages/MergeTree/extractZkPathFromCreateQuery.cpp create mode 100644 src/Storages/MergeTree/extractZkPathFromCreateQuery.h diff --git a/src/Backups/BackupCoordinationDistributed.cpp b/src/Backups/BackupCoordinationDistributed.cpp index 9612b62dcdb..9df17bf434e 100644 --- a/src/Backups/BackupCoordinationDistributed.cpp +++ b/src/Backups/BackupCoordinationDistributed.cpp @@ -174,7 +174,7 @@ Strings BackupCoordinationDistributed::setStatusAndWaitFor(const String & curren void BackupCoordinationDistributed::addReplicatedPartNames( - const String & table_zk_path, + const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector & part_names_and_checksums) @@ -186,39 +186,39 @@ void BackupCoordinationDistributed::addReplicatedPartNames( } auto zookeeper = get_zookeeper(); - String path = zookeeper_path + "/repl_part_names/" + escapeForFileName(table_zk_path); + String path = zookeeper_path + "/repl_part_names/" + escapeForFileName(table_shared_id); zookeeper->createIfNotExists(path, ""); path += "/" + escapeForFileName(replica_name); zookeeper->create(path, ReplicatedPartNames::serialize(part_names_and_checksums, table_name_for_logs), zkutil::CreateMode::Persistent); } -Strings BackupCoordinationDistributed::getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const +Strings BackupCoordinationDistributed::getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const { std::lock_guard lock{mutex}; prepareReplicatedPartNames(); - return replicated_part_names->getPartNames(table_zk_path, replica_name); + return replicated_part_names->getPartNames(table_shared_id, replica_name); } void BackupCoordinationDistributed::addReplicatedDataPath( - const String & table_zk_path, const String & data_path) + const String & table_shared_id, const String & data_path) { auto zookeeper = get_zookeeper(); - String path = zookeeper_path + "/repl_data_paths/" + escapeForFileName(table_zk_path); + String path = zookeeper_path + "/repl_data_paths/" + escapeForFileName(table_shared_id); + zookeeper->createIfNotExists(path, ""); + path += "/" + escapeForFileName(data_path); zookeeper->createIfNotExists(path, ""); - path += "/"; - zookeeper->create(path, data_path, zkutil::CreateMode::PersistentSequential); } -Strings BackupCoordinationDistributed::getReplicatedDataPaths(const String & table_zk_path) const +Strings BackupCoordinationDistributed::getReplicatedDataPaths(const String & table_shared_id) const { auto zookeeper = get_zookeeper(); - String path = zookeeper_path + "/repl_data_paths/" + escapeForFileName(table_zk_path); + String path = zookeeper_path + "/repl_data_paths/" + escapeForFileName(table_shared_id); Strings children = zookeeper->getChildren(path); Strings data_paths; data_paths.reserve(children.size()); for (const String & child : children) - data_paths.push_back(zookeeper->get(path + "/" + child)); + data_paths.push_back(unescapeForFileName(child)); return data_paths; } diff --git a/src/Backups/BackupCoordinationDistributed.h b/src/Backups/BackupCoordinationDistributed.h index 84cd4b3dddb..172c69edb20 100644 --- a/src/Backups/BackupCoordinationDistributed.h +++ b/src/Backups/BackupCoordinationDistributed.h @@ -19,15 +19,15 @@ public: Strings setStatusAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms) override; void addReplicatedPartNames( - const String & table_zk_path, + const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector & part_names_and_checksums) override; - Strings getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const override; + Strings getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const override; - void addReplicatedDataPath(const String & table_zk_path, const String & data_path) override; - Strings getReplicatedDataPaths(const String & table_zk_path) const override; + void addReplicatedDataPath(const String & table_shared_id, const String & data_path) override; + Strings getReplicatedDataPaths(const String & table_shared_id) const override; void addFileInfo(const FileInfo & file_info, bool & is_data_file_required) override; void updateFileInfo(const FileInfo & file_info) override; diff --git a/src/Backups/BackupCoordinationHelpers.cpp b/src/Backups/BackupCoordinationHelpers.cpp index d86e8cff52c..cca66f03aac 100644 --- a/src/Backups/BackupCoordinationHelpers.cpp +++ b/src/Backups/BackupCoordinationHelpers.cpp @@ -157,7 +157,7 @@ BackupCoordinationReplicatedPartNames::BackupCoordinationReplicatedPartNames() = BackupCoordinationReplicatedPartNames::~BackupCoordinationReplicatedPartNames() = default; void BackupCoordinationReplicatedPartNames::addPartNames( - const String & table_zk_path, + const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector & part_names_and_checksums) @@ -165,7 +165,7 @@ void BackupCoordinationReplicatedPartNames::addPartNames( if (part_names_prepared) throw Exception(ErrorCodes::LOGICAL_ERROR, "addPartNames() must not be called after getPartNames()"); - auto & table_info = table_infos[table_zk_path]; + auto & table_info = table_infos[table_shared_id]; if (!table_info.covered_parts_finder) table_info.covered_parts_finder = std::make_unique(table_name_for_logs); @@ -207,10 +207,10 @@ void BackupCoordinationReplicatedPartNames::addPartNames( } } -Strings BackupCoordinationReplicatedPartNames::getPartNames(const String & table_zk_path, const String & replica_name) const +Strings BackupCoordinationReplicatedPartNames::getPartNames(const String & table_shared_id, const String & replica_name) const { preparePartNames(); - auto it = table_infos.find(table_zk_path); + auto it = table_infos.find(table_shared_id); if (it == table_infos.end()) return {}; const auto & replicas_parts = it->second.replicas_parts; diff --git a/src/Backups/BackupCoordinationHelpers.h b/src/Backups/BackupCoordinationHelpers.h index 7d343edd3d0..2e9e4b3cbde 100644 --- a/src/Backups/BackupCoordinationHelpers.h +++ b/src/Backups/BackupCoordinationHelpers.h @@ -24,7 +24,7 @@ public: /// getPartNames(). /// Checksums are used only to control that parts under the same names on different replicas are the same. void addPartNames( - const String & table_zk_path, + const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector & part_names_and_checksums); @@ -32,7 +32,7 @@ public: /// Returns the names of the parts which a specified replica of a replicated table should put to the backup. /// This is the same list as it was added by call of the function addPartNames() but without duplications and without /// parts covered by another parts. - Strings getPartNames(const String & table_zk_path, const String & replica_name) const; + Strings getPartNames(const String & table_shared_id, const String & replica_name) const; private: void preparePartNames() const; @@ -52,7 +52,7 @@ private: std::unique_ptr covered_parts_finder; }; - std::map table_infos; /// Should be ordered because we need this map to be in the same order on every replica. + std::map table_infos; /// Should be ordered because we need this map to be in the same order on every replica. mutable bool part_names_prepared = false; }; diff --git a/src/Backups/BackupCoordinationLocal.cpp b/src/Backups/BackupCoordinationLocal.cpp index 6ff9800797e..7fd6fec6c33 100644 --- a/src/Backups/BackupCoordinationLocal.cpp +++ b/src/Backups/BackupCoordinationLocal.cpp @@ -27,29 +27,29 @@ Strings BackupCoordinationLocal::setStatusAndWaitFor(const String &, const Strin return {}; } -void BackupCoordinationLocal::addReplicatedPartNames(const String & table_zk_path, const String & table_name_for_logs, const String & replica_name, const std::vector & part_names_and_checksums) +void BackupCoordinationLocal::addReplicatedPartNames(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector & part_names_and_checksums) { std::lock_guard lock{mutex}; - replicated_part_names.addPartNames(table_zk_path, table_name_for_logs, replica_name, part_names_and_checksums); + replicated_part_names.addPartNames(table_shared_id, table_name_for_logs, replica_name, part_names_and_checksums); } -Strings BackupCoordinationLocal::getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const +Strings BackupCoordinationLocal::getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const { std::lock_guard lock{mutex}; - return replicated_part_names.getPartNames(table_zk_path, replica_name); + return replicated_part_names.getPartNames(table_shared_id, replica_name); } -void BackupCoordinationLocal::addReplicatedDataPath(const String & table_zk_path, const String & data_path) +void BackupCoordinationLocal::addReplicatedDataPath(const String & table_shared_id, const String & data_path) { std::lock_guard lock{mutex}; - replicated_data_paths[table_zk_path].push_back(data_path); + replicated_data_paths[table_shared_id].push_back(data_path); } -Strings BackupCoordinationLocal::getReplicatedDataPaths(const String & table_zk_path) const +Strings BackupCoordinationLocal::getReplicatedDataPaths(const String & table_shared_id) const { std::lock_guard lock{mutex}; - auto it = replicated_data_paths.find(table_zk_path); + auto it = replicated_data_paths.find(table_shared_id); if (it == replicated_data_paths.end()) return {}; return it->second; diff --git a/src/Backups/BackupCoordinationLocal.h b/src/Backups/BackupCoordinationLocal.h index 7fdd88d37cb..519c721c208 100644 --- a/src/Backups/BackupCoordinationLocal.h +++ b/src/Backups/BackupCoordinationLocal.h @@ -23,12 +23,12 @@ public: Strings setStatusAndWait(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts) override; Strings setStatusAndWaitFor(const String & current_host, const String & new_status, const String & message, const Strings & all_hosts, UInt64 timeout_ms) override; - void addReplicatedPartNames(const String & table_zk_path, const String & table_name_for_logs, const String & replica_name, + void addReplicatedPartNames(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector & part_names_and_checksums) override; - Strings getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const override; + Strings getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const override; - void addReplicatedDataPath(const String & table_zk_path, const String & data_path) override; - Strings getReplicatedDataPaths(const String & table_zk_path) const override; + void addReplicatedDataPath(const String & table_shared_id, const String & data_path) override; + Strings getReplicatedDataPaths(const String & table_shared_id) const override; void addFileInfo(const FileInfo & file_info, bool & is_data_file_required) override; void updateFileInfo(const FileInfo & file_info) override; diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index 1ba94552589..8104e363a68 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -389,7 +390,7 @@ void BackupEntriesCollector::gatherDatabaseMetadata( ASTPtr create_database_query; try { - create_database_query = database_info.database->getCreateDatabaseQueryForBackup(); + create_database_query = database_info.database->getCreateDatabaseQuery(); } catch (...) { @@ -537,68 +538,86 @@ bool BackupEntriesCollector::compareWithPrevious(std::optional & inco { /// We need to scan tables at least twice to be sure that we haven't missed any table which could be renamed /// while we were scanning. - std::set database_names; - std::set table_names; - boost::range::copy(database_infos | boost::adaptors::map_keys, std::inserter(database_names, database_names.end())); - boost::range::copy(table_infos | boost::adaptors::map_keys, std::inserter(table_names, table_names.end())); + std::vector> databases_metadata; + std::vector> tables_metadata; + databases_metadata.reserve(database_infos.size()); + tables_metadata.reserve(table_infos.size()); + for (const auto & [database_name, database_info] : database_infos) + databases_metadata.emplace_back(database_name, database_info.create_database_query ? serializeAST(*database_info.create_database_query) : ""); + for (const auto & [table_name, table_info] : table_infos) + tables_metadata.emplace_back(table_name, serializeAST(*table_info.create_table_query)); - if (previous_database_names != database_names) + /// We need to sort the lists to make the comparison below correct. + ::sort(databases_metadata.begin(), databases_metadata.end()); + ::sort(tables_metadata.begin(), tables_metadata.end()); + + SCOPE_EXIT({ + previous_databases_metadata = std::move(databases_metadata); + previous_tables_metadata = std::move(tables_metadata); + }); + + /// Databases must be the same as during the previous scan. + if (databases_metadata != previous_databases_metadata) { - bool error_message_ready = false; - for (const auto & database_name : database_names) + std::vector> difference; + difference.reserve(databases_metadata.size()); + std::set_difference(databases_metadata.begin(), databases_metadata.end(), previous_databases_metadata.begin(), + previous_databases_metadata.end(), std::back_inserter(difference)); + + if (!difference.empty()) { - if (!previous_database_names.contains(database_name)) - { - inconsistency_error = Exception{ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Database {} were added during scanning", backQuoteIfNeed(database_name)}; - error_message_ready = true; - break; - } + inconsistency_error = Exception{ + ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, + "Database {} were created or changed its definition during scanning", + backQuoteIfNeed(difference[0].first)}; + return false; } - if (!error_message_ready) + + difference.clear(); + difference.reserve(previous_databases_metadata.size()); + std::set_difference(previous_databases_metadata.begin(), previous_databases_metadata.end(), databases_metadata.begin(), + databases_metadata.end(), std::back_inserter(difference)); + + if (!difference.empty()) { - for (const auto & database_name : previous_database_names) - { - if (!database_names.contains(database_name)) - { - inconsistency_error = Exception{ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Database {} were removed during scanning", backQuoteIfNeed(database_name)}; - error_message_ready = true; - break; - } - } + inconsistency_error = Exception{ + ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, + "Database {} were removed or changed its definition during scanning", + backQuoteIfNeed(difference[0].first)}; + return false; } - assert(error_message_ready); - previous_database_names = std::move(database_names); - previous_table_names = std::move(table_names); - return false; } - if (previous_table_names != table_names) + /// Tables must be the same as during the previous scan. + if (tables_metadata != previous_tables_metadata) { - bool error_message_ready = false; - for (const auto & table_name : table_names) + std::vector> difference; + difference.reserve(tables_metadata.size()); + std::set_difference(tables_metadata.begin(), tables_metadata.end(), previous_tables_metadata.begin(), + previous_tables_metadata.end(), std::back_inserter(difference)); + + if (!difference.empty()) { - if (!previous_table_names.contains(table_name)) - { - inconsistency_error = Exception{ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "{} were added during scanning", tableNameWithTypeToString(table_name.database, table_name.table, true)}; - error_message_ready = true; - break; - } + inconsistency_error = Exception{ + ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, + "{} were created or changed its definition during scanning", + tableNameWithTypeToString(difference[0].first.database, difference[0].first.table, true)}; + return false; } - if (!error_message_ready) + + difference.clear(); + difference.reserve(previous_tables_metadata.size()); + std::set_difference(previous_tables_metadata.begin(), previous_tables_metadata.end(), tables_metadata.begin(), + tables_metadata.end(), std::back_inserter(difference)); + + if (!difference.empty()) { - for (const auto & table_name : previous_table_names) - { - if (!table_names.contains(table_name)) - { - inconsistency_error = Exception{ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "{} were removed during scanning", tableNameWithTypeToString(table_name.database, table_name.table, true)}; - error_message_ready = true; - break; - } - } + inconsistency_error = Exception{ + ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, + "{} were removed or changed its definition during scanning", + tableNameWithTypeToString(difference[0].first.database, difference[0].first.table, true)}; + return false; } - assert(error_message_ready); - previous_table_names = std::move(table_names); - return false; } return true; @@ -615,7 +634,8 @@ void BackupEntriesCollector::makeBackupEntriesForDatabasesDefs() LOG_TRACE(log, "Adding definition of database {}", backQuoteIfNeed(database_name)); ASTPtr new_create_query = database_info.create_database_query; - renameDatabaseAndTableNameInCreateQuery(context->getGlobalContext(), renaming_map, new_create_query); + adjustCreateQueryForBackup(new_create_query, context->getGlobalContext(), nullptr); + renameDatabaseAndTableNameInCreateQuery(new_create_query, renaming_map, context->getGlobalContext()); const String & metadata_path_in_backup = database_info.metadata_path_in_backup; backup_entries.emplace_back(metadata_path_in_backup, std::make_shared(serializeAST(*new_create_query))); @@ -625,12 +645,13 @@ void BackupEntriesCollector::makeBackupEntriesForDatabasesDefs() /// Calls IDatabase::backupTable() for all the tables found to make backup entries for tables. void BackupEntriesCollector::makeBackupEntriesForTablesDefs() { - for (const auto & [table_name, table_info] : table_infos) + for (auto & [table_name, table_info] : table_infos) { LOG_TRACE(log, "Adding definition of {}", tableNameWithTypeToString(table_name.database, table_name.table, false)); ASTPtr new_create_query = table_info.create_table_query; - renameDatabaseAndTableNameInCreateQuery(context->getGlobalContext(), renaming_map, new_create_query); + adjustCreateQueryForBackup(new_create_query, context->getGlobalContext(), &table_info.replicated_table_shared_id); + renameDatabaseAndTableNameInCreateQuery(new_create_query, renaming_map, context->getGlobalContext()); const String & metadata_path_in_backup = table_info.metadata_path_in_backup; backup_entries.emplace_back(metadata_path_in_backup, std::make_shared(serializeAST(*new_create_query))); @@ -645,18 +666,21 @@ void BackupEntriesCollector::makeBackupEntriesForTablesData() for (const auto & [table_name, table_info] : table_infos) { const auto & storage = table_info.storage; - if (!storage) - { - /// This storage exists on other replica and has not been created on this replica yet. - /// We store metadata only for such tables. - /// TODO: Need special processing if it's a ReplicatedMergeTree. - continue; - } - - LOG_TRACE(log, "Adding data of {}", tableNameWithTypeToString(table_name.database, table_name.table, false)); const auto & data_path_in_backup = table_info.data_path_in_backup; - const auto & partitions = table_info.partitions; - storage->backupData(*this, data_path_in_backup, partitions); + if (storage) + { + LOG_TRACE(log, "Adding data of {}", tableNameWithTypeToString(table_name.database, table_name.table, false)); + storage->backupData(*this, data_path_in_backup, table_info.partitions); + } + else + { + /// Storage == null means this storage exists on other replicas but it has not been created on this replica yet. + /// If this table is replicated in this case we call IBackupCoordination::addReplicatedDataPath() which will cause + /// other replicas to fill the storage's data in the backup. + /// If this table is not replicated we'll do nothing leaving the storage's data empty in the backup. + if (table_info.replicated_table_shared_id) + backup_coordination->addReplicatedDataPath(*table_info.replicated_table_shared_id, data_path_in_backup); + } } } diff --git a/src/Backups/BackupEntriesCollector.h b/src/Backups/BackupEntriesCollector.h index 49901295003..46a2bd1863a 100644 --- a/src/Backups/BackupEntriesCollector.h +++ b/src/Backups/BackupEntriesCollector.h @@ -120,15 +120,16 @@ private: ASTPtr create_table_query; String metadata_path_in_backup; std::filesystem::path data_path_in_backup; + std::optional replicated_table_shared_id; std::optional partitions; }; String current_status; std::chrono::steady_clock::time_point consistent_metadata_snapshot_start_time; std::unordered_map database_infos; - std::map table_infos; - std::set previous_database_names; - std::set previous_table_names; + std::unordered_map table_infos; + std::vector> previous_databases_metadata; + std::vector> previous_tables_metadata; BackupEntries backup_entries; std::queue> post_tasks; diff --git a/src/Backups/DDLAdjustingForBackupVisitor.cpp b/src/Backups/DDLAdjustingForBackupVisitor.cpp new file mode 100644 index 00000000000..e3fc3ac5552 --- /dev/null +++ b/src/Backups/DDLAdjustingForBackupVisitor.cpp @@ -0,0 +1,114 @@ +#include +#include +#include +#include +#include +#include + +#include + + +namespace DB +{ + +namespace +{ + void visitStorageSystemTableEngine(ASTStorage &, const DDLAdjustingForBackupVisitor::Data & data) + { + /// Precondition: storage.engine && storage.engine->name.starts_with("System")) + + /// If this is a definition of a system table we'll remove columns and comment because they're reduntant for backups. + auto & create = data.create_query->as(); + create.reset(create.columns_list); + create.reset(create.comment); + } + + void visitStorageReplicatedTableEngine(ASTStorage & storage, const DDLAdjustingForBackupVisitor::Data & data) + { + /// Precondition: engine_name.starts_with("Replicated") && engine_name.ends_with("MergeTree") + + if (data.replicated_table_shared_id) + *data.replicated_table_shared_id = StorageReplicatedMergeTree::tryGetTableSharedIDFromCreateQuery(*data.create_query, data.global_context); + + /// Before storing the metadata in a backup we have to find a zookeeper path in its definition and turn the table's UUID in there + /// back into "{uuid}", and also we probably can remove the zookeeper path and replica name if they're default. + /// So we're kind of reverting what we had done to the table's definition in registerStorageMergeTree.cpp before we created this table. + auto & create = data.create_query->as(); + if (create.uuid == UUIDHelpers::Nil) + return; + + auto & engine = *storage.engine; + + auto * engine_args_ast = typeid_cast(engine.arguments.get()); + if (!engine_args_ast) + return; + + auto & engine_args = engine_args_ast->children; + if (engine_args.size() < 2) + return; + + auto * zookeeper_path_ast = typeid_cast(engine_args[0].get()); + auto * replica_name_ast = typeid_cast(engine_args[1].get()); + if (zookeeper_path_ast && (zookeeper_path_ast->value.getType() == Field::Types::String) && + replica_name_ast && (replica_name_ast->value.getType() == Field::Types::String)) + { + String & zookeeper_path_arg = zookeeper_path_ast->value.get(); + String & replica_name_arg = replica_name_ast->value.get(); + String table_uuid_str = toString(create.uuid); + if (size_t uuid_pos = zookeeper_path_arg.find(table_uuid_str); uuid_pos != String::npos) + zookeeper_path_arg.replace(uuid_pos, table_uuid_str.size(), "{uuid}"); + const auto & config = data.global_context->getConfigRef(); + if ((zookeeper_path_arg == StorageReplicatedMergeTree::getDefaultZooKeeperPath(config)) + && (replica_name_arg == StorageReplicatedMergeTree::getDefaultReplicaName(config)) + && ((engine_args.size() == 2) || !engine_args[2]->as())) + { + engine_args.erase(engine_args.begin(), engine_args.begin() + 2); + } + } + } + + void visitStorage(ASTStorage & storage, const DDLAdjustingForBackupVisitor::Data & data) + { + if (!storage.engine) + return; + + const String & engine_name = storage.engine->name; + if (engine_name.starts_with("System")) + visitStorageSystemTableEngine(storage, data); + else if (engine_name.starts_with("Replicated") && engine_name.ends_with("MergeTree")) + visitStorageReplicatedTableEngine(storage, data); + } + + void visitCreateQuery(ASTCreateQuery & create, const DDLAdjustingForBackupVisitor::Data & data) + { + create.uuid = UUIDHelpers::Nil; + create.to_inner_uuid = UUIDHelpers::Nil; + + if (create.storage) + visitStorage(*create.storage, data); + } +} + + +bool DDLAdjustingForBackupVisitor::needChildVisit(const ASTPtr &, const ASTPtr &) +{ + return false; +} + +void DDLAdjustingForBackupVisitor::visit(ASTPtr ast, const Data & data) +{ + if (auto * create = ast->as()) + visitCreateQuery(*create, data); +} + +void adjustCreateQueryForBackup(ASTPtr & ast, const ContextPtr & global_context, std::optional * replicated_table_shared_id) +{ + ast = ast->clone(); + if (replicated_table_shared_id) + *replicated_table_shared_id = {}; + + DDLAdjustingForBackupVisitor::Data data{ast, global_context, replicated_table_shared_id}; + DDLAdjustingForBackupVisitor::Visitor{data}.visit(ast); +} + +} diff --git a/src/Backups/DDLAdjustingForBackupVisitor.h b/src/Backups/DDLAdjustingForBackupVisitor.h new file mode 100644 index 00000000000..87498471cc4 --- /dev/null +++ b/src/Backups/DDLAdjustingForBackupVisitor.h @@ -0,0 +1,36 @@ +#pragma once + +#include +#include + + +namespace DB +{ +class IAST; +using ASTPtr = std::shared_ptr; +class Context; +using ContextPtr = std::shared_ptr; + +/// Changes a create query to a form which is appropriate or suitable for saving in a backup. +/// Also extracts a replicated table's shared ID from the create query if this is a create query for a replicated table. +/// `replicated_table_shared_id` can be null if you don't need that. +void adjustCreateQueryForBackup(ASTPtr & ast, const ContextPtr & global_context, std::optional * replicated_table_shared_id); + +/// Visits ASTCreateQuery and changes it to a form which is appropriate or suitable for saving in a backup. +class DDLAdjustingForBackupVisitor +{ +public: + struct Data + { + ASTPtr create_query; + ContextPtr global_context; + std::optional * replicated_table_shared_id = nullptr; + }; + + using Visitor = InDepthNodeVisitor; + + static bool needChildVisit(const ASTPtr & ast, const ASTPtr & child); + static void visit(ASTPtr ast, const Data & data); +}; + +} diff --git a/src/Backups/IBackupCoordination.h b/src/Backups/IBackupCoordination.h index 9df5b9efdc4..0ae150c2b47 100644 --- a/src/Backups/IBackupCoordination.h +++ b/src/Backups/IBackupCoordination.h @@ -28,21 +28,21 @@ public: /// Multiple replicas of the replicated table call this function and then the added part names can be returned by call of the function /// getReplicatedPartNames(). /// Checksums are used only to control that parts under the same names on different replicas are the same. - virtual void addReplicatedPartNames(const String & table_zk_path, const String & table_name_for_logs, const String & replica_name, + virtual void addReplicatedPartNames(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name, const std::vector & part_names_and_checksums) = 0; /// Returns the names of the parts which a specified replica of a replicated table should put to the backup. /// This is the same list as it was added by call of the function addReplicatedPartNames() but without duplications and without /// parts covered by another parts. - virtual Strings getReplicatedPartNames(const String & table_zk_path, const String & replica_name) const = 0; + virtual Strings getReplicatedPartNames(const String & table_shared_id, const String & replica_name) const = 0; /// Adds a data path in backup for a replicated table. /// Multiple replicas of the replicated table call this function and then all the added paths can be returned by call of the function /// getReplicatedDataPaths(). - virtual void addReplicatedDataPath(const String & table_zk_path, const String & data_path) = 0; + virtual void addReplicatedDataPath(const String & table_shared_id, const String & data_path) = 0; /// Returns all the data paths in backup added for a replicated table (see also addReplicatedDataPath()). - virtual Strings getReplicatedDataPaths(const String & table_zk_path) const = 0; + virtual Strings getReplicatedDataPaths(const String & table_shared_id) const = 0; struct FileInfo { diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index 12151eff08f..90c17ef0427 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -354,7 +355,7 @@ void RestorerFromBackup::findTableInBackup(const QualifiedTableName & table_name read_buffer.reset(); ParserCreateQuery create_parser; ASTPtr create_table_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - renameDatabaseAndTableNameInCreateQuery(context->getGlobalContext(), renaming_map, create_table_query); + renameDatabaseAndTableNameInCreateQuery(create_table_query, renaming_map, context->getGlobalContext()); QualifiedTableName table_name = renaming_map.getNewTableName(table_name_in_backup); @@ -433,7 +434,7 @@ void RestorerFromBackup::findDatabaseInBackup(const String & database_name_in_ba read_buffer.reset(); ParserCreateQuery create_parser; ASTPtr create_database_query = parseQuery(create_parser, create_query_str, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - renameDatabaseAndTableNameInCreateQuery(context->getGlobalContext(), renaming_map, create_database_query); + renameDatabaseAndTableNameInCreateQuery(create_database_query, renaming_map, context->getGlobalContext()); String database_name = renaming_map.getNewDatabaseName(database_name_in_backup); DatabaseInfo & database_info = database_infos[database_name]; @@ -587,7 +588,8 @@ void RestorerFromBackup::createDatabases() if (!restore_settings.allow_different_database_def) { /// Check that the database's definition is the same as expected. - ASTPtr create_database_query = database->getCreateDatabaseQueryForBackup(); + ASTPtr create_database_query = database->getCreateDatabaseQuery(); + adjustCreateQueryForBackup(create_database_query, context->getGlobalContext(), nullptr); ASTPtr expected_create_query = database_info.create_database_query; if (serializeAST(*create_database_query) != serializeAST(*expected_create_query)) { @@ -659,7 +661,7 @@ void RestorerFromBackup::createTables() if (!restore_settings.allow_different_table_def) { ASTPtr create_table_query = database->getCreateTableQuery(resolved_id.table_name, context); - storage->adjustCreateQueryForBackup(create_table_query); + adjustCreateQueryForBackup(create_table_query, context->getGlobalContext(), nullptr); ASTPtr expected_create_query = table_info.create_table_query; if (serializeAST(*create_table_query) != serializeAST(*expected_create_query)) { diff --git a/src/Databases/DDLRenamingVisitor.cpp b/src/Databases/DDLRenamingVisitor.cpp index fc14d7abbd9..c8958fa06d4 100644 --- a/src/Databases/DDLRenamingVisitor.cpp +++ b/src/Databases/DDLRenamingVisitor.cpp @@ -307,13 +307,6 @@ void DDLRenamingVisitor::visit(ASTPtr ast, const Data & data) bool DDLRenamingVisitor::needChildVisit(const ASTPtr &, const ASTPtr &) { return true; } -void renameDatabaseAndTableNameInCreateQuery(const ContextPtr & global_context, const DDLRenamingMap & renaming_map, ASTPtr & ast) -{ - DDLRenamingVisitor::Data data{global_context, renaming_map, ast}; - DDLRenamingVisitor::Visitor{data}.visit(ast); -} - - void DDLRenamingMap::setNewTableName(const QualifiedTableName & old_table_name, const QualifiedTableName & new_table_name) { if (old_table_name.table.empty() || old_table_name.database.empty() || new_table_name.table.empty() || new_table_name.database.empty()) @@ -367,4 +360,12 @@ QualifiedTableName DDLRenamingMap::getNewTableName(const QualifiedTableName & ol return {getNewDatabaseName(old_table_name.database), old_table_name.table}; } + +void renameDatabaseAndTableNameInCreateQuery(ASTPtr & ast, const DDLRenamingMap & renaming_map, const ContextPtr & global_context) +{ + ast = ast->clone(); + DDLRenamingVisitor::Data data{ast, renaming_map, global_context}; + DDLRenamingVisitor::Visitor{data}.visit(ast); +} + } diff --git a/src/Databases/DDLRenamingVisitor.h b/src/Databases/DDLRenamingVisitor.h index 72b578b9fcb..11e8c4676e0 100644 --- a/src/Databases/DDLRenamingVisitor.h +++ b/src/Databases/DDLRenamingVisitor.h @@ -17,7 +17,7 @@ class DDLRenamingMap; /// Changes names of databases or tables in a create query according to a specified renaming map. /// Does not validate AST, works a best-effort way. -void renameDatabaseAndTableNameInCreateQuery(const ContextPtr & global_context, const DDLRenamingMap & renaming_map, ASTPtr & ast); +void renameDatabaseAndTableNameInCreateQuery(ASTPtr & ast, const DDLRenamingMap & renaming_map, const ContextPtr & global_context); /// Renaming map keeps information about new names of databases or tables. class DDLRenamingMap @@ -40,9 +40,9 @@ class DDLRenamingVisitor public: struct Data { - ContextPtr global_context; - const DDLRenamingMap & renaming_map; ASTPtr create_query; + const DDLRenamingMap & renaming_map; + ContextPtr global_context; }; using Visitor = InDepthNodeVisitor; diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 6286723aaa3..5a0eec10abb 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -44,6 +44,7 @@ namespace ErrorCodes extern const int INCORRECT_QUERY; extern const int ALL_CONNECTION_TRIES_FAILED; extern const int NO_ACTIVE_REPLICAS; + extern const int INCONSISTENT_METADATA_FOR_BACKUP; extern const int CANNOT_RESTORE_TABLE; } @@ -923,6 +924,45 @@ String DatabaseReplicated::readMetadataFile(const String & table_name) const } +std::vector> +DatabaseReplicated::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr &) const +{ + /// Here we read metadata from ZooKeeper. We could do that by simple call of DatabaseAtomic::getTablesForBackup() however + /// reading from ZooKeeper is better because thus we won't be dependant on how fast the replication queue of this database is. + std::vector> res; + auto zookeeper = getContext()->getZooKeeper(); + auto escaped_table_names = zookeeper->getChildren(zookeeper_path + "/metadata"); + for (const auto & escaped_table_name : escaped_table_names) + { + String table_name = unescapeForFileName(escaped_table_name); + if (!filter(table_name)) + continue; + String zk_metadata; + if (!zookeeper->tryGet(zookeeper_path + "/metadata/" + escaped_table_name, zk_metadata)) + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Metadata for table {} was not found in ZooKeeper", table_name); + + ParserCreateQuery parser; + auto create_table_query = parseQuery(parser, zk_metadata, 0, getContext()->getSettingsRef().max_parser_depth); + + auto & create = create_table_query->as(); + create.attach = false; + create.setTable(table_name); + create.setDatabase(getDatabaseName()); + + StoragePtr storage; + if (create.uuid != UUIDHelpers::Nil) + { + storage = DatabaseCatalog::instance().tryGetByUUID(create.uuid).second; + if (storage) + storage->adjustCreateQueryForBackup(create_table_query); + } + res.emplace_back(create_table_query, storage); + } + + return res; +} + + void DatabaseReplicated::createTableRestoredFromBackup( const ASTPtr & create_table_query, ContextMutablePtr local_context, diff --git a/src/Databases/DatabaseReplicated.h b/src/Databases/DatabaseReplicated.h index 958ee3f133f..07014702067 100644 --- a/src/Databases/DatabaseReplicated.h +++ b/src/Databases/DatabaseReplicated.h @@ -72,6 +72,7 @@ public: void shutdown() override; + std::vector> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & local_context) const override; void createTableRestoredFromBackup(const ASTPtr & create_table_query, ContextMutablePtr local_context, std::shared_ptr restore_coordination, UInt64 timeout_ms) override; friend struct DatabaseReplicatedTask; diff --git a/src/Databases/IDatabase.cpp b/src/Databases/IDatabase.cpp index a75f213a6bb..9e33548b0dd 100644 --- a/src/Databases/IDatabase.cpp +++ b/src/Databases/IDatabase.cpp @@ -21,20 +21,9 @@ StoragePtr IDatabase::getTable(const String & name, ContextPtr context) const throw Exception(ErrorCodes::UNKNOWN_TABLE, "Table {}.{} doesn't exist", backQuoteIfNeed(getDatabaseName()), backQuoteIfNeed(name)); } -ASTPtr IDatabase::getCreateDatabaseQueryForBackup() const -{ - auto query = getCreateDatabaseQuery(); - - /// We don't want to see any UUIDs in backup (after RESTORE the database will have another UUID anyway). - auto & create = query->as(); - create.uuid = UUIDHelpers::Nil; - - return query; -} - std::vector> IDatabase::getTablesForBackup(const FilterByNameFunction &, const ContextPtr &) const { - /// Cannot restore any table because IDatabase doesn't own any tables. + /// Cannot backup any table because IDatabase doesn't own any tables. throw Exception(ErrorCodes::CANNOT_BACKUP_TABLE, "Database engine {} does not support backups, cannot backup tables in database {}", getEngineName(), backQuoteIfNeed(getDatabaseName())); diff --git a/src/Databases/IDatabase.h b/src/Databases/IDatabase.h index cdea03aa1cb..72155bc818c 100644 --- a/src/Databases/IDatabase.h +++ b/src/Databases/IDatabase.h @@ -332,9 +332,6 @@ public: throw Exception(ErrorCodes::LOGICAL_ERROR, "Database engine {} does not run a replication thread!", getEngineName()); } - /// Returns a CREATE DATABASE query prepared for writing to a backup. - virtual ASTPtr getCreateDatabaseQueryForBackup() const; - /// Returns CREATE TABLE queries and corresponding tables prepared for writing to a backup. virtual std::vector> getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr & context) const; diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index a3f35ccc0f8..0d0a242e4fb 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -249,28 +249,8 @@ bool IStorage::isStaticStorage() const return false; } -void IStorage::adjustCreateQueryForBackup(ASTPtr & create_query) const +void IStorage::adjustCreateQueryForBackup(ASTPtr &) const { - create_query = create_query->clone(); - - /// We don't want to see any UUIDs in backup (after RESTORE the table will have another UUID anyway). - auto & create = create_query->as(); - create.uuid = UUIDHelpers::Nil; - create.to_inner_uuid = UUIDHelpers::Nil; - - /// If this is a definition of a system table we'll remove columns and comment because they're reduntant for backups. - if (isSystemStorage()) - { - if (!create.storage || !create.storage->engine) - throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query without table engine for a system table {}", getStorageID().getFullTableName()); - - auto & engine = *(create.storage->engine); - if (!engine.name.starts_with("System")) - throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query with an unexpected table engine {} for a system table {}", engine.name, getStorageID().getFullTableName()); - - create.reset(create.columns_list); - create.reset(create.comment); - } } void IStorage::backupData(BackupEntriesCollector &, const String &, const std::optional &) diff --git a/src/Storages/MergeTree/extractZkPathFromCreateQuery.cpp b/src/Storages/MergeTree/extractZkPathFromCreateQuery.cpp new file mode 100644 index 00000000000..45d667047af --- /dev/null +++ b/src/Storages/MergeTree/extractZkPathFromCreateQuery.cpp @@ -0,0 +1,60 @@ +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +std::optional tryExtractZkPathFromCreateQuery(const IAST & create_query, const ContextPtr & global_context) +{ + const auto * create = create_query.as(); + if (!create || !create->storage || !create->storage->engine) + return {}; + + /// Check if the table engine is one of the ReplicatedMergeTree family. + const auto & ast_engine = *create->storage->engine; + if (!ast_engine.name.starts_with("Replicated") || !ast_engine.name.ends_with("MergeTree")) + return {}; + + /// Get the first argument. + const auto * ast_arguments = typeid_cast(ast_engine.arguments.get()); + if (!ast_arguments || ast_arguments->children.empty()) + return {}; + + auto * ast_zk_path = typeid_cast(ast_arguments->children[0].get()); + if (!ast_zk_path || (ast_zk_path->value.getType() != Field::Types::String)) + return {}; + + String zk_path = ast_zk_path->value.safeGet(); + + /// Expand macros. + Macros::MacroExpansionInfo info; + info.table_id.table_name = create->getTable(); + info.table_id.database_name = create->getDatabase(); + info.table_id.uuid = create->uuid; + auto database = DatabaseCatalog::instance().tryGetDatabase(info.table_id.database_name); + if (database && database->getEngineName() == "Replicated") + { + info.shard = getReplicatedDatabaseShardName(database); + info.replica = getReplicatedDatabaseReplicaName(database); + } + + try + { + zk_path = global_context->getMacros()->expand(zk_path, info); + } + catch (...) + { + return {}; /// Couldn't expand macros. + } + + return zk_path; +} + +} diff --git a/src/Storages/MergeTree/extractZkPathFromCreateQuery.h b/src/Storages/MergeTree/extractZkPathFromCreateQuery.h new file mode 100644 index 00000000000..e22f76d2cd5 --- /dev/null +++ b/src/Storages/MergeTree/extractZkPathFromCreateQuery.h @@ -0,0 +1,19 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ +class IAST; +class Context; +using ContextPtr = std::shared_ptr; + +/// Extracts a zookeeper path from a specified CREATE TABLE query. Returns std::nullopt if fails. +/// The function takes the first argument of the ReplicatedMergeTree table engine and expands macros in it. +/// It works like a part of what the create() function in registerStorageMergeTree.cpp does but in a simpler manner. +std::optional tryExtractZkPathFromCreateQuery(const IAST & create_query, const ContextPtr & global_context); + +} diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 66fb2a64a50..0a544ebad02 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -7504,6 +7505,24 @@ void StorageReplicatedMergeTree::createTableSharedID() } +std::optional StorageReplicatedMergeTree::tryGetTableSharedIDFromCreateQuery(const IAST & create_query, const ContextPtr & global_context) +{ + auto zk_path = tryExtractZkPathFromCreateQuery(create_query, global_context); + if (!zk_path) + return {}; + + String zk_name = zkutil::extractZooKeeperName(*zk_path); + zk_path = zkutil::extractZooKeeperPath(*zk_path, false, nullptr); + zkutil::ZooKeeperPtr zookeeper = (zk_name == getDefaultZooKeeperName()) ? global_context->getZooKeeper() : global_context->getAuxiliaryZooKeeper(zk_name); + + String id; + if (!zookeeper->tryGet(fs::path(*zk_path) / "table_shared_id", id)) + return {}; + + return id; +} + + void StorageReplicatedMergeTree::lockSharedDataTemporary(const String & part_name, const String & part_id, const DiskPtr & disk) const { auto settings = getSettings(); @@ -8258,46 +8277,8 @@ void StorageReplicatedMergeTree::adjustCreateQueryForBackup(ASTPtr & create_quer { MergeTreeData::adjustCreateQueryForBackup(create_query); - /// Before storing the metadata in a backup we have to find a zookeeper path in its definition and turn the table's UUID in there - /// back into "{uuid}", and also we probably can remove the zookeeper path and replica name if they're default. - /// So we're kind of reverting what we had done to the table's definition in registerStorageMergeTree.cpp before we created this table. - auto & create = create_query->as(); - - if (!create.storage || !create.storage->engine) - throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query without table engine for a replicated table {}", getStorageID().getFullTableName()); - - auto & engine = *(create.storage->engine); - if (!engine.name.starts_with("Replicated") || !engine.name.ends_with("MergeTree")) - throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query with an unexpected table engine {} for a replicated table {}", engine.name, getStorageID().getFullTableName()); - - if (create.uuid == UUIDHelpers::Nil) - return; - - auto * engine_args_ast = typeid_cast(engine.arguments.get()); - if (!engine_args_ast) - return; - - auto & engine_args = engine_args_ast->children; - if (engine_args.size() < 2) - return; - - auto * zookeeper_path_ast = typeid_cast(engine_args[0].get()); - auto * replica_name_ast = typeid_cast(engine_args[1].get()); - if (zookeeper_path_ast && (zookeeper_path_ast->value.getType() == Field::Types::String) && - replica_name_ast && (replica_name_ast->value.getType() == Field::Types::String)) - { - String & zookeeper_path_arg = zookeeper_path_ast->value.get(); - String & replica_name_arg = replica_name_ast->value.get(); - String table_uuid_str = toString(create.uuid); - if (size_t uuid_pos = zookeeper_path_arg.find(table_uuid_str); uuid_pos != String::npos) - zookeeper_path_arg.replace(uuid_pos, table_uuid_str.size(), "{uuid}"); - const auto & config = getContext()->getConfigRef(); - if ((zookeeper_path_arg == getDefaultZooKeeperPath(config)) && (replica_name_arg == getDefaultReplicaName(config)) - && ((engine_args.size() == 2) || !engine_args[2]->as())) - { - engine_args.erase(engine_args.begin(), engine_args.begin() + 2); - } - } + if (getTableSharedID() != tryGetTableSharedIDFromCreateQuery(*create_query, getContext())) + throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Table {} has its shared ID to be different from one from the create query"); } void StorageReplicatedMergeTree::backupData( @@ -8309,8 +8290,8 @@ void StorageReplicatedMergeTree::backupData( auto backup_entries = backupParts(backup_entries_collector.getContext(), "", partitions); auto coordination = backup_entries_collector.getBackupCoordination(); - String full_zk_path = getZooKeeperName() + getZooKeeperPath(); - coordination->addReplicatedDataPath(full_zk_path, data_path_in_backup); + String shared_id = getTableSharedID(); + coordination->addReplicatedDataPath(shared_id, data_path_in_backup); std::unordered_map part_names_with_hashes_calculating; for (auto & [relative_path, backup_entry] : backup_entries) @@ -8348,23 +8329,23 @@ void StorageReplicatedMergeTree::backupData( } /// Send our list of part names to the coordination (to compare with other replicas). - coordination->addReplicatedPartNames(full_zk_path, getStorageID().getFullTableName(), getReplicaName(), part_names_with_hashes); + coordination->addReplicatedPartNames(shared_id, getStorageID().getFullTableName(), getReplicaName(), part_names_with_hashes); /// This task will be executed after all replicas have collected their parts and the coordination is ready to /// give us the final list of parts to add to the BackupEntriesCollector. - auto post_collecting_task = [full_zk_path, + auto post_collecting_task = [shared_id, replica_name = getReplicaName(), coordination, backup_entries = std::move(backup_entries), &backup_entries_collector]() { - Strings data_paths = coordination->getReplicatedDataPaths(full_zk_path); + Strings data_paths = coordination->getReplicatedDataPaths(shared_id); std::vector data_paths_fs; data_paths_fs.reserve(data_paths.size()); for (const auto & data_path : data_paths) data_paths_fs.push_back(data_path); - Strings part_names = coordination->getReplicatedPartNames(full_zk_path, replica_name); + Strings part_names = coordination->getReplicatedPartNames(shared_id, replica_name); std::unordered_set part_names_set{part_names.begin(), part_names.end()}; for (const auto & [relative_path, backup_entry] : backup_entries) diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h index 86120b354bd..18b9ef54777 100644 --- a/src/Storages/StorageReplicatedMergeTree.h +++ b/src/Storages/StorageReplicatedMergeTree.h @@ -312,6 +312,9 @@ public: // Return table id, common for different replicas String getTableSharedID() const override; + /// Returns the same as getTableSharedID(), but extracts it from a create query. + static std::optional tryGetTableSharedIDFromCreateQuery(const IAST & create_query, const ContextPtr & global_context); + static String getDefaultZooKeeperName() { return default_zookeeper_name; } /// Check if there are new broken disks and enqueue part recovery tasks. diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py index 63198f40af9..885a0d851c2 100644 --- a/tests/integration/test_backup_restore_on_cluster/test.py +++ b/tests/integration/test_backup_restore_on_cluster/test.py @@ -557,3 +557,44 @@ def test_projection(): ) == "2\n" ) + + +def test_replicated_database_with_not_synced_tables(): + node1.query( + "CREATE DATABASE mydb ON CLUSTER 'cluster' ENGINE=Replicated('/clickhouse/path/','{shard}','{replica}')" + ) + + node1.query("CREATE TABLE mydb.tbl(x UInt8, y String) ENGINE=ReplicatedMergeTree ORDER BY x") + + backup_name = new_backup_name() + node2.query(f"BACKUP DATABASE mydb TO {backup_name}") + + node1.query("DROP DATABASE mydb ON CLUSTER 'cluster' NO DELAY") + + node1.query(f"RESTORE DATABASE mydb FROM {backup_name}") + assert node1.query("EXISTS mydb.tbl") == "1\n" + + +def test_replicated_table_with_not_synced_def(): + node1.query( + "CREATE TABLE tbl (" + "x UInt8, y String" + ") ENGINE=ReplicatedMergeTree('/clickhouse/tables/tbl/', '{replica}')" + "ORDER BY tuple()" + ) + + node2.query( + "CREATE TABLE tbl (" + "x UInt8, y String" + ") ENGINE=ReplicatedMergeTree('/clickhouse/tables/tbl/', '{replica}')" + "ORDER BY tuple()" + ) + + node2.query("SYSTEM STOP REPLICATION QUEUES tbl") + node1.query("ALTER TABLE tbl MODIFY COLUMN x String") + + backup_name = new_backup_name() + node2.query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}") + + #node1.query("DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY") + #node1.query(f"RESTORE TABLE tbl FROM {backup_name}") From 01921ce9a3322bd719d13a244c94c37594fe36c0 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 26 Jun 2022 17:17:43 +0200 Subject: [PATCH 090/121] Move most code from setTableStructure() to a separate function. --- .../ReplicatedMergeTreeTableMetadata.cpp | 120 ++++++++++++++++++ .../ReplicatedMergeTreeTableMetadata.h | 2 + src/Storages/StorageReplicatedMergeTree.cpp | 117 +---------------- 3 files changed, 123 insertions(+), 116 deletions(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp index 7dee7b8d0f8..ea90179caa3 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include @@ -353,4 +354,123 @@ ReplicatedMergeTreeTableMetadata::checkAndFindDiff(const ReplicatedMergeTreeTabl return diff; } +StorageInMemoryMetadata ReplicatedMergeTreeTableMetadata::Diff::getNewMetadata(const ColumnsDescription & new_columns, ContextPtr context, const StorageInMemoryMetadata & old_metadata) const +{ + StorageInMemoryMetadata new_metadata = old_metadata; + new_metadata.columns = new_columns; + + if (!empty()) + { + auto parse_key_expr = [] (const String & key_expr) + { + ParserNotEmptyExpressionList parser(false); + auto new_sorting_key_expr_list = parseQuery(parser, key_expr, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + + ASTPtr order_by_ast; + if (new_sorting_key_expr_list->children.size() == 1) + order_by_ast = new_sorting_key_expr_list->children[0]; + else + { + auto tuple = makeASTFunction("tuple"); + tuple->arguments->children = new_sorting_key_expr_list->children; + order_by_ast = tuple; + } + return order_by_ast; + }; + + if (sorting_key_changed) + { + auto order_by_ast = parse_key_expr(new_sorting_key); + + new_metadata.sorting_key.recalculateWithNewAST(order_by_ast, new_metadata.columns, context); + + if (new_metadata.primary_key.definition_ast == nullptr) + { + /// Primary and sorting key become independent after this ALTER so we have to + /// save the old ORDER BY expression as the new primary key. + auto old_sorting_key_ast = old_metadata.getSortingKey().definition_ast; + new_metadata.primary_key = KeyDescription::getKeyFromAST( + old_sorting_key_ast, new_metadata.columns, context); + } + } + + if (sampling_expression_changed) + { + if (!new_sampling_expression.empty()) + { + auto sample_by_ast = parse_key_expr(new_sampling_expression); + new_metadata.sampling_key.recalculateWithNewAST(sample_by_ast, new_metadata.columns, context); + } + else /// SAMPLE BY was removed + { + new_metadata.sampling_key = {}; + } + } + + if (skip_indices_changed) + new_metadata.secondary_indices = IndicesDescription::parse(new_skip_indices, new_columns, context); + + if (constraints_changed) + new_metadata.constraints = ConstraintsDescription::parse(new_constraints); + + if (projections_changed) + new_metadata.projections = ProjectionsDescription::parse(new_projections, new_columns, context); + + if (ttl_table_changed) + { + if (!new_ttl_table.empty()) + { + ParserTTLExpressionList parser; + auto ttl_for_table_ast = parseQuery(parser, new_ttl_table, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + new_metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST( + ttl_for_table_ast, new_metadata.columns, context, new_metadata.primary_key); + } + else /// TTL was removed + { + new_metadata.table_ttl = TTLTableDescription{}; + } + } + } + + /// Changes in columns may affect following metadata fields + new_metadata.column_ttls_by_name.clear(); + for (const auto & [name, ast] : new_metadata.columns.getColumnTTLs()) + { + auto new_ttl_entry = TTLDescription::getTTLFromAST(ast, new_metadata.columns, context, new_metadata.primary_key); + new_metadata.column_ttls_by_name[name] = new_ttl_entry; + } + + if (new_metadata.partition_key.definition_ast != nullptr) + new_metadata.partition_key.recalculateWithNewColumns(new_metadata.columns, context); + + if (!sorting_key_changed) /// otherwise already updated + new_metadata.sorting_key.recalculateWithNewColumns(new_metadata.columns, context); + + /// Primary key is special, it exists even if not defined + if (new_metadata.primary_key.definition_ast != nullptr) + { + new_metadata.primary_key.recalculateWithNewColumns(new_metadata.columns, context); + } + else + { + new_metadata.primary_key = KeyDescription::getKeyFromAST(new_metadata.sorting_key.definition_ast, new_metadata.columns, context); + new_metadata.primary_key.definition_ast = nullptr; + } + + if (!sampling_expression_changed && new_metadata.sampling_key.definition_ast != nullptr) + new_metadata.sampling_key.recalculateWithNewColumns(new_metadata.columns, context); + + if (!skip_indices_changed) /// otherwise already updated + { + for (auto & index : new_metadata.secondary_indices) + index.recalculateWithNewColumns(new_metadata.columns, context); + } + + if (!ttl_table_changed && new_metadata.table_ttl.definition_ast != nullptr) + new_metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST( + new_metadata.table_ttl.definition_ast, new_metadata.columns, context, new_metadata.primary_key); + + return new_metadata; +} + } diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h index 6d510d20304..eb2d087e988 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeTableMetadata.h @@ -66,6 +66,8 @@ struct ReplicatedMergeTreeTableMetadata return !sorting_key_changed && !sampling_expression_changed && !skip_indices_changed && !projections_changed && !ttl_table_changed && !constraints_changed; } + + StorageInMemoryMetadata getNewMetadata(const ColumnsDescription & new_columns, ContextPtr context, const StorageInMemoryMetadata & old_metadata) const; }; void checkEquals(const ReplicatedMergeTreeTableMetadata & from_zk, const ColumnsDescription & columns, ContextPtr context) const; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 0a544ebad02..8a93c813bae 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1094,123 +1094,8 @@ void StorageReplicatedMergeTree::checkTableStructure(const String & zookeeper_pr void StorageReplicatedMergeTree::setTableStructure( ColumnsDescription new_columns, const ReplicatedMergeTreeTableMetadata::Diff & metadata_diff) { - StorageInMemoryMetadata new_metadata = getInMemoryMetadata(); StorageInMemoryMetadata old_metadata = getInMemoryMetadata(); - - new_metadata.columns = new_columns; - - if (!metadata_diff.empty()) - { - auto parse_key_expr = [] (const String & key_expr) - { - ParserNotEmptyExpressionList parser(false); - auto new_sorting_key_expr_list = parseQuery(parser, key_expr, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - - ASTPtr order_by_ast; - if (new_sorting_key_expr_list->children.size() == 1) - order_by_ast = new_sorting_key_expr_list->children[0]; - else - { - auto tuple = makeASTFunction("tuple"); - tuple->arguments->children = new_sorting_key_expr_list->children; - order_by_ast = tuple; - } - return order_by_ast; - }; - - if (metadata_diff.sorting_key_changed) - { - auto order_by_ast = parse_key_expr(metadata_diff.new_sorting_key); - auto & sorting_key = new_metadata.sorting_key; - auto & primary_key = new_metadata.primary_key; - - sorting_key.recalculateWithNewAST(order_by_ast, new_metadata.columns, getContext()); - - if (primary_key.definition_ast == nullptr) - { - /// Primary and sorting key become independent after this ALTER so we have to - /// save the old ORDER BY expression as the new primary key. - auto old_sorting_key_ast = old_metadata.getSortingKey().definition_ast; - primary_key = KeyDescription::getKeyFromAST( - old_sorting_key_ast, new_metadata.columns, getContext()); - } - } - - if (metadata_diff.sampling_expression_changed) - { - if (!metadata_diff.new_sampling_expression.empty()) - { - auto sample_by_ast = parse_key_expr(metadata_diff.new_sampling_expression); - new_metadata.sampling_key.recalculateWithNewAST(sample_by_ast, new_metadata.columns, getContext()); - } - else /// SAMPLE BY was removed - { - new_metadata.sampling_key = {}; - } - } - - if (metadata_diff.skip_indices_changed) - new_metadata.secondary_indices = IndicesDescription::parse(metadata_diff.new_skip_indices, new_columns, getContext()); - - if (metadata_diff.constraints_changed) - new_metadata.constraints = ConstraintsDescription::parse(metadata_diff.new_constraints); - - if (metadata_diff.projections_changed) - new_metadata.projections = ProjectionsDescription::parse(metadata_diff.new_projections, new_columns, getContext()); - - if (metadata_diff.ttl_table_changed) - { - if (!metadata_diff.new_ttl_table.empty()) - { - ParserTTLExpressionList parser; - auto ttl_for_table_ast = parseQuery(parser, metadata_diff.new_ttl_table, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); - new_metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST( - ttl_for_table_ast, new_metadata.columns, getContext(), new_metadata.primary_key); - } - else /// TTL was removed - { - new_metadata.table_ttl = TTLTableDescription{}; - } - } - } - - /// Changes in columns may affect following metadata fields - new_metadata.column_ttls_by_name.clear(); - for (const auto & [name, ast] : new_metadata.columns.getColumnTTLs()) - { - auto new_ttl_entry = TTLDescription::getTTLFromAST(ast, new_metadata.columns, getContext(), new_metadata.primary_key); - new_metadata.column_ttls_by_name[name] = new_ttl_entry; - } - - if (new_metadata.partition_key.definition_ast != nullptr) - new_metadata.partition_key.recalculateWithNewColumns(new_metadata.columns, getContext()); - - if (!metadata_diff.sorting_key_changed) /// otherwise already updated - new_metadata.sorting_key.recalculateWithNewColumns(new_metadata.columns, getContext()); - - /// Primary key is special, it exists even if not defined - if (new_metadata.primary_key.definition_ast != nullptr) - { - new_metadata.primary_key.recalculateWithNewColumns(new_metadata.columns, getContext()); - } - else - { - new_metadata.primary_key = KeyDescription::getKeyFromAST(new_metadata.sorting_key.definition_ast, new_metadata.columns, getContext()); - new_metadata.primary_key.definition_ast = nullptr; - } - - if (!metadata_diff.sampling_expression_changed && new_metadata.sampling_key.definition_ast != nullptr) - new_metadata.sampling_key.recalculateWithNewColumns(new_metadata.columns, getContext()); - - if (!metadata_diff.skip_indices_changed) /// otherwise already updated - { - for (auto & index : new_metadata.secondary_indices) - index.recalculateWithNewColumns(new_metadata.columns, getContext()); - } - - if (!metadata_diff.ttl_table_changed && new_metadata.table_ttl.definition_ast != nullptr) - new_metadata.table_ttl = TTLTableDescription::getTTLForTableFromAST( - new_metadata.table_ttl.definition_ast, new_metadata.columns, getContext(), new_metadata.primary_key); + StorageInMemoryMetadata new_metadata = metadata_diff.getNewMetadata(new_columns, getContext(), old_metadata); /// Even if the primary/sorting/partition keys didn't change we must reinitialize it /// because primary/partition key column types might have changed. From efbee5e7235af09192b5d387202cb6039fba2e6c Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Sun, 26 Jun 2022 17:53:20 +0200 Subject: [PATCH 091/121] Improve gathering metadata for backup - part 7. --- src/Storages/StorageReplicatedMergeTree.cpp | 15 +++- .../test_backup_restore_on_cluster/test.py | 90 +++++++++++++++---- 2 files changed, 84 insertions(+), 21 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 8a93c813bae..c856786ffb3 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -8160,10 +8160,19 @@ void StorageReplicatedMergeTree::createAndStoreFreezeMetadata(DiskPtr disk, Data void StorageReplicatedMergeTree::adjustCreateQueryForBackup(ASTPtr & create_query) const { - MergeTreeData::adjustCreateQueryForBackup(create_query); + /// Adjust the create query using values from ZooKeeper. + auto zookeeper = getZooKeeper(); + auto columns_from_entry = ColumnsDescription::parse(zookeeper->get(fs::path(zookeeper_path) / "columns")); + auto metadata_from_entry = ReplicatedMergeTreeTableMetadata::parse(zookeeper->get(fs::path(zookeeper_path) / "metadata")); - if (getTableSharedID() != tryGetTableSharedIDFromCreateQuery(*create_query, getContext())) - throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Table {} has its shared ID to be different from one from the create query"); + auto current_metadata = getInMemoryMetadataPtr(); + auto metadata_diff = ReplicatedMergeTreeTableMetadata(*this, current_metadata).checkAndFindDiff(metadata_from_entry, current_metadata->getColumns(), getContext()); + auto adjusted_metadata = metadata_diff.getNewMetadata(columns_from_entry, getContext(), *current_metadata); + applyMetadataChangesToCreateQuery(create_query, adjusted_metadata); + + /// Check that tryGetTableSharedIDFromCreateQuery() works for this storage. + if (tryGetTableSharedIDFromCreateQuery(*create_query, getContext()) != getTableSharedID()) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table {} has its shared ID to be different from one from the create query"); } void StorageReplicatedMergeTree::backupData( diff --git a/tests/integration/test_backup_restore_on_cluster/test.py b/tests/integration/test_backup_restore_on_cluster/test.py index 885a0d851c2..02f855cf766 100644 --- a/tests/integration/test_backup_restore_on_cluster/test.py +++ b/tests/integration/test_backup_restore_on_cluster/test.py @@ -559,22 +559,6 @@ def test_projection(): ) -def test_replicated_database_with_not_synced_tables(): - node1.query( - "CREATE DATABASE mydb ON CLUSTER 'cluster' ENGINE=Replicated('/clickhouse/path/','{shard}','{replica}')" - ) - - node1.query("CREATE TABLE mydb.tbl(x UInt8, y String) ENGINE=ReplicatedMergeTree ORDER BY x") - - backup_name = new_backup_name() - node2.query(f"BACKUP DATABASE mydb TO {backup_name}") - - node1.query("DROP DATABASE mydb ON CLUSTER 'cluster' NO DELAY") - - node1.query(f"RESTORE DATABASE mydb FROM {backup_name}") - assert node1.query("EXISTS mydb.tbl") == "1\n" - - def test_replicated_table_with_not_synced_def(): node1.query( "CREATE TABLE tbl (" @@ -593,8 +577,78 @@ def test_replicated_table_with_not_synced_def(): node2.query("SYSTEM STOP REPLICATION QUEUES tbl") node1.query("ALTER TABLE tbl MODIFY COLUMN x String") + # Not synced because the replication queue is stopped + assert node1.query( + "SELECT name, type FROM system.columns WHERE database='default' AND table='tbl'" + ) == TSV([["x", "String"], ["y", "String"]]) + assert node2.query( + "SELECT name, type FROM system.columns WHERE database='default' AND table='tbl'" + ) == TSV([["x", "UInt8"], ["y", "String"]]) + backup_name = new_backup_name() node2.query(f"BACKUP TABLE tbl ON CLUSTER 'cluster' TO {backup_name}") - #node1.query("DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY") - #node1.query(f"RESTORE TABLE tbl FROM {backup_name}") + node1.query("DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY") + + # But synced after RESTORE anyway + node1.query( + f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} SETTINGS replica_num_in_backup=1" + ) + assert node1.query( + "SELECT name, type FROM system.columns WHERE database='default' AND table='tbl'" + ) == TSV([["x", "String"], ["y", "String"]]) + assert node2.query( + "SELECT name, type FROM system.columns WHERE database='default' AND table='tbl'" + ) == TSV([["x", "String"], ["y", "String"]]) + + node1.query("DROP TABLE tbl ON CLUSTER 'cluster' NO DELAY") + + node2.query( + f"RESTORE TABLE tbl ON CLUSTER 'cluster' FROM {backup_name} SETTINGS replica_num_in_backup=2" + ) + assert node1.query( + "SELECT name, type FROM system.columns WHERE database='default' AND table='tbl'" + ) == TSV([["x", "String"], ["y", "String"]]) + assert node2.query( + "SELECT name, type FROM system.columns WHERE database='default' AND table='tbl'" + ) == TSV([["x", "String"], ["y", "String"]]) + + +def test_table_in_replicated_database_with_not_synced_def(): + node1.query( + "CREATE DATABASE mydb ON CLUSTER 'cluster' ENGINE=Replicated('/clickhouse/path/','{shard}','{replica}')" + ) + + node1.query( + "CREATE TABLE mydb.tbl (x UInt8, y String) ENGINE=ReplicatedMergeTree ORDER BY tuple()" + ) + + node1.query("ALTER TABLE mydb.tbl MODIFY COLUMN x String") + + backup_name = new_backup_name() + node2.query(f"BACKUP DATABASE mydb ON CLUSTER 'cluster' TO {backup_name}") + + node1.query("DROP DATABASE mydb ON CLUSTER 'cluster' NO DELAY") + + # But synced after RESTORE anyway + node1.query( + f"RESTORE DATABASE mydb ON CLUSTER 'cluster' FROM {backup_name} SETTINGS replica_num_in_backup=1" + ) + assert node1.query( + "SELECT name, type FROM system.columns WHERE database='mydb' AND table='tbl'" + ) == TSV([["x", "String"], ["y", "String"]]) + assert node2.query( + "SELECT name, type FROM system.columns WHERE database='mydb' AND table='tbl'" + ) == TSV([["x", "String"], ["y", "String"]]) + + node1.query("DROP DATABASE mydb ON CLUSTER 'cluster' NO DELAY") + + node2.query( + f"RESTORE DATABASE mydb ON CLUSTER 'cluster' FROM {backup_name} SETTINGS replica_num_in_backup=2" + ) + assert node1.query( + "SELECT name, type FROM system.columns WHERE database='mydb' AND table='tbl'" + ) == TSV([["x", "String"], ["y", "String"]]) + assert node2.query( + "SELECT name, type FROM system.columns WHERE database='mydb' AND table='tbl'" + ) == TSV([["x", "String"], ["y", "String"]]) From aa97bf512521486353012780116d8cdb9b2b6de4 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Tue, 28 Jun 2022 09:59:02 +0200 Subject: [PATCH 092/121] Improve handling predefined databases and tables. --- src/Backups/BackupEntriesCollector.cpp | 4 +- src/Backups/DDLAdjustingForBackupVisitor.cpp | 3 +- src/Backups/DDLAdjustingForBackupVisitor.h | 2 +- src/Backups/RestorerFromBackup.cpp | 49 ++++++++------------ src/Backups/RestorerFromBackup.h | 2 + src/Databases/DDLRenamingVisitor.cpp | 3 +- src/Databases/DDLRenamingVisitor.h | 2 +- src/Interpreters/DatabaseCatalog.cpp | 45 +++++++++++++++++- src/Interpreters/DatabaseCatalog.h | 9 +++- 9 files changed, 79 insertions(+), 40 deletions(-) diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index 8104e363a68..691e72a3f21 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -385,7 +385,7 @@ void BackupEntriesCollector::gatherDatabaseMetadata( DatabaseInfo & database_info = it->second; - if (backup_create_database_query && !database_info.create_database_query && !DatabaseCatalog::isPredefinedDatabaseName(database_name)) + if (backup_create_database_query && !database_info.create_database_query && (database_name != DatabaseCatalog::TEMPORARY_DATABASE)) { ASTPtr create_database_query; try @@ -629,7 +629,7 @@ void BackupEntriesCollector::makeBackupEntriesForDatabasesDefs() for (const auto & [database_name, database_info] : database_infos) { if (!database_info.create_database_query) - continue; /// We don't store CREATE queries for predefined databases (see DatabaseCatalog::isPredefinedDatabaseName()). + continue; /// We store CREATE DATABASE queries only if there was BACKUP DATABASE specified. LOG_TRACE(log, "Adding definition of database {}", backQuoteIfNeed(database_name)); diff --git a/src/Backups/DDLAdjustingForBackupVisitor.cpp b/src/Backups/DDLAdjustingForBackupVisitor.cpp index e3fc3ac5552..7dd58629a49 100644 --- a/src/Backups/DDLAdjustingForBackupVisitor.cpp +++ b/src/Backups/DDLAdjustingForBackupVisitor.cpp @@ -101,9 +101,8 @@ void DDLAdjustingForBackupVisitor::visit(ASTPtr ast, const Data & data) visitCreateQuery(*create, data); } -void adjustCreateQueryForBackup(ASTPtr & ast, const ContextPtr & global_context, std::optional * replicated_table_shared_id) +void adjustCreateQueryForBackup(ASTPtr ast, const ContextPtr & global_context, std::optional * replicated_table_shared_id) { - ast = ast->clone(); if (replicated_table_shared_id) *replicated_table_shared_id = {}; diff --git a/src/Backups/DDLAdjustingForBackupVisitor.h b/src/Backups/DDLAdjustingForBackupVisitor.h index 87498471cc4..63353dcc000 100644 --- a/src/Backups/DDLAdjustingForBackupVisitor.h +++ b/src/Backups/DDLAdjustingForBackupVisitor.h @@ -14,7 +14,7 @@ using ContextPtr = std::shared_ptr; /// Changes a create query to a form which is appropriate or suitable for saving in a backup. /// Also extracts a replicated table's shared ID from the create query if this is a create query for a replicated table. /// `replicated_table_shared_id` can be null if you don't need that. -void adjustCreateQueryForBackup(ASTPtr & ast, const ContextPtr & global_context, std::optional * replicated_table_shared_id); +void adjustCreateQueryForBackup(ASTPtr ast, const ContextPtr & global_context, std::optional * replicated_table_shared_id); /// Visits ASTCreateQuery and changes it to a form which is appropriate or suitable for saving in a backup. class DDLAdjustingForBackupVisitor diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index 90c17ef0427..b5f81fe3b9f 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -77,26 +77,14 @@ namespace return str; } - String tryGetTableEngine(const IAST & ast) + /// Whether a specified name corresponds one of the tables backuping ACL. + bool isSystemAccessTableName(const QualifiedTableName & table_name) { - const ASTCreateQuery * create = ast.as(); - if (!create) - return {}; - if (!create->storage || !create->storage->engine) - return {}; - return create->storage->engine->name; - } + if (table_name.database != DatabaseCatalog::SYSTEM_DATABASE) + return false; - bool hasSystemTableEngine(const IAST & ast) - { - return tryGetTableEngine(ast).starts_with("System"); - } - - bool hasSystemAccessTableEngine(const IAST & ast) - { - String engine_name = tryGetTableEngine(ast); - return (engine_name == "SystemUsers") || (engine_name == "SystemRoles") || (engine_name == "SystemSettingsProfiles") - || (engine_name == "SystemRowPolicies") || (engine_name == "SystemQuotas"); + return (table_name.table == "users") || (table_name.table == "roles") || (table_name.table == "settings_profiles") + || (table_name.table == "row_policies") || (table_name.table == "quotas"); } } @@ -375,6 +363,7 @@ void RestorerFromBackup::findTableInBackup(const QualifiedTableName & table_name TableInfo & res_table_info = table_infos[table_name]; res_table_info.create_table_query = create_table_query; + res_table_info.is_predefined_table = DatabaseCatalog::instance().isPredefinedTable(StorageID{table_name.database, table_name.table}); res_table_info.data_path_in_backup = data_path_in_backup; res_table_info.dependencies = getDependenciesSetFromCreateQuery(context->getGlobalContext(), table_name, create_table_query); @@ -385,7 +374,7 @@ void RestorerFromBackup::findTableInBackup(const QualifiedTableName & table_name insertAtEnd(*res_table_info.partitions, *partitions); } - if (hasSystemAccessTableEngine(*create_table_query)) + if (isSystemAccessTableName(table_name)) { if (!access_restore_task) access_restore_task = std::make_shared(backup, restore_settings, restore_coordination); @@ -450,6 +439,7 @@ void RestorerFromBackup::findDatabaseInBackup(const String & database_name_in_ba } database_info.create_database_query = create_database_query; + database_info.is_predefined_database = DatabaseCatalog::isPredefinedDatabase(database_name); } for (const String & table_name_in_backup : table_names_in_backup) @@ -491,9 +481,9 @@ void RestorerFromBackup::findEverythingInBackup(const std::set & except_ void RestorerFromBackup::checkAccessForObjectsFoundInBackup() const { AccessRightsElements required_access; - for (const auto & database_name : database_infos | boost::adaptors::map_keys) + for (const auto & [database_name, database_info] : database_infos) { - if (DatabaseCatalog::isPredefinedDatabaseName(database_name)) + if (database_info.is_predefined_database) continue; AccessFlags flags; @@ -509,7 +499,8 @@ void RestorerFromBackup::checkAccessForObjectsFoundInBackup() const for (const auto & [table_name, table_info] : table_infos) { - if (hasSystemTableEngine(*table_info.create_table_query)) + /// Access required to restore ACL system tables is checked separately. + if (table_info.is_predefined_table) continue; if (table_name.database == DatabaseCatalog::TEMPORARY_DATABASE) @@ -565,7 +556,7 @@ void RestorerFromBackup::createDatabases() for (const auto & [database_name, database_info] : database_infos) { bool need_create_database = (restore_settings.create_database != RestoreDatabaseCreationMode::kMustExist); - if (need_create_database && DatabaseCatalog::isPredefinedDatabaseName(database_name)) + if (database_info.is_predefined_database) need_create_database = false; /// Predefined databases always exist. if (need_create_database) @@ -585,7 +576,7 @@ void RestorerFromBackup::createDatabases() DatabasePtr database = DatabaseCatalog::instance().getDatabase(database_name); - if (!restore_settings.allow_different_database_def) + if (!restore_settings.allow_different_database_def && !database_info.is_predefined_database) { /// Check that the database's definition is the same as expected. ASTPtr create_database_query = database->getCreateDatabaseQuery(); @@ -621,13 +612,11 @@ void RestorerFromBackup::createTables() DatabasePtr database = DatabaseCatalog::instance().getDatabase(table_name.database); bool need_create_table = (restore_settings.create_table != RestoreTableCreationMode::kMustExist); - if (need_create_table && hasSystemTableEngine(*table_info.create_table_query)) - need_create_table = false; /// Tables with System* table engine already exist or can't be created by SQL anyway. + if (table_info.is_predefined_table) + need_create_table = false; /// Predefined tables always exist. if (need_create_table) { - /// Execute CREATE TABLE query (we call IDatabase::createTableRestoredFromBackup() to allow the database to do some - /// database-specific things). auto create_table_query = table_info.create_table_query; if (restore_settings.create_table == RestoreTableCreationMode::kCreateIfNotExists) { @@ -641,6 +630,8 @@ void RestorerFromBackup::createTables() tableNameWithTypeToString(table_name.database, table_name.table, false), serializeAST(*create_table_query)); + /// Execute CREATE TABLE query (we call IDatabase::createTableRestoredFromBackup() to allow the database to do some + /// database-specific things). database->createTableRestoredFromBackup( create_table_query, context, @@ -658,7 +649,7 @@ void RestorerFromBackup::createTables() table_info.storage = storage; table_info.table_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); - if (!restore_settings.allow_different_table_def) + if (!restore_settings.allow_different_table_def && !table_info.is_predefined_table) { ASTPtr create_table_query = database->getCreateTableQuery(resolved_id.table_name, context); adjustCreateQueryForBackup(create_table_query, context->getGlobalContext(), nullptr); diff --git a/src/Backups/RestorerFromBackup.h b/src/Backups/RestorerFromBackup.h index f4d19a10cf6..d6f4eabd0dd 100644 --- a/src/Backups/RestorerFromBackup.h +++ b/src/Backups/RestorerFromBackup.h @@ -97,11 +97,13 @@ private: struct DatabaseInfo { ASTPtr create_database_query; + bool is_predefined_database = false; }; struct TableInfo { ASTPtr create_table_query; + bool is_predefined_table = false; std::optional partitions; std::filesystem::path data_path_in_backup; std::unordered_set dependencies; diff --git a/src/Databases/DDLRenamingVisitor.cpp b/src/Databases/DDLRenamingVisitor.cpp index c8958fa06d4..8dbcc2a24bb 100644 --- a/src/Databases/DDLRenamingVisitor.cpp +++ b/src/Databases/DDLRenamingVisitor.cpp @@ -361,9 +361,8 @@ QualifiedTableName DDLRenamingMap::getNewTableName(const QualifiedTableName & ol } -void renameDatabaseAndTableNameInCreateQuery(ASTPtr & ast, const DDLRenamingMap & renaming_map, const ContextPtr & global_context) +void renameDatabaseAndTableNameInCreateQuery(ASTPtr ast, const DDLRenamingMap & renaming_map, const ContextPtr & global_context) { - ast = ast->clone(); DDLRenamingVisitor::Data data{ast, renaming_map, global_context}; DDLRenamingVisitor::Visitor{data}.visit(ast); } diff --git a/src/Databases/DDLRenamingVisitor.h b/src/Databases/DDLRenamingVisitor.h index 11e8c4676e0..44146a8ee6b 100644 --- a/src/Databases/DDLRenamingVisitor.h +++ b/src/Databases/DDLRenamingVisitor.h @@ -17,7 +17,7 @@ class DDLRenamingMap; /// Changes names of databases or tables in a create query according to a specified renaming map. /// Does not validate AST, works a best-effort way. -void renameDatabaseAndTableNameInCreateQuery(ASTPtr & ast, const DDLRenamingMap & renaming_map, const ContextPtr & global_context); +void renameDatabaseAndTableNameInCreateQuery(ASTPtr ast, const DDLRenamingMap & renaming_map, const ContextPtr & global_context); /// Renaming map keeps information about new names of databases or tables. class DDLRenamingMap diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index a0579b813db..bd64b14624c 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -234,12 +234,13 @@ void DatabaseCatalog::shutdownImpl() view_dependencies.clear(); } -bool DatabaseCatalog::isPredefinedDatabaseName(const std::string_view & database_name) +bool DatabaseCatalog::isPredefinedDatabase(const std::string_view & database_name) { return database_name == TEMPORARY_DATABASE || database_name == SYSTEM_DATABASE || database_name == INFORMATION_SCHEMA || database_name == INFORMATION_SCHEMA_UPPERCASE; } + DatabaseAndTable DatabaseCatalog::tryGetByUUID(const UUID & uuid) const { assert(uuid != UUIDHelpers::Nil && getFirstLevelIdx(uuid) < uuid_map.size()); @@ -328,6 +329,48 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( return {database, table}; } +bool DatabaseCatalog::isPredefinedTable(const StorageID & table_id) const +{ + static const char * INFORMATION_SCHEMA_VIEWS[] = {"schemata", "tables", "views", "columns"}; + static const char * INFORMATION_SCHEMA_UPPERCASE_VIEWS[] = {"SCHEMATA", "TABLES", "VIEWS", "COLUMNS"}; + + auto checkDatabaseAndTableName = [&](const String & database_name, const String & table_name) + { + if (database_name == SYSTEM_DATABASE) + { + auto storage = getSystemDatabase()->tryGetTable(table_name, getContext()); + return storage && storage->isSystemStorage(); + } + if (database_name == INFORMATION_SCHEMA) + { + return std::find(std::begin(INFORMATION_SCHEMA_VIEWS), std::end(INFORMATION_SCHEMA_VIEWS), table_name) + != std::end(INFORMATION_SCHEMA_VIEWS); + } + if (database_name == INFORMATION_SCHEMA_UPPERCASE) + { + return std::find(std::begin(INFORMATION_SCHEMA_UPPERCASE_VIEWS), std::end(INFORMATION_SCHEMA_UPPERCASE_VIEWS), table_name) + != std::end(INFORMATION_SCHEMA_UPPERCASE_VIEWS); + } + return false; + }; + + if (table_id.hasUUID()) + { + if (auto storage = tryGetByUUID(table_id.uuid).second) + { + if (storage->isSystemStorage()) + return true; + auto res_id = storage->getStorageID(); + String database_name = res_id.getDatabaseName(); + if (database_name != SYSTEM_DATABASE) + return checkDatabaseAndTableName(database_name, res_id.getTableName()); + } + return false; + } + + return checkDatabaseAndTableName(table_id.getDatabaseName(), table_id.getTableName()); +} + void DatabaseCatalog::assertDatabaseExists(const String & database_name) const { std::lock_guard lock{databases_mutex}; diff --git a/src/Interpreters/DatabaseCatalog.h b/src/Interpreters/DatabaseCatalog.h index 4468cc3a5d8..133cf0c5126 100644 --- a/src/Interpreters/DatabaseCatalog.h +++ b/src/Interpreters/DatabaseCatalog.h @@ -130,8 +130,8 @@ public: static constexpr const char * INFORMATION_SCHEMA = "information_schema"; static constexpr const char * INFORMATION_SCHEMA_UPPERCASE = "INFORMATION_SCHEMA"; - /// Returns true if a passed string is one of the predefined databases' names - static bool isPredefinedDatabaseName(const std::string_view & database_name); + /// Returns true if a passed name is one of the predefined databases' names. + static bool isPredefinedDatabase(const std::string_view & database_name); static DatabaseCatalog & init(ContextMutablePtr global_context_); static DatabaseCatalog & instance(); @@ -181,6 +181,11 @@ public: ContextPtr context, std::optional * exception = nullptr) const; + /// Returns true if a passed table_id refers to one of the predefined tables' names. + /// All tables in the "system" database with System* table engine are predefined. + /// Four views (tables, views, columns, schemata) in the "information_schema" database are predefined too. + bool isPredefinedTable(const StorageID & table_id) const; + void addDependency(const StorageID & from, const StorageID & where); void removeDependency(const StorageID & from, const StorageID & where); Dependencies getDependencies(const StorageID & from) const; From 11b51d2878d2f944fc86044ea5571b71ea9319b0 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Mon, 27 Jun 2022 15:39:12 +0200 Subject: [PATCH 093/121] Implement storing UDF in backups. --- src/Access/AccessBackup.cpp | 2 +- src/Backups/RestoreSettings.cpp | 7 +- src/Backups/RestoreSettings.h | 5 ++ src/Backups/RestorerFromBackup.cpp | 21 +++++- .../System/StorageSystemFunctions.cpp | 74 +++++++++++++++++++ src/Storages/System/StorageSystemFunctions.h | 3 + .../test_backup_restore_new/test.py | 22 ++++++ 7 files changed, 128 insertions(+), 6 deletions(-) diff --git a/src/Access/AccessBackup.cpp b/src/Access/AccessBackup.cpp index 0322ad7457b..dd8cbbf269b 100644 --- a/src/Access/AccessBackup.cpp +++ b/src/Access/AccessBackup.cpp @@ -139,7 +139,7 @@ namespace } catch (Exception & e) { - e.addMessage("While parsing " + file_path); + e.addMessage("While parsing " + file_path + " from backup"); throw; } } diff --git a/src/Backups/RestoreSettings.cpp b/src/Backups/RestoreSettings.cpp index 590d39f24f8..efa1fe2cfb8 100644 --- a/src/Backups/RestoreSettings.cpp +++ b/src/Backups/RestoreSettings.cpp @@ -74,7 +74,7 @@ namespace { case RestoreTableCreationMode::kCreate: return Field{true}; case RestoreTableCreationMode::kMustExist: return Field{false}; - case RestoreTableCreationMode::kCreateIfNotExists: return Field{"if not exists"}; + case RestoreTableCreationMode::kCreateIfNotExists: return Field{"if-not-exists"}; } throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected value of enum RestoreTableCreationMode: {}", static_cast(value)); } @@ -131,12 +131,14 @@ namespace switch (value) { case RestoreAccessCreationMode::kCreate: return Field{true}; - case RestoreAccessCreationMode::kCreateIfNotExists: return Field{"if not exists"}; + case RestoreAccessCreationMode::kCreateIfNotExists: return Field{"if-not-exists"}; case RestoreAccessCreationMode::kReplace: return Field{"replace"}; } throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected value of enum RestoreAccessCreationMode: {}", static_cast(value)); } }; + + using SettingFieldRestoreUDFCreationMode = SettingFieldRestoreAccessCreationMode; } /// List of restore settings except base_backup_name and cluster_host_ids. @@ -155,6 +157,7 @@ namespace M(Bool, allow_non_empty_tables) \ M(RestoreAccessCreationMode, create_access) \ M(Bool, allow_unresolved_access_dependencies) \ + M(RestoreUDFCreationMode, create_function) \ M(Bool, internal) \ M(String, host_id) \ M(String, coordination_zk_path) diff --git a/src/Backups/RestoreSettings.h b/src/Backups/RestoreSettings.h index 5e941b79508..1bc5d867a37 100644 --- a/src/Backups/RestoreSettings.h +++ b/src/Backups/RestoreSettings.h @@ -36,6 +36,8 @@ enum class RestoreAccessCreationMode kReplace, }; +using RestoreUDFCreationMode = RestoreAccessCreationMode; + /// Settings specified in the "SETTINGS" clause of a RESTORE query. struct RestoreSettings { @@ -99,6 +101,9 @@ struct RestoreSettings /// For example, if an user has a profile assigned and that profile is not in the backup and doesn't exist locally. bool allow_unresolved_access_dependencies = false; + /// How the RESTORE command will handle if a user-defined function which it's going to restore already exists. + RestoreUDFCreationMode create_function = RestoreUDFCreationMode::kCreateIfNotExists; + /// Internal, should not be specified by user. bool internal = false; diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index b5f81fe3b9f..ecd34b12742 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -86,7 +86,13 @@ namespace return (table_name.table == "users") || (table_name.table == "roles") || (table_name.table == "settings_profiles") || (table_name.table == "row_policies") || (table_name.table == "quotas"); } -} + + /// Whether a specified name corresponds one of the tables backuping ACL. + bool isSystemFunctionsTableName(const QualifiedTableName & table_name) + { + return (table_name.database == DatabaseCatalog::SYSTEM_DATABASE) && (table_name.table == "functions"); + } + } RestorerFromBackup::RestorerFromBackup( @@ -374,7 +380,7 @@ void RestorerFromBackup::findTableInBackup(const QualifiedTableName & table_name insertAtEnd(*res_table_info.partitions, *partitions); } - if (isSystemAccessTableName(table_name)) + if (!restore_settings.structure_only && isSystemAccessTableName(table_name)) { if (!access_restore_task) access_restore_task = std::make_shared(backup, restore_settings, restore_coordination); @@ -499,9 +505,18 @@ void RestorerFromBackup::checkAccessForObjectsFoundInBackup() const for (const auto & [table_name, table_info] : table_infos) { - /// Access required to restore ACL system tables is checked separately. if (table_info.is_predefined_table) + { + if (isSystemFunctionsTableName(table_name)) + { + /// CREATE_FUNCTION privilege is required to restore the "system.functions" table. + if (!restore_settings.structure_only && backup->hasFiles(table_info.data_path_in_backup)) + required_access.emplace_back(AccessType::CREATE_FUNCTION); + } + /// Privileges required to restore ACL system tables are checked separately + /// (see access_restore_task->getRequiredAccess() below). continue; + } if (table_name.database == DatabaseCatalog::TEMPORARY_DATABASE) { diff --git a/src/Storages/System/StorageSystemFunctions.cpp b/src/Storages/System/StorageSystemFunctions.cpp index b3f1231bd1a..e346d8b427b 100644 --- a/src/Storages/System/StorageSystemFunctions.cpp +++ b/src/Storages/System/StorageSystemFunctions.cpp @@ -9,6 +9,16 @@ #include #include #include +#include +#include +#include +#include +#include +#include +#include + +namespace fs = std::filesystem; + namespace DB { @@ -20,6 +30,11 @@ enum class FunctionOrigin : Int8 EXECUTABLE_USER_DEFINED = 2 }; +namespace ErrorCodes +{ + extern const int CANNOT_RESTORE_TABLE; +} + namespace { template @@ -99,4 +114,63 @@ void StorageSystemFunctions::fillData(MutableColumns & res_columns, ContextPtr c fillRow(res_columns, function_name, UInt64(0), "", FunctionOrigin::EXECUTABLE_USER_DEFINED, user_defined_executable_functions_factory); } } + +void StorageSystemFunctions::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional &) +{ + const auto & user_defined_sql_functions_factory = UserDefinedSQLFunctionFactory::instance(); + const auto & user_defined_sql_functions_names = user_defined_sql_functions_factory.getAllRegisteredNames(); + fs::path data_path_in_backup_fs{data_path_in_backup}; + for (const auto & function_name : user_defined_sql_functions_names) + { + auto ast = user_defined_sql_functions_factory.tryGet(function_name); + if (!ast) + continue; + backup_entries_collector.addBackupEntry( + data_path_in_backup_fs / (escapeForFileName(function_name) + ".sql"), + std::make_shared(queryToString(ast))); + } +} + +void StorageSystemFunctions::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional &) +{ + auto backup = restorer.getBackup(); + + Strings filenames = backup->listFiles(data_path_in_backup); + for (const auto & filename : filenames) + { + if (!filename.ends_with(".sql")) + throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore user-defined functions, expected *.sql files, got {}", filename); + } + + fs::path data_path_in_backup_fs{data_path_in_backup}; + auto & user_defined_sql_functions_factory = UserDefinedSQLFunctionFactory::instance(); + const auto & restore_settings = restorer.getRestoreSettings(); + auto context = restorer.getContext(); + + for (const auto & filename : filenames) + { + String escaped_function_name = filename.substr(0, filename.length() - strlen(".sql")); + String function_name = unescapeForFileName(escaped_function_name); + + String filepath = data_path_in_backup_fs / filename; + auto function_def_entry = backup->readFile(filepath); + auto function_def_in = function_def_entry->getReadBuffer(); + String function_def; + readStringUntilEOF(function_def, *function_def_in); + + ParserCreateFunctionQuery parser; + ASTPtr ast = parseQuery( + parser, + function_def.data(), + function_def.data() + function_def.size(), + "in file " + filepath + " from backup " + backup->getName(), + 0, + context->getSettingsRef().max_parser_depth); + + bool replace = (restore_settings.create_function == RestoreUDFCreationMode::kReplace); + bool if_not_exists = (restore_settings.create_function == RestoreUDFCreationMode::kCreateIfNotExists); + user_defined_sql_functions_factory.registerFunction(context, function_name, ast, replace, if_not_exists, true); + } +} + } diff --git a/src/Storages/System/StorageSystemFunctions.h b/src/Storages/System/StorageSystemFunctions.h index fdbe79e29a2..606694a4c0b 100644 --- a/src/Storages/System/StorageSystemFunctions.h +++ b/src/Storages/System/StorageSystemFunctions.h @@ -19,6 +19,9 @@ public: static NamesAndTypesList getNamesAndTypes(); + void backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) override; + void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) override; + protected: using IStorageSystemOneBlock::IStorageSystemOneBlock; diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index 0a39576ce10..a94964bc8a3 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -777,3 +777,25 @@ def test_projection(): ) == "2\n" ) + + +def test_system_functions(): + instance.query("CREATE FUNCTION linear_equation AS (x, k, b) -> k*x + b;") + + instance.query("CREATE FUNCTION parity_str AS (n) -> if(n % 2, 'odd', 'even');") + + backup_name = new_backup_name() + instance.query(f"BACKUP TABLE system.functions TO {backup_name}") + + instance.query("DROP FUNCTION linear_equation") + instance.query("DROP FUNCTION parity_str") + + instance.query(f"RESTORE TABLE system.functions FROM {backup_name}") + + assert instance.query( + "SELECT number, linear_equation(number, 2, 1) FROM numbers(3)" + ) == TSV([[0, 1], [1, 3], [2, 5]]) + + assert instance.query("SELECT number, parity_str(number) FROM numbers(3)") == TSV( + [[0, "even"], [1, "odd"], [2, "even"]] + ) From 031ca28fdc13ad5a398a722d5dd2d6a48b291ffe Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 29 Jun 2022 14:42:23 +0200 Subject: [PATCH 094/121] Add test for partition clause. More checks for data compatibility on restore. --- src/Access/AccessBackup.cpp | 12 ++++++- src/Access/AccessBackup.h | 3 +- src/Backups/BackupEntriesCollector.cpp | 32 +++++++++++-------- src/Backups/BackupEntriesCollector.h | 3 -- src/Backups/DDLAdjustingForBackupVisitor.cpp | 12 +++---- src/Backups/RestorerFromBackup.cpp | 28 ++++++++-------- src/Backups/RestorerFromBackup.h | 8 ++--- src/Storages/IStorage.cpp | 12 +++++-- src/Storages/IStorage.h | 3 ++ src/Storages/MergeTree/MergeTreeData.cpp | 6 +++- src/Storages/MergeTree/MergeTreeData.h | 3 ++ src/Storages/StorageLog.cpp | 26 +++++++++------ src/Storages/StorageMaterializedView.cpp | 7 ++++ src/Storages/StorageMaterializedView.h | 1 + src/Storages/StorageMemory.cpp | 26 +++++++++------ src/Storages/StorageStripeLog.cpp | 26 +++++++++------ .../System/StorageSystemFunctions.cpp | 11 ++++--- src/Storages/System/StorageSystemQuotas.cpp | 10 ++---- src/Storages/System/StorageSystemRoles.cpp | 10 ++---- .../System/StorageSystemRowPolicies.cpp | 10 ++---- .../System/StorageSystemSettingsProfiles.cpp | 10 ++---- src/Storages/System/StorageSystemUsers.cpp | 10 ++---- .../test_backup_restore_new/test.py | 26 +++++++++++++++ 23 files changed, 177 insertions(+), 118 deletions(-) diff --git a/src/Access/AccessBackup.cpp b/src/Access/AccessBackup.cpp index dd8cbbf269b..180f17e3448 100644 --- a/src/Access/AccessBackup.cpp +++ b/src/Access/AccessBackup.cpp @@ -25,6 +25,7 @@ namespace DB namespace ErrorCodes { + extern const int CANNOT_RESTORE_TABLE; extern const int LOGICAL_ERROR; } @@ -317,12 +318,21 @@ AccessRestoreTask::AccessRestoreTask( AccessRestoreTask::~AccessRestoreTask() = default; -void AccessRestoreTask::addDataPath(const String & data_path) +void AccessRestoreTask::addDataPath(const String & data_path, const QualifiedTableName & table_name_for_logs) { if (!data_paths.emplace(data_path).second) return; + if (!backup->hasFiles(data_path)) + return; + String file_path = fs::path{data_path} / "access.txt"; + if (!backup->fileExists(file_path)) + { + throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: File {} in backup is required", + table_name_for_logs.getFullName(), file_path); + } + auto backup_entry = backup->readFile(file_path); auto ab = AccessEntitiesInBackup::fromBackupEntry(*backup_entry, file_path); diff --git a/src/Access/AccessBackup.h b/src/Access/AccessBackup.h index 5c70e268eae..3eab9fa1494 100644 --- a/src/Access/AccessBackup.h +++ b/src/Access/AccessBackup.h @@ -17,6 +17,7 @@ class IRestoreCoordination; struct IAccessEntity; using AccessEntityPtr = std::shared_ptr; class AccessRightsElements; +struct QualifiedTableName; /// Makes a backup of access entities of a specified type. @@ -35,7 +36,7 @@ public: ~AccessRestoreTask(); /// Adds a data path to loads access entities from. - void addDataPath(const String & data_path); + void addDataPath(const String & data_path, const QualifiedTableName & table_name_for_logs); bool hasDataPath(const String & data_path) const; /// Checks that the current user can do restoring. diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index 691e72a3f21..30f1ecd53cd 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -478,6 +478,7 @@ void BackupEntriesCollector::gatherTablesMetadata() for (const auto & db_table : db_tables) { const auto & create_table_query = db_table.first; + const auto & storage = db_table.second; const auto & create = create_table_query->as(); String table_name = create.getTable(); @@ -499,14 +500,28 @@ void BackupEntriesCollector::gatherTablesMetadata() /// Add information to `table_infos`. auto & res_table_info = table_infos[QualifiedTableName{database_name, table_name}]; res_table_info.database = database; - res_table_info.storage = db_table.second; + res_table_info.storage = storage; res_table_info.create_table_query = create_table_query; res_table_info.metadata_path_in_backup = metadata_path_in_backup; res_table_info.data_path_in_backup = data_path_in_backup; - auto partitions_it = database_info.tables.find(table_name); - if (partitions_it != database_info.tables.end()) - res_table_info.partitions = partitions_it->second.partitions; + if (!backup_settings.structure_only) + { + auto it = database_info.tables.find(table_name); + if (it != database_info.tables.end()) + { + const auto & partitions = it->second.partitions; + if (partitions && !storage->supportsBackupPartition()) + { + throw Exception( + ErrorCodes::CANNOT_BACKUP_TABLE, + "Table engine {} doesn't support partitions, cannot backup {}", + storage->getName(), + tableNameWithTypeToString(database_name, table_name, false)); + } + res_table_info.partitions = partitions; + } + } } } } @@ -724,13 +739,4 @@ void BackupEntriesCollector::runPostTasks() } } -void BackupEntriesCollector::throwPartitionsNotSupported(const StorageID & storage_id, const String & table_engine) -{ - throw Exception( - ErrorCodes::CANNOT_BACKUP_TABLE, - "Table engine {} doesn't support partitions, cannot backup table {}", - table_engine, - storage_id.getFullTableName()); -} - } diff --git a/src/Backups/BackupEntriesCollector.h b/src/Backups/BackupEntriesCollector.h index 46a2bd1863a..3b1260f6c99 100644 --- a/src/Backups/BackupEntriesCollector.h +++ b/src/Backups/BackupEntriesCollector.h @@ -50,9 +50,6 @@ public: /// 1) we need to join (in a backup) the data of replicated tables gathered on different hosts. void addPostTask(std::function task); - /// Throws an exception that a specified table engine doesn't support partitions. - [[noreturn]] static void throwPartitionsNotSupported(const StorageID & storage_id, const String & table_engine); - private: void calculateRootPathInBackup(); diff --git a/src/Backups/DDLAdjustingForBackupVisitor.cpp b/src/Backups/DDLAdjustingForBackupVisitor.cpp index 7dd58629a49..2dedc677df8 100644 --- a/src/Backups/DDLAdjustingForBackupVisitor.cpp +++ b/src/Backups/DDLAdjustingForBackupVisitor.cpp @@ -34,9 +34,6 @@ namespace /// back into "{uuid}", and also we probably can remove the zookeeper path and replica name if they're default. /// So we're kind of reverting what we had done to the table's definition in registerStorageMergeTree.cpp before we created this table. auto & create = data.create_query->as(); - if (create.uuid == UUIDHelpers::Nil) - return; - auto & engine = *storage.engine; auto * engine_args_ast = typeid_cast(engine.arguments.get()); @@ -54,9 +51,12 @@ namespace { String & zookeeper_path_arg = zookeeper_path_ast->value.get(); String & replica_name_arg = replica_name_ast->value.get(); - String table_uuid_str = toString(create.uuid); - if (size_t uuid_pos = zookeeper_path_arg.find(table_uuid_str); uuid_pos != String::npos) - zookeeper_path_arg.replace(uuid_pos, table_uuid_str.size(), "{uuid}"); + if (create.uuid != UUIDHelpers::Nil) + { + String table_uuid_str = toString(create.uuid); + if (size_t uuid_pos = zookeeper_path_arg.find(table_uuid_str); uuid_pos != String::npos) + zookeeper_path_arg.replace(uuid_pos, table_uuid_str.size(), "{uuid}"); + } const auto & config = data.global_context->getConfigRef(); if ((zookeeper_path_arg == StorageReplicatedMergeTree::getDefaultZooKeeperPath(config)) && (replica_name_arg == StorageReplicatedMergeTree::getDefaultReplicaName(config)) diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index ecd34b12742..247660bbce4 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -370,8 +370,9 @@ void RestorerFromBackup::findTableInBackup(const QualifiedTableName & table_name TableInfo & res_table_info = table_infos[table_name]; res_table_info.create_table_query = create_table_query; res_table_info.is_predefined_table = DatabaseCatalog::instance().isPredefinedTable(StorageID{table_name.database, table_name.table}); - res_table_info.data_path_in_backup = data_path_in_backup; res_table_info.dependencies = getDependenciesSetFromCreateQuery(context->getGlobalContext(), table_name, create_table_query); + res_table_info.has_data = backup->hasFiles(data_path_in_backup); + res_table_info.data_path_in_backup = data_path_in_backup; if (partitions) { @@ -384,7 +385,7 @@ void RestorerFromBackup::findTableInBackup(const QualifiedTableName & table_name { if (!access_restore_task) access_restore_task = std::make_shared(backup, restore_settings, restore_coordination); - access_restore_task->addDataPath(data_path_in_backup); + access_restore_task->addDataPath(data_path_in_backup, table_name); } } @@ -510,7 +511,7 @@ void RestorerFromBackup::checkAccessForObjectsFoundInBackup() const if (isSystemFunctionsTableName(table_name)) { /// CREATE_FUNCTION privilege is required to restore the "system.functions" table. - if (!restore_settings.structure_only && backup->hasFiles(table_info.data_path_in_backup)) + if (!restore_settings.structure_only && table_info.has_data) required_access.emplace_back(AccessType::CREATE_FUNCTION); } /// Privileges required to restore ACL system tables are checked separately @@ -538,8 +539,7 @@ void RestorerFromBackup::checkAccessForObjectsFoundInBackup() const flags |= AccessType::CREATE_TABLE; } - if (!restore_settings.structure_only && !create.is_dictionary && !create.is_ordinary_view - && backup->hasFiles(table_info.data_path_in_backup)) + if (!restore_settings.structure_only && table_info.has_data) { flags |= AccessType::INSERT; } @@ -685,6 +685,15 @@ void RestorerFromBackup::createTables() { const auto & data_path_in_backup = table_info.data_path_in_backup; const auto & partitions = table_info.partitions; + if (partitions && !storage->supportsBackupPartition()) + { + throw Exception( + ErrorCodes::CANNOT_RESTORE_TABLE, + "Table engine {} doesn't support partitions, cannot restore {}", + storage->getName(), + tableNameWithTypeToString(table_name.database, table_name.table, false)); + } + storage->restoreDataFromBackup(*this, data_path_in_backup, partitions); } } @@ -795,15 +804,6 @@ RestorerFromBackup::DataRestoreTasks RestorerFromBackup::getDataRestoreTasks() return res_tasks; } -void RestorerFromBackup::throwPartitionsNotSupported(const StorageID & storage_id, const String & table_engine) -{ - throw Exception( - ErrorCodes::CANNOT_RESTORE_TABLE, - "Table engine {} doesn't support partitions, cannot table {}", - table_engine, - storage_id.getFullTableName()); -} - void RestorerFromBackup::throwTableIsNotEmpty(const StorageID & storage_id) { throw Exception( diff --git a/src/Backups/RestorerFromBackup.h b/src/Backups/RestorerFromBackup.h index d6f4eabd0dd..e47aca0e69f 100644 --- a/src/Backups/RestorerFromBackup.h +++ b/src/Backups/RestorerFromBackup.h @@ -59,9 +59,6 @@ public: /// Checks that a specified path is already registered to be used for restoring access control. void checkPathInBackupIsRegisteredToRestoreAccess(const String & path); - /// Throws an exception that a specified table engine doesn't support partitions. - [[noreturn]] static void throwPartitionsNotSupported(const StorageID & storage_id, const String & table_engine); - /// Throws an exception that a specified table is already non-empty. [[noreturn]] static void throwTableIsNotEmpty(const StorageID & storage_id); @@ -104,9 +101,10 @@ private: { ASTPtr create_table_query; bool is_predefined_table = false; - std::optional partitions; - std::filesystem::path data_path_in_backup; std::unordered_set dependencies; + bool has_data = false; + std::filesystem::path data_path_in_backup; + std::optional partitions; bool created = false; StoragePtr storage; TableLockHolder table_lock; diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp index 0d0a242e4fb..fc29769790d 100644 --- a/src/Storages/IStorage.cpp +++ b/src/Storages/IStorage.cpp @@ -14,7 +14,8 @@ #include #include #include -#include +#include +#include namespace DB @@ -24,7 +25,7 @@ namespace ErrorCodes extern const int TABLE_IS_DROPPED; extern const int NOT_IMPLEMENTED; extern const int DEADLOCK_AVOIDED; - extern const int INCONSISTENT_METADATA_FOR_BACKUP; + extern const int CANNOT_RESTORE_TABLE; } bool IStorage::isVirtualColumn(const String & column_name, const StorageMetadataPtr & metadata_snapshot) const @@ -257,8 +258,13 @@ void IStorage::backupData(BackupEntriesCollector &, const String &, const std::o { } -void IStorage::restoreDataFromBackup(RestorerFromBackup &, const String &, const std::optional &) +void IStorage::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional &) { + /// If an inherited class doesn't override restoreDataFromBackup() that means it doesn't backup any data. + auto filenames = restorer.getBackup()->listFiles(data_path_in_backup); + if (!filenames.empty()) + throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: Folder {} in backup must be empty", + getStorageID().getFullTableName(), data_path_in_backup); } std::string PrewhereInfo::dump() const diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index 34170785896..e265c94eb11 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -232,6 +232,9 @@ public: /// Extracts data from the backup and put it to the storage. virtual void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions); + /// Returns true if the storage supports backup/restore for specific partitions. + virtual bool supportsBackupPartition() const { return false; } + private: StorageID storage_id; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 02d4d9ebe8b..d4b2c8d177e 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -150,6 +150,7 @@ namespace ErrorCodes extern const int SUPPORT_IS_DISABLED; extern const int TOO_MANY_SIMULTANEOUS_QUERIES; extern const int INCORRECT_QUERY; + extern const int CANNOT_RESTORE_TABLE; } static void checkSampleExpression(const StorageInMemoryMetadata & metadata, bool allow_sampling_expression_not_in_primary_key, bool check_sample_column_is_correct) @@ -4092,7 +4093,10 @@ void MergeTreeData::restorePartsFromBackup(RestorerFromBackup & restorer, const { const auto part_info = MergeTreePartInfo::tryParsePartName(part_name, format_version); if (!part_info) - continue; + { + throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: File name {} doesn't look like the name of a part", + getStorageID().getFullTableName(), String{data_path_in_backup_fs / part_name}); + } if (partition_ids && !partition_ids->contains(part_info->partition_id)) continue; diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 4fd7dd7d3cf..9aa14367f80 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -716,6 +716,9 @@ public: /// Extract data from the backup and put it to the storage. void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) override; + /// Returns true if the storage supports backup/restore for specific partitions. + bool supportsBackupPartition() const override { return true; } + /// Moves partition to specified Disk void movePartitionToDisk(const ASTPtr & partition, const String & name, bool moving_part, ContextPtr context); diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp index 1324ebf5b28..ac6ead54016 100644 --- a/src/Storages/StorageLog.cpp +++ b/src/Storages/StorageLog.cpp @@ -51,6 +51,7 @@ namespace ErrorCodes extern const int SIZES_OF_MARKS_FILES_ARE_INCONSISTENT; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int INCORRECT_FILE_NAME; + extern const int CANNOT_RESTORE_TABLE; } /// NOTE: The lock `StorageLog::rwlock` is NOT kept locked while reading, @@ -921,11 +922,8 @@ std::optional StorageLog::totalBytes(const Settings &) const return total_bytes; } -void StorageLog::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) +void StorageLog::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - BackupEntriesCollector::throwPartitionsNotSupported(getStorageID(), getName()); - auto lock_timeout = getLockTimeout(backup_entries_collector.getContext()); loadMarks(lock_timeout); @@ -986,16 +984,16 @@ void StorageLog::backupData(BackupEntriesCollector & backup_entries_collector, c } } -void StorageLog::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) +void StorageLog::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - RestorerFromBackup::throwPartitionsNotSupported(getStorageID(), getName()); + auto backup = restorer.getBackup(); + if (!backup->hasFiles(data_path_in_backup)) + return; if (!num_data_files) return; - auto backup = restorer.getBackup(); - if (!restorer.isNonEmptyTableAllowed() && total_bytes && backup->hasFiles(data_path_in_backup)) + if (!restorer.isNonEmptyTableAllowed() && total_bytes) RestorerFromBackup::throwTableIsNotEmpty(getStorageID()); auto lock_timeout = getLockTimeout(restorer.getContext()); @@ -1024,6 +1022,11 @@ void StorageLog::restoreDataImpl(const BackupPtr & backup, const String & data_p for (const auto & data_file : data_files) { String file_path_in_backup = data_path_in_backup_fs / fileName(data_file.path); + if (!backup->fileExists(file_path_in_backup)) + { + throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: File {} in backup is required", + getStorageID().getFullTableName(), file_path_in_backup); + } auto backup_entry = backup->readFile(file_path_in_backup); auto in = backup_entry->getReadBuffer(); auto out = disk->writeFile(data_file.path, max_compress_block_size, WriteMode::Append); @@ -1035,6 +1038,11 @@ void StorageLog::restoreDataImpl(const BackupPtr & backup, const String & data_p /// Append marks. size_t num_extra_marks = 0; String file_path_in_backup = data_path_in_backup_fs / fileName(marks_file_path); + if (!backup->fileExists(file_path_in_backup)) + { + throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: File {} in backup is required", + getStorageID().getFullTableName(), file_path_in_backup); + } size_t file_size = backup->getFileSize(file_path_in_backup); if (file_size % (num_data_files * sizeof(Mark)) != 0) throw Exception("Size of marks file is inconsistent", ErrorCodes::SIZES_OF_MARKS_FILES_ARE_INCONSISTENT); diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp index 2ece0af3359..b01415f9590 100644 --- a/src/Storages/StorageMaterializedView.cpp +++ b/src/Storages/StorageMaterializedView.cpp @@ -421,6 +421,13 @@ void StorageMaterializedView::restoreDataFromBackup(RestorerFromBackup & restore return getTargetTable()->restoreDataFromBackup(restorer, data_path_in_backup, partitions); } +bool StorageMaterializedView::supportsBackupPartition() const +{ + if (hasInnerTable()) + return getTargetTable()->supportsBackupPartition(); + return false; +} + std::optional StorageMaterializedView::totalRows(const Settings & settings) const { if (hasInnerTable()) diff --git a/src/Storages/StorageMaterializedView.h b/src/Storages/StorageMaterializedView.h index 0adf394876c..1d8808b302e 100644 --- a/src/Storages/StorageMaterializedView.h +++ b/src/Storages/StorageMaterializedView.h @@ -97,6 +97,7 @@ public: void backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) override; void restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) override; + bool supportsBackupPartition() const override; std::optional totalRows(const Settings & settings) const override; std::optional totalBytes(const Settings & settings) const override; diff --git a/src/Storages/StorageMemory.cpp b/src/Storages/StorageMemory.cpp index 5de8c3bda43..7baecaa594f 100644 --- a/src/Storages/StorageMemory.cpp +++ b/src/Storages/StorageMemory.cpp @@ -38,6 +38,7 @@ namespace DB namespace ErrorCodes { extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; + extern const int CANNOT_RESTORE_TABLE; } @@ -479,24 +480,21 @@ namespace }; } -void StorageMemory::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) +void StorageMemory::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - BackupEntriesCollector::throwPartitionsNotSupported(getStorageID(), getName()); - auto max_compress_block_size = backup_entries_collector.getContext()->getSettingsRef().max_compress_block_size; backup_entries_collector.addBackupEntries( std::make_shared(getInMemoryMetadataPtr(), data.get(), data_path_in_backup, max_compress_block_size) ->getBackupEntries()); } -void StorageMemory::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) +void StorageMemory::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - RestorerFromBackup::throwPartitionsNotSupported(getStorageID(), getName()); - auto backup = restorer.getBackup(); - if (!restorer.isNonEmptyTableAllowed() && total_size_bytes && backup->hasFiles(data_path_in_backup)) + if (!backup->hasFiles(data_path_in_backup)) + return; + + if (!restorer.isNonEmptyTableAllowed() && total_size_bytes) RestorerFromBackup::throwTableIsNotEmpty(getStorageID()); restorer.addDataRestoreTask( @@ -514,6 +512,11 @@ void StorageMemory::restoreDataImpl(const BackupPtr & backup, const String & dat IndexForNativeFormat index; { String index_file_path = data_path_in_backup_fs / "index.mrk"; + if (!backup->fileExists(index_file_path)) + { + throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: File {} in backup is required", + getStorageID().getFullTableName(), index_file_path); + } auto backup_entry = backup->readFile(index_file_path); auto in = backup_entry->getReadBuffer(); CompressedReadBuffer compressed_in{*in}; @@ -526,6 +529,11 @@ void StorageMemory::restoreDataImpl(const BackupPtr & backup, const String & dat size_t new_rows = 0; { String data_file_path = data_path_in_backup_fs / "data.bin"; + if (!backup->fileExists(data_file_path)) + { + throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: File {} in backup is required", + getStorageID().getFullTableName(), data_file_path); + } auto backup_entry = backup->readFile(data_file_path); std::unique_ptr in = backup_entry->getReadBuffer(); std::optional temp_data_copy; diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp index d569a81c4a7..2033d33a33d 100644 --- a/src/Storages/StorageStripeLog.cpp +++ b/src/Storages/StorageStripeLog.cpp @@ -55,6 +55,7 @@ namespace ErrorCodes extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; extern const int INCORRECT_FILE_NAME; extern const int TIMEOUT_EXCEEDED; + extern const int CANNOT_RESTORE_TABLE; } @@ -527,11 +528,8 @@ std::optional StorageStripeLog::totalBytes(const Settings &) const } -void StorageStripeLog::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) +void StorageStripeLog::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - BackupEntriesCollector::throwPartitionsNotSupported(getStorageID(), getName()); - auto lock_timeout = getLockTimeout(backup_entries_collector.getContext()); loadIndices(lock_timeout); @@ -589,13 +587,13 @@ void StorageStripeLog::backupData(BackupEntriesCollector & backup_entries_collec data_path_in_backup_fs / "count.txt", std::make_unique(toString(num_rows))); } -void StorageStripeLog::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) +void StorageStripeLog::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - RestorerFromBackup::throwPartitionsNotSupported(getStorageID(), getName()); - auto backup = restorer.getBackup(); - if (!restorer.isNonEmptyTableAllowed() && total_bytes && backup->hasFiles(data_path_in_backup)) + if (!backup->hasFiles(data_path_in_backup)) + return; + + if (!restorer.isNonEmptyTableAllowed() && total_bytes) RestorerFromBackup::throwTableIsNotEmpty(getStorageID()); auto lock_timeout = getLockTimeout(restorer.getContext()); @@ -624,6 +622,11 @@ void StorageStripeLog::restoreDataImpl(const BackupPtr & backup, const String & auto old_data_size = file_checker.getFileSize(data_file_path); { String file_path_in_backup = data_path_in_backup_fs / fileName(data_file_path); + if (!backup->fileExists(file_path_in_backup)) + { + throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: File {} in backup is required", + getStorageID().getFullTableName(), file_path_in_backup); + } auto backup_entry = backup->readFile(file_path_in_backup); auto in = backup_entry->getReadBuffer(); auto out = disk->writeFile(data_file_path, max_compress_block_size, WriteMode::Append); @@ -634,6 +637,11 @@ void StorageStripeLog::restoreDataImpl(const BackupPtr & backup, const String & { String index_path_in_backup = data_path_in_backup_fs / fileName(index_file_path); IndexForNativeFormat extra_indices; + if (!backup->fileExists(index_path_in_backup)) + { + throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: File {} in backup is required", + getStorageID().getFullTableName(), index_path_in_backup); + } auto backup_entry = backup->readFile(index_path_in_backup); auto index_in = backup_entry->getReadBuffer(); CompressedReadBuffer index_compressed_in{*index_in}; diff --git a/src/Storages/System/StorageSystemFunctions.cpp b/src/Storages/System/StorageSystemFunctions.cpp index e346d8b427b..e2bc699d3f1 100644 --- a/src/Storages/System/StorageSystemFunctions.cpp +++ b/src/Storages/System/StorageSystemFunctions.cpp @@ -115,7 +115,7 @@ void StorageSystemFunctions::fillData(MutableColumns & res_columns, ContextPtr c } } -void StorageSystemFunctions::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional &) +void StorageSystemFunctions::backupData(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { const auto & user_defined_sql_functions_factory = UserDefinedSQLFunctionFactory::instance(); const auto & user_defined_sql_functions_names = user_defined_sql_functions_factory.getAllRegisteredNames(); @@ -131,18 +131,21 @@ void StorageSystemFunctions::backupData(BackupEntriesCollector & backup_entries_ } } -void StorageSystemFunctions::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional &) +void StorageSystemFunctions::restoreDataFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & /* partitions */) { auto backup = restorer.getBackup(); + fs::path data_path_in_backup_fs{data_path_in_backup}; Strings filenames = backup->listFiles(data_path_in_backup); for (const auto & filename : filenames) { if (!filename.ends_with(".sql")) - throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore user-defined functions, expected *.sql files, got {}", filename); + { + throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: File name {} doesn't have the extension .sql", + getStorageID().getFullTableName(), String{data_path_in_backup_fs / filename}); + } } - fs::path data_path_in_backup_fs{data_path_in_backup}; auto & user_defined_sql_functions_factory = UserDefinedSQLFunctionFactory::instance(); const auto & restore_settings = restorer.getRestoreSettings(); auto context = restorer.getContext(); diff --git a/src/Storages/System/StorageSystemQuotas.cpp b/src/Storages/System/StorageSystemQuotas.cpp index efe6b93fe57..27cf64cbcb4 100644 --- a/src/Storages/System/StorageSystemQuotas.cpp +++ b/src/Storages/System/StorageSystemQuotas.cpp @@ -122,21 +122,15 @@ void StorageSystemQuotas::fillData(MutableColumns & res_columns, ContextPtr cont } void StorageSystemQuotas::backupData( - BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) + BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - BackupEntriesCollector::throwPartitionsNotSupported(getStorageID(), getName()); - const auto & access_control = backup_entries_collector.getContext()->getAccessControl(); access_control.backup(backup_entries_collector, AccessEntityType::QUOTA, data_path_in_backup); } void StorageSystemQuotas::restoreDataFromBackup( - RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) + RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - RestorerFromBackup::throwPartitionsNotSupported(getStorageID(), getName()); - auto & access_control = restorer.getContext()->getAccessControl(); access_control.restore(restorer, data_path_in_backup); } diff --git a/src/Storages/System/StorageSystemRoles.cpp b/src/Storages/System/StorageSystemRoles.cpp index ff3490ce8ba..22597530835 100644 --- a/src/Storages/System/StorageSystemRoles.cpp +++ b/src/Storages/System/StorageSystemRoles.cpp @@ -60,21 +60,15 @@ void StorageSystemRoles::fillData(MutableColumns & res_columns, ContextPtr conte } void StorageSystemRoles::backupData( - BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) + BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - BackupEntriesCollector::throwPartitionsNotSupported(getStorageID(), getName()); - const auto & access_control = backup_entries_collector.getContext()->getAccessControl(); access_control.backup(backup_entries_collector, AccessEntityType::ROLE, data_path_in_backup); } void StorageSystemRoles::restoreDataFromBackup( - RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) + RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - RestorerFromBackup::throwPartitionsNotSupported(getStorageID(), getName()); - auto & access_control = restorer.getContext()->getAccessControl(); access_control.restore(restorer, data_path_in_backup); } diff --git a/src/Storages/System/StorageSystemRowPolicies.cpp b/src/Storages/System/StorageSystemRowPolicies.cpp index 680f90adff7..1e13654d188 100644 --- a/src/Storages/System/StorageSystemRowPolicies.cpp +++ b/src/Storages/System/StorageSystemRowPolicies.cpp @@ -140,21 +140,15 @@ void StorageSystemRowPolicies::fillData(MutableColumns & res_columns, ContextPtr } void StorageSystemRowPolicies::backupData( - BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) + BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - BackupEntriesCollector::throwPartitionsNotSupported(getStorageID(), getName()); - const auto & access_control = backup_entries_collector.getContext()->getAccessControl(); access_control.backup(backup_entries_collector, AccessEntityType::ROW_POLICY, data_path_in_backup); } void StorageSystemRowPolicies::restoreDataFromBackup( - RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) + RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - RestorerFromBackup::throwPartitionsNotSupported(getStorageID(), getName()); - auto & access_control = restorer.getContext()->getAccessControl(); access_control.restore(restorer, data_path_in_backup); } diff --git a/src/Storages/System/StorageSystemSettingsProfiles.cpp b/src/Storages/System/StorageSystemSettingsProfiles.cpp index 7c3ccfe863a..aaf5bedadd0 100644 --- a/src/Storages/System/StorageSystemSettingsProfiles.cpp +++ b/src/Storages/System/StorageSystemSettingsProfiles.cpp @@ -87,21 +87,15 @@ void StorageSystemSettingsProfiles::fillData(MutableColumns & res_columns, Conte } void StorageSystemSettingsProfiles::backupData( - BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) + BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - BackupEntriesCollector::throwPartitionsNotSupported(getStorageID(), getName()); - const auto & access_control = backup_entries_collector.getContext()->getAccessControl(); access_control.backup(backup_entries_collector, AccessEntityType::SETTINGS_PROFILE, data_path_in_backup); } void StorageSystemSettingsProfiles::restoreDataFromBackup( - RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) + RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - RestorerFromBackup::throwPartitionsNotSupported(getStorageID(), getName()); - auto & access_control = restorer.getContext()->getAccessControl(); access_control.restore(restorer, data_path_in_backup); } diff --git a/src/Storages/System/StorageSystemUsers.cpp b/src/Storages/System/StorageSystemUsers.cpp index f2cae638d45..d8dc1722a91 100644 --- a/src/Storages/System/StorageSystemUsers.cpp +++ b/src/Storages/System/StorageSystemUsers.cpp @@ -215,21 +215,15 @@ void StorageSystemUsers::fillData(MutableColumns & res_columns, ContextPtr conte } void StorageSystemUsers::backupData( - BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & partitions) + BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - BackupEntriesCollector::throwPartitionsNotSupported(getStorageID(), getName()); - const auto & access_control = backup_entries_collector.getContext()->getAccessControl(); access_control.backup(backup_entries_collector, AccessEntityType::USER, data_path_in_backup); } void StorageSystemUsers::restoreDataFromBackup( - RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & partitions) + RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & /* partitions */) { - if (partitions) - RestorerFromBackup::throwPartitionsNotSupported(getStorageID(), getName()); - auto & access_control = restorer.getContext()->getAccessControl(); access_control.restore(restorer, data_path_in_backup); } diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index a94964bc8a3..7894daf5bad 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -799,3 +799,29 @@ def test_system_functions(): assert instance.query("SELECT number, parity_str(number) FROM numbers(3)") == TSV( [[0, "even"], [1, "odd"], [2, "even"]] ) + + +def test_backup_partition(): + create_and_fill_table(n=30) + + backup_name = new_backup_name() + instance.query(f"BACKUP TABLE test.table PARTITIONS '1', '4' TO {backup_name}") + + instance.query("DROP TABLE test.table") + + instance.query(f"RESTORE TABLE test.table FROM {backup_name}") + + assert instance.query("SELECT * FROM test.table ORDER BY x") == TSV([[1, '1'], [4, '4'], [11, '11'], [14, '14'], [21, '21'], [24, '24']]) + + +def test_restore_partition(): + create_and_fill_table(n=30) + + backup_name = new_backup_name() + instance.query(f"BACKUP TABLE test.table TO {backup_name}") + + instance.query("DROP TABLE test.table") + + instance.query(f"RESTORE TABLE test.table PARTITIONS '2', '3' FROM {backup_name}") + + assert instance.query("SELECT * FROM test.table ORDER BY x") == TSV([[2, '2'], [3, '3'], [12, '12'], [13, '13'], [22, '22'], [23, '23']]) From 5456bde4a2e6f40754d52de07b1c6115ef261079 Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Wed, 29 Jun 2022 22:44:05 +0200 Subject: [PATCH 095/121] Improve gathering metadata for storing ACL in backups. --- src/Access/AccessBackup.cpp | 75 ++++++++++--------- src/Access/AccessBackup.h | 36 +++++---- src/Access/AccessControl.cpp | 15 +--- src/Access/AccessControl.h | 7 +- src/Access/DiskAccessStorage.cpp | 22 ++++-- src/Access/DiskAccessStorage.h | 2 +- src/Access/IAccessStorage.cpp | 30 +++++--- src/Access/IAccessStorage.h | 11 ++- src/Access/MemoryAccessStorage.cpp | 22 ++++-- src/Access/MemoryAccessStorage.h | 2 +- src/Access/MultipleAccessStorage.cpp | 22 +++--- src/Access/MultipleAccessStorage.h | 4 +- src/Access/ReplicatedAccessStorage.cpp | 62 +++++++++++++-- src/Access/ReplicatedAccessStorage.h | 3 +- src/Backups/BackupCoordinationDistributed.cpp | 43 +++++++++++ src/Backups/BackupCoordinationDistributed.h | 6 ++ src/Backups/BackupCoordinationLocal.cpp | 31 ++++++++ src/Backups/BackupCoordinationLocal.h | 8 ++ src/Backups/BackupEntriesCollector.cpp | 12 +++ src/Backups/BackupEntriesCollector.h | 6 ++ src/Backups/IBackupCoordination.h | 8 ++ src/Backups/RestorerFromBackup.cpp | 32 ++++---- src/Backups/RestorerFromBackup.h | 11 ++- src/Storages/System/StorageSystemQuotas.cpp | 6 +- src/Storages/System/StorageSystemRoles.cpp | 6 +- .../System/StorageSystemRowPolicies.cpp | 6 +- .../System/StorageSystemSettingsProfiles.cpp | 6 +- src/Storages/System/StorageSystemUsers.cpp | 6 +- 28 files changed, 337 insertions(+), 163 deletions(-) diff --git a/src/Access/AccessBackup.cpp b/src/Access/AccessBackup.cpp index 180f17e3448..bd1344a6f14 100644 --- a/src/Access/AccessBackup.cpp +++ b/src/Access/AccessBackup.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -226,7 +227,7 @@ namespace } } - AccessRightsElements getRequiredAccessToRestore(const std::unordered_map & entities) + AccessRightsElements getRequiredAccessToRestore(const std::vector> & entities) { AccessRightsElements res; for (const auto & entity : entities | boost::adaptors::map_values) @@ -295,74 +296,78 @@ namespace } } -void backupAccessEntities( - BackupEntriesCollector & backup_entries_collector, + +std::pair makeBackupEntryForAccess( + const std::vector> access_entities, const String & data_path_in_backup, - const AccessControl & access_control, - AccessEntityType type) + size_t counter, + const AccessControl & access_control) { - auto entities = access_control.readAllForBackup(type, backup_entries_collector.getBackupSettings()); - auto dependencies = readDependenciesNamesAndTypes(findDependencies(entities), access_control); + auto dependencies = readDependenciesNamesAndTypes(findDependencies(access_entities), access_control); AccessEntitiesInBackup ab; - boost::range::copy(entities, std::inserter(ab.entities, ab.entities.end())); + boost::range::copy(access_entities, std::inserter(ab.entities, ab.entities.end())); ab.dependencies = std::move(dependencies); - backup_entries_collector.addBackupEntry(fs::path{data_path_in_backup} / "access.txt", ab.toBackupEntry()); + String filename = fmt::format("access{:02}.txt", counter + 1); /// access01.txt, access02.txt, ... + String file_path_in_backup = fs::path{data_path_in_backup} / filename; + return {file_path_in_backup, ab.toBackupEntry()}; } -AccessRestoreTask::AccessRestoreTask( - const BackupPtr & backup_, const RestoreSettings & restore_settings_, std::shared_ptr restore_coordination_) - : backup(backup_), restore_settings(restore_settings_), restore_coordination(restore_coordination_) +AccessRestorerFromBackup::AccessRestorerFromBackup( + const BackupPtr & backup_, const RestoreSettings & restore_settings_) + : backup(backup_), allow_unresolved_access_dependencies(restore_settings_.allow_unresolved_access_dependencies) { } -AccessRestoreTask::~AccessRestoreTask() = default; +AccessRestorerFromBackup::~AccessRestorerFromBackup() = default; -void AccessRestoreTask::addDataPath(const String & data_path, const QualifiedTableName & table_name_for_logs) +void AccessRestorerFromBackup::addDataPath(const String & data_path, const QualifiedTableName & table_name_for_logs) { if (!data_paths.emplace(data_path).second) return; - if (!backup->hasFiles(data_path)) + fs::path data_path_in_backup_fs = data_path; + Strings filenames = backup->listFiles(data_path); + if (filenames.empty()) return; - String file_path = fs::path{data_path} / "access.txt"; - if (!backup->fileExists(file_path)) + for (const String & filename : filenames) { - throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: File {} in backup is required", - table_name_for_logs.getFullName(), file_path); + if (!filename.starts_with("access") || !filename.ends_with(".txt")) + throw Exception(ErrorCodes::CANNOT_RESTORE_TABLE, "Cannot restore table {}: File name {} doesn't match the wildcard \"access*.txt\"", + table_name_for_logs.getFullName(), String{data_path_in_backup_fs / filename}); } - auto backup_entry = backup->readFile(file_path); - auto ab = AccessEntitiesInBackup::fromBackupEntry(*backup_entry, file_path); + ::sort(filenames.begin(), filenames.end()); + + for (const String & filename : filenames) + { + String filepath_in_backup = data_path_in_backup_fs / filename; + auto backup_entry = backup->readFile(filepath_in_backup); + auto ab = AccessEntitiesInBackup::fromBackupEntry(*backup_entry, filepath_in_backup); + + boost::range::copy(ab.entities, std::back_inserter(entities)); + boost::range::copy(ab.dependencies, std::inserter(dependencies, dependencies.end())); + } - boost::range::copy(ab.entities, std::inserter(entities, entities.end())); - boost::range::copy(ab.dependencies, std::inserter(dependencies, dependencies.end())); for (const auto & id : entities | boost::adaptors::map_keys) dependencies.erase(id); } -bool AccessRestoreTask::hasDataPath(const String & data_path) const -{ - return data_paths.contains(data_path); -} - -AccessRightsElements AccessRestoreTask::getRequiredAccess() const +AccessRightsElements AccessRestorerFromBackup::getRequiredAccess() const { return getRequiredAccessToRestore(entities); } -void AccessRestoreTask::restore(AccessControl & access_control) const +std::vector> AccessRestorerFromBackup::getAccessEntities(const AccessControl & access_control) const { - auto old_to_new_ids = resolveDependencies(dependencies, access_control, restore_settings.allow_unresolved_access_dependencies); + auto new_entities = entities; - std::vector> new_entities; - boost::range::copy(entities, std::back_inserter(new_entities)); + auto old_to_new_ids = resolveDependencies(dependencies, access_control, allow_unresolved_access_dependencies); generateRandomIDs(new_entities, old_to_new_ids); - replaceDependencies(new_entities, old_to_new_ids); - access_control.insertFromBackup(new_entities, restore_settings, restore_coordination); + return new_entities; } } diff --git a/src/Access/AccessBackup.h b/src/Access/AccessBackup.h index 3eab9fa1494..74f889e2c00 100644 --- a/src/Access/AccessBackup.h +++ b/src/Access/AccessBackup.h @@ -1,6 +1,6 @@ #pragma once -#include +#include #include #include @@ -9,47 +9,45 @@ namespace DB { class AccessControl; enum class AccessEntityType; -class BackupEntriesCollector; -class RestorerFromBackup; -class IBackup; -using BackupPtr = std::shared_ptr; -class IRestoreCoordination; struct IAccessEntity; using AccessEntityPtr = std::shared_ptr; class AccessRightsElements; +class IBackup; +using BackupPtr = std::shared_ptr; +class IBackupEntry; +using BackupEntryPtr = std::shared_ptr; +struct RestoreSettings; struct QualifiedTableName; /// Makes a backup of access entities of a specified type. -void backupAccessEntities( - BackupEntriesCollector & backup_entries_collector, +std::pair makeBackupEntryForAccess( + const std::vector> access_entities, const String & data_path_in_backup, - const AccessControl & access_control, - AccessEntityType type); + size_t counter, + const AccessControl & access_control); + /// Restores access entities from a backup. -class AccessRestoreTask +class AccessRestorerFromBackup { public: - AccessRestoreTask( - const BackupPtr & backup_, const RestoreSettings & restore_settings_, std::shared_ptr restore_coordination_); - ~AccessRestoreTask(); + AccessRestorerFromBackup(const BackupPtr & backup_, const RestoreSettings & restore_settings_); + ~AccessRestorerFromBackup(); /// Adds a data path to loads access entities from. void addDataPath(const String & data_path, const QualifiedTableName & table_name_for_logs); - bool hasDataPath(const String & data_path) const; /// Checks that the current user can do restoring. AccessRightsElements getRequiredAccess() const; /// Inserts all access entities loaded from all the paths added by addDataPath(). - void restore(AccessControl & access_control) const; + std::vector> getAccessEntities(const AccessControl & access_control) const; private: BackupPtr backup; - RestoreSettings restore_settings; - std::shared_ptr restore_coordination; - std::unordered_map entities; + bool allow_unresolved_access_dependencies = false; + std::vector> entities; std::unordered_map> dependencies; std::unordered_set data_paths; }; diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index b5b22caa400..7152820b5bc 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -459,20 +459,9 @@ UUID AccessControl::authenticate(const Credentials & credentials, const Poco::Ne } } -void AccessControl::backup(BackupEntriesCollector & backup_entries_collector, AccessEntityType type, const String & data_path_in_backup) const +void AccessControl::restoreFromBackup(RestorerFromBackup & restorer) { - backupAccessEntities(backup_entries_collector, data_path_in_backup, *this, type); -} - -void AccessControl::restore(RestorerFromBackup & restorer, const String & data_path_in_backup) -{ - /// The restorer must already know about `data_path_in_backup`, but let's check. - restorer.checkPathInBackupIsRegisteredToRestoreAccess(data_path_in_backup); -} - -void AccessControl::insertFromBackup(const std::vector> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr restore_coordination) -{ - MultipleAccessStorage::insertFromBackup(entities_from_backup, restore_settings, restore_coordination); + MultipleAccessStorage::restoreFromBackup(restorer); changes_notifier->sendNotifications(); } diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h index 90ad2895122..22ff0a488f7 100644 --- a/src/Access/AccessControl.h +++ b/src/Access/AccessControl.h @@ -42,8 +42,6 @@ class ClientInfo; class ExternalAuthenticators; class AccessChangesNotifier; struct Settings; -class BackupEntriesCollector; -class RestorerFromBackup; /// Manages access control entities. @@ -121,8 +119,7 @@ public: UUID authenticate(const Credentials & credentials, const Poco::Net::IPAddress & address) const; /// Makes a backup of access entities. - void backup(BackupEntriesCollector & backup_entries_collector, AccessEntityType type, const String & data_path_in_backup) const; - static void restore(RestorerFromBackup & restorer, const String & data_path_in_backup); + void restoreFromBackup(RestorerFromBackup & restorer) override; void setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config); @@ -198,8 +195,6 @@ public: /// Gets manager of notifications. AccessChangesNotifier & getChangesNotifier(); - void insertFromBackup(const std::vector> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr restore_coordination) override; - private: class ContextAccessCache; class CustomSettingsPrefixes; diff --git a/src/Access/DiskAccessStorage.cpp b/src/Access/DiskAccessStorage.cpp index 231e325196d..994abc7b53a 100644 --- a/src/Access/DiskAccessStorage.cpp +++ b/src/Access/DiskAccessStorage.cpp @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include @@ -650,19 +651,24 @@ void DiskAccessStorage::deleteAccessEntityOnDisk(const UUID & id) const } -void DiskAccessStorage::insertFromBackup( - const std::vector> & entities_from_backup, - const RestoreSettings & restore_settings, - std::shared_ptr) +void DiskAccessStorage::restoreFromBackup(RestorerFromBackup & restorer) { if (!isRestoreAllowed()) throwRestoreNotAllowed(); - bool replace_if_exists = (restore_settings.create_access == RestoreAccessCreationMode::kReplace); - bool throw_if_exists = (restore_settings.create_access == RestoreAccessCreationMode::kCreate); + auto entities = restorer.getAccessEntitiesToRestore(); + if (entities.empty()) + return; - for (const auto & [id, entity] : entities_from_backup) - insertWithID(id, entity, replace_if_exists, throw_if_exists); + auto create_access = restorer.getRestoreSettings().create_access; + bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace); + bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate); + + restorer.addDataRestoreTask([this, entities = std::move(entities), replace_if_exists, throw_if_exists] + { + for (const auto & [id, entity] : entities) + insertWithID(id, entity, replace_if_exists, throw_if_exists); + }); } } diff --git a/src/Access/DiskAccessStorage.h b/src/Access/DiskAccessStorage.h index 1bdefbf82f9..d3bd61ff353 100644 --- a/src/Access/DiskAccessStorage.h +++ b/src/Access/DiskAccessStorage.h @@ -30,7 +30,7 @@ public: bool exists(const UUID & id) const override; bool isBackupAllowed() const override { return backup_allowed; } - void insertFromBackup(const std::vector> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr restore_coordination) override; + void restoreFromBackup(RestorerFromBackup & restorer) override; private: std::optional findImpl(AccessEntityType type, const String & name) const override; diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index 476b1674ce1..230045c7749 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -2,9 +2,12 @@ #include #include #include +#include +#include #include #include #include +#include #include #include #include @@ -520,29 +523,34 @@ bool IAccessStorage::isAddressAllowed(const User & user, const Poco::Net::IPAddr } -bool IAccessStorage::isRestoreAllowed() const -{ - return isBackupAllowed() && !isReadOnly(); -} - -std::vector> IAccessStorage::readAllForBackup(AccessEntityType type, const BackupSettings &) const +void IAccessStorage::backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, AccessEntityType type) const { if (!isBackupAllowed()) throwBackupNotAllowed(); - auto res = readAllWithIDs(type); - boost::range::remove_erase_if(res, [](const std::pair & x) { return !x.second->isBackupAllowed(); }); - return res; + auto entities = readAllWithIDs(type); + boost::range::remove_erase_if(entities, [](const std::pair & x) { return !x.second->isBackupAllowed(); }); + + auto backup_entry = makeBackupEntryForAccess( + entities, + data_path_in_backup, + backup_entries_collector.getAccessCounter(type), + backup_entries_collector.getContext()->getAccessControl()); + + backup_entries_collector.addBackupEntry(backup_entry); } -void IAccessStorage::insertFromBackup(const std::vector> &, const RestoreSettings &, std::shared_ptr) + +void IAccessStorage::restoreFromBackup(RestorerFromBackup &) { if (!isRestoreAllowed()) throwRestoreNotAllowed(); - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "insertFromBackup() is not implemented in {}", getStorageType()); + + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "restoreFromBackup() is not implemented in {}", getStorageType()); } + UUID IAccessStorage::generateRandomID() { static Poco::UUIDGenerator generator; diff --git a/src/Access/IAccessStorage.h b/src/Access/IAccessStorage.h index 7b43309204d..394d3ed6358 100644 --- a/src/Access/IAccessStorage.h +++ b/src/Access/IAccessStorage.h @@ -18,9 +18,8 @@ struct User; class Credentials; class ExternalAuthenticators; enum class AuthenticationType; -struct BackupSettings; -struct RestoreSettings; -class IRestoreCoordination; +class BackupEntriesCollector; +class RestorerFromBackup; /// Contains entities, i.e. instances of classes derived from IAccessEntity. /// The implementations of this class MUST be thread-safe. @@ -158,11 +157,11 @@ public: /// Returns true if this storage can be stored to or restored from a backup. virtual bool isBackupAllowed() const { return false; } - virtual bool isRestoreAllowed() const; + virtual bool isRestoreAllowed() const { return isBackupAllowed() && !isReadOnly(); } /// Makes a backup of this access storage. - virtual std::vector> readAllForBackup(AccessEntityType type, const BackupSettings & backup_settings) const; - virtual void insertFromBackup(const std::vector> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr restore_coordination); + virtual void backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, AccessEntityType type) const; + virtual void restoreFromBackup(RestorerFromBackup & restorer); protected: virtual std::optional findImpl(AccessEntityType type, const String & name) const = 0; diff --git a/src/Access/MemoryAccessStorage.cpp b/src/Access/MemoryAccessStorage.cpp index ad877e263ad..60669532e25 100644 --- a/src/Access/MemoryAccessStorage.cpp +++ b/src/Access/MemoryAccessStorage.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -272,19 +273,24 @@ void MemoryAccessStorage::setAll(const std::vector> & entities_from_backup, - const RestoreSettings & restore_settings, - std::shared_ptr) +void MemoryAccessStorage::restoreFromBackup(RestorerFromBackup & restorer) { if (!isRestoreAllowed()) throwRestoreNotAllowed(); - bool replace_if_exists = (restore_settings.create_access == RestoreAccessCreationMode::kReplace); - bool throw_if_exists = (restore_settings.create_access == RestoreAccessCreationMode::kCreate); + auto entities = restorer.getAccessEntitiesToRestore(); + if (entities.empty()) + return; - for (const auto & [id, entity] : entities_from_backup) - insertWithID(id, entity, replace_if_exists, throw_if_exists); + auto create_access = restorer.getRestoreSettings().create_access; + bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace); + bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate); + + restorer.addDataRestoreTask([this, entities = std::move(entities), replace_if_exists, throw_if_exists] + { + for (const auto & [id, entity] : entities) + insertWithID(id, entity, replace_if_exists, throw_if_exists); + }); } } diff --git a/src/Access/MemoryAccessStorage.h b/src/Access/MemoryAccessStorage.h index aa4cd08252c..5c8d33ed443 100644 --- a/src/Access/MemoryAccessStorage.h +++ b/src/Access/MemoryAccessStorage.h @@ -29,7 +29,7 @@ public: bool exists(const UUID & id) const override; bool isBackupAllowed() const override { return backup_allowed; } - void insertFromBackup(const std::vector> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr restore_coordination) override; + void restoreFromBackup(RestorerFromBackup & restorer) override; private: std::optional findImpl(AccessEntityType type, const String & name) const override; diff --git a/src/Access/MultipleAccessStorage.cpp b/src/Access/MultipleAccessStorage.cpp index 6f654f68e57..30c3865c1be 100644 --- a/src/Access/MultipleAccessStorage.cpp +++ b/src/Access/MultipleAccessStorage.cpp @@ -383,40 +383,38 @@ bool MultipleAccessStorage::isRestoreAllowed() const } -std::vector> MultipleAccessStorage::readAllForBackup(AccessEntityType type, const BackupSettings & backup_settings) const +void MultipleAccessStorage::backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, AccessEntityType type) const { - std::vector> res; auto storages = getStoragesInternal(); - size_t count = 0; + bool allowed = false; for (const auto & storage : *storages) { if (storage->isBackupAllowed()) { - insertAtEnd(res, storage->readAllForBackup(type, backup_settings)); - ++count; + storage->backup(backup_entries_collector, data_path_in_backup, type); + allowed = true; } } - if (!count) + if (!allowed) throwBackupNotAllowed(); - - return res; } - -void MultipleAccessStorage::insertFromBackup(const std::vector> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr restore_coordination) +void MultipleAccessStorage::restoreFromBackup(RestorerFromBackup & restorer) { auto storages = getStoragesInternal(); + for (const auto & storage : *storages) { if (storage->isRestoreAllowed()) { - storage->insertFromBackup(entities_from_backup, restore_settings, restore_coordination); + storage->restoreFromBackup(restorer); return; } } - throwRestoreNotAllowed(); + + throwBackupNotAllowed(); } } diff --git a/src/Access/MultipleAccessStorage.h b/src/Access/MultipleAccessStorage.h index 2eacdafd3f3..58cf09fd0ff 100644 --- a/src/Access/MultipleAccessStorage.h +++ b/src/Access/MultipleAccessStorage.h @@ -45,8 +45,8 @@ public: bool isBackupAllowed() const override; bool isRestoreAllowed() const override; - std::vector> readAllForBackup(AccessEntityType type, const BackupSettings & backup_settings) const override; - void insertFromBackup(const std::vector> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr restore_coordination) override; + void backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, AccessEntityType type) const override; + void restoreFromBackup(RestorerFromBackup & restorer) override; protected: std::optional findImpl(AccessEntityType type, const String & name) const override; diff --git a/src/Access/ReplicatedAccessStorage.cpp b/src/Access/ReplicatedAccessStorage.cpp index 6a9d716c2f9..f6c8d0a7153 100644 --- a/src/Access/ReplicatedAccessStorage.cpp +++ b/src/Access/ReplicatedAccessStorage.cpp @@ -2,10 +2,14 @@ #include #include #include +#include +#include +#include #include +#include #include #include -#include +#include #include #include #include @@ -13,6 +17,7 @@ #include #include #include +#include namespace DB @@ -613,19 +618,64 @@ AccessEntityPtr ReplicatedAccessStorage::readImpl(const UUID & id, bool throw_if return entry.entity; } -void ReplicatedAccessStorage::insertFromBackup(const std::vector> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr restore_coordination) + +void ReplicatedAccessStorage::backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, AccessEntityType type) const +{ + if (!isBackupAllowed()) + throwBackupNotAllowed(); + + auto entities = readAllWithIDs(type); + boost::range::remove_erase_if(entities, [](const std::pair & x) { return !x.second->isBackupAllowed(); }); + + auto backup_entry_with_path = makeBackupEntryForAccess( + entities, + data_path_in_backup, + backup_entries_collector.getAccessCounter(type), + backup_entries_collector.getContext()->getAccessControl()); + + auto backup_coordination = backup_entries_collector.getBackupCoordination(); + backup_coordination->addReplicatedAccessPath(zookeeper_path, backup_entry_with_path.first); + String current_host_id = backup_entries_collector.getBackupSettings().host_id; + backup_coordination->setReplicatedAccessHost(zookeeper_path, current_host_id); + + backup_entries_collector.addPostTask( + [backup_entry = backup_entry_with_path.second, + zookeeper_path = zookeeper_path, + current_host_id, + &backup_entries_collector, + backup_coordination] + { + if (current_host_id != backup_coordination->getReplicatedAccessHost(zookeeper_path)) + return; + + for (const String & path : backup_coordination->getReplicatedAccessPaths(zookeeper_path)) + backup_entries_collector.addBackupEntry(path, backup_entry); + }); +} + + +void ReplicatedAccessStorage::restoreFromBackup(RestorerFromBackup & restorer) { if (!isRestoreAllowed()) throwRestoreNotAllowed(); + auto restore_coordination = restorer.getRestoreCoordination(); if (!restore_coordination->acquireReplicatedAccessStorage(zookeeper_path)) return; - bool replace_if_exists = (restore_settings.create_access == RestoreAccessCreationMode::kReplace); - bool throw_if_exists = (restore_settings.create_access == RestoreAccessCreationMode::kCreate); + auto entities = restorer.getAccessEntitiesToRestore(); + if (entities.empty()) + return; - for (const auto & [id, entity] : entities_from_backup) - insertWithID(id, entity, replace_if_exists, throw_if_exists); + auto create_access = restorer.getRestoreSettings().create_access; + bool replace_if_exists = (create_access == RestoreAccessCreationMode::kReplace); + bool throw_if_exists = (create_access == RestoreAccessCreationMode::kCreate); + + restorer.addDataRestoreTask([this, entities = std::move(entities), replace_if_exists, throw_if_exists] + { + for (const auto & [id, entity] : entities) + insertWithID(id, entity, replace_if_exists, throw_if_exists); + }); } } diff --git a/src/Access/ReplicatedAccessStorage.h b/src/Access/ReplicatedAccessStorage.h index 7cccdc1793f..6311e2ac7c0 100644 --- a/src/Access/ReplicatedAccessStorage.h +++ b/src/Access/ReplicatedAccessStorage.h @@ -38,7 +38,8 @@ public: bool exists(const UUID & id) const override; bool isBackupAllowed() const override { return backup_allowed; } - void insertFromBackup(const std::vector> & entities_from_backup, const RestoreSettings & restore_settings, std::shared_ptr restore_coordination) override; + void backup(BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, AccessEntityType type) const override; + void restoreFromBackup(RestorerFromBackup & restorer) override; private: String zookeeper_path; diff --git a/src/Backups/BackupCoordinationDistributed.cpp b/src/Backups/BackupCoordinationDistributed.cpp index 9df17bf434e..5b932229e71 100644 --- a/src/Backups/BackupCoordinationDistributed.cpp +++ b/src/Backups/BackupCoordinationDistributed.cpp @@ -145,6 +145,8 @@ void BackupCoordinationDistributed::createRootNodes() zookeeper->createIfNotExists(zookeeper_path, ""); zookeeper->createIfNotExists(zookeeper_path + "/repl_part_names", ""); zookeeper->createIfNotExists(zookeeper_path + "/repl_data_paths", ""); + zookeeper->createIfNotExists(zookeeper_path + "/repl_access_host", ""); + zookeeper->createIfNotExists(zookeeper_path + "/repl_access_paths", ""); zookeeper->createIfNotExists(zookeeper_path + "/file_names", ""); zookeeper->createIfNotExists(zookeeper_path + "/file_infos", ""); zookeeper->createIfNotExists(zookeeper_path + "/archive_suffixes", ""); @@ -245,6 +247,47 @@ void BackupCoordinationDistributed::prepareReplicatedPartNames() const } +void BackupCoordinationDistributed::addReplicatedAccessPath(const String & access_zk_path, const String & file_path) +{ + auto zookeeper = get_zookeeper(); + String path = zookeeper_path + "/repl_access_paths/" + escapeForFileName(access_zk_path); + zookeeper->createIfNotExists(path, ""); + path += "/" + escapeForFileName(file_path); + zookeeper->createIfNotExists(path, ""); +} + +Strings BackupCoordinationDistributed::getReplicatedAccessPaths(const String & access_zk_path) const +{ + auto zookeeper = get_zookeeper(); + String path = zookeeper_path + "/repl_access_paths/" + escapeForFileName(access_zk_path); + Strings children = zookeeper->getChildren(path); + Strings file_paths; + file_paths.reserve(children.size()); + for (const String & child : children) + file_paths.push_back(unescapeForFileName(child)); + return file_paths; +} + +void BackupCoordinationDistributed::setReplicatedAccessHost(const String & access_zk_path, const String & host_id) +{ + auto zookeeper = get_zookeeper(); + String path = zookeeper_path + "/repl_access_host/" + escapeForFileName(access_zk_path); + auto code = zookeeper->tryCreate(path, host_id, zkutil::CreateMode::Persistent); + if ((code != Coordination::Error::ZOK) && (code != Coordination::Error::ZNODEEXISTS)) + throw zkutil::KeeperException(code, path); + + if (code == Coordination::Error::ZNODEEXISTS) + zookeeper->set(path, host_id); +} + +String BackupCoordinationDistributed::getReplicatedAccessHost(const String & access_zk_path) const +{ + auto zookeeper = get_zookeeper(); + String path = zookeeper_path + "/repl_access_host/" + escapeForFileName(access_zk_path); + return zookeeper->get(path); +} + + void BackupCoordinationDistributed::addFileInfo(const FileInfo & file_info, bool & is_data_file_required) { auto zookeeper = get_zookeeper(); diff --git a/src/Backups/BackupCoordinationDistributed.h b/src/Backups/BackupCoordinationDistributed.h index 172c69edb20..813132bd0b8 100644 --- a/src/Backups/BackupCoordinationDistributed.h +++ b/src/Backups/BackupCoordinationDistributed.h @@ -29,6 +29,12 @@ public: void addReplicatedDataPath(const String & table_shared_id, const String & data_path) override; Strings getReplicatedDataPaths(const String & table_shared_id) const override; + void addReplicatedAccessPath(const String & access_zk_path, const String & file_path) override; + Strings getReplicatedAccessPaths(const String & access_zk_path) const override; + + void setReplicatedAccessHost(const String & access_zk_path, const String & host_id) override; + String getReplicatedAccessHost(const String & access_zk_path) const override; + void addFileInfo(const FileInfo & file_info, bool & is_data_file_required) override; void updateFileInfo(const FileInfo & file_info) override; diff --git a/src/Backups/BackupCoordinationLocal.cpp b/src/Backups/BackupCoordinationLocal.cpp index 7fd6fec6c33..158988cf8b8 100644 --- a/src/Backups/BackupCoordinationLocal.cpp +++ b/src/Backups/BackupCoordinationLocal.cpp @@ -56,6 +56,37 @@ Strings BackupCoordinationLocal::getReplicatedDataPaths(const String & table_sha } +void BackupCoordinationLocal::addReplicatedAccessPath(const String & access_zk_path, const String & file_path) +{ + std::lock_guard lock{mutex}; + replicated_access_paths[access_zk_path].push_back(file_path); +} + +Strings BackupCoordinationLocal::getReplicatedAccessPaths(const String & access_zk_path) const +{ + std::lock_guard lock{mutex}; + auto it = replicated_access_paths.find(access_zk_path); + if (it == replicated_access_paths.end()) + return {}; + return it->second; +} + +void BackupCoordinationLocal::setReplicatedAccessHost(const String & access_zk_path, const String & host_id) +{ + std::lock_guard lock{mutex}; + replicated_access_hosts[access_zk_path] = host_id; +} + +String BackupCoordinationLocal::getReplicatedAccessHost(const String & access_zk_path) const +{ + std::lock_guard lock{mutex}; + auto it = replicated_access_hosts.find(access_zk_path); + if (it == replicated_access_hosts.end()) + return {}; + return it->second; +} + + void BackupCoordinationLocal::addFileInfo(const FileInfo & file_info, bool & is_data_file_required) { std::lock_guard lock{mutex}; diff --git a/src/Backups/BackupCoordinationLocal.h b/src/Backups/BackupCoordinationLocal.h index 519c721c208..dcd6505a438 100644 --- a/src/Backups/BackupCoordinationLocal.h +++ b/src/Backups/BackupCoordinationLocal.h @@ -30,6 +30,12 @@ public: void addReplicatedDataPath(const String & table_shared_id, const String & data_path) override; Strings getReplicatedDataPaths(const String & table_shared_id) const override; + void addReplicatedAccessPath(const String & access_zk_path, const String & file_path) override; + Strings getReplicatedAccessPaths(const String & access_zk_path) const override; + + void setReplicatedAccessHost(const String & access_zk_path, const String & host_id) override; + String getReplicatedAccessHost(const String & access_zk_path) const override; + void addFileInfo(const FileInfo & file_info, bool & is_data_file_required) override; void updateFileInfo(const FileInfo & file_info) override; @@ -48,6 +54,8 @@ private: mutable std::mutex mutex; BackupCoordinationReplicatedPartNames replicated_part_names TSA_GUARDED_BY(mutex); std::unordered_map replicated_data_paths TSA_GUARDED_BY(mutex); + std::unordered_map replicated_access_paths TSA_GUARDED_BY(mutex); + std::unordered_map replicated_access_hosts TSA_GUARDED_BY(mutex); std::map file_names TSA_GUARDED_BY(mutex); /// Should be ordered alphabetically, see listFiles(). For empty files we assume checksum = 0. std::map file_infos TSA_GUARDED_BY(mutex); /// Information about files. Without empty files. Strings archive_suffixes TSA_GUARDED_BY(mutex); diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index 30f1ecd53cd..e237140cf2b 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -706,6 +707,11 @@ void BackupEntriesCollector::addBackupEntry(const String & file_name, BackupEntr backup_entries.emplace_back(file_name, backup_entry); } +void BackupEntriesCollector::addBackupEntry(const std::pair & backup_entry) +{ + addBackupEntry(backup_entry.first, backup_entry.second); +} + void BackupEntriesCollector::addBackupEntries(const BackupEntries & backup_entries_) { if (current_status == kWritingBackupStatus) @@ -739,4 +745,10 @@ void BackupEntriesCollector::runPostTasks() } } +size_t BackupEntriesCollector::getAccessCounter(AccessEntityType type) +{ + access_counters.resize(static_cast(AccessEntityType::MAX)); + return access_counters[static_cast(type)]++; +} + } diff --git a/src/Backups/BackupEntriesCollector.h b/src/Backups/BackupEntriesCollector.h index 3b1260f6c99..0772fe84b26 100644 --- a/src/Backups/BackupEntriesCollector.h +++ b/src/Backups/BackupEntriesCollector.h @@ -19,6 +19,7 @@ class IBackupCoordination; class IDatabase; using DatabasePtr = std::shared_ptr; struct StorageID; +enum class AccessEntityType; /// Collects backup entries for all databases and tables which should be put to a backup. class BackupEntriesCollector : private boost::noncopyable @@ -42,6 +43,7 @@ public: /// Adds a backup entry which will be later returned by run(). /// These function can be called by implementations of IStorage::backupData() in inherited storage classes. void addBackupEntry(const String & file_name, BackupEntryPtr backup_entry); + void addBackupEntry(const std::pair & backup_entry); void addBackupEntries(const BackupEntries & backup_entries_); void addBackupEntries(BackupEntries && backup_entries_); @@ -50,6 +52,9 @@ public: /// 1) we need to join (in a backup) the data of replicated tables gathered on different hosts. void addPostTask(std::function task); + /// Returns an incremental counter used to backup access control. + size_t getAccessCounter(AccessEntityType type); + private: void calculateRootPathInBackup(); @@ -130,6 +135,7 @@ private: BackupEntries backup_entries; std::queue> post_tasks; + std::vector access_counters; }; } diff --git a/src/Backups/IBackupCoordination.h b/src/Backups/IBackupCoordination.h index 0ae150c2b47..b4c5c7b3d88 100644 --- a/src/Backups/IBackupCoordination.h +++ b/src/Backups/IBackupCoordination.h @@ -44,6 +44,14 @@ public: /// Returns all the data paths in backup added for a replicated table (see also addReplicatedDataPath()). virtual Strings getReplicatedDataPaths(const String & table_shared_id) const = 0; + /// Adds a path to access.txt file keeping access entities of a ReplicatedAccessStorage. + virtual void addReplicatedAccessPath(const String & access_zk_path, const String & file_path) = 0; + virtual Strings getReplicatedAccessPaths(const String & access_zk_path) const = 0; + + /// Sets the host id of a host storing access entities of a ReplicatedAccessStorage to backup. + virtual void setReplicatedAccessHost(const String & access_zk_path, const String & host) = 0; + virtual String getReplicatedAccessHost(const String & access_zk_path) const = 0; + struct FileInfo { String file_name; diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index 247660bbce4..6013eed7919 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -383,9 +383,9 @@ void RestorerFromBackup::findTableInBackup(const QualifiedTableName & table_name if (!restore_settings.structure_only && isSystemAccessTableName(table_name)) { - if (!access_restore_task) - access_restore_task = std::make_shared(backup, restore_settings, restore_coordination); - access_restore_task->addDataPath(data_path_in_backup, table_name); + if (!access_restorer) + access_restorer = std::make_unique(backup, restore_settings); + access_restorer->addDataPath(data_path_in_backup, table_name); } } @@ -555,8 +555,8 @@ void RestorerFromBackup::checkAccessForObjectsFoundInBackup() const required_access.emplace_back(flags, table_name.database, table_name.table); } - if (access_restore_task) - insertAtEnd(required_access, access_restore_task->getRequiredAccess()); + if (access_restorer) + insertAtEnd(required_access, access_restorer->getRequiredAccess()); /// We convert to AccessRights and back to check access rights in a predictable way /// (some elements could be duplicated or not sorted). @@ -770,15 +770,9 @@ void RestorerFromBackup::addDataRestoreTasks(DataRestoreTasks && new_tasks) insertAtEnd(data_restore_tasks, std::move(new_tasks)); } -void RestorerFromBackup::checkPathInBackupIsRegisteredToRestoreAccess(const String & path) -{ - if (!access_restore_task || !access_restore_task->hasDataPath(path)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Path to restore access was not added"); -} - RestorerFromBackup::DataRestoreTasks RestorerFromBackup::getDataRestoreTasks() { - if (data_restore_tasks.empty() && !access_restore_task) + if (data_restore_tasks.empty()) return {}; LOG_TRACE(log, "Will insert data to tables"); @@ -798,12 +792,20 @@ RestorerFromBackup::DataRestoreTasks RestorerFromBackup::getDataRestoreTasks() for (const auto & task : data_restore_tasks) res_tasks.push_back([task, storages, table_locks] { task(); }); - if (access_restore_task) - res_tasks.push_back([task = access_restore_task, access_control = &context->getAccessControl()] { task->restore(*access_control); }); - return res_tasks; } +std::vector> RestorerFromBackup::getAccessEntitiesToRestore() +{ + if (!access_restorer || access_restored) + return {}; + + /// getAccessEntitiesToRestore() will return entities only when called first time (we don't want to restore the same entities again). + access_restored = true; + + return access_restorer->getAccessEntities(context->getAccessControl()); +} + void RestorerFromBackup::throwTableIsNotEmpty(const StorageID & storage_id) { throw Exception( diff --git a/src/Backups/RestorerFromBackup.h b/src/Backups/RestorerFromBackup.h index e47aca0e69f..3d814f67713 100644 --- a/src/Backups/RestorerFromBackup.h +++ b/src/Backups/RestorerFromBackup.h @@ -15,7 +15,9 @@ class IBackup; using BackupPtr = std::shared_ptr; class IRestoreCoordination; struct StorageID; -class AccessRestoreTask; +class AccessRestorerFromBackup; +struct IAccessEntity; +using AccessEntityPtr = std::shared_ptr; /// Restores the definition of databases and tables and prepares tasks to restore the data of the tables. class RestorerFromBackup : private boost::noncopyable @@ -56,8 +58,8 @@ public: void addDataRestoreTask(DataRestoreTask && new_task); void addDataRestoreTasks(DataRestoreTasks && new_tasks); - /// Checks that a specified path is already registered to be used for restoring access control. - void checkPathInBackupIsRegisteredToRestoreAccess(const String & path); + /// Returns the list of access entities to restore. + std::vector> getAccessEntitiesToRestore(); /// Throws an exception that a specified table is already non-empty. [[noreturn]] static void throwTableIsNotEmpty(const StorageID & storage_id); @@ -116,7 +118,8 @@ private: std::unordered_map database_infos; std::map table_infos; std::vector data_restore_tasks; - std::shared_ptr access_restore_task; + std::unique_ptr access_restorer; + bool access_restored = false; }; } diff --git a/src/Storages/System/StorageSystemQuotas.cpp b/src/Storages/System/StorageSystemQuotas.cpp index 27cf64cbcb4..046db151684 100644 --- a/src/Storages/System/StorageSystemQuotas.cpp +++ b/src/Storages/System/StorageSystemQuotas.cpp @@ -125,14 +125,14 @@ void StorageSystemQuotas::backupData( BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { const auto & access_control = backup_entries_collector.getContext()->getAccessControl(); - access_control.backup(backup_entries_collector, AccessEntityType::QUOTA, data_path_in_backup); + access_control.backup(backup_entries_collector, data_path_in_backup, AccessEntityType::QUOTA); } void StorageSystemQuotas::restoreDataFromBackup( - RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & /* partitions */) + RestorerFromBackup & restorer, const String & /* data_path_in_backup */, const std::optional & /* partitions */) { auto & access_control = restorer.getContext()->getAccessControl(); - access_control.restore(restorer, data_path_in_backup); + access_control.restoreFromBackup(restorer); } } diff --git a/src/Storages/System/StorageSystemRoles.cpp b/src/Storages/System/StorageSystemRoles.cpp index 22597530835..e5b8d53ce7e 100644 --- a/src/Storages/System/StorageSystemRoles.cpp +++ b/src/Storages/System/StorageSystemRoles.cpp @@ -63,14 +63,14 @@ void StorageSystemRoles::backupData( BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { const auto & access_control = backup_entries_collector.getContext()->getAccessControl(); - access_control.backup(backup_entries_collector, AccessEntityType::ROLE, data_path_in_backup); + access_control.backup(backup_entries_collector, data_path_in_backup, AccessEntityType::ROLE); } void StorageSystemRoles::restoreDataFromBackup( - RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & /* partitions */) + RestorerFromBackup & restorer, const String & /* data_path_in_backup */, const std::optional & /* partitions */) { auto & access_control = restorer.getContext()->getAccessControl(); - access_control.restore(restorer, data_path_in_backup); + access_control.restoreFromBackup(restorer); } } diff --git a/src/Storages/System/StorageSystemRowPolicies.cpp b/src/Storages/System/StorageSystemRowPolicies.cpp index 1e13654d188..064f610730d 100644 --- a/src/Storages/System/StorageSystemRowPolicies.cpp +++ b/src/Storages/System/StorageSystemRowPolicies.cpp @@ -143,14 +143,14 @@ void StorageSystemRowPolicies::backupData( BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { const auto & access_control = backup_entries_collector.getContext()->getAccessControl(); - access_control.backup(backup_entries_collector, AccessEntityType::ROW_POLICY, data_path_in_backup); + access_control.backup(backup_entries_collector, data_path_in_backup, AccessEntityType::ROW_POLICY); } void StorageSystemRowPolicies::restoreDataFromBackup( - RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & /* partitions */) + RestorerFromBackup & restorer, const String & /* data_path_in_backup */, const std::optional & /* partitions */) { auto & access_control = restorer.getContext()->getAccessControl(); - access_control.restore(restorer, data_path_in_backup); + access_control.restoreFromBackup(restorer); } } diff --git a/src/Storages/System/StorageSystemSettingsProfiles.cpp b/src/Storages/System/StorageSystemSettingsProfiles.cpp index aaf5bedadd0..d03848ba68b 100644 --- a/src/Storages/System/StorageSystemSettingsProfiles.cpp +++ b/src/Storages/System/StorageSystemSettingsProfiles.cpp @@ -90,14 +90,14 @@ void StorageSystemSettingsProfiles::backupData( BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { const auto & access_control = backup_entries_collector.getContext()->getAccessControl(); - access_control.backup(backup_entries_collector, AccessEntityType::SETTINGS_PROFILE, data_path_in_backup); + access_control.backup(backup_entries_collector, data_path_in_backup, AccessEntityType::SETTINGS_PROFILE); } void StorageSystemSettingsProfiles::restoreDataFromBackup( - RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & /* partitions */) + RestorerFromBackup & restorer, const String & /* data_path_in_backup */, const std::optional & /* partitions */) { auto & access_control = restorer.getContext()->getAccessControl(); - access_control.restore(restorer, data_path_in_backup); + access_control.restoreFromBackup(restorer); } } diff --git a/src/Storages/System/StorageSystemUsers.cpp b/src/Storages/System/StorageSystemUsers.cpp index d8dc1722a91..be56abfa3e8 100644 --- a/src/Storages/System/StorageSystemUsers.cpp +++ b/src/Storages/System/StorageSystemUsers.cpp @@ -218,14 +218,14 @@ void StorageSystemUsers::backupData( BackupEntriesCollector & backup_entries_collector, const String & data_path_in_backup, const std::optional & /* partitions */) { const auto & access_control = backup_entries_collector.getContext()->getAccessControl(); - access_control.backup(backup_entries_collector, AccessEntityType::USER, data_path_in_backup); + access_control.backup(backup_entries_collector, data_path_in_backup, AccessEntityType::USER); } void StorageSystemUsers::restoreDataFromBackup( - RestorerFromBackup & restorer, const String & data_path_in_backup, const std::optional & /* partitions */) + RestorerFromBackup & restorer, const String & /* data_path_in_backup */, const std::optional & /* partitions */) { auto & access_control = restorer.getContext()->getAccessControl(); - access_control.restore(restorer, data_path_in_backup); + access_control.restoreFromBackup(restorer); } } From 109c9bcbd54712654e141ab7b4e3e79477d699e5 Mon Sep 17 00:00:00 2001 From: loyispa Date: Thu, 30 Jun 2022 18:37:19 +0800 Subject: [PATCH 096/121] Fix typo --- src/Storages/LiveView/StorageLiveView.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/LiveView/StorageLiveView.cpp b/src/Storages/LiveView/StorageLiveView.cpp index 75b2f981389..6a079aa832f 100644 --- a/src/Storages/LiveView/StorageLiveView.cpp +++ b/src/Storages/LiveView/StorageLiveView.cpp @@ -328,7 +328,7 @@ StorageLiveView::StorageLiveView( blocks_metadata_ptr = std::make_shared(); active_ptr = std::make_shared(true); - periodic_refresh_task = getContext()->getSchedulePool().createTask("LieViewPeriodicRefreshTask", [this]{ periodicRefreshTaskFunc(); }); + periodic_refresh_task = getContext()->getSchedulePool().createTask("LiveViewPeriodicRefreshTask", [this]{ periodicRefreshTaskFunc(); }); periodic_refresh_task->deactivate(); } From c38841a044bdbffad927ab820b8959117739d5dc Mon Sep 17 00:00:00 2001 From: Evgeny Kruglov Date: Thu, 30 Jun 2022 12:47:35 +0200 Subject: [PATCH 097/121] Fixes for clickhouse/clickhouse-keeper docker image (#38462) --- docker/keeper/Dockerfile | 2 +- docker/keeper/entrypoint.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/keeper/Dockerfile b/docker/keeper/Dockerfile index 068377e8f8c..282392bd98a 100644 --- a/docker/keeper/Dockerfile +++ b/docker/keeper/Dockerfile @@ -67,7 +67,7 @@ RUN arch=${TARGETARCH:-amd64} \ && chmod ugo+Xrw -R /var/lib/clickhouse /var/log/clickhouse-keeper /etc/clickhouse-keeper -EXPOSE 2181 10181 44444 +EXPOSE 2181 10181 44444 9181 VOLUME /var/lib/clickhouse /var/log/clickhouse-keeper /etc/clickhouse-keeper diff --git a/docker/keeper/entrypoint.sh b/docker/keeper/entrypoint.sh index 86e56e88aa9..939cd941aeb 100644 --- a/docker/keeper/entrypoint.sh +++ b/docker/keeper/entrypoint.sh @@ -31,7 +31,7 @@ else DO_CHOWN=0 fi -KEEPER_CONFIG="${KEEPER_CONFIG:-/etc/clickhouse-keeper/config.yaml}" +KEEPER_CONFIG="${KEEPER_CONFIG:-/etc/clickhouse-keeper/keeper_config.xml}" if [ -f "$KEEPER_CONFIG" ] && ! $gosu test -f "$KEEPER_CONFIG" -a -r "$KEEPER_CONFIG"; then echo "Configuration file '$KEEPER_CONFIG' isn't readable by user with id '$USER'" From f443cf66f0b69136d31a2ca9c6091c30f5557e3d Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 29 Jun 2022 19:19:47 +0200 Subject: [PATCH 098/121] CacheDictionary simplify update queue --- .../CacheDictionaryUpdateQueue.cpp | 24 +++++++++++-------- src/Dictionaries/CacheDictionaryUpdateQueue.h | 6 ++--- 2 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/Dictionaries/CacheDictionaryUpdateQueue.cpp b/src/Dictionaries/CacheDictionaryUpdateQueue.cpp index aee1f0de2f6..1fdaf10c57c 100644 --- a/src/Dictionaries/CacheDictionaryUpdateQueue.cpp +++ b/src/Dictionaries/CacheDictionaryUpdateQueue.cpp @@ -68,9 +68,9 @@ void CacheDictionaryUpdateQueue::waitForCurrentUpdateFinish if (update_queue.isFinished()) throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "CacheDictionaryUpdateQueue finished"); - std::unique_lock update_lock(update_mutex); + std::unique_lock update_lock(update_unit_ptr->update_mutex); - bool result = is_update_finished.wait_for( + bool result = update_unit_ptr->is_update_finished.wait_for( update_lock, std::chrono::milliseconds(configuration.query_wait_timeout_milliseconds), [&] @@ -133,19 +133,23 @@ void CacheDictionaryUpdateQueue::updateThreadFunction() /// Update update_func(unit_to_update); - /// Notify thread about finished updating the bunch of ids - /// where their own ids were included. - std::lock_guard lock(update_mutex); + { + /// Notify thread about finished updating the bunch of ids + /// where their own ids were included. + std::lock_guard lock(unit_to_update->update_mutex); + unit_to_update->is_done = true; + } - unit_to_update->is_done = true; - is_update_finished.notify_all(); + unit_to_update->is_update_finished.notify_all(); } catch (...) { - std::lock_guard lock(update_mutex); + { + std::lock_guard lock(unit_to_update->update_mutex); + unit_to_update->current_exception = std::current_exception(); // NOLINT(bugprone-throw-keyword-missing) + } - unit_to_update->current_exception = std::current_exception(); // NOLINT(bugprone-throw-keyword-missing) - is_update_finished.notify_all(); + unit_to_update->is_update_finished.notify_all(); } } } diff --git a/src/Dictionaries/CacheDictionaryUpdateQueue.h b/src/Dictionaries/CacheDictionaryUpdateQueue.h index d6a195ca7b8..48598cb4548 100644 --- a/src/Dictionaries/CacheDictionaryUpdateQueue.h +++ b/src/Dictionaries/CacheDictionaryUpdateQueue.h @@ -77,6 +77,9 @@ private: std::atomic is_done{false}; std::exception_ptr current_exception{nullptr}; /// NOLINT + mutable std::mutex update_mutex; + mutable std::condition_variable is_update_finished; + /// While UpdateUnit is alive, it is accounted in update_queue size. CurrentMetrics::Increment alive_batch{CurrentMetrics::CacheDictionaryUpdateQueueBatches}; CurrentMetrics::Increment alive_keys; @@ -159,9 +162,6 @@ private: UpdateQueue update_queue; ThreadPool update_pool; - - mutable std::mutex update_mutex; - mutable std::condition_variable is_update_finished; }; extern template class CacheDictionaryUpdateQueue; From a47355877ef35c706603db8c1b251470233bf0cf Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 30 Jun 2022 13:58:26 +0300 Subject: [PATCH 099/121] Add revision() function (#38555) It can be useful to match versions, since in some tables (system.trace_log) there is only revision column. P.S. came to this when was digging into stress reports from CI. P.P.S. case insensitive by analogy with version(). Signed-off-by: Azat Khuzhin --- src/Functions/registerFunctionsMiscellaneous.cpp | 2 ++ src/Functions/serverConstants.cpp | 15 +++++++++++++++ .../01773_case_sensitive_revision.reference | 1 + .../0_stateless/01773_case_sensitive_revision.sql | 1 + 4 files changed, 19 insertions(+) create mode 100644 tests/queries/0_stateless/01773_case_sensitive_revision.reference create mode 100644 tests/queries/0_stateless/01773_case_sensitive_revision.sql diff --git a/src/Functions/registerFunctionsMiscellaneous.cpp b/src/Functions/registerFunctionsMiscellaneous.cpp index 9cd9c70da16..755d38409a6 100644 --- a/src/Functions/registerFunctionsMiscellaneous.cpp +++ b/src/Functions/registerFunctionsMiscellaneous.cpp @@ -40,6 +40,7 @@ void registerFunctionIsNaN(FunctionFactory &); void registerFunctionIfNotFinite(FunctionFactory &); void registerFunctionThrowIf(FunctionFactory &); void registerFunctionVersion(FunctionFactory &); +void registerFunctionRevision(FunctionFactory &); void registerFunctionBuildId(FunctionFactory &); void registerFunctionUptime(FunctionFactory &); void registerFunctionTimezone(FunctionFactory &); @@ -129,6 +130,7 @@ void registerFunctionsMiscellaneous(FunctionFactory & factory) registerFunctionIfNotFinite(factory); registerFunctionThrowIf(factory); registerFunctionVersion(factory); + registerFunctionRevision(factory); registerFunctionBuildId(factory); registerFunctionUptime(factory); registerFunctionTimezone(factory); diff --git a/src/Functions/serverConstants.cpp b/src/Functions/serverConstants.cpp index 12134cf4e4c..e809ec7c298 100644 --- a/src/Functions/serverConstants.cpp +++ b/src/Functions/serverConstants.cpp @@ -6,6 +6,7 @@ #include #include #include +#include #if defined(OS_LINUX) # include @@ -88,6 +89,15 @@ namespace explicit FunctionVersion(ContextPtr context) : FunctionConstantBase(VERSION_STRING, context->isDistributed()) {} }; + /// revision() - returns the current revision. + class FunctionRevision : public FunctionConstantBase + { + public: + static constexpr auto name = "revision"; + static FunctionPtr create(ContextPtr context) { return std::make_shared(context); } + explicit FunctionRevision(ContextPtr context) : FunctionConstantBase(ClickHouseRevision::getVersionRevision(), context->isDistributed()) {} + }; + class FunctionZooKeeperSessionUptime : public FunctionConstantBase { public: @@ -151,6 +161,11 @@ void registerFunctionVersion(FunctionFactory & factory) factory.registerFunction(FunctionFactory::CaseInsensitive); } +void registerFunctionRevision(FunctionFactory & factory) +{ + factory.registerFunction(FunctionFactory::CaseInsensitive); +} + void registerFunctionZooKeeperSessionUptime(FunctionFactory & factory) { factory.registerFunction(); diff --git a/tests/queries/0_stateless/01773_case_sensitive_revision.reference b/tests/queries/0_stateless/01773_case_sensitive_revision.reference new file mode 100644 index 00000000000..72749c905a3 --- /dev/null +++ b/tests/queries/0_stateless/01773_case_sensitive_revision.reference @@ -0,0 +1 @@ +1 1 1 diff --git a/tests/queries/0_stateless/01773_case_sensitive_revision.sql b/tests/queries/0_stateless/01773_case_sensitive_revision.sql new file mode 100644 index 00000000000..16970daf61b --- /dev/null +++ b/tests/queries/0_stateless/01773_case_sensitive_revision.sql @@ -0,0 +1 @@ +SELECT revision()=Revision(), REVISION()=Revision(), revisiON()=reVision(); From 95687f2d01e9b17fd19815bc6f6b712ec3075504 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 30 Jun 2022 13:15:35 +0200 Subject: [PATCH 100/121] CacheDictionaryUpdateUnit make update state non atomic --- src/Dictionaries/CacheDictionaryUpdateQueue.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Dictionaries/CacheDictionaryUpdateQueue.h b/src/Dictionaries/CacheDictionaryUpdateQueue.h index 48598cb4548..8db5c4a59df 100644 --- a/src/Dictionaries/CacheDictionaryUpdateQueue.h +++ b/src/Dictionaries/CacheDictionaryUpdateQueue.h @@ -74,12 +74,12 @@ private: template friend class CacheDictionaryUpdateQueue; - std::atomic is_done{false}; - std::exception_ptr current_exception{nullptr}; /// NOLINT - mutable std::mutex update_mutex; mutable std::condition_variable is_update_finished; + bool is_done{false}; + std::exception_ptr current_exception{nullptr}; /// NOLINT + /// While UpdateUnit is alive, it is accounted in update_queue size. CurrentMetrics::Increment alive_batch{CurrentMetrics::CacheDictionaryUpdateQueueBatches}; CurrentMetrics::Increment alive_keys; From 1e3b5bfcb75de0cff8bccc3f9342c6fe66a9cf5c Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 30 Jun 2022 11:43:25 +0000 Subject: [PATCH 101/121] Fix test 00233_position_function_family --- src/Common/StringSearcher.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Common/StringSearcher.h b/src/Common/StringSearcher.h index 7d669ddd369..b8f8a9d3a88 100644 --- a/src/Common/StringSearcher.h +++ b/src/Common/StringSearcher.h @@ -853,7 +853,8 @@ struct StdLibASCIIStringSearcher : public StringSearcherBase else { return std::search( - haystack_start, haystack_end, needle_start, needle_end); + haystack_start, haystack_end, needle_start, needle_end, + [](char c1, char c2) {return c1 == c2;}); } } From d31ca4c4b6debe974f8f70272eb8e755cbbd1738 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 30 Jun 2022 14:49:11 +0200 Subject: [PATCH 102/121] Fixed tests --- src/Columns/ColumnVector.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index 7cfb90d4371..60423e2b0fe 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -633,7 +633,8 @@ namespace while (sse_copy_counter) { - _mm_storeu_si128(reinterpret_cast<__m128i *>(result_data_copy), *(reinterpret_cast(data_copy_begin_ptr))); + __m128i copy_batch = _mm_loadu_si128(reinterpret_cast(data_copy_begin_ptr)); + _mm_storeu_si128(reinterpret_cast<__m128i *>(result_data_copy), copy_batch); result_data_copy += 4; data_copy_begin_ptr += 4; --sse_copy_counter; @@ -673,7 +674,8 @@ namespace while (sse_copy_counter) { - _mm_storeu_si128(reinterpret_cast<__m128i *>(result_data), *(reinterpret_cast(data_copy_begin_ptr))); + __m128i copy_batch = _mm_loadu_si128(reinterpret_cast(data_copy_begin_ptr)); + _mm_storeu_si128(reinterpret_cast<__m128i *>(result_data), copy_batch); result_data += 4; data_copy_begin_ptr += 4; --sse_copy_counter; From cbcd740dc19db2ba502af5422bd805188bd7eaaa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= Date: Thu, 30 Jun 2022 15:10:09 +0200 Subject: [PATCH 103/121] Adapt some more nodes to avoid issues with pre-22.4 replicas --- src/Storages/StorageReplicatedMergeTree.cpp | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index fac11db2ab9..e44013f39ca 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -598,11 +598,19 @@ void StorageReplicatedMergeTree::createNewZooKeeperNodes() auto zookeeper = getZooKeeper(); std::vector futures; - /// We need to confirm /quorum exists here although it's called under createTableIfNotExists because in older CH releases (pre 22.4) - /// it was created here, so if metadata creation is done by an older replica the node might not exists when reaching this call - futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/quorum", String(), zkutil::CreateMode::Persistent)); - futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/quorum/parallel", String(), zkutil::CreateMode::Persistent)); + /// These 4 nodes used to be created in createNewZookeeperNodes() and they were moved to createTable() + /// This means that if the first replica creating the table metadata has an older version of CH (22.3 or previous) + /// there will be a time between its calls to `createTable` and `createNewZookeeperNodes` where the nodes won't exists + /// and that will cause issues in newer replicas + /// See https://github.com/ClickHouse/ClickHouse/issues/38600 for example + futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/quorum", String(), zkutil::CreateMode::Persistent)); + futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/quorum/last_part", String(), zkutil::CreateMode::Persistent)); + futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/quorum/failed_parts", String(), zkutil::CreateMode::Persistent)); + futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/mutations", String(), zkutil::CreateMode::Persistent)); + + + futures.push_back(zookeeper->asyncTryCreateNoThrow(zookeeper_path + "/quorum/parallel", String(), zkutil::CreateMode::Persistent)); /// Nodes for remote fs zero-copy replication const auto settings = getSettings(); if (settings->allow_remote_fs_zero_copy_replication) From e367d96964e2630057a2752fab0fbb9ffa4d398a Mon Sep 17 00:00:00 2001 From: Vitaly Baranov Date: Thu, 30 Jun 2022 10:10:12 +0200 Subject: [PATCH 104/121] Fix style. --- src/Access/IAccessStorage.cpp | 1 - src/Access/MultipleAccessStorage.cpp | 2 +- src/Backups/BackupCoordinationHelpers.cpp | 4 +-- src/Backups/BackupCoordinationLocal.cpp | 2 +- src/Backups/BackupEntriesCollector.cpp | 25 ++++++++++--------- src/Backups/BackupEntriesCollector.h | 2 +- src/Backups/DDLAdjustingForBackupVisitor.cpp | 4 +-- src/Backups/RestorerFromBackup.cpp | 2 +- src/Backups/RestorerFromBackup.h | 6 ++--- src/Databases/DDLRenamingVisitor.cpp | 1 - src/Databases/DatabaseReplicated.cpp | 4 +-- src/Interpreters/DatabaseCatalog.cpp | 20 +++++++-------- .../MergeTree/DataPartStorageOnDisk.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 1 - .../test_backup_restore_new/test.py | 10 +++++--- 15 files changed, 44 insertions(+), 42 deletions(-) diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index 230045c7749..fa9c78816c7 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -550,7 +550,6 @@ void IAccessStorage::restoreFromBackup(RestorerFromBackup &) } - UUID IAccessStorage::generateRandomID() { static Poco::UUIDGenerator generator; diff --git a/src/Access/MultipleAccessStorage.cpp b/src/Access/MultipleAccessStorage.cpp index 30c3865c1be..e7151cc7b4b 100644 --- a/src/Access/MultipleAccessStorage.cpp +++ b/src/Access/MultipleAccessStorage.cpp @@ -414,7 +414,7 @@ void MultipleAccessStorage::restoreFromBackup(RestorerFromBackup & restorer) } } - throwBackupNotAllowed(); + throwBackupNotAllowed(); } } diff --git a/src/Backups/BackupCoordinationHelpers.cpp b/src/Backups/BackupCoordinationHelpers.cpp index cca66f03aac..7f570ba9c85 100644 --- a/src/Backups/BackupCoordinationHelpers.cpp +++ b/src/Backups/BackupCoordinationHelpers.cpp @@ -284,7 +284,7 @@ Strings BackupCoordinationStatusSync::setImpl(const String & current_host, const if ((all_hosts.size() == 1) && (all_hosts.front() == current_host)) return {message}; - + /// Wait for other hosts. Strings ready_hosts_results; @@ -314,7 +314,7 @@ Strings BackupCoordinationStatusSync::setImpl(const String & current_host, const { host_with_error = host; error_message = zookeeper->get(zookeeper_path + "/" + zk_node); - return; + return; } auto it = unready_hosts.find(host); if ((it != unready_hosts.end()) && (status == new_status)) diff --git a/src/Backups/BackupCoordinationLocal.cpp b/src/Backups/BackupCoordinationLocal.cpp index 158988cf8b8..a7d5602ca30 100644 --- a/src/Backups/BackupCoordinationLocal.cpp +++ b/src/Backups/BackupCoordinationLocal.cpp @@ -74,7 +74,7 @@ Strings BackupCoordinationLocal::getReplicatedAccessPaths(const String & access_ void BackupCoordinationLocal::setReplicatedAccessHost(const String & access_zk_path, const String & host_id) { std::lock_guard lock{mutex}; - replicated_access_hosts[access_zk_path] = host_id; + replicated_access_hosts[access_zk_path] = host_id; } String BackupCoordinationLocal::getReplicatedAccessHost(const String & access_zk_path) const diff --git a/src/Backups/BackupEntriesCollector.cpp b/src/Backups/BackupEntriesCollector.cpp index e237140cf2b..d5ed9e0da2b 100644 --- a/src/Backups/BackupEntriesCollector.cpp +++ b/src/Backups/BackupEntriesCollector.cpp @@ -28,6 +28,7 @@ namespace ErrorCodes extern const int INCONSISTENT_METADATA_FOR_BACKUP; extern const int CANNOT_BACKUP_TABLE; extern const int TABLE_IS_DROPPED; + extern const int UNKNOWN_TABLE; extern const int LOGICAL_ERROR; } @@ -169,7 +170,7 @@ Strings BackupEntriesCollector::setStatus(const String & new_status, const Strin { auto now = std::chrono::steady_clock::now(); auto end_of_timeout = std::max(now, consistent_metadata_snapshot_start_time + consistent_metadata_snapshot_timeout); - + return backup_coordination->setStatusAndWaitFor( backup_settings.host_id, new_status, @@ -213,7 +214,7 @@ void BackupEntriesCollector::gatherMetadataAndCheckConsistency() { /// Gathered metadata and checked consistency, cool! But we have to check that other hosts cope with that too. auto all_hosts_results = setStatus(new_status, "consistent"); - + std::optional host_with_inconsistency; std::optional inconsistency_error_on_other_host; for (size_t i = 0; i != all_hosts.size(); ++i) @@ -397,7 +398,7 @@ void BackupEntriesCollector::gatherDatabaseMetadata( { throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Couldn't get a create query for database {}", database_name); } - + database_info.create_database_query = create_database_query; const auto & create = create_database_query->as(); @@ -420,7 +421,7 @@ void BackupEntriesCollector::gatherDatabaseMetadata( } database_info.except_table_names.emplace(*table_name); } - + if (all_tables) { database_info.all_tables = all_tables; @@ -437,13 +438,13 @@ void BackupEntriesCollector::gatherTablesMetadata() { const auto & database = database_info.database; bool is_temporary_database = (database_name == DatabaseCatalog::TEMPORARY_DATABASE); - + auto filter_by_table_name = [database_info = &database_info](const String & table_name) { /// We skip inner tables of materialized views. if (table_name.starts_with(".inner_id.")) return false; - + if (database_info->tables.contains(table_name)) return true; @@ -464,7 +465,7 @@ void BackupEntriesCollector::gatherTablesMetadata() if (is_temporary_database && !create.temporary) throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a non-temporary create query for {}", tableNameWithTypeToString(database_name, create.getTable(), false)); - + if (!is_temporary_database && (create.getDatabase() != database_name)) throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Got a create query with unexpected database name {} for {}", backQuoteIfNeed(create.getDatabase()), tableNameWithTypeToString(database_name, create.getTable(), false)); } @@ -579,7 +580,7 @@ bool BackupEntriesCollector::compareWithPrevious(std::optional & inco difference.reserve(databases_metadata.size()); std::set_difference(databases_metadata.begin(), databases_metadata.end(), previous_databases_metadata.begin(), previous_databases_metadata.end(), std::back_inserter(difference)); - + if (!difference.empty()) { inconsistency_error = Exception{ @@ -593,7 +594,7 @@ bool BackupEntriesCollector::compareWithPrevious(std::optional & inco difference.reserve(previous_databases_metadata.size()); std::set_difference(previous_databases_metadata.begin(), previous_databases_metadata.end(), databases_metadata.begin(), databases_metadata.end(), std::back_inserter(difference)); - + if (!difference.empty()) { inconsistency_error = Exception{ @@ -611,7 +612,7 @@ bool BackupEntriesCollector::compareWithPrevious(std::optional & inco difference.reserve(tables_metadata.size()); std::set_difference(tables_metadata.begin(), tables_metadata.end(), previous_tables_metadata.begin(), previous_tables_metadata.end(), std::back_inserter(difference)); - + if (!difference.empty()) { inconsistency_error = Exception{ @@ -625,7 +626,7 @@ bool BackupEntriesCollector::compareWithPrevious(std::optional & inco difference.reserve(previous_tables_metadata.size()); std::set_difference(previous_tables_metadata.begin(), previous_tables_metadata.end(), tables_metadata.begin(), tables_metadata.end(), std::back_inserter(difference)); - + if (!difference.empty()) { inconsistency_error = Exception{ @@ -646,7 +647,7 @@ void BackupEntriesCollector::makeBackupEntriesForDatabasesDefs() { if (!database_info.create_database_query) continue; /// We store CREATE DATABASE queries only if there was BACKUP DATABASE specified. - + LOG_TRACE(log, "Adding definition of database {}", backQuoteIfNeed(database_name)); ASTPtr new_create_query = database_info.create_database_query; diff --git a/src/Backups/BackupEntriesCollector.h b/src/Backups/BackupEntriesCollector.h index 0772fe84b26..9a653ee7e4d 100644 --- a/src/Backups/BackupEntriesCollector.h +++ b/src/Backups/BackupEntriesCollector.h @@ -63,7 +63,7 @@ private: bool tryGatherMetadataAndCompareWithPrevious(std::optional & inconsistency_error); void gatherDatabasesMetadata(); - + void gatherDatabaseMetadata( const String & database_name, bool throw_if_database_not_found, diff --git a/src/Backups/DDLAdjustingForBackupVisitor.cpp b/src/Backups/DDLAdjustingForBackupVisitor.cpp index 2dedc677df8..8223e08f127 100644 --- a/src/Backups/DDLAdjustingForBackupVisitor.cpp +++ b/src/Backups/DDLAdjustingForBackupVisitor.cpp @@ -17,7 +17,7 @@ namespace { /// Precondition: storage.engine && storage.engine->name.starts_with("System")) - /// If this is a definition of a system table we'll remove columns and comment because they're reduntant for backups. + /// If this is a definition of a system table we'll remove columns and comment because they're redundant for backups. auto & create = data.create_query->as(); create.reset(create.columns_list); create.reset(create.comment); @@ -105,7 +105,7 @@ void adjustCreateQueryForBackup(ASTPtr ast, const ContextPtr & global_context, s { if (replicated_table_shared_id) *replicated_table_shared_id = {}; - + DDLAdjustingForBackupVisitor::Data data{ast, global_context, replicated_table_shared_id}; DDLAdjustingForBackupVisitor::Visitor{data}.visit(ast); } diff --git a/src/Backups/RestorerFromBackup.cpp b/src/Backups/RestorerFromBackup.cpp index 6013eed7919..5b211bc50a8 100644 --- a/src/Backups/RestorerFromBackup.cpp +++ b/src/Backups/RestorerFromBackup.cpp @@ -638,7 +638,7 @@ void RestorerFromBackup::createTables() create_table_query = create_table_query->clone(); create_table_query->as().if_not_exists = true; } - + LOG_TRACE( log, "Creating {}: {}", diff --git a/src/Backups/RestorerFromBackup.h b/src/Backups/RestorerFromBackup.h index 3d814f67713..ae2f0c76832 100644 --- a/src/Backups/RestorerFromBackup.h +++ b/src/Backups/RestorerFromBackup.h @@ -78,14 +78,14 @@ private: std::vector root_paths_in_backup; void findRootPathsInBackup(); - + void findDatabasesAndTablesInBackup(); void findTableInBackup(const QualifiedTableName & table_name_in_backup, const std::optional & partitions); void findDatabaseInBackup(const String & database_name_in_backup, const std::set & except_table_names); void findEverythingInBackup(const std::set & except_database_names, const std::set & except_table_names); - + void checkAccessForObjectsFoundInBackup() const; - + void createDatabases(); void createTables(); diff --git a/src/Databases/DDLRenamingVisitor.cpp b/src/Databases/DDLRenamingVisitor.cpp index 8dbcc2a24bb..7ea5dbeda83 100644 --- a/src/Databases/DDLRenamingVisitor.cpp +++ b/src/Databases/DDLRenamingVisitor.cpp @@ -41,7 +41,6 @@ namespace create.setDatabase(new_table_name.database); } } - } else if (create.table) { diff --git a/src/Databases/DatabaseReplicated.cpp b/src/Databases/DatabaseReplicated.cpp index 5a0eec10abb..a7ad632efff 100644 --- a/src/Databases/DatabaseReplicated.cpp +++ b/src/Databases/DatabaseReplicated.cpp @@ -928,7 +928,7 @@ std::vector> DatabaseReplicated::getTablesForBackup(const FilterByNameFunction & filter, const ContextPtr &) const { /// Here we read metadata from ZooKeeper. We could do that by simple call of DatabaseAtomic::getTablesForBackup() however - /// reading from ZooKeeper is better because thus we won't be dependant on how fast the replication queue of this database is. + /// reading from ZooKeeper is better because thus we won't be dependent on how fast the replication queue of this database is. std::vector> res; auto zookeeper = getContext()->getZooKeeper(); auto escaped_table_names = zookeeper->getChildren(zookeeper_path + "/metadata"); @@ -940,7 +940,7 @@ DatabaseReplicated::getTablesForBackup(const FilterByNameFunction & filter, cons String zk_metadata; if (!zookeeper->tryGet(zookeeper_path + "/metadata/" + escaped_table_name, zk_metadata)) throw Exception(ErrorCodes::INCONSISTENT_METADATA_FOR_BACKUP, "Metadata for table {} was not found in ZooKeeper", table_name); - + ParserCreateQuery parser; auto create_table_query = parseQuery(parser, zk_metadata, 0, getContext()->getSettingsRef().max_parser_depth); diff --git a/src/Interpreters/DatabaseCatalog.cpp b/src/Interpreters/DatabaseCatalog.cpp index bd64b14624c..bae2aed2cd5 100644 --- a/src/Interpreters/DatabaseCatalog.cpp +++ b/src/Interpreters/DatabaseCatalog.cpp @@ -331,10 +331,10 @@ DatabaseAndTable DatabaseCatalog::getTableImpl( bool DatabaseCatalog::isPredefinedTable(const StorageID & table_id) const { - static const char * INFORMATION_SCHEMA_VIEWS[] = {"schemata", "tables", "views", "columns"}; - static const char * INFORMATION_SCHEMA_UPPERCASE_VIEWS[] = {"SCHEMATA", "TABLES", "VIEWS", "COLUMNS"}; + static const char * information_schema_views[] = {"schemata", "tables", "views", "columns"}; + static const char * information_schema_views_uppercase[] = {"SCHEMATA", "TABLES", "VIEWS", "COLUMNS"}; - auto checkDatabaseAndTableName = [&](const String & database_name, const String & table_name) + auto check_database_and_table_name = [&](const String & database_name, const String & table_name) { if (database_name == SYSTEM_DATABASE) { @@ -343,13 +343,13 @@ bool DatabaseCatalog::isPredefinedTable(const StorageID & table_id) const } if (database_name == INFORMATION_SCHEMA) { - return std::find(std::begin(INFORMATION_SCHEMA_VIEWS), std::end(INFORMATION_SCHEMA_VIEWS), table_name) - != std::end(INFORMATION_SCHEMA_VIEWS); + return std::find(std::begin(information_schema_views), std::end(information_schema_views), table_name) + != std::end(information_schema_views); } if (database_name == INFORMATION_SCHEMA_UPPERCASE) { - return std::find(std::begin(INFORMATION_SCHEMA_UPPERCASE_VIEWS), std::end(INFORMATION_SCHEMA_UPPERCASE_VIEWS), table_name) - != std::end(INFORMATION_SCHEMA_UPPERCASE_VIEWS); + return std::find(std::begin(information_schema_views_uppercase), std::end(information_schema_views_uppercase), table_name) + != std::end(information_schema_views_uppercase); } return false; }; @@ -362,13 +362,13 @@ bool DatabaseCatalog::isPredefinedTable(const StorageID & table_id) const return true; auto res_id = storage->getStorageID(); String database_name = res_id.getDatabaseName(); - if (database_name != SYSTEM_DATABASE) - return checkDatabaseAndTableName(database_name, res_id.getTableName()); + if (database_name != SYSTEM_DATABASE) /// If (database_name == SYSTEM_DATABASE) then we have already checked it (see isSystemStorage() above). + return check_database_and_table_name(database_name, res_id.getTableName()); } return false; } - return checkDatabaseAndTableName(table_id.getDatabaseName(), table_id.getTableName()); + return check_database_and_table_name(table_id.getDatabaseName(), table_id.getTableName()); } void DatabaseCatalog::assertDatabaseExists(const String & database_name) const diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp index 68a2a0cbd15..dc80b0aafb6 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp @@ -646,7 +646,7 @@ void DataPartStorageOnDisk::backup( String filepath_on_disk = part_path_on_disk / filepath; String filepath_in_backup = part_path_in_backup / filepath; String hardlink_filepath = temp_part_dir / filepath; - + disk->createHardLink(filepath_on_disk, hardlink_filepath); UInt128 file_hash{checksum.file_hash.first, checksum.file_hash.second}; backup_entries.emplace_back( diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index c856786ffb3..6a5b9c2d8e6 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -159,7 +159,6 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int CONCURRENT_ACCESS_NOT_SUPPORTED; extern const int CHECKSUM_DOESNT_MATCH; - extern const int INCONSISTENT_METADATA_FOR_BACKUP; } namespace ActionLocks diff --git a/tests/integration/test_backup_restore_new/test.py b/tests/integration/test_backup_restore_new/test.py index 7894daf5bad..a930ddac7df 100644 --- a/tests/integration/test_backup_restore_new/test.py +++ b/tests/integration/test_backup_restore_new/test.py @@ -795,7 +795,7 @@ def test_system_functions(): assert instance.query( "SELECT number, linear_equation(number, 2, 1) FROM numbers(3)" ) == TSV([[0, 1], [1, 3], [2, 5]]) - + assert instance.query("SELECT number, parity_str(number) FROM numbers(3)") == TSV( [[0, "even"], [1, "odd"], [2, "even"]] ) @@ -811,7 +811,9 @@ def test_backup_partition(): instance.query(f"RESTORE TABLE test.table FROM {backup_name}") - assert instance.query("SELECT * FROM test.table ORDER BY x") == TSV([[1, '1'], [4, '4'], [11, '11'], [14, '14'], [21, '21'], [24, '24']]) + assert instance.query("SELECT * FROM test.table ORDER BY x") == TSV( + [[1, "1"], [4, "4"], [11, "11"], [14, "14"], [21, "21"], [24, "24"]] + ) def test_restore_partition(): @@ -824,4 +826,6 @@ def test_restore_partition(): instance.query(f"RESTORE TABLE test.table PARTITIONS '2', '3' FROM {backup_name}") - assert instance.query("SELECT * FROM test.table ORDER BY x") == TSV([[2, '2'], [3, '3'], [12, '12'], [13, '13'], [22, '22'], [23, '23']]) + assert instance.query("SELECT * FROM test.table ORDER BY x") == TSV( + [[2, "2"], [3, "3"], [12, "12"], [13, "13"], [22, "22"], [23, "23"]] + ) From 4ba4e9b95177f3217d8a17ac8f8fcc6fd7d1910f Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Thu, 30 Jun 2022 09:53:30 -0400 Subject: [PATCH 105/121] Remove 404ing original article --- docs/en/engines/table-engines/special/null.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/docs/en/engines/table-engines/special/null.md b/docs/en/engines/table-engines/special/null.md index 5e775227dab..ca02d8e300b 100644 --- a/docs/en/engines/table-engines/special/null.md +++ b/docs/en/engines/table-engines/special/null.md @@ -10,6 +10,3 @@ When writing to a `Null` table, data is ignored. When reading from a `Null` tabl :::note If you are wondering why this is useful, note that you can create a materialized view on a `Null` table. So the data written to the table will end up affecting the view, but original raw data will still be discarded. ::: - - -[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/null/) From d2cbdc7c5340f1515972875b2f4ad0da53de4938 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 30 Jun 2022 17:23:53 +0300 Subject: [PATCH 106/121] Update ReplicatedMergeTreeQueue.cpp --- src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index add1ba875aa..f6c80baba05 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1103,7 +1103,9 @@ bool ReplicatedMergeTreeQueue::isCoveredByFuturePartsImpl(const LogEntry & entry continue; /// Parts are not disjoint, so new_part_name either contains or covers future_part. - chassert(future_part.contains(result_part) || result_part.contains(future_part)); + if (!(future_part.contains(result_part) || result_part.contains(future_part))) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Got unexpected non-disjoint parts: {} and {}", future_part_elem.first, new_part_name); + /// We cannot execute `entry` (or upgrade its actual_part_name to `new_part_name`) /// while any covered or covering parts are processed. /// But we also cannot simply return true and postpone entry processing, because it may lead to kind of livelock. From 95c3eff4c78c467dbb7c0b2eb2203b64b4f2261d Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Thu, 30 Jun 2022 11:17:38 -0400 Subject: [PATCH 107/121] remove 404ing original article URL --- docs/en/engines/database-engines/postgresql.md | 1 - docs/en/engines/table-engines/log-family/index.md | 1 - docs/en/engines/table-engines/special/file.md | 1 - docs/en/engines/table-engines/special/join.md | 1 - docs/en/engines/table-engines/special/merge.md | 1 - docs/en/engines/table-engines/special/set.md | 1 - docs/en/engines/table-engines/special/url.md | 1 - docs/en/operations/external-authenticators/index.md | 1 - docs/en/operations/system-tables/distributed_ddl_queue.md | 1 - docs/en/operations/system-tables/distribution_queue.md | 1 - docs/en/operations/system-tables/opentelemetry_span_log.md | 1 - docs/en/operations/system-tables/parts_columns.md | 1 - docs/en/operations/system-tables/replication_queue.md | 1 - .../en/sql-reference/aggregate-functions/reference/meanztest.md | 2 -- .../sql-reference/aggregate-functions/reference/welchttest.md | 1 - docs/en/sql-reference/data-types/domains/index.md | 1 - docs/en/sql-reference/data-types/geo.md | 1 - docs/en/sql-reference/data-types/map.md | 1 - docs/en/sql-reference/data-types/simpleaggregatefunction.md | 1 - docs/en/sql-reference/functions/encryption-functions.md | 1 - docs/en/sql-reference/table-functions/mysql.md | 1 - 21 files changed, 22 deletions(-) diff --git a/docs/en/engines/database-engines/postgresql.md b/docs/en/engines/database-engines/postgresql.md index 07181cc23e8..969a326b701 100644 --- a/docs/en/engines/database-engines/postgresql.md +++ b/docs/en/engines/database-engines/postgresql.md @@ -136,4 +136,3 @@ DESCRIBE TABLE test_database.test_table; └────────┴───────────────────┘ ``` -[Original article](https://clickhouse.com/docs/en/database-engines/postgresql/) diff --git a/docs/en/engines/table-engines/log-family/index.md b/docs/en/engines/table-engines/log-family/index.md index 8e772341733..4ea2294554a 100644 --- a/docs/en/engines/table-engines/log-family/index.md +++ b/docs/en/engines/table-engines/log-family/index.md @@ -43,4 +43,3 @@ The `TinyLog` engine is the simplest in the family and provides the poorest func The `Log` and `StripeLog` engines support parallel data reading. When reading data, ClickHouse uses multiple threads. Each thread processes a separate data block. The `Log` engine uses a separate file for each column of the table. `StripeLog` stores all the data in one file. As a result, the `StripeLog` engine uses fewer file descriptors, but the `Log` engine provides higher efficiency when reading data. -[Original article](https://clickhouse.com/docs/en/operations/table_engines/log_family/) diff --git a/docs/en/engines/table-engines/special/file.md b/docs/en/engines/table-engines/special/file.md index 5f27bc73e1d..7a53670bebd 100644 --- a/docs/en/engines/table-engines/special/file.md +++ b/docs/en/engines/table-engines/special/file.md @@ -86,4 +86,3 @@ $ echo -e "1,2\n3,4" | clickhouse-local -q "CREATE TABLE table (a Int64, b Int64 - Indices - Replication -[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/file/) diff --git a/docs/en/engines/table-engines/special/join.md b/docs/en/engines/table-engines/special/join.md index c95ebe19c31..4e628b8b9b0 100644 --- a/docs/en/engines/table-engines/special/join.md +++ b/docs/en/engines/table-engines/special/join.md @@ -151,4 +151,3 @@ ALTER TABLE id_val_join DELETE WHERE id = 3; └────┴─────┘ ``` -[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/join/) diff --git a/docs/en/engines/table-engines/special/merge.md b/docs/en/engines/table-engines/special/merge.md index ab15ad8dc76..d32547a300c 100644 --- a/docs/en/engines/table-engines/special/merge.md +++ b/docs/en/engines/table-engines/special/merge.md @@ -86,4 +86,3 @@ SELECT * FROM WatchLog; - [Virtual columns](../../../engines/table-engines/special/index.md#table_engines-virtual_columns) - [merge](../../../sql-reference/table-functions/merge.md) table function -[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/merge/) diff --git a/docs/en/engines/table-engines/special/set.md b/docs/en/engines/table-engines/special/set.md index 46e31af7ff1..f7114f04cea 100644 --- a/docs/en/engines/table-engines/special/set.md +++ b/docs/en/engines/table-engines/special/set.md @@ -20,4 +20,3 @@ When creating a table, the following settings are applied: - [persistent](../../../operations/settings/settings.md#persistent) -[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/set/) diff --git a/docs/en/engines/table-engines/special/url.md b/docs/en/engines/table-engines/special/url.md index 19246b82219..82617e9425d 100644 --- a/docs/en/engines/table-engines/special/url.md +++ b/docs/en/engines/table-engines/special/url.md @@ -89,4 +89,3 @@ SELECT * FROM url_engine_table - Indexes. - Replication. -[Original article](https://clickhouse.com/docs/en/operations/table_engines/special/url/) diff --git a/docs/en/operations/external-authenticators/index.md b/docs/en/operations/external-authenticators/index.md index af2ba713ec1..d358267c4f0 100644 --- a/docs/en/operations/external-authenticators/index.md +++ b/docs/en/operations/external-authenticators/index.md @@ -13,4 +13,3 @@ The following external authenticators and directories are supported: - Kerberos [Authenticator](./kerberos.md#external-authenticators-kerberos) - [SSL X.509 authentication](./ssl-x509.md#ssl-external-authentication) -[Original article](https://clickhouse.com/docs/en/operations/external-authenticators/index/) diff --git a/docs/en/operations/system-tables/distributed_ddl_queue.md b/docs/en/operations/system-tables/distributed_ddl_queue.md index 5a2478b22d9..a35d4a2a5b7 100644 --- a/docs/en/operations/system-tables/distributed_ddl_queue.md +++ b/docs/en/operations/system-tables/distributed_ddl_queue.md @@ -61,4 +61,3 @@ exception_code: ZOK 2 rows in set. Elapsed: 0.025 sec. ``` -[Original article](https://clickhouse.com/docs/en/operations/system_tables/distributed_ddl_queuedistributed_ddl_queue.md) diff --git a/docs/en/operations/system-tables/distribution_queue.md b/docs/en/operations/system-tables/distribution_queue.md index 88d376c7553..896491a458b 100644 --- a/docs/en/operations/system-tables/distribution_queue.md +++ b/docs/en/operations/system-tables/distribution_queue.md @@ -47,4 +47,3 @@ last_exception: - [Distributed table engine](../../engines/table-engines/special/distributed.md) -[Original article](https://clickhouse.com/docs/en/operations/system_tables/distribution_queue) diff --git a/docs/en/operations/system-tables/opentelemetry_span_log.md b/docs/en/operations/system-tables/opentelemetry_span_log.md index 1b3b97af019..a9ca32ae030 100644 --- a/docs/en/operations/system-tables/opentelemetry_span_log.md +++ b/docs/en/operations/system-tables/opentelemetry_span_log.md @@ -50,4 +50,3 @@ attribute.values: [] - [OpenTelemetry](../../operations/opentelemetry.md) -[Original article](https://clickhouse.com/docs/en/operations/system_tables/opentelemetry_span_log) diff --git a/docs/en/operations/system-tables/parts_columns.md b/docs/en/operations/system-tables/parts_columns.md index 0439da79ab3..2f85b912f38 100644 --- a/docs/en/operations/system-tables/parts_columns.md +++ b/docs/en/operations/system-tables/parts_columns.md @@ -145,4 +145,3 @@ column_marks_bytes: 48 - [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md) -[Original article](https://clickhouse.com/docs/en/operations/system_tables/parts_columns) diff --git a/docs/en/operations/system-tables/replication_queue.md b/docs/en/operations/system-tables/replication_queue.md index cb22345c3a2..a7ac748ebbd 100644 --- a/docs/en/operations/system-tables/replication_queue.md +++ b/docs/en/operations/system-tables/replication_queue.md @@ -88,4 +88,3 @@ last_postpone_time: 1970-01-01 03:00:00 - [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system.md#query-language-system-replicated) -[Original article](https://clickhouse.com/docs/en/operations/system_tables/replication_queue) diff --git a/docs/en/sql-reference/aggregate-functions/reference/meanztest.md b/docs/en/sql-reference/aggregate-functions/reference/meanztest.md index d129e5722bc..0752df05818 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/meanztest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/meanztest.md @@ -66,5 +66,3 @@ Result: └──────────────────────────────────────────────────────────────────────────────────┘ ``` - -[Original article](https://clickhouse.com/docs/en/sql-reference/aggregate-functions/reference/meanZTest/) diff --git a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md index 82c09ed606e..0a0278f970e 100644 --- a/docs/en/sql-reference/aggregate-functions/reference/welchttest.md +++ b/docs/en/sql-reference/aggregate-functions/reference/welchttest.md @@ -69,4 +69,3 @@ Result: - [Welch's t-test](https://en.wikipedia.org/wiki/Welch%27s_t-test) - [studentTTest function](studentttest.md#studentttest) -[Original article](https://clickhouse.com/docs/en/sql-reference/aggregate-functions/reference/welchTTest/) diff --git a/docs/en/sql-reference/data-types/domains/index.md b/docs/en/sql-reference/data-types/domains/index.md index f9bd6eea07e..50599db2f47 100644 --- a/docs/en/sql-reference/data-types/domains/index.md +++ b/docs/en/sql-reference/data-types/domains/index.md @@ -27,4 +27,3 @@ You can use domains anywhere corresponding base type can be used, for example: - Can’t implicitly convert string values into domain values when inserting data from another column or table. - Domain adds no constrains on stored values. -[Original article](https://clickhouse.com/docs/en/data_types/domains/) diff --git a/docs/en/sql-reference/data-types/geo.md b/docs/en/sql-reference/data-types/geo.md index c8edf985582..22fc56dbcf5 100644 --- a/docs/en/sql-reference/data-types/geo.md +++ b/docs/en/sql-reference/data-types/geo.md @@ -104,4 +104,3 @@ Result: └─────────────────────────────────────────────────────────────────────────────────────────────────┴─────────────────┘ ``` -[Original article](https://clickhouse.com/docs/en/data-types/geo/) diff --git a/docs/en/sql-reference/data-types/map.md b/docs/en/sql-reference/data-types/map.md index e913a5f34e3..65a0f9cbc52 100644 --- a/docs/en/sql-reference/data-types/map.md +++ b/docs/en/sql-reference/data-types/map.md @@ -108,4 +108,3 @@ Result: - [map()](../../sql-reference/functions/tuple-map-functions.md#function-map) function - [CAST()](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function -[Original article](https://clickhouse.com/docs/en/data-types/map/) diff --git a/docs/en/sql-reference/data-types/simpleaggregatefunction.md b/docs/en/sql-reference/data-types/simpleaggregatefunction.md index d0f604e8d8e..069e2e68671 100644 --- a/docs/en/sql-reference/data-types/simpleaggregatefunction.md +++ b/docs/en/sql-reference/data-types/simpleaggregatefunction.md @@ -39,4 +39,3 @@ Values of the `SimpleAggregateFunction(func, Type)` look and stored the same way CREATE TABLE simple (id UInt64, val SimpleAggregateFunction(sum, Double)) ENGINE=AggregatingMergeTree ORDER BY id; ``` -[Original article](https://clickhouse.com/docs/en/data_types/simpleaggregatefunction/) diff --git a/docs/en/sql-reference/functions/encryption-functions.md b/docs/en/sql-reference/functions/encryption-functions.md index 5b3fe2201e6..75f6cf18766 100644 --- a/docs/en/sql-reference/functions/encryption-functions.md +++ b/docs/en/sql-reference/functions/encryption-functions.md @@ -355,4 +355,3 @@ Result: └───────────┘ ``` -[Original article](https://clickhouse.com/docs/en/sql-reference/functions/encryption_functions/) diff --git a/docs/en/sql-reference/table-functions/mysql.md b/docs/en/sql-reference/table-functions/mysql.md index 33ddaa89435..60d95b17c4c 100644 --- a/docs/en/sql-reference/table-functions/mysql.md +++ b/docs/en/sql-reference/table-functions/mysql.md @@ -111,4 +111,3 @@ SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123'); - [The ‘MySQL’ table engine](../../engines/table-engines/integrations/mysql.md) - [Using MySQL as a source of external dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-mysql) -[Original article](https://clickhouse.com/docs/en/sql-reference/table_functions/mysql/) From 7a1346ad22f3e04233098a30ffc7b0c4ac948a74 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 30 Jun 2022 18:27:28 +0300 Subject: [PATCH 108/121] Update 02067_lost_part_s3.sql --- tests/queries/0_stateless/02067_lost_part_s3.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02067_lost_part_s3.sql b/tests/queries/0_stateless/02067_lost_part_s3.sql index 87cbdca1d06..463f80348b2 100644 --- a/tests/queries/0_stateless/02067_lost_part_s3.sql +++ b/tests/queries/0_stateless/02067_lost_part_s3.sql @@ -1,4 +1,4 @@ --- Tags: no-backward-compatibility-check:22.5.1 +-- Tags: no-backward-compatibility-check DROP TABLE IF EXISTS partslost_0; DROP TABLE IF EXISTS partslost_1; From 5c5f05dd4b7bf9d82f3dd9d1c3c4c7dcce4e4f9e Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 30 Jun 2022 17:38:10 +0200 Subject: [PATCH 109/121] Clean out randomized integration volumes each run --- tests/integration/runner | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/tests/integration/runner b/tests/integration/runner index d82e73068af..cd07875ad1d 100755 --- a/tests/integration/runner +++ b/tests/integration/runner @@ -346,6 +346,17 @@ if __name__ == "__main__": ) except Exception as ex: print("Volume creationg failed, probably it already exists, exception", ex) + # TODO: this part cleans out stale volumes produced by container name + # randomizer, we should remove it after Sep 2022 + try: + subprocess.check_call( + "docker volume rm $(docker volume ls -q | " + f"grep '{VOLUME_NAME}_.*_volume')", + shell=True, + ) + except Exception as ex: + print("Probably, some stale volumes still there, just continue:", ex) + # TODO END dockerd_internal_volume = f"--volume={VOLUME_NAME}_volume:/var/lib/docker" # If enabled we kill and remove containers before pytest session run. @@ -392,7 +403,11 @@ if __name__ == "__main__": command=args.command, ) - containers = subprocess.check_output(f"docker ps -a -q --filter name={CONTAINER_NAME} --format={{{{.ID}}}}", shell=True, universal_newlines=True).splitlines() + containers = subprocess.check_output( + f"docker ps -a -q --filter name={CONTAINER_NAME} --format={{{{.ID}}}}", + shell=True, + universal_newlines=True, + ).splitlines() if containers: print(f"Trying to kill containers name={CONTAINER_NAME} ids={containers}") subprocess.check_call(f"docker kill {' '.join(containers)}", shell=True) From 26749c53fe15fcea8501a379ad9b48c822ea7684 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Thu, 30 Jun 2022 12:45:10 -0400 Subject: [PATCH 110/121] fix formatting of code clocks and lists --- .../mergetree-family/mergetree.md | 73 +++++++++++-------- 1 file changed, 44 insertions(+), 29 deletions(-) diff --git a/docs/en/engines/table-engines/mergetree-family/mergetree.md b/docs/en/engines/table-engines/mergetree-family/mergetree.md index 20d9a14b194..3e5a0635339 100644 --- a/docs/en/engines/table-engines/mergetree-family/mergetree.md +++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md @@ -68,40 +68,42 @@ For a description of parameters, see the [CREATE query description](../../../sql `ORDER BY` — The sorting key. - A tuple of column names or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`. +A tuple of column names or arbitrary expressions. Example: `ORDER BY (CounterID, EventDate)`. - ClickHouse uses the sorting key as a primary key if the primary key is not defined explicitly by the `PRIMARY KEY` clause. +ClickHouse uses the sorting key as a primary key if the primary key is not defined explicitly by the `PRIMARY KEY` clause. - Use the `ORDER BY tuple()` syntax, if you do not need sorting. See [Selecting the Primary Key](#selecting-the-primary-key). +Use the `ORDER BY tuple()` syntax, if you do not need sorting. See [Selecting the Primary Key](#selecting-the-primary-key). #### PARTITION BY `PARTITION BY` — The [partitioning key](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md). Optional. In most cases you don't need partition key, and in most other cases you don't need partition key more granular than by months. Partitioning does not speed up queries (in contrast to the ORDER BY expression). You should never use too granular partitioning. Don't partition your data by client identifiers or names (instead make client identifier or name the first column in the ORDER BY expression). - For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](../../../sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format. +For partitioning by month, use the `toYYYYMM(date_column)` expression, where `date_column` is a column with a date of the type [Date](../../../sql-reference/data-types/date.md). The partition names here have the `"YYYYMM"` format. #### PRIMARY KEY `PRIMARY KEY` — The primary key if it [differs from the sorting key](#choosing-a-primary-key-that-differs-from-the-sorting-key). Optional. - By default the primary key is the same as the sorting key (which is specified by the `ORDER BY` clause). Thus in most cases it is unnecessary to specify a separate `PRIMARY KEY` clause. +By default the primary key is the same as the sorting key (which is specified by the `ORDER BY` clause). Thus in most cases it is unnecessary to specify a separate `PRIMARY KEY` clause. #### SAMPLE BY `SAMPLE BY` — An expression for sampling. Optional. - If a sampling expression is used, the primary key must contain it. The result of a sampling expression must be an unsigned integer. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`. +If a sampling expression is used, the primary key must contain it. The result of a sampling expression must be an unsigned integer. Example: `SAMPLE BY intHash32(UserID) ORDER BY (CounterID, EventDate, intHash32(UserID))`. #### TTL `TTL` — A list of rules specifying storage duration of rows and defining logic of automatic parts movement [between disks and volumes](#table_engine-mergetree-multiple-volumes). Optional. - Expression must have one `Date` or `DateTime` column as a result. Example: - `TTL date + INTERVAL 1 DAY` +Expression must have one `Date` or `DateTime` column as a result. Example: +``` +TTL date + INTERVAL 1 DAY +``` - Type of the rule `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'|GROUP BY` specifies an action to be done with the part if the expression is satisfied (reaches current time): removal of expired rows, moving a part (if expression is satisfied for all rows in a part) to specified disk (`TO DISK 'xxx'`) or to volume (`TO VOLUME 'xxx'`), or aggregating values in expired rows. Default type of the rule is removal (`DELETE`). List of multiple rules can be specified, but there should be no more than one `DELETE` rule. +Type of the rule `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'|GROUP BY` specifies an action to be done with the part if the expression is satisfied (reaches current time): removal of expired rows, moving a part (if expression is satisfied for all rows in a part) to specified disk (`TO DISK 'xxx'`) or to volume (`TO VOLUME 'xxx'`), or aggregating values in expired rows. Default type of the rule is removal (`DELETE`). List of multiple rules can be specified, but there should be no more than one `DELETE` rule. - For more details, see [TTL for columns and tables](#table_engine-mergetree-ttl) +For more details, see [TTL for columns and tables](#table_engine-mergetree-ttl) ### SETTINGS Additional parameters that control the behavior of the `MergeTree` (optional): @@ -129,7 +131,6 @@ Additional parameters that control the behavior of the `MergeTree` (optional): #### min_merge_bytes_to_use_direct_io `min_merge_bytes_to_use_direct_io` — The minimum data volume for merge operation that is required for using direct I/O access to the storage disk. When merging data parts, ClickHouse calculates the total storage volume of all the data to be merged. If the volume exceeds `min_merge_bytes_to_use_direct_io` bytes, ClickHouse reads and writes the data to the storage disk using the direct I/O interface (`O_DIRECT` option). If `min_merge_bytes_to_use_direct_io = 0`, then direct I/O is disabled. Default value: `10 * 1024 * 1024 * 1024` bytes. - #### merge_with_ttl_timeout @@ -305,15 +306,29 @@ For `SELECT` queries, ClickHouse analyzes whether an index can be used. An index Thus, it is possible to quickly run queries on one or many ranges of the primary key. In this example, queries will be fast when run for a specific tracking tag, for a specific tag and date range, for a specific tag and date, for multiple tags with a date range, and so on. Let’s look at the engine configured as follows: - - ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate) SETTINGS index_granularity=8192 +```sql +ENGINE MergeTree() +PARTITION BY toYYYYMM(EventDate) +ORDER BY (CounterID, EventDate) +SETTINGS index_granularity=8192 +``` In this case, in queries: ``` sql -SELECT count() FROM table WHERE EventDate = toDate(now()) AND CounterID = 34 -SELECT count() FROM table WHERE EventDate = toDate(now()) AND (CounterID = 34 OR CounterID = 42) -SELECT count() FROM table WHERE ((EventDate >= toDate('2014-01-01') AND EventDate <= toDate('2014-01-31')) OR EventDate = toDate('2014-05-01')) AND CounterID IN (101500, 731962, 160656) AND (CounterID = 101500 OR EventDate != toDate('2014-05-01')) +SELECT count() FROM table +WHERE EventDate = toDate(now()) +AND CounterID = 34 + +SELECT count() FROM table +WHERE EventDate = toDate(now()) +AND (CounterID = 34 OR CounterID = 42) + +SELECT count() FROM table +WHERE ((EventDate >= toDate('2014-01-01') +AND EventDate <= toDate('2014-01-31')) OR EventDate = toDate('2014-05-01')) +AND CounterID IN (101500, 731962, 160656) +AND (CounterID = 101500 OR EventDate != toDate('2014-05-01')) ``` ClickHouse will use the primary key index to trim improper data and the monthly partitioning key to trim partitions that are in improper date ranges. @@ -376,36 +391,36 @@ SELECT count() FROM table WHERE u64 * i32 == 10 AND u64 * length(s) >= 1234 #### `minmax` - Stores extremes of the specified expression (if the expression is `tuple`, then it stores extremes for each element of `tuple`), uses stored info for skipping blocks of data like the primary key. +Stores extremes of the specified expression (if the expression is `tuple`, then it stores extremes for each element of `tuple`), uses stored info for skipping blocks of data like the primary key. #### `set(max_rows)` - Stores unique values of the specified expression (no more than `max_rows` rows, `max_rows=0` means “no limits”). Uses the values to check if the `WHERE` expression is not satisfiable on a block of data. +Stores unique values of the specified expression (no more than `max_rows` rows, `max_rows=0` means “no limits”). Uses the values to check if the `WHERE` expression is not satisfiable on a block of data. #### `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` - Stores a [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter) that contains all ngrams from a block of data. Works only with datatypes: [String](../../../sql-reference/data-types/string.md), [FixedString](../../../sql-reference/data-types/fixedstring.md) and [Map](../../../sql-reference/data-types/map.md). Can be used for optimization of `EQUALS`, `LIKE` and `IN` expressions. +Stores a [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter) that contains all ngrams from a block of data. Works only with datatypes: [String](../../../sql-reference/data-types/string.md), [FixedString](../../../sql-reference/data-types/fixedstring.md) and [Map](../../../sql-reference/data-types/map.md). Can be used for optimization of `EQUALS`, `LIKE` and `IN` expressions. - - `n` — ngram size, - - `size_of_bloom_filter_in_bytes` — Bloom filter size in bytes (you can use large values here, for example, 256 or 512, because it can be compressed well). - - `number_of_hash_functions` — The number of hash functions used in the Bloom filter. - - `random_seed` — The seed for Bloom filter hash functions. +- `n` — ngram size, +- `size_of_bloom_filter_in_bytes` — Bloom filter size in bytes (you can use large values here, for example, 256 or 512, because it can be compressed well). +- `number_of_hash_functions` — The number of hash functions used in the Bloom filter. +- `random_seed` — The seed for Bloom filter hash functions. #### `tokenbf_v1(size_of_bloom_filter_in_bytes, number_of_hash_functions, random_seed)` - The same as `ngrambf_v1`, but stores tokens instead of ngrams. Tokens are sequences separated by non-alphanumeric characters. +The same as `ngrambf_v1`, but stores tokens instead of ngrams. Tokens are sequences separated by non-alphanumeric characters. #### `bloom_filter([false_positive])` — Stores a [Bloom filter](https://en.wikipedia.org/wiki/Bloom_filter) for the specified columns. - The optional `false_positive` parameter is the probability of receiving a false positive response from the filter. Possible values: (0, 1). Default value: 0.025. +The optional `false_positive` parameter is the probability of receiving a false positive response from the filter. Possible values: (0, 1). Default value: 0.025. - Supported data types: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`, `Array`, `LowCardinality`, `Nullable`, `UUID`, `Map`. +Supported data types: `Int*`, `UInt*`, `Float*`, `Enum`, `Date`, `DateTime`, `String`, `FixedString`, `Array`, `LowCardinality`, `Nullable`, `UUID`, `Map`. - For `Map` data type client can specify if index should be created for keys or values using [mapKeys](../../../sql-reference/functions/tuple-map-functions.md#mapkeys) or [mapValues](../../../sql-reference/functions/tuple-map-functions.md#mapvalues) function. +For `Map` data type client can specify if index should be created for keys or values using [mapKeys](../../../sql-reference/functions/tuple-map-functions.md#mapkeys) or [mapValues](../../../sql-reference/functions/tuple-map-functions.md#mapvalues) function. - The following functions can use the filter: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions), [notIn](../../../sql-reference/functions/in-functions), [has](../../../sql-reference/functions/array-functions#hasarr-elem), [hasAny](../../../sql-reference/functions/array-functions#hasany), [hasAll](../../../sql-reference/functions/array-functions#hasall). +The following functions can use the filter: [equals](../../../sql-reference/functions/comparison-functions.md), [notEquals](../../../sql-reference/functions/comparison-functions.md), [in](../../../sql-reference/functions/in-functions), [notIn](../../../sql-reference/functions/in-functions), [has](../../../sql-reference/functions/array-functions#hasarr-elem), [hasAny](../../../sql-reference/functions/array-functions#hasany), [hasAll](../../../sql-reference/functions/array-functions#hasall). - Example of index creation for `Map` data type +Example of index creation for `Map` data type ``` INDEX map_key_index mapKeys(map_column) TYPE bloom_filter GRANULARITY 1 From 987f6bc8ff29fbfa0f65109336682452f9c9238d Mon Sep 17 00:00:00 2001 From: Yuko Takagi <70714860+yukotakagi@users.noreply.github.com> Date: Thu, 30 Jun 2022 12:03:09 -0600 Subject: [PATCH 111/121] Update README.md Update upcoming meetup. --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 153a0d5ce11..1d0146582a6 100644 --- a/README.md +++ b/README.md @@ -15,5 +15,8 @@ ClickHouse® is an open-source column-oriented database management system that a * [Contacts](https://clickhouse.com/company/#contact) can help to get your questions answered if there are any. ## Upcoming events -* [Paris Meetup](https://www.meetup.com/clickhouse-france-user-group/events/286304312/) Please join us for an evening of talks (in English), food and discussion. Featuring talks of ClickHouse in production and at least one on the deep internals of ClickHouse itself. * [v22.7 Release Webinar](https://clickhouse.com/company/events/v22-7-release-webinar/) Original creator, co-founder, and CTO of ClickHouse Alexey Milovidov will walk us through the highlights of the release, provide live demos, and share vision into what is coming in the roadmap. +* [ClickHouse Meetup at the Cloudflare office in London](https://www.meetup.com/clickhouse-london-user-group/events/286891586/) ClickHouse meetup at the Cloudflare office space in central London +* [ClickHouse Meetup at the Metoda office in Munich](https://www.meetup.com/clickhouse-meetup-munich/events/286891667/) ClickHouse meetup at the Metoda office in Munich + + From bb358617e1e0ef6e0cf43f67b1eeea366986c7bd Mon Sep 17 00:00:00 2001 From: Robert Schulze Date: Thu, 30 Jun 2022 23:35:44 +0200 Subject: [PATCH 112/121] Better naming for stuff related to splitted debug symbols The previous name was slightly misleading, e.g. it is not about "intalling stripped binaries" but about splitting debug symbols from the binary. --- CMakeLists.txt | 6 +++--- cmake/{strip_binary.cmake => split_debug_symbols.cmake} | 2 +- docker/packager/packager | 2 +- docs/en/development/cmake-in-clickhouse.md | 2 +- programs/CMakeLists.txt | 8 ++++---- programs/keeper/CMakeLists.txt | 6 +++--- programs/library-bridge/CMakeLists.txt | 8 ++++---- programs/odbc-bridge/CMakeLists.txt | 8 ++++---- 8 files changed, 21 insertions(+), 21 deletions(-) rename cmake/{strip_binary.cmake => split_debug_symbols.cmake} (98%) diff --git a/CMakeLists.txt b/CMakeLists.txt index c8bb1a2d1ca..4c7b732c68c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -252,10 +252,10 @@ else () endif () # Optionally split binaries and debug symbols. -option(INSTALL_STRIPPED_BINARIES "Split binaries and debug symbols" OFF) -if (INSTALL_STRIPPED_BINARIES) +option(SPLIT_DEBUG_SYMBOLS "Split binaries and debug symbols" OFF) +if (SPLIT_DEBUG_SYMBOLS) message(STATUS "Will split binaries and debug symbols") - set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information") + set(SPLITTED_DEBUG_SYMBOLS_DIR "stripped" CACHE STRING "A separate directory for stripped information") endif() cmake_host_system_information(RESULT AVAILABLE_PHYSICAL_MEMORY QUERY AVAILABLE_PHYSICAL_MEMORY) # Not available under freebsd diff --git a/cmake/strip_binary.cmake b/cmake/split_debug_symbols.cmake similarity index 98% rename from cmake/strip_binary.cmake rename to cmake/split_debug_symbols.cmake index 6e38c86fc70..12182ed9c20 100644 --- a/cmake/strip_binary.cmake +++ b/cmake/split_debug_symbols.cmake @@ -1,4 +1,4 @@ -macro(clickhouse_strip_binary) +macro(clickhouse_split_debug_symbols) set(oneValueArgs TARGET DESTINATION_DIR BINARY_PATH) cmake_parse_arguments(STRIP "" "${oneValueArgs}" "" ${ARGN}) diff --git a/docker/packager/packager b/docker/packager/packager index 14147b8e069..7c0f046b76c 100755 --- a/docker/packager/packager +++ b/docker/packager/packager @@ -202,7 +202,7 @@ def parse_env_variables( cmake_flags.append("-DCMAKE_INSTALL_SYSCONFDIR=/etc") cmake_flags.append("-DCMAKE_INSTALL_LOCALSTATEDIR=/var") if is_release_build(build_type, package_type, sanitizer, split_binary): - cmake_flags.append("-DINSTALL_STRIPPED_BINARIES=ON") + cmake_flags.append("-DSPLIT_DEBUG_SYMBOLS=ON") result.append("WITH_PERFORMANCE=1") if is_cross_arm: cmake_flags.append("-DBUILD_STANDALONE_KEEPER=1") diff --git a/docs/en/development/cmake-in-clickhouse.md b/docs/en/development/cmake-in-clickhouse.md index 5625cf3657d..83279f5f69a 100644 --- a/docs/en/development/cmake-in-clickhouse.md +++ b/docs/en/development/cmake-in-clickhouse.md @@ -349,7 +349,7 @@ Note that ClickHouse uses forks of these libraries, see https://github.com/Click Only for Linux, x86_64 or aarch64. -INSTALL_STRIPPED_BINARIES +SPLIT_DEBUG_SYMBOLS OFF Build stripped binaries with debug info in separate directory diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt index a2c6eb1a27e..1639af163a9 100644 --- a/programs/CMakeLists.txt +++ b/programs/CMakeLists.txt @@ -2,7 +2,7 @@ if (USE_CLANG_TIDY) set (CMAKE_CXX_CLANG_TIDY "${CLANG_TIDY_PATH}") endif () -include(${ClickHouse_SOURCE_DIR}/cmake/strip_binary.cmake) +include(${ClickHouse_SOURCE_DIR}/cmake/split_debug_symbols.cmake) # The `clickhouse` binary is a multi purpose tool that contains multiple execution modes (client, server, etc.), # each of them may be built and linked as a separate library. @@ -511,10 +511,10 @@ else () add_custom_command(TARGET clickhouse POST_BUILD COMMAND ./clickhouse hash-binary > hash && ${OBJCOPY_PATH} --add-section .clickhouse.hash=hash clickhouse COMMENT "Adding section '.clickhouse.hash' to clickhouse binary" VERBATIM) endif() - if (INSTALL_STRIPPED_BINARIES) - clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT} BINARY_PATH clickhouse) + if (SPLIT_DEBUG_SYMBOLS) + clickhouse_split_debug_symbols(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH clickhouse) else() - clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT}) + clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${SPLITTED_DEBUG_SYMBOLS_DIR}) install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif() endif() diff --git a/programs/keeper/CMakeLists.txt b/programs/keeper/CMakeLists.txt index c77b335b615..cf6c8a6e975 100644 --- a/programs/keeper/CMakeLists.txt +++ b/programs/keeper/CMakeLists.txt @@ -131,10 +131,10 @@ if (BUILD_STANDALONE_KEEPER) add_dependencies(clickhouse-keeper clickhouse_keeper_configs) set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../) - if (INSTALL_STRIPPED_BINARIES) - clickhouse_strip_binary(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-keeper) + if (SPLIT_DEBUG_SYMBOLS) + clickhouse_split_debug_symbols(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH ../clickhouse-keeper) else() - clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT}) + clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR}) install(TARGETS clickhouse-keeper RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif() endif() diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index 90ce3d8be7f..a80f2568f04 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -1,4 +1,4 @@ -include(${ClickHouse_SOURCE_DIR}/cmake/strip_binary.cmake) +include(${ClickHouse_SOURCE_DIR}/cmake/split_debug_symbols.cmake) set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES library-bridge.cpp @@ -24,9 +24,9 @@ target_link_libraries(clickhouse-library-bridge PRIVATE set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..) -if (INSTALL_STRIPPED_BINARIES) - clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-library-bridge) +if (SPLIT_DEBUG_SYMBOLS) + clickhouse_split_debug_symbols(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH ../clickhouse-library-bridge) else() - clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT}) + clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR}) install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif() diff --git a/programs/odbc-bridge/CMakeLists.txt b/programs/odbc-bridge/CMakeLists.txt index b530e08ca26..f64bec9892f 100644 --- a/programs/odbc-bridge/CMakeLists.txt +++ b/programs/odbc-bridge/CMakeLists.txt @@ -1,4 +1,4 @@ -include(${ClickHouse_SOURCE_DIR}/cmake/strip_binary.cmake) +include(${ClickHouse_SOURCE_DIR}/cmake/split_debug_symbols.cmake) set (CLICKHOUSE_ODBC_BRIDGE_SOURCES ColumnInfoHandler.cpp @@ -39,10 +39,10 @@ if (USE_GDB_ADD_INDEX) add_custom_command(TARGET clickhouse-odbc-bridge POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} ../clickhouse-odbc-bridge COMMENT "Adding .gdb-index to clickhouse-odbc-bridge" VERBATIM) endif() -if (INSTALL_STRIPPED_BINARIES) - clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-odbc-bridge) +if (SPLIT_DEBUG_SYMBOLS) + clickhouse_split_debug_symbols(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR} BINARY_PATH ../clickhouse-odbc-bridge) else() - clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT}) + clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${SPLITTED_DEBUG_SYMBOLS_DIR}) install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse) endif() From 9e544d38bd6385c8455573e9776c4e25bb5851f4 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Thu, 30 Jun 2022 18:51:33 -0300 Subject: [PATCH 113/121] Doc. Fix a note about ClickHouse Keeper --- docs/en/operations/tips.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index f364bc85088..ea6a29177c0 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -128,7 +128,7 @@ You should never use manually written scripts to transfer data between different If you want to divide an existing ZooKeeper cluster into two, the correct way is to increase the number of its replicas and then reconfigure it as two independent clusters. -You can run ClickHouse Keeper on the same server as ClickHouse, but do not run ZooKeeper on the same servers as ClickHouse. Because ZooKeeper is very sensitive for latency and ClickHouse may utilize all available system resources. +You can run ClickHouse Keeper on the same server as ClickHouse in test environments, or in environments with low ingestion rate. Do not run ZooKeeper and ClickHouse Keeper on the same servers as ClickHouse in production environments using the same one disk. Because ZooKeeper/Keeper are very sensitive for latency and ClickHouse may utilize all available system resources. You can have ZooKeeper observers in an ensemble but ClickHouse servers should not interact with observers. From 7378f0e30f4eb10d91c3d5a72182046ce469472a Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Thu, 30 Jun 2022 22:33:56 +0000 Subject: [PATCH 114/121] Print stacktraces if test queue is full --- tests/clickhouse-test | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 8744e8bf95b..f21d1734029 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1594,6 +1594,8 @@ def do_run_tests(jobs, test_suite: TestSuite, parallel): queue.close() except Full: + print("Couldn't put test to the queue within timeout. Server probably hung.") + print_stacktraces() queue.close() pool.join() From f62c5e46f7b96ff7dcbfc1ff71405dcebd5d8f2a Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Fri, 1 Jul 2022 02:47:15 +0300 Subject: [PATCH 115/121] Update CHANGELOG.md --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 94ea0ce2118..dfc51952250 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,7 +34,6 @@ * Add two new settings `input_format_csv_skip_first_lines/input_format_tsv_skip_first_lines` to allow skipping specified number of lines in the beginning of the file in CSV/TSV formats. [#37537](https://github.com/ClickHouse/ClickHouse/pull/37537) ([Kruglov Pavel](https://github.com/Avogar)). * `showCertificate` function shows current server's SSL certificate. [#37540](https://github.com/ClickHouse/ClickHouse/pull/37540) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). * HTTP source for Data Dictionaries in Named Collections is supported. [#37581](https://github.com/ClickHouse/ClickHouse/pull/37581) ([Yakov Olkhovskiy](https://github.com/yakov-olkhovskiy)). -* Added a new window function `nonNegativeDerivative(metric_column, timestamp_column[, INTERVAL x SECOND])`. [#37628](https://github.com/ClickHouse/ClickHouse/pull/37628) ([Andrey Zvonov](https://github.com/zvonand)). * Implemented changing the comment for `ReplicatedMergeTree` tables. [#37416](https://github.com/ClickHouse/ClickHouse/pull/37416) ([Vasily Nemkov](https://github.com/Enmk)). * Added `SYSTEM UNFREEZE` query that deletes the whole backup regardless if the corresponding table is deleted or not. [#36424](https://github.com/ClickHouse/ClickHouse/pull/36424) ([Vadim Volodin](https://github.com/PolyProgrammist)). From 97009ab9a334d4c3ab4df48a7f8273af2cd7c7f4 Mon Sep 17 00:00:00 2001 From: Denny Crane Date: Thu, 30 Jun 2022 21:12:08 -0300 Subject: [PATCH 116/121] Update tips.md --- docs/en/operations/tips.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md index ea6a29177c0..5325311a9e6 100644 --- a/docs/en/operations/tips.md +++ b/docs/en/operations/tips.md @@ -128,7 +128,8 @@ You should never use manually written scripts to transfer data between different If you want to divide an existing ZooKeeper cluster into two, the correct way is to increase the number of its replicas and then reconfigure it as two independent clusters. -You can run ClickHouse Keeper on the same server as ClickHouse in test environments, or in environments with low ingestion rate. Do not run ZooKeeper and ClickHouse Keeper on the same servers as ClickHouse in production environments using the same one disk. Because ZooKeeper/Keeper are very sensitive for latency and ClickHouse may utilize all available system resources. +You can run ClickHouse Keeper on the same server as ClickHouse in test environments, or in environments with low ingestion rate. +For production environments we suggest to use separate servers for ClickHouse and ZooKeeper/Keeper, or place ClickHouse files and Keeper files on to separate disks. Because ZooKeeper/Keeper are very sensitive for disk latency and ClickHouse may utilize all available system resources. You can have ZooKeeper observers in an ensemble but ClickHouse servers should not interact with observers. From bab954c461aa07a75180d6a34fa33a05346f7879 Mon Sep 17 00:00:00 2001 From: lgbo-ustc Date: Fri, 1 Jul 2022 10:15:35 +0800 Subject: [PATCH 117/121] update codes based on review comment --- src/Interpreters/TranslateQualifiedNamesVisitor.cpp | 6 +++--- src/Interpreters/TranslateQualifiedNamesVisitor.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index fd71dc01595..b58b90b6d47 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -31,12 +31,12 @@ namespace ErrorCodes extern const int UNSUPPORTED_JOIN_KEYS; extern const int LOGICAL_ERROR; } -bool TranslateQualifiedNamesMatcher::Data::matchColumnName(const String & name, const String & column_name, DataTypePtr column_type) +bool TranslateQualifiedNamesMatcher::Data::matchColumnName(const std::string_view & name, const String & column_name, DataTypePtr column_type) { if (name.size() < column_name.size()) return false; - if (std::strncmp(name.data(), column_name.data(), column_name.size()) != 0) + if (!name.starts_with(column_name)) return false; if (name.size() == column_name.size()) @@ -49,7 +49,7 @@ bool TranslateQualifiedNamesMatcher::Data::matchColumnName(const String & name, { const Strings & names = type_tuple->getElementNames(); const DataTypes & element_types = type_tuple->getElements(); - String sub_name = name.substr(column_name.size() + 1, name.size() - column_name.size()); + std::string_view sub_name = name.substr(column_name.size() + 1); for (size_t i = 0; i < names.size(); ++i) { if (matchColumnName(sub_name, names[i], element_types[i])) diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.h b/src/Interpreters/TranslateQualifiedNamesVisitor.h index b1d4d94d01c..e0c2f6b6bc0 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -39,7 +39,7 @@ public: bool hasTable() const { return !tables.empty(); } bool processAsterisks() const { return hasTable() && has_columns; } bool unknownColumn(size_t table_pos, const ASTIdentifier & identifier) const; - static bool matchColumnName(const String & name, const String & column_name, DataTypePtr column_type); + static bool matchColumnName(const std::string_view & name, const String & column_name, DataTypePtr column_type); }; static void visit(ASTPtr & ast, Data & data); From 974e99fcfe9f7509585dc5891d1ca6c638381f9b Mon Sep 17 00:00:00 2001 From: santrancisco Date: Fri, 1 Jul 2022 19:07:42 +1000 Subject: [PATCH 118/121] Remove broken client library link --- docs/en/interfaces/third-party/client-libraries.md | 1 - docs/ru/interfaces/third-party/client-libraries.md | 1 - docs/zh/interfaces/third-party/client-libraries.md | 1 - 3 files changed, 3 deletions(-) diff --git a/docs/en/interfaces/third-party/client-libraries.md b/docs/en/interfaces/third-party/client-libraries.md index 705b9ef42c0..8067b18cc35 100644 --- a/docs/en/interfaces/third-party/client-libraries.md +++ b/docs/en/interfaces/third-party/client-libraries.md @@ -51,7 +51,6 @@ ClickHouse Inc does **not** maintain the libraries listed below and hasn’t don - [clickhouse-rs](https://github.com/suharev7/clickhouse-rs) - [Klickhouse](https://github.com/Protryon/klickhouse) - R - - [clickhouse-r](https://github.com/hannesmuehleisen/clickhouse-r) - [RClickHouse](https://github.com/IMSMWU/RClickHouse) - Java - [clickhouse-client-java](https://github.com/VirtusAI/clickhouse-client-java) diff --git a/docs/ru/interfaces/third-party/client-libraries.md b/docs/ru/interfaces/third-party/client-libraries.md index 702b53622da..ab2c9419b7f 100644 --- a/docs/ru/interfaces/third-party/client-libraries.md +++ b/docs/ru/interfaces/third-party/client-libraries.md @@ -45,7 +45,6 @@ sidebar_label: "Клиентские библиотеки от сторонни - [clickhouse-rs](https://github.com/suharev7/clickhouse-rs) - [Klickhouse](https://github.com/Protryon/klickhouse) - R - - [clickhouse-r](https://github.com/hannesmuehleisen/clickhouse-r) - [RClickhouse](https://github.com/IMSMWU/RClickhouse) - Java - [clickhouse-client-java](https://github.com/VirtusAI/clickhouse-client-java) diff --git a/docs/zh/interfaces/third-party/client-libraries.md b/docs/zh/interfaces/third-party/client-libraries.md index 52f7d70c0bf..8ed482eee73 100644 --- a/docs/zh/interfaces/third-party/client-libraries.md +++ b/docs/zh/interfaces/third-party/client-libraries.md @@ -46,7 +46,6 @@ Yandex**没有**维护下面列出的库,也没有做过任何广泛的测试 - [clickhouse-rs](https://github.com/suharev7/clickhouse-rs) - [Klickhouse](https://github.com/Protryon/klickhouse) - R - - [clickhouse-r](https://github.com/hannesmuehleisen/clickhouse-r) - [RClickHouse](https://github.com/IMSMWU/RClickHouse) - Java - [clickhouse-client-java](https://github.com/VirtusAI/clickhouse-client-java) From 16ccd3eccdd81371bdf92e6ef3400b68760c1b4e Mon Sep 17 00:00:00 2001 From: Filatenkov Artur <58165623+FArthur-cmd@users.noreply.github.com> Date: Fri, 1 Jul 2022 12:12:31 +0300 Subject: [PATCH 119/121] Update hardware.sh --- benchmark/hardware.sh | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/benchmark/hardware.sh b/benchmark/hardware.sh index e8c9c58aca3..0c3a1396440 100755 --- a/benchmark/hardware.sh +++ b/benchmark/hardware.sh @@ -40,10 +40,16 @@ if [[ $(./clickhouse client --query "EXISTS hits") == '1' && $(./clickhouse clie echo "Dataset already downloaded" else echo "Will download the dataset" + if [ "`uname`" = "Darwin" ] + then + ./clickhouse client --receive_timeout 1000 --max_insert_threads $(sysctl -n hw.ncpu) --progress --query " + CREATE OR REPLACE TABLE hits ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime) + AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst')" + else ./clickhouse client --receive_timeout 1000 --max_insert_threads $(nproc || 4) --progress --query " CREATE OR REPLACE TABLE hits ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime) AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst')" - + fi ./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM hits" fi @@ -63,8 +69,8 @@ QUERY_NUM=1 cat "$QUERIES_FILE" | sed "s/{table}/hits/g" | while read query; do sync - if [ "${OS}" = "Darwin" ] - then + if [ "`uname`" = "Darwin" ] + then sudo purge > /dev/null else echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null @@ -90,8 +96,8 @@ echo touch {cpu_model,cpu,df,memory,memory_total,blk,mdstat,instance}.txt -if [ "${OS}" = "Darwin" ] -then +if [ "`uname`" = "Darwin" ] +then echo '----Version, build id-----------' ./clickhouse local --query "SELECT format('Version: {}', version())" ./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw From 7cc063a1d0e54aa159c6456f46b2849d2e7622c2 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Fri, 1 Jul 2022 12:08:53 +0200 Subject: [PATCH 120/121] Fix strange backport titles issues --- tests/ci/cherry_pick_utils/cherrypick.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/cherry_pick_utils/cherrypick.py b/tests/ci/cherry_pick_utils/cherrypick.py index 92c87800828..c844beaee88 100644 --- a/tests/ci/cherry_pick_utils/cherrypick.py +++ b/tests/ci/cherry_pick_utils/cherrypick.py @@ -165,7 +165,7 @@ class CherryPick: "user.name=robot-clickhouse", ] - title = (self._pr["title"].replace('"', r"\""),) + title = self._pr["title"].replace('"', r"\"") pr_title = f"Backport #{self._pr['number']} to {self.target_branch}: {title}" self._run(git_prefix + ["checkout", "-f", self.backport_branch]) From 2a23c39a789c581778e3e59d44431dbf22109515 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 1 Jul 2022 15:20:33 +0300 Subject: [PATCH 121/121] Remove outdated cmake documentation When it had been added, initially, it was autogenerated - #14711 Later, after documentation rework, it had been removed, in #37441. And this makes documentation out dated, and out dated documentation may provide more harm then help, so let's remove it. Also it has links to the code in github that does not contain commit SHA1, so those links may point in the wrong place. Signed-off-by: Azat Khuzhin --- docs/_includes/cmake_in_clickhouse_footer.md | 121 ---- docs/_includes/cmake_in_clickhouse_header.md | 27 - docs/en/development/cmake-in-clickhouse.md | 545 ------------------- 3 files changed, 693 deletions(-) delete mode 100644 docs/_includes/cmake_in_clickhouse_footer.md delete mode 100644 docs/_includes/cmake_in_clickhouse_header.md delete mode 100644 docs/en/development/cmake-in-clickhouse.md diff --git a/docs/_includes/cmake_in_clickhouse_footer.md b/docs/_includes/cmake_in_clickhouse_footer.md deleted file mode 100644 index bf8411ba815..00000000000 --- a/docs/_includes/cmake_in_clickhouse_footer.md +++ /dev/null @@ -1,121 +0,0 @@ - -## Developer's guide for adding new CMake options - -### Don't be obvious. Be informative. - -Bad: -```cmake -option (ENABLE_TESTS "Enables testing" OFF) -``` - -This description is quite useless as is neither gives the viewer any additional information nor explains the option purpose. - -Better: - -```cmake -option(ENABLE_TESTS "Provide unit_test_dbms target with Google.test unit tests" OFF) -``` - -If the option's purpose can't be guessed by its name, or the purpose guess may be misleading, or option has some -pre-conditions, leave a comment above the `option()` line and explain what it does. -The best way would be linking the docs page (if it exists). -The comment is parsed into a separate column (see below). - -Even better: - -```cmake -# implies ${TESTS_ARE_ENABLED} -# see tests/CMakeLists.txt for implementation detail. -option(ENABLE_TESTS "Provide unit_test_dbms target with Google.test unit tests" OFF) -``` - -### If the option's state could produce unwanted (or unusual) result, explicitly warn the user. - -Suppose you have an option that may strip debug symbols from the ClickHouse's part. -This can speed up the linking process, but produces a binary that cannot be debugged. -In that case, prefer explicitly raising a warning telling the developer that he may be doing something wrong. -Also, such options should be disabled if applies. - -Bad: -```cmake -option(STRIP_DEBUG_SYMBOLS_FUNCTIONS - "Do not generate debugger info for ClickHouse functions. - ${STRIP_DSF_DEFAULT}) - -if (STRIP_DEBUG_SYMBOLS_FUNCTIONS) - target_compile_options(clickhouse_functions PRIVATE "-g0") -endif() - -``` -Better: - -```cmake -# Provides faster linking and lower binary size. -# Tradeoff is the inability to debug some source files with e.g. gdb -# (empty stack frames and no local variables)." -option(STRIP_DEBUG_SYMBOLS_FUNCTIONS - "Do not generate debugger info for ClickHouse functions." - ${STRIP_DSF_DEFAULT}) - -if (STRIP_DEBUG_SYMBOLS_FUNCTIONS) - message(WARNING "Not generating debugger info for ClickHouse functions") - target_compile_options(clickhouse_functions PRIVATE "-g0") -endif() -``` - -### In the option's description, explain WHAT the option does rather than WHY it does something. - -The WHY explanation should be placed in the comment. -You may find that the option's name is self-descriptive. - -Bad: - -```cmake -option(ENABLE_THINLTO "Enable Thin LTO. Only applicable for clang. It's also suppressed when building with tests or sanitizers." ON) -``` - -Better: - -```cmake -# Only applicable for clang. -# Turned off when building with tests or sanitizers. -option(ENABLE_THINLTO "Clang-specific link time optimisation" ON). -``` - -### Don't assume other developers know as much as you do. - -In ClickHouse, there are many tools used that an ordinary developer may not know. If you are in doubt, give a link to -the tool's docs. It won't take much of your time. - -Bad: - -```cmake -option(ENABLE_THINLTO "Enable Thin LTO. Only applicable for clang. It's also suppressed when building with tests or sanitizers." ON) -``` - -Better (combined with the above hint): - -```cmake -# https://clang.llvm.org/docs/ThinLTO.html -# Only applicable for clang. -# Turned off when building with tests or sanitizers. -option(ENABLE_THINLTO "Clang-specific link time optimisation" ON). -``` - -Other example, bad: - -```cmake -option (USE_INCLUDE_WHAT_YOU_USE "Use 'include-what-you-use' tool" OFF) -``` - -Better: - -```cmake -# https://github.com/include-what-you-use/include-what-you-use -option (USE_INCLUDE_WHAT_YOU_USE "Reduce unneeded #include s (external tool)" OFF) -``` - -### Prefer consistent default values. - -CMake allows you to pass a plethora of values representing boolean `true/false`, e.g. `1, ON, YES, ...`. -Prefer the `ON/OFF` values, if possible. diff --git a/docs/_includes/cmake_in_clickhouse_header.md b/docs/_includes/cmake_in_clickhouse_header.md deleted file mode 100644 index 2f2e0421946..00000000000 --- a/docs/_includes/cmake_in_clickhouse_header.md +++ /dev/null @@ -1,27 +0,0 @@ -# CMake in ClickHouse - -## TL; DR How to make ClickHouse compile and link faster? - -Minimal ClickHouse build example: - -```bash -cmake .. \ - -DCMAKE_C_COMPILER=$(which clang-14) \ - -DCMAKE_CXX_COMPILER=$(which clang++-14) \ - -DCMAKE_BUILD_TYPE=Debug \ - -DENABLE_UTILS=OFF \ - -DENABLE_TESTS=OFF -``` - -## CMake files types - -1. ClickHouse's source CMake files (located in the root directory and in `/src`). -2. Arch-dependent CMake files (located in `/cmake/*os_name*`). -3. Libraries finders (search for contrib libraries, located in `/contrib/*/CMakeLists.txt`). -3. Contrib build CMake files (used instead of libraries' own CMake files, located in `/cmake/modules`) - -## List of CMake flags - -* This list is auto-generated by [this Python script](https://github.com/clickhouse/clickhouse/blob/master/docs/tools/cmake_in_clickhouse_generator.py). -* The flag name is a link to its position in the code. -* If an option's default value is itself an option, it's also a link to its position in this list. diff --git a/docs/en/development/cmake-in-clickhouse.md b/docs/en/development/cmake-in-clickhouse.md deleted file mode 100644 index 83279f5f69a..00000000000 --- a/docs/en/development/cmake-in-clickhouse.md +++ /dev/null @@ -1,545 +0,0 @@ ---- -sidebar_position: 69 -sidebar_label: CMake in ClickHouse -description: How to make ClickHouse compile and link faster ---- - -# CMake in ClickHouse - -How to make ClickHouse compile and link faster. Minimal ClickHouse build example: - -```bash -cmake .. \ - -DCMAKE_C_COMPILER=$(which clang-13) \ - -DCMAKE_CXX_COMPILER=$(which clang++-13) \ - -DCMAKE_BUILD_TYPE=Debug \ - -DENABLE_UTILS=OFF \ - -DENABLE_TESTS=OFF -``` - -## CMake files types - -1. ClickHouse source CMake files (located in the root directory and in /src). -2. Arch-dependent CMake files (located in /cmake/*os_name*). -3. Libraries finders (search for contrib libraries, located in /contrib/*/CMakeLists.txt). -4. Contrib build CMake files (used instead of libraries' own CMake files, located in /cmake/modules) - -## List of CMake flags -- The flag name is a link to its position in the code. -- If an option's default value is itself an option, it's also a link to its position in this list. - -## ClickHouse modes - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameDefault valueDescriptionComment
ENABLE_CLICKHOUSE_ALLONEnable all ClickHouse modes by defaultThe clickhouse binary is a multi purpose tool that contains multiple execution modes (client, server, etc.), each of them may be built and linked as a separate library. If you do not know what modes you need, turn this option OFF and enable SERVER and CLIENT only.
ENABLE_CLICKHOUSE_BENCHMARKENABLE_CLICKHOUSE_ALLQueries benchmarking modehttps://clickhouse.com/docs/en/operations/utilities/clickhouse-benchmark/
ENABLE_CLICKHOUSE_CLIENTENABLE_CLICKHOUSE_ALLClient mode (interactive tui/shell that connects to the server)
ENABLE_CLICKHOUSE_COMPRESSORENABLE_CLICKHOUSE_ALLData compressor and decompressorhttps://clickhouse.com/docs/en/operations/utilities/clickhouse-compressor/
ENABLE_CLICKHOUSE_COPIERENABLE_CLICKHOUSE_ALLInter-cluster data copying modehttps://clickhouse.com/docs/en/operations/utilities/clickhouse-copier/
ENABLE_CLICKHOUSE_EXTRACT_FROM_CONFIGENABLE_CLICKHOUSE_ALLConfigs processor (extract values etc.)
ENABLE_CLICKHOUSE_FORMATENABLE_CLICKHOUSE_ALLQueries pretty-printer and formatter with syntax highlighting
ENABLE_CLICKHOUSE_GIT_IMPORTENABLE_CLICKHOUSE_ALLA tool to analyze Git repositorieshttps://presentations.clickhouse.com/matemarketing_2020/
ENABLE_CLICKHOUSE_INSTALLOFFInstall ClickHouse without .deb/.rpm/.tgz packages (having the binary only)
ENABLE_CLICKHOUSE_KEEPERENABLE_CLICKHOUSE_ALLClickHouse alternative to ZooKeeper
ENABLE_CLICKHOUSE_KEEPER_CONVERTERENABLE_CLICKHOUSE_ALLUtil allows to convert ZooKeeper logs and snapshots into clickhouse-keeper snapshot
ENABLE_CLICKHOUSE_LIBRARY_BRIDGEENABLE_CLICKHOUSE_ALLHTTP-server working like a proxy to Library dictionary source
ENABLE_CLICKHOUSE_LOCALENABLE_CLICKHOUSE_ALLLocal files fast processing modehttps://clickhouse.com/docs/en/operations/utilities/clickhouse-local/
ENABLE_CLICKHOUSE_OBFUSCATORENABLE_CLICKHOUSE_ALLTable data obfuscator (convert real data to benchmark-ready one)https://clickhouse.com/docs/en/operations/utilities/clickhouse-obfuscator/
ENABLE_CLICKHOUSE_ODBC_BRIDGEENABLE_CLICKHOUSE_ALLHTTP-server working like a proxy to ODBC driver
ENABLE_CLICKHOUSE_SERVERENABLE_CLICKHOUSE_ALLServer mode (main mode)
ENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADERENABLE_CLICKHOUSE_ALLA tool to export table data files to be later put to a static files web server
- - -## External libraries -Note that ClickHouse uses forks of these libraries, see https://github.com/ClickHouse-Extras. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameDefault valueDescriptionComment
ENABLE_AVX0Use AVX instructions on x86_64
ENABLE_AVX20Use AVX2 instructions on x86_64
ENABLE_AVX2_FOR_SPEC_OP0Use avx2 instructions for specific operations on x86_64
ENABLE_AVX5120Use AVX512 instructions on x86_64
ENABLE_AVX512_FOR_SPEC_OP0Use avx512 instructions for specific operations on x86_64
ENABLE_BMI0Use BMI instructions on x86_64
ENABLE_CCACHEENABLE_CCACHE_BY_DEFAULTSpeedup re-compilations using ccache (external tool)https://ccache.dev/
ENABLE_CLANG_TIDYOFFUse clang-tidy static analyzerhttps://clang.llvm.org/extra/clang-tidy/
ENABLE_PCLMULQDQ1Use pclmulqdq instructions on x86_64
ENABLE_POPCNT1Use popcnt instructions on x86_64
ENABLE_SSE411Use SSE4.1 instructions on x86_64
ENABLE_SSE421Use SSE4.2 instructions on x86_64
ENABLE_SSSE31Use SSSE3 instructions on x86_64
- - -## Other flags - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
NameDefault valueDescriptionComment
ADD_GDB_INDEX_FOR_GOLDOFFAdd .gdb-index to resulting binaries for gold linker.Ignored if lld is used
ARCH_NATIVE0Add -march=native compiler flag. This makes your binaries non-portable but more performant code may be generated. This option overrides ENABLE_* options for specific instruction set. Highly not recommended to use.
BUILD_STANDALONE_KEEPEROFFBuild keeper as small standalone binary
CLICKHOUSE_SPLIT_BINARYOFFMake several binaries (clickhouse-server, clickhouse-client etc.) instead of one bundled
COMPILER_PIPEON-pipe compiler optionLess /tmp usage, more RAM usage.
ENABLE_BUILD_PATH_MAPPINGONEnable remap file source paths in debug info, predefined preprocessor macros and __builtin_FILE(). It's to generate reproducible builds. See https://reproducible-builds.org/docs/build-pathReproducible builds If turned ON, remap file source paths in debug info, predefined preprocessor macros and __builtin_FILE().
ENABLE_CHECK_HEAVY_BUILDSOFFDon't allow C++ translation units to compile too long or to take too much memory while compiling.Take care to add prlimit in command line before ccache, or else ccache thinks that prlimit is compiler, and clang++ is its input file, and refuses to work with multiple inputs, e.g in ccache log: [2021-03-31T18:06:32.655327 36900] Command line: /usr/bin/ccache prlimit --as=10000000000 --data=5000000000 --cpu=600 /usr/bin/clang++-11 - ...... std=gnu++2a -MD -MT src/CMakeFiles/dbms.dir/Storages/MergeTree/IMergeTreeDataPart.cpp.o -MF src/CMakeFiles/dbms.dir/Storages/MergeTree/IMergeTreeDataPart.cpp.o.d -o src/CMakeFiles/dbms.dir/Storages/MergeTree/IMergeTreeDataPart.cpp.o -c ../src/Storages/MergeTree/IMergeTreeDataPart.cpp [2021-03-31T18:06:32.656704 36900] Multiple input files: /usr/bin/clang++-11 and ../src/Storages/MergeTree/IMergeTreeDataPart.cpp Another way would be to use --ccache-skip option before clang++-11 to make ccache ignore it.
ENABLE_COLORED_BUILDONEnable colored diagnostics in build log.
ENABLE_EXAMPLESOFFBuild all example programs in 'examples' subdirectories
ENABLE_FUZZINGOFFFuzzy testing using libfuzzer
ENABLE_LIBRARIESONEnable all external libraries by defaultTurns on all external libs like s3, kafka, ODBC, ...
ENABLE_MULTITARGET_CODEONEnable platform-dependent codeClickHouse developers may use platform-dependent code under some macro (e.g. ifdef ENABLE_MULTITARGET). If turned ON, this option defines such macro. See src/Functions/TargetSpecific.h
ENABLE_TESTSONProvide unit_test_dbms target with Google.Test unit testsIf turned ON, assumes the user has either the system GTest library or the bundled one.
ENABLE_THINLTOONClang-specific link time optimizationhttps://clang.llvm.org/docs/ThinLTO.html Applies to clang only. Disabled when building with tests or sanitizers.
FAIL_ON_UNSUPPORTED_OPTIONS_COMBINATIONONStop/Fail CMake configuration if some ENABLE_XXX option is defined (either ON or OFF) but is not possible to satisfyIf turned off: e.g. when ENABLE_FOO is ON, but FOO tool was not found, the CMake will continue.
GLIBC_COMPATIBILITYONEnable compatibility with older glibc libraries.Only for Linux, x86_64 or aarch64.
SPLIT_DEBUG_SYMBOLSOFFBuild stripped binaries with debug info in separate directory
LINKER_NAMEOFFLinker name or full pathExample values: lld-10, gold.
PARALLEL_COMPILE_JOBS""Maximum number of concurrent compilation jobs1 if not set
PARALLEL_LINK_JOBS""Maximum number of concurrent link jobs1 if not set
SANITIZE""Enable one of the code sanitizersPossible values: - address (ASan) - memory (MSan) - thread (TSan) - undefined (UBSan) - "" (no sanitizing)
SPLIT_SHARED_LIBRARIESOFFKeep all internal libraries as separate .so filesDEVELOPER ONLY. Faster linking if turned on.
STRIP_DEBUG_SYMBOLS_FUNCTIONSSTRIP_DSF_DEFAULTDo not generate debugger info for ClickHouse functionsProvides faster linking and lower binary size. Tradeoff is the inability to debug some source files with e.g. gdb (empty stack frames and no local variables)."
USE_DEBUG_HELPERSUSE_DEBUG_HELPERSEnable debug helpers
USE_STATIC_LIBRARIESONDisable to use shared libraries
USE_UNWINDENABLE_LIBRARIESEnable libunwind (better stacktraces)
WERROROFFEnable -Werror compiler optionUsing system libs can cause a lot of warnings in includes (on macro expansion).
WITH_COVERAGEOFFProfile the resulting binary/binariesCompiler-specific coverage flags e.g. -fcoverage-mapping for gcc
- -## Developer's guide for adding new CMake options - -#### Don't be obvious. Be informative. - -Bad: - -``` -option (ENABLE_TESTS "Enables testing" OFF) -``` - -This description is quite useless as it neither gives the viewer any additional information nor explains the option purpose. - -Better: - -``` -option(ENABLE_TESTS "Provide unit_test_dbms target with Google.test unit tests" OFF) -``` - -If the option's purpose can't be guessed by its name, or the purpose guess may be misleading, or option has some -pre-conditions, leave a comment above the option() line and explain what it does. -The best way would be linking the docs page (if it exists). -The comment is parsed into a separate column (see below). - -Even better: - -``` -# implies ${TESTS_ARE_ENABLED} -# see tests/CMakeLists.txt for implementation detail. -option(ENABLE_TESTS "Provide unit_test_dbms target with Google.test unit tests" OFF) -``` - -#### If the option's state could produce unwanted (or unusual) result, explicitly warn the user. - -Suppose you have an option that may strip debug symbols from the ClickHouse part. -This can speed up the linking process, but produces a binary that cannot be debugged. -In that case, prefer explicitly raising a warning telling the developer that he may be doing something wrong. -Also, such options should be disabled if applies. - -Bad: - -``` -option(STRIP_DEBUG_SYMBOLS_FUNCTIONS - "Do not generate debugger info for ClickHouse functions. - ${STRIP_DSF_DEFAULT}) - -if (STRIP_DEBUG_SYMBOLS_FUNCTIONS) - target_compile_options(clickhouse_functions PRIVATE "-g0") -endif() -``` - -Better: - -``` -# Provides faster linking and lower binary size. -# Tradeoff is the inability to debug some source files with e.g. gdb -# (empty stack frames and no local variables)." -option(STRIP_DEBUG_SYMBOLS_FUNCTIONS - "Do not generate debugger info for ClickHouse functions." - ${STRIP_DSF_DEFAULT}) - -if (STRIP_DEBUG_SYMBOLS_FUNCTIONS) - message(WARNING "Not generating debugger info for ClickHouse functions") - target_compile_options(clickhouse_functions PRIVATE "-g0") -endif() -``` - -#### In the option's description, explain WHAT the option does rather than WHY it does something. -The WHY explanation should be placed in the comment. You may find that the option's name is self-descriptive. - -Bad: - -``` -option(ENABLE_THINLTO "Enable Thin LTO. Only applicable for clang. It's also suppressed when building with tests or sanitizers." ON) -``` - -Better: - -``` -# Only applicable for clang. -# Turned off when building with tests or sanitizers. -option(ENABLE_THINLTO "Clang-specific link time optimisation" ON). -``` - -#### Don't assume other developers know as much as you do. -In ClickHouse, there are many tools used that an ordinary developer may not know. If you are in doubt, give a link to -the tool's docs. It won't take much of your time. - -Bad: - -``` -option(ENABLE_THINLTO "Enable Thin LTO. Only applicable for clang. It's also suppressed when building with tests or sanitizers." ON) -``` - -Better (combined with the above hint): - -``` -# https://clang.llvm.org/docs/ThinLTO.html -# Only applicable for clang. -# Turned off when building with tests or sanitizers. -option(ENABLE_THINLTO "Clang-specific link time optimisation" ON). -``` - -Other example, bad: - -``` -option (USE_INCLUDE_WHAT_YOU_USE "Use 'include-what-you-use' tool" OFF) -``` - -Better: - -``` -# https://github.com/include-what-you-use/include-what-you-use -option (USE_INCLUDE_WHAT_YOU_USE "Reduce unneeded #include s (external tool)" OFF) -``` - -#### Prefer consistent default values. -CMake allows you to pass a plethora of values representing boolean true/false, e.g. 1, ON, YES, .... - -Prefer the ON/OFF values, if possible. -