From 4f9920c71ccb5edeeff15686e8cb75a07287d98c Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Fri, 24 Feb 2023 17:53:17 +0800 Subject: [PATCH 01/30] optimize performance of nullable String And Number column serializeValueIntoArena --- .../aggregate_with_serialized_method.xml | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 tests/performance/aggregate_with_serialized_method.xml diff --git a/tests/performance/aggregate_with_serialized_method.xml b/tests/performance/aggregate_with_serialized_method.xml new file mode 100644 index 00000000000..52c7a0ddd3f --- /dev/null +++ b/tests/performance/aggregate_with_serialized_method.xml @@ -0,0 +1,31 @@ + + + 8 + 0 + 4 + + + + CREATE TABLE t_nullable + ( + key_string1 Nullable(String), + key_string2 Nullable(String), + key_string3 Nullable(String), + key_int64_1 Nullable(Int64), + key_int64_2 Nullable(Int64), + key_int64_3 Nullable(Int64), + key_int64_4 Nullable(Int64), + key_int64_5 Nullable(Int64), + m1 Int64, + m2 Int64, + ) + ENGINE = MergeTree + ORDER BY tuple() + + insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%1000+1, number%1000+2, number%1000+3, number%1000+4,number%1000+5, number%6000+1, number%5000+2 from numbers_mt(20000000) + OPTIMIZE TABLE t_nullable FINAL + select min(m1) from t_nullable group by key_string1,key_string2,key_string3 format Null + select min(m1) from t_nullable group by key_int64_1,key_int64_2,key_string3 format Null + + drop table if exists t_nullable + \ No newline at end of file From 43e0481ac040922edfc519a5bd0cf6cd781924cd Mon Sep 17 00:00:00 2001 From: LiuNeng <1398775315@qq.com> Date: Tue, 25 Apr 2023 11:38:50 +0800 Subject: [PATCH 02/30] optimize agg with multiple string key --- src/Columns/ColumnNullable.cpp | 47 +++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 7 deletions(-) diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 2eb2ff0bf69..08f707d0b30 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -137,18 +137,51 @@ void ColumnNullable::insertData(const char * pos, size_t length) StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const { const auto & arr = getNullMapData(); + const bool is_null = arr[n]; static constexpr auto s = sizeof(arr[0]); + char * pos; + if (const ColumnString * string_col = checkAndGetColumn(getNestedColumn())) + { + auto data = string_col->getDataAt(n); + size_t string_size = data.size + 1; + auto memory_size = is_null ? s : s + sizeof(string_size) + string_size; + pos = arena.allocContinue(memory_size, begin); + memcpy(pos, &arr[n], s); + if (!is_null) + { + memcpy(pos + s, &string_size, sizeof(string_size)); + memcpy(pos + s + sizeof(string_size), data.data, string_size); + } + return StringRef(pos, memory_size); + } + else if (getNestedColumn().valuesHaveFixedSize()) + { + auto col = getNestedColumnPtr(); + auto data = col->getDataAt(n); + auto size = col->sizeOfValueIfFixed(); + auto memory_size = is_null ? s : s + size; + pos = arena.allocContinue(memory_size, begin); + memcpy(pos, &arr[n], s); + if (!is_null) + { + memcpy(pos + s, data.data, size); + } + return StringRef(pos, memory_size); + } + else + { + pos = arena.allocContinue(s, begin); + memcpy(pos, &arr[n], s); - auto * pos = arena.allocContinue(s, begin); - memcpy(pos, &arr[n], s); + if (arr[n]) + return StringRef(pos, s); - if (arr[n]) - return StringRef(pos, s); + auto nested_ref = getNestedColumn().serializeValueIntoArena(n, arena, begin); - auto nested_ref = getNestedColumn().serializeValueIntoArena(n, arena, begin); + /// serializeValueIntoArena may reallocate memory. Have to use ptr from nested_ref.data and move it back. + return StringRef(nested_ref.data - s, nested_ref.size + s); + } - /// serializeValueIntoArena may reallocate memory. Have to use ptr from nested_ref.data and move it back. - return StringRef(nested_ref.data - s, nested_ref.size + s); } const char * ColumnNullable::deserializeAndInsertFromArena(const char * pos) From 035dbdaf220d4bfdedc88711aae799145362221d Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Mon, 26 Jun 2023 13:42:24 +0800 Subject: [PATCH 03/30] remove numbers optimization. It will decrease performance --- src/Columns/ColumnNullable.cpp | 14 -------------- .../aggregate_with_serialized_method.xml | 14 +++----------- 2 files changed, 3 insertions(+), 25 deletions(-) diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 08f707d0b30..48b3740fa97 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -154,20 +154,6 @@ StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char } return StringRef(pos, memory_size); } - else if (getNestedColumn().valuesHaveFixedSize()) - { - auto col = getNestedColumnPtr(); - auto data = col->getDataAt(n); - auto size = col->sizeOfValueIfFixed(); - auto memory_size = is_null ? s : s + size; - pos = arena.allocContinue(memory_size, begin); - memcpy(pos, &arr[n], s); - if (!is_null) - { - memcpy(pos + s, data.data, size); - } - return StringRef(pos, memory_size); - } else { pos = arena.allocContinue(s, begin); diff --git a/tests/performance/aggregate_with_serialized_method.xml b/tests/performance/aggregate_with_serialized_method.xml index 52c7a0ddd3f..3c0ad4a7223 100644 --- a/tests/performance/aggregate_with_serialized_method.xml +++ b/tests/performance/aggregate_with_serialized_method.xml @@ -11,21 +11,13 @@ key_string1 Nullable(String), key_string2 Nullable(String), key_string3 Nullable(String), - key_int64_1 Nullable(Int64), - key_int64_2 Nullable(Int64), - key_int64_3 Nullable(Int64), - key_int64_4 Nullable(Int64), - key_int64_5 Nullable(Int64), m1 Int64, m2 Int64, ) - ENGINE = MergeTree - ORDER BY tuple() + ENGINE = Memory - insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%1000+1, number%1000+2, number%1000+3, number%1000+4,number%1000+5, number%6000+1, number%5000+2 from numbers_mt(20000000) - OPTIMIZE TABLE t_nullable FINAL - select min(m1) from t_nullable group by key_string1,key_string2,key_string3 format Null - select min(m1) from t_nullable group by key_int64_1,key_int64_2,key_string3 format Null + insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%6000+1, number%5000+2 from numbers_mt(20000000) + select key_string1,key_string2,key_string3, min(m1) from t_nullable group by key_string1,key_string2,key_string3 drop table if exists t_nullable \ No newline at end of file From f96b9b7512222ba71f48c905ac2d181515e99774 Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Wed, 28 Jun 2023 15:04:43 +0800 Subject: [PATCH 04/30] optimize fixed size column --- src/Columns/ColumnNullable.cpp | 17 +++++++++++++++-- .../aggregate_with_serialized_method.xml | 10 ++++++++-- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 48b3740fa97..02a3de5ae55 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -140,9 +140,9 @@ StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char const bool is_null = arr[n]; static constexpr auto s = sizeof(arr[0]); char * pos; - if (const ColumnString * string_col = checkAndGetColumn(getNestedColumn())) + if (isString(nested_column->getDataType())) { - auto data = string_col->getDataAt(n); + auto data = nested_column->getDataAt(n); size_t string_size = data.size + 1; auto memory_size = is_null ? s : s + sizeof(string_size) + string_size; pos = arena.allocContinue(memory_size, begin); @@ -154,6 +154,19 @@ StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char } return StringRef(pos, memory_size); } + else if (isNumber(nested_column->getDataType()) || isFixedString(nested_column->getDataType())) + { + auto data = nested_column->getDataAt(n); + auto size = data.size; + auto memory_size = is_null ? s : s + size; + pos = arena.allocContinue(memory_size, begin); + memcpy(pos, &arr[n], s); + if (!is_null) + { + memcpy(pos + s, data.data, size); + } + return StringRef(pos, memory_size); + } else { pos = arena.allocContinue(s, begin); diff --git a/tests/performance/aggregate_with_serialized_method.xml b/tests/performance/aggregate_with_serialized_method.xml index 3c0ad4a7223..a280dae67aa 100644 --- a/tests/performance/aggregate_with_serialized_method.xml +++ b/tests/performance/aggregate_with_serialized_method.xml @@ -11,13 +11,19 @@ key_string1 Nullable(String), key_string2 Nullable(String), key_string3 Nullable(String), + key_int64_1 Nullable(Int64), + key_int64_2 Nullable(Int64), + key_int64_3 Nullable(Int64), + key_int64_4 Nullable(Int64), + key_int64_5 Nullable(Int64), m1 Int64, - m2 Int64, + m2 Int64 ) ENGINE = Memory - insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%6000+1, number%5000+2 from numbers_mt(20000000) + insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%1000+1, number%1000+2, number%1000+3, number%1000+4,number%1000+5, number%6000+1, number%5000+2 from numbers_mt(20000000) select key_string1,key_string2,key_string3, min(m1) from t_nullable group by key_string1,key_string2,key_string3 + select key_string3,key_int64_1,key_int64_2, min(m1) from t_nullable group by key_string3,key_int64_1,key_int64_2 drop table if exists t_nullable \ No newline at end of file From 62dffd0be232469a0440beb91a16efd40a398583 Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Wed, 28 Jun 2023 22:42:54 +0800 Subject: [PATCH 05/30] optimize conditions --- src/Columns/ColumnNullable.cpp | 6 ++++-- src/Columns/ColumnNullable.h | 3 +++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 02a3de5ae55..3cf1f158031 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -34,6 +34,8 @@ ColumnNullable::ColumnNullable(MutableColumnPtr && nested_column_, MutableColumn { /// ColumnNullable cannot have constant nested column. But constant argument could be passed. Materialize it. nested_column = getNestedColumn().convertToFullColumnIfConst(); + is_string = isString(nested_column->getDataType()); + is_number_or_fixed_string = isNumber(nested_column->getDataType()) || isFixedString(nested_column->getDataType()); if (!getNestedColumn().canBeInsideNullable()) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "{} cannot be inside Nullable column", getNestedColumn().getName()); @@ -140,7 +142,7 @@ StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char const bool is_null = arr[n]; static constexpr auto s = sizeof(arr[0]); char * pos; - if (isString(nested_column->getDataType())) + if (is_string) { auto data = nested_column->getDataAt(n); size_t string_size = data.size + 1; @@ -154,7 +156,7 @@ StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char } return StringRef(pos, memory_size); } - else if (isNumber(nested_column->getDataType()) || isFixedString(nested_column->getDataType())) + else if (is_number_or_fixed_string) { auto data = nested_column->getDataAt(n); auto size = data.size; diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index bc95eca69b9..e569b989c35 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -212,6 +212,9 @@ public: private: WrappedPtr nested_column; WrappedPtr null_map; + // optimize serializeValueIntoArena + bool is_string; + bool is_number_or_fixed_string; template void applyNullMapImpl(const NullMap & map); From 81f0d175285c08ce96d619771d29555b84b8c7fd Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Thu, 29 Jun 2023 10:25:36 +0800 Subject: [PATCH 06/30] change param name --- src/Columns/ColumnNullable.cpp | 4 ++-- src/Columns/ColumnNullable.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 3cf1f158031..9045851d790 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -35,7 +35,7 @@ ColumnNullable::ColumnNullable(MutableColumnPtr && nested_column_, MutableColumn /// ColumnNullable cannot have constant nested column. But constant argument could be passed. Materialize it. nested_column = getNestedColumn().convertToFullColumnIfConst(); is_string = isString(nested_column->getDataType()); - is_number_or_fixed_string = isNumber(nested_column->getDataType()) || isFixedString(nested_column->getDataType()); + is_fixed_size_column = nested_column->valuesHaveFixedSize(); if (!getNestedColumn().canBeInsideNullable()) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "{} cannot be inside Nullable column", getNestedColumn().getName()); @@ -156,7 +156,7 @@ StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char } return StringRef(pos, memory_size); } - else if (is_number_or_fixed_string) + else if (is_fixed_size_column) { auto data = nested_column->getDataAt(n); auto size = data.size; diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index e569b989c35..4f37650ffe3 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -214,7 +214,7 @@ private: WrappedPtr null_map; // optimize serializeValueIntoArena bool is_string; - bool is_number_or_fixed_string; + bool is_fixed_size_column; template void applyNullMapImpl(const NullMap & map); From 594b38229f05d6c3a1182f7efdd21ca1efa4b6b4 Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Wed, 5 Jul 2023 13:53:12 +0800 Subject: [PATCH 07/30] another version --- src/Columns/ColumnAggregateFunction.cpp | 2 +- src/Columns/ColumnAggregateFunction.h | 2 +- src/Columns/ColumnArray.cpp | 2 +- src/Columns/ColumnArray.h | 2 +- src/Columns/ColumnCompressed.h | 2 +- src/Columns/ColumnConst.h | 2 +- src/Columns/ColumnDecimal.cpp | 21 +++++++++++++++++-- src/Columns/ColumnDecimal.h | 2 +- src/Columns/ColumnFixedString.cpp | 23 ++++++++++++++++++--- src/Columns/ColumnFixedString.h | 2 +- src/Columns/ColumnFunction.h | 2 +- src/Columns/ColumnLowCardinality.cpp | 2 +- src/Columns/ColumnLowCardinality.h | 2 +- src/Columns/ColumnMap.cpp | 2 +- src/Columns/ColumnMap.h | 2 +- src/Columns/ColumnNullable.cpp | 27 ++++--------------------- src/Columns/ColumnNullable.h | 2 +- src/Columns/ColumnObject.h | 2 +- src/Columns/ColumnSparse.cpp | 2 +- src/Columns/ColumnSparse.h | 2 +- src/Columns/ColumnString.cpp | 23 ++++++++++++++++----- src/Columns/ColumnString.h | 3 ++- src/Columns/ColumnTuple.cpp | 2 +- src/Columns/ColumnTuple.h | 2 +- src/Columns/ColumnUnique.h | 4 ++-- src/Columns/ColumnVector.cpp | 23 ++++++++++++++++++--- src/Columns/ColumnVector.h | 2 +- src/Columns/IColumn.h | 2 +- src/Columns/IColumnDummy.h | 2 +- 29 files changed, 107 insertions(+), 61 deletions(-) diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp index 62ec324455e..3ebb30df87e 100644 --- a/src/Columns/ColumnAggregateFunction.cpp +++ b/src/Columns/ColumnAggregateFunction.cpp @@ -524,7 +524,7 @@ void ColumnAggregateFunction::insertDefault() pushBackAndCreateState(data, arena, func.get()); } -StringRef ColumnAggregateFunction::serializeValueIntoArena(size_t n, Arena & arena, const char *& begin) const +StringRef ColumnAggregateFunction::serializeValueIntoArena(size_t n, Arena & arena, const char *& begin, const UInt8 *) const { WriteBufferFromArena out(arena, begin); func->serialize(data[n], out, version); diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h index f9ce45708c9..7c7201e585a 100644 --- a/src/Columns/ColumnAggregateFunction.h +++ b/src/Columns/ColumnAggregateFunction.h @@ -162,7 +162,7 @@ public: void insertDefault() override; - StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override; const char * deserializeAndInsertFromArena(const char * src_arena) override; diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp index 74512d1669b..1cb8188bce6 100644 --- a/src/Columns/ColumnArray.cpp +++ b/src/Columns/ColumnArray.cpp @@ -205,7 +205,7 @@ void ColumnArray::insertData(const char * pos, size_t length) } -StringRef ColumnArray::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const +StringRef ColumnArray::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const { size_t array_size = sizeAt(n); size_t offset = offsetAt(n); diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h index f011d9a607b..2a9bfa405e5 100644 --- a/src/Columns/ColumnArray.h +++ b/src/Columns/ColumnArray.h @@ -77,7 +77,7 @@ public: StringRef getDataAt(size_t n) const override; bool isDefaultAt(size_t n) const override; void insertData(const char * pos, size_t length) override; - StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; void updateHashWithValue(size_t n, SipHash & hash) const override; diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h index bfe7cdb4924..b780fbbf37a 100644 --- a/src/Columns/ColumnCompressed.h +++ b/src/Columns/ColumnCompressed.h @@ -88,7 +88,7 @@ public: void insertData(const char *, size_t) override { throwMustBeDecompressed(); } void insertDefault() override { throwMustBeDecompressed(); } void popBack(size_t) override { throwMustBeDecompressed(); } - StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override { throwMustBeDecompressed(); } + StringRef serializeValueIntoArena(size_t, Arena &, char const *&, const UInt8 *) const override { throwMustBeDecompressed(); } const char * deserializeAndInsertFromArena(const char *) override { throwMustBeDecompressed(); } const char * skipSerializedInArena(const char *) const override { throwMustBeDecompressed(); } void updateHashWithValue(size_t, SipHash &) const override { throwMustBeDecompressed(); } diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h index f769dd6cc2a..dc84e0c2402 100644 --- a/src/Columns/ColumnConst.h +++ b/src/Columns/ColumnConst.h @@ -151,7 +151,7 @@ public: s -= n; } - StringRef serializeValueIntoArena(size_t, Arena & arena, char const *& begin) const override + StringRef serializeValueIntoArena(size_t, Arena & arena, char const *& begin, const UInt8 *) const override { return data->serializeValueIntoArena(0, arena, begin); } diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp index 8e5792934cf..142ee6c271d 100644 --- a/src/Columns/ColumnDecimal.cpp +++ b/src/Columns/ColumnDecimal.cpp @@ -59,9 +59,26 @@ bool ColumnDecimal::hasEqualValues() const } template -StringRef ColumnDecimal::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const +StringRef ColumnDecimal::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const { - auto * pos = arena.allocContinue(sizeof(T), begin); + constexpr size_t null_bit_size = sizeof(UInt8); + StringRef res; + char * pos; + if (null_bit) + { + res.size = * null_bit ? null_bit_size : null_bit_size + sizeof(T); + pos = arena.allocContinue(res.size, begin); + res.data = pos; + memcpy(pos, null_bit, null_bit_size); + if (*null_bit) return res; + pos += null_bit_size; + } + else + { + res.size = sizeof(T); + pos = arena.allocContinue(res.size, begin); + res.data = pos; + } memcpy(pos, &data[n], sizeof(T)); return StringRef(pos, sizeof(T)); } diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h index 03e0b9be558..fb24ae4554b 100644 --- a/src/Columns/ColumnDecimal.h +++ b/src/Columns/ColumnDecimal.h @@ -80,7 +80,7 @@ public: Float64 getFloat64(size_t n) const final { return DecimalUtils::convertTo(data[n], scale); } - StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; void updateHashWithValue(size_t n, SipHash & hash) const override; diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp index 24b5c435ecd..a18e5c522a1 100644 --- a/src/Columns/ColumnFixedString.cpp +++ b/src/Columns/ColumnFixedString.cpp @@ -86,11 +86,28 @@ void ColumnFixedString::insertData(const char * pos, size_t length) memset(chars.data() + old_size + length, 0, n - length); } -StringRef ColumnFixedString::serializeValueIntoArena(size_t index, Arena & arena, char const *& begin) const +StringRef ColumnFixedString::serializeValueIntoArena(size_t index, Arena & arena, char const *& begin, const UInt8 * null_bit) const { - auto * pos = arena.allocContinue(n, begin); + constexpr size_t null_bit_size = sizeof(UInt8); + StringRef res; + char * pos; + if (null_bit) + { + res.size = * null_bit ? null_bit_size : null_bit_size + n; + pos = arena.allocContinue(res.size, begin); + res.data = pos; + memcpy(pos, null_bit, null_bit_size); + if (*null_bit) return res; + pos += null_bit_size; + } + else + { + res.size = n; + pos = arena.allocContinue(res.size, begin); + res.data = pos; + } memcpy(pos, &chars[n * index], n); - return StringRef(pos, n); + return res; } const char * ColumnFixedString::deserializeAndInsertFromArena(const char * pos) diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h index 39497e3403e..445432b7b28 100644 --- a/src/Columns/ColumnFixedString.h +++ b/src/Columns/ColumnFixedString.h @@ -115,7 +115,7 @@ public: chars.resize_assume_reserved(chars.size() - n * elems); } - StringRef serializeValueIntoArena(size_t index, Arena & arena, char const *& begin) const override; + StringRef serializeValueIntoArena(size_t index, Arena & arena, char const *& begin, const UInt8 *) const override; const char * deserializeAndInsertFromArena(const char * pos) override; diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h index a1f6245c2bd..c21e88744e0 100644 --- a/src/Columns/ColumnFunction.h +++ b/src/Columns/ColumnFunction.h @@ -96,7 +96,7 @@ public: throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert into {}", getName()); } - StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override + StringRef serializeValueIntoArena(size_t, Arena &, char const *&, const UInt8 *) const override { throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot serialize from {}", getName()); } diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp index 9269ea4ee4d..41358a4e538 100644 --- a/src/Columns/ColumnLowCardinality.cpp +++ b/src/Columns/ColumnLowCardinality.cpp @@ -255,7 +255,7 @@ void ColumnLowCardinality::insertData(const char * pos, size_t length) idx.insertPosition(dictionary.getColumnUnique().uniqueInsertData(pos, length)); } -StringRef ColumnLowCardinality::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const +StringRef ColumnLowCardinality::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const { return getDictionary().serializeValueIntoArena(getIndexes().getUInt(n), arena, begin); } diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h index dcd07ff3b34..91bd5945fd9 100644 --- a/src/Columns/ColumnLowCardinality.h +++ b/src/Columns/ColumnLowCardinality.h @@ -87,7 +87,7 @@ public: void popBack(size_t n) override { idx.popBack(n); } - StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override; const char * deserializeAndInsertFromArena(const char * pos) override; diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp index 797700e87b0..ddcde43ca23 100644 --- a/src/Columns/ColumnMap.cpp +++ b/src/Columns/ColumnMap.cpp @@ -111,7 +111,7 @@ void ColumnMap::popBack(size_t n) nested->popBack(n); } -StringRef ColumnMap::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const +StringRef ColumnMap::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const { return nested->serializeValueIntoArena(n, arena, begin); } diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h index e5bc26127df..fde8a7e0e67 100644 --- a/src/Columns/ColumnMap.h +++ b/src/Columns/ColumnMap.h @@ -58,7 +58,7 @@ public: void insert(const Field & x) override; void insertDefault() override; void popBack(size_t n) override; - StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; void updateHashWithValue(size_t n, SipHash & hash) const override; diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index 9045851d790..ce0876647b9 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -136,38 +136,19 @@ void ColumnNullable::insertData(const char * pos, size_t length) } } -StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const +StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const { const auto & arr = getNullMapData(); - const bool is_null = arr[n]; static constexpr auto s = sizeof(arr[0]); char * pos; if (is_string) { - auto data = nested_column->getDataAt(n); - size_t string_size = data.size + 1; - auto memory_size = is_null ? s : s + sizeof(string_size) + string_size; - pos = arena.allocContinue(memory_size, begin); - memcpy(pos, &arr[n], s); - if (!is_null) - { - memcpy(pos + s, &string_size, sizeof(string_size)); - memcpy(pos + s + sizeof(string_size), data.data, string_size); - } - return StringRef(pos, memory_size); + const auto * column_string = static_cast(nested_column.get()); + return column_string->serializeValueIntoArena(n, arena, begin, &arr[n]); } else if (is_fixed_size_column) { - auto data = nested_column->getDataAt(n); - auto size = data.size; - auto memory_size = is_null ? s : s + size; - pos = arena.allocContinue(memory_size, begin); - memcpy(pos, &arr[n], s); - if (!is_null) - { - memcpy(pos + s, data.data, size); - } - return StringRef(pos, memory_size); + return nested_column->serializeValueIntoArena(n, arena, begin, &arr[n]); } else { diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 4f37650ffe3..679f51d5900 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -62,7 +62,7 @@ public: StringRef getDataAt(size_t) const override; /// Will insert null value if pos=nullptr void insertData(const char * pos, size_t length) override; - StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h index bc5a6b69bb0..36a33a8f10f 100644 --- a/src/Columns/ColumnObject.h +++ b/src/Columns/ColumnObject.h @@ -244,7 +244,7 @@ public: StringRef getDataAt(size_t) const override { throwMustBeConcrete(); } bool isDefaultAt(size_t) const override { throwMustBeConcrete(); } void insertData(const char *, size_t) override { throwMustBeConcrete(); } - StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override { throwMustBeConcrete(); } + StringRef serializeValueIntoArena(size_t, Arena &, char const *&, const UInt8 *) const override { throwMustBeConcrete(); } const char * deserializeAndInsertFromArena(const char *) override { throwMustBeConcrete(); } const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); } void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); } diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp index 4f76a9be4b9..057c0cd7112 100644 --- a/src/Columns/ColumnSparse.cpp +++ b/src/Columns/ColumnSparse.cpp @@ -150,7 +150,7 @@ void ColumnSparse::insertData(const char * pos, size_t length) insertSingleValue([&](IColumn & column) { column.insertData(pos, length); }); } -StringRef ColumnSparse::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const +StringRef ColumnSparse::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const { return values->serializeValueIntoArena(getValueIndex(n), arena, begin); } diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h index 26e05655f60..48c7422dd27 100644 --- a/src/Columns/ColumnSparse.h +++ b/src/Columns/ColumnSparse.h @@ -78,7 +78,7 @@ public: /// Will insert null value if pos=nullptr void insertData(const char * pos, size_t length) override; - StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char *) const override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp index 38c7b2c0dd6..50fe90ad8ef 100644 --- a/src/Columns/ColumnString.cpp +++ b/src/Columns/ColumnString.cpp @@ -213,17 +213,30 @@ ColumnPtr ColumnString::permute(const Permutation & perm, size_t limit) const } -StringRef ColumnString::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const +StringRef ColumnString::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const { size_t string_size = sizeAt(n); size_t offset = offsetAt(n); - + constexpr size_t null_bit_size = sizeof(UInt8); StringRef res; - res.size = sizeof(string_size) + string_size; - char * pos = arena.allocContinue(res.size, begin); + char * pos; + if (null_bit) + { + res.size = * null_bit ? null_bit_size : null_bit_size + sizeof(string_size) + string_size; + pos = arena.allocContinue(res.size, begin); + res.data = pos; + memcpy(pos, null_bit, null_bit_size); + if (*null_bit) return res; + pos += null_bit_size; + } + else + { + res.size = sizeof(string_size) + string_size; + pos = arena.allocContinue(res.size, begin); + res.data = pos; + } memcpy(pos, &string_size, sizeof(string_size)); memcpy(pos + sizeof(string_size), &chars[offset], string_size); - res.data = pos; return res; } diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index 08c876a803d..e8e5ebbcbf9 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -11,6 +11,7 @@ #include #include #include +#include class Collator; @@ -168,7 +169,7 @@ public: offsets.resize_assume_reserved(offsets.size() - n); } - StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override; const char * deserializeAndInsertFromArena(const char * pos) override; diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp index 9702d275114..d8992125be4 100644 --- a/src/Columns/ColumnTuple.cpp +++ b/src/Columns/ColumnTuple.cpp @@ -171,7 +171,7 @@ void ColumnTuple::popBack(size_t n) column->popBack(n); } -StringRef ColumnTuple::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const +StringRef ColumnTuple::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const { StringRef res(begin, 0); for (const auto & column : columns) diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h index e7dee9b8ff9..79099f4c098 100644 --- a/src/Columns/ColumnTuple.h +++ b/src/Columns/ColumnTuple.h @@ -61,7 +61,7 @@ public: void insertFrom(const IColumn & src_, size_t n) override; void insertDefault() override; void popBack(size_t n) override; - StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; void updateHashWithValue(size_t n, SipHash & hash) const override; diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index 377255d80c7..d2fc69d7fb8 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -79,7 +79,7 @@ public: Float32 getFloat32(size_t n) const override { return getNestedColumn()->getFloat32(n); } bool getBool(size_t n) const override { return getNestedColumn()->getBool(n); } bool isNullAt(size_t n) const override { return is_nullable && n == getNullValueIndex(); } - StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override; const char * skipSerializedInArena(const char * pos) const override; void updateHashWithValue(size_t n, SipHash & hash_func) const override { @@ -373,7 +373,7 @@ size_t ColumnUnique::uniqueInsertData(const char * pos, size_t lengt } template -StringRef ColumnUnique::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const +StringRef ColumnUnique::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const { if (is_nullable) { diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp index f2fe343a371..a9b8c0ccacb 100644 --- a/src/Columns/ColumnVector.cpp +++ b/src/Columns/ColumnVector.cpp @@ -49,11 +49,28 @@ namespace ErrorCodes } template -StringRef ColumnVector::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const +StringRef ColumnVector::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const { - auto * pos = arena.allocContinue(sizeof(T), begin); + constexpr size_t null_bit_size = sizeof(UInt8); + StringRef res; + char * pos; + if (null_bit) + { + res.size = * null_bit ? null_bit_size : null_bit_size + sizeof(T); + pos = arena.allocContinue(res.size, begin); + res.data = pos; + memcpy(pos, null_bit, null_bit_size); + if (*null_bit) return res; + pos += null_bit_size; + } + else + { + res.size = sizeof(T); + pos = arena.allocContinue(res.size, begin); + res.data = pos; + } unalignedStore(pos, data[n]); - return StringRef(pos, sizeof(T)); + return res; } template diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index b8ebff2a5d5..7bb69656c5a 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -174,7 +174,7 @@ public: data.resize_assume_reserved(data.size() - n); } - StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override; + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override; const char * deserializeAndInsertFromArena(const char * pos) override; diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h index b4eaf5c28f5..12ac1102efd 100644 --- a/src/Columns/IColumn.h +++ b/src/Columns/IColumn.h @@ -218,7 +218,7 @@ public: * For example, to obtain unambiguous representation of Array of strings, strings data should be interleaved with their sizes. * Parameter begin should be used with Arena::allocContinue. */ - virtual StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const = 0; + virtual StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const = 0; /// Deserializes a value that was serialized using IColumn::serializeValueIntoArena method. /// Returns pointer to the position after the read data. diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h index 82d4c857b29..4cadae2bc3d 100644 --- a/src/Columns/IColumnDummy.h +++ b/src/Columns/IColumnDummy.h @@ -57,7 +57,7 @@ public: ++s; } - StringRef serializeValueIntoArena(size_t /*n*/, Arena & arena, char const *& begin) const override + StringRef serializeValueIntoArena(size_t /*n*/, Arena & arena, char const *& begin, const UInt8 *) const override { /// Has to put one useless byte into Arena, because serialization into zero number of bytes is ambiguous. char * res = arena.allocContinue(1, begin); From 2997fe0677813edd622a5b8f2fe7f4ae17591b03 Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Wed, 5 Jul 2023 18:30:54 +0800 Subject: [PATCH 08/30] add default value for compile --- src/Columns/ColumnNullable.h | 2 +- src/Columns/ColumnString.h | 2 +- src/Columns/ColumnUnique.h | 2 +- src/Columns/ColumnVector.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 679f51d5900..8064ce014d3 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -62,7 +62,7 @@ public: StringRef getDataAt(size_t) const override; /// Will insert null value if pos=nullptr void insertData(const char * pos, size_t length) override; - StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override; + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index e8e5ebbcbf9..907dc83caeb 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -169,7 +169,7 @@ public: offsets.resize_assume_reserved(offsets.size() - n); } - StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override; + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const override; const char * deserializeAndInsertFromArena(const char * pos) override; diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index d2fc69d7fb8..69f4818e6be 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -79,7 +79,7 @@ public: Float32 getFloat32(size_t n) const override { return getNestedColumn()->getFloat32(n); } bool getBool(size_t n) const override { return getNestedColumn()->getBool(n); } bool isNullAt(size_t n) const override { return is_nullable && n == getNullValueIndex(); } - StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override; + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const override; const char * skipSerializedInArena(const char * pos) const override; void updateHashWithValue(size_t n, SipHash & hash_func) const override { diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 7bb69656c5a..232769a5295 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -174,7 +174,7 @@ public: data.resize_assume_reserved(data.size() - n); } - StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override; + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const override; const char * deserializeAndInsertFromArena(const char * pos) override; From f33367cd8b50089d33ad3dc431157396f369fb12 Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Mon, 7 Aug 2023 13:37:24 +0800 Subject: [PATCH 09/30] add more test --- src/Columns/ColumnNullable.h | 2 +- src/Columns/ColumnString.h | 2 +- src/Columns/ColumnUnique.h | 2 +- src/Columns/ColumnVector.h | 2 +- src/Columns/tests/gtest_column_unique.cpp | 6 +++--- tests/performance/aggregate_with_serialized_method.xml | 4 +++- 6 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 8064ce014d3..719fa698acc 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -62,7 +62,7 @@ public: StringRef getDataAt(size_t) const override; /// Will insert null value if pos=nullptr void insertData(const char * pos, size_t length) override; - StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const override; + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override; const char * deserializeAndInsertFromArena(const char * pos) override; const char * skipSerializedInArena(const char * pos) const override; void insertRangeFrom(const IColumn & src, size_t start, size_t length) override; diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h index 907dc83caeb..e8e5ebbcbf9 100644 --- a/src/Columns/ColumnString.h +++ b/src/Columns/ColumnString.h @@ -169,7 +169,7 @@ public: offsets.resize_assume_reserved(offsets.size() - n); } - StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const override; + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override; const char * deserializeAndInsertFromArena(const char * pos) override; diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h index 69f4818e6be..882d17b1649 100644 --- a/src/Columns/ColumnUnique.h +++ b/src/Columns/ColumnUnique.h @@ -79,7 +79,7 @@ public: Float32 getFloat32(size_t n) const override { return getNestedColumn()->getFloat32(n); } bool getBool(size_t n) const override { return getNestedColumn()->getBool(n); } bool isNullAt(size_t n) const override { return is_nullable && n == getNullValueIndex(); } - StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const override; + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override; const char * skipSerializedInArena(const char * pos) const override; void updateHashWithValue(size_t n, SipHash & hash_func) const override { diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h index 232769a5295..7bb69656c5a 100644 --- a/src/Columns/ColumnVector.h +++ b/src/Columns/ColumnVector.h @@ -174,7 +174,7 @@ public: data.resize_assume_reserved(data.size() - n); } - StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const override; + StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override; const char * deserializeAndInsertFromArena(const char * pos) override; diff --git a/src/Columns/tests/gtest_column_unique.cpp b/src/Columns/tests/gtest_column_unique.cpp index 15208da70fb..ab2cb42b603 100644 --- a/src/Columns/tests/gtest_column_unique.cpp +++ b/src/Columns/tests/gtest_column_unique.cpp @@ -117,7 +117,7 @@ void column_unique_unique_deserialize_from_arena_impl(ColumnType & column, const const char * pos = nullptr; for (size_t i = 0; i < num_values; ++i) { - auto ref = column_unique_pattern->serializeValueIntoArena(idx->getUInt(i), arena, pos); + auto ref = column_unique_pattern->serializeValueIntoArena(idx->getUInt(i), arena, pos, nullptr); const char * new_pos; column_unique->uniqueDeserializeAndInsertFromArena(ref.data, new_pos); ASSERT_EQ(new_pos - ref.data, ref.size) << "Deserialized data has different sizes at position " << i; @@ -140,8 +140,8 @@ void column_unique_unique_deserialize_from_arena_impl(ColumnType & column, const const char * pos_lc = nullptr; for (size_t i = 0; i < num_values; ++i) { - auto ref_string = column.serializeValueIntoArena(i, arena_string, pos_string); - auto ref_lc = column_unique->serializeValueIntoArena(idx->getUInt(i), arena_lc, pos_lc); + auto ref_string = column.serializeValueIntoArena(i, arena_string, pos_string, nullptr); + auto ref_lc = column_unique->serializeValueIntoArena(idx->getUInt(i), arena_lc, pos_lc, nullptr); ASSERT_EQ(ref_string, ref_lc) << "Serialized data is different from pattern at position " << i; } } diff --git a/tests/performance/aggregate_with_serialized_method.xml b/tests/performance/aggregate_with_serialized_method.xml index a280dae67aa..4c4ef0438ae 100644 --- a/tests/performance/aggregate_with_serialized_method.xml +++ b/tests/performance/aggregate_with_serialized_method.xml @@ -21,9 +21,11 @@ ) ENGINE = Memory - insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%1000+1, number%1000+2, number%1000+3, number%1000+4,number%1000+5, number%6000+1, number%5000+2 from numbers_mt(20000000) + insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%10+1, number%10+2, number%10+3, number%10+4,number%10+5, number%6000+1, number%5000+2 from numbers_mt(20000000) select key_string1,key_string2,key_string3, min(m1) from t_nullable group by key_string1,key_string2,key_string3 select key_string3,key_int64_1,key_int64_2, min(m1) from t_nullable group by key_string3,key_int64_1,key_int64_2 + select key_int64_1,key_int64_2,key_int64_3,key_int64_4,key_int64_5, min(m1) from t_nullable group by key_int64_1,key_int64_2,key_int64_3,key_int64_4,key_int64_5 + select toFloat64(key_int64_1),toFloat64(key_int64_2),toFloat64(key_int64_3),toFloat64(key_int64_4),toFloat64(key_int64_5), min(m1) from t_nullable group by toFloat64(key_int64_1),toFloat64(key_int64_2),toFloat64(key_int64_3),toFloat64(key_int64_4),toFloat64(key_int64_5) limit 10 drop table if exists t_nullable \ No newline at end of file From 8a8330131644c106771055de7f67f761d01e00cd Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Mon, 7 Aug 2023 14:25:15 +0800 Subject: [PATCH 10/30] optimize --- src/Columns/ColumnNullable.cpp | 92 ++++++++++++++----- src/Columns/ColumnNullable.h | 4 +- .../aggregate_with_serialized_method.xml | 1 + 3 files changed, 72 insertions(+), 25 deletions(-) diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index ce0876647b9..ea95016a766 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -4,6 +4,10 @@ #include #include #include +#include "ColumnDecimal.h" +#include "ColumnFixedString.h" +#include "ColumnsDateTime.h" +#include "ColumnsNumber.h" #include #include #include @@ -34,8 +38,7 @@ ColumnNullable::ColumnNullable(MutableColumnPtr && nested_column_, MutableColumn { /// ColumnNullable cannot have constant nested column. But constant argument could be passed. Materialize it. nested_column = getNestedColumn().convertToFullColumnIfConst(); - is_string = isString(nested_column->getDataType()); - is_fixed_size_column = nested_column->valuesHaveFixedSize(); + nested_type = nested_column->getDataType(); if (!getNestedColumn().canBeInsideNullable()) throw Exception(ErrorCodes::ILLEGAL_COLUMN, "{} cannot be inside Nullable column", getNestedColumn().getName()); @@ -141,29 +144,72 @@ StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char const auto & arr = getNullMapData(); static constexpr auto s = sizeof(arr[0]); char * pos; - if (is_string) + + switch (nested_type) { - const auto * column_string = static_cast(nested_column.get()); - return column_string->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::UInt8: + return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::UInt16: + return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::UInt32: + return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::UInt64: + return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::UInt128: + return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::UInt256: + return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::Int8: + return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::Int16: + return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::Int32: + return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::Int64: + return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::Int128: + return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::Int256: + return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::Float32: + return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::Float64: + return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::Date: + return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::Date32: + return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::DateTime: + return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::DateTime64: + return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::String: + return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::FixedString: + return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::Decimal32: + return static_cast *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::Decimal64: + return static_cast *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::Decimal128: + return static_cast *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::Decimal256: + return static_cast *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::UUID: + return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::IPv4: + return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + case TypeIndex::IPv6: + return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]); + default: + pos = arena.allocContinue(s, begin); + memcpy(pos, &arr[n], s); + if (arr[n]) + return StringRef(pos, s); + auto nested_ref = getNestedColumn().serializeValueIntoArena(n, arena, begin); + /// serializeValueIntoArena may reallocate memory. Have to use ptr from nested_ref.data and move it back. + return StringRef(nested_ref.data - s, nested_ref.size + s); } - else if (is_fixed_size_column) - { - return nested_column->serializeValueIntoArena(n, arena, begin, &arr[n]); - } - else - { - pos = arena.allocContinue(s, begin); - memcpy(pos, &arr[n], s); - - if (arr[n]) - return StringRef(pos, s); - - auto nested_ref = getNestedColumn().serializeValueIntoArena(n, arena, begin); - - /// serializeValueIntoArena may reallocate memory. Have to use ptr from nested_ref.data and move it back. - return StringRef(nested_ref.data - s, nested_ref.size + s); - } - } const char * ColumnNullable::deserializeAndInsertFromArena(const char * pos) diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h index 719fa698acc..b57fdf3064d 100644 --- a/src/Columns/ColumnNullable.h +++ b/src/Columns/ColumnNullable.h @@ -6,6 +6,7 @@ #include #include +#include "Core/TypeId.h" #include "config.h" @@ -213,8 +214,7 @@ private: WrappedPtr nested_column; WrappedPtr null_map; // optimize serializeValueIntoArena - bool is_string; - bool is_fixed_size_column; + TypeIndex nested_type; template void applyNullMapImpl(const NullMap & map); diff --git a/tests/performance/aggregate_with_serialized_method.xml b/tests/performance/aggregate_with_serialized_method.xml index 4c4ef0438ae..91763c69bb9 100644 --- a/tests/performance/aggregate_with_serialized_method.xml +++ b/tests/performance/aggregate_with_serialized_method.xml @@ -26,6 +26,7 @@ select key_string3,key_int64_1,key_int64_2, min(m1) from t_nullable group by key_string3,key_int64_1,key_int64_2 select key_int64_1,key_int64_2,key_int64_3,key_int64_4,key_int64_5, min(m1) from t_nullable group by key_int64_1,key_int64_2,key_int64_3,key_int64_4,key_int64_5 select toFloat64(key_int64_1),toFloat64(key_int64_2),toFloat64(key_int64_3),toFloat64(key_int64_4),toFloat64(key_int64_5), min(m1) from t_nullable group by toFloat64(key_int64_1),toFloat64(key_int64_2),toFloat64(key_int64_3),toFloat64(key_int64_4),toFloat64(key_int64_5) limit 10 + select toDecimal64(key_int64_1, 3),toDecimal64(key_int64_2, 3),toDecimal64(key_int64_3, 3),toDecimal64(key_int64_4, 3),toDecimal64(key_int64_5, 3), min(m1) from t_nullable group by toDecimal64(key_int64_1, 3),toDecimal64(key_int64_2, 3),toDecimal64(key_int64_3, 3),toDecimal64(key_int64_4, 3),toDecimal64(key_int64_5, 3) limit 10 drop table if exists t_nullable \ No newline at end of file From 65aeb0563f020dcc4035f3903dfded305329975b Mon Sep 17 00:00:00 2001 From: liuneng <1398775315@qq.com> Date: Tue, 8 Aug 2023 10:07:45 +0800 Subject: [PATCH 11/30] fix include --- src/Columns/ColumnNullable.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp index ea95016a766..fcd95e5c963 100644 --- a/src/Columns/ColumnNullable.cpp +++ b/src/Columns/ColumnNullable.cpp @@ -4,10 +4,10 @@ #include #include #include -#include "ColumnDecimal.h" -#include "ColumnFixedString.h" -#include "ColumnsDateTime.h" -#include "ColumnsNumber.h" +#include +#include +#include +#include #include #include #include From c1b94b4a3febdd2fbb22f1c2b8aa17b0089137d9 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 8 Aug 2023 00:04:03 +0200 Subject: [PATCH 12/30] fixes for detach/attach partition --- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 6 +- src/Storages/MergeTree/IMergeTreeDataPart.h | 3 +- src/Storages/MergeTree/MergeTreeData.cpp | 71 +++++++++++++----- .../MergeTree/MergeTreeDataPartInMemory.cpp | 6 +- .../MergeTree/MergeTreeDataPartInMemory.h | 3 +- .../ReplicatedMergeTreeCleanupThread.cpp | 3 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 8 +- .../MergeTree/ReplicatedMergeTreeSink.h | 2 +- src/Storages/StorageMergeTree.cpp | 3 +- src/Storages/StorageReplicatedMergeTree.cpp | 5 +- .../test.py | 54 ++++++++------ .../02443_detach_attach_partition.reference | 4 + .../02443_detach_attach_partition.sh | 74 +++++++++++++++++++ 13 files changed, 188 insertions(+), 54 deletions(-) create mode 100644 tests/queries/0_stateless/02443_detach_attach_partition.reference create mode 100755 tests/queries/0_stateless/02443_detach_attach_partition.sh diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 6d7b6b39a40..b05c3d15f24 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1780,7 +1780,8 @@ void IMergeTreeDataPart::renameToDetached(const String & prefix) part_is_probably_removed_from_disk = true; } -DataPartStoragePtr IMergeTreeDataPart::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & /*metadata_snapshot*/) const +DataPartStoragePtr IMergeTreeDataPart::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & /*metadata_snapshot*/, + const DiskTransactionPtr & disk_transaction) const { /// Avoid unneeded duplicates of broken parts if we try to detach the same broken part multiple times. /// Otherwise it may pollute detached/ with dirs with _tryN suffix and we will fail to remove broken part after 10 attempts. @@ -1795,7 +1796,8 @@ DataPartStoragePtr IMergeTreeDataPart::makeCloneInDetached(const String & prefix IDataPartStorage::ClonePartParams params { .copy_instead_of_hardlink = isStoredOnRemoteDiskWithZeroCopySupport() && storage.supportsReplication() && storage_settings->allow_remote_fs_zero_copy_replication, - .make_source_readonly = true + .make_source_readonly = true, + .external_transaction = disk_transaction }; return getDataPartStorage().freeze( storage.relative_data_path, diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 9243c91987b..1df091ab1a3 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -368,7 +368,8 @@ public: virtual void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists); /// Makes clone of a part in detached/ directory via hard links - virtual DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const; + virtual DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot, + const DiskTransactionPtr & disk_transaction = {}) const; /// Makes full clone of part in specified subdirectory (relative to storage data directory, e.g. "detached") on another disk MutableDataPartStoragePtr makeCloneOnDisk(const DiskPtr & disk, const String & directory_name) const; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 0cfcd815cce..ed9127de977 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2619,8 +2619,50 @@ size_t MergeTreeData::clearOldBrokenPartsFromDetachedDirectory() if (detached_parts.empty()) return 0; - PartsTemporaryRename renamed_parts(*this, "detached/"); + auto get_last_touched_time = [&](const DetachedPartInfo & part_info) -> time_t + { + auto path = fs::path(relative_data_path) / "detached" / part_info.dir_name; + time_t last_change_time = part_info.disk->getLastChanged(path); + time_t last_modification_time = part_info.disk->getLastModified(path).epochTime(); + return std::max(last_change_time, last_modification_time); + }; + time_t ttl_seconds = getSettings()->merge_tree_clear_old_broken_detached_parts_ttl_timeout_seconds; + + size_t unfinished_deleting_parts = 0; + time_t current_time = time(nullptr); + for (const auto & part_info : detached_parts) + { + if (!part_info.dir_name.starts_with("deleting_")) + continue; + + time_t startup_time = current_time + static_cast(Context::getGlobalContextInstance()->getUptimeSeconds()); + time_t last_touch_time = get_last_touched_time(part_info); + + /// Maybe it's being deleted right now (for example, in ALTER DROP DETACHED) + bool had_restart = last_touch_time < startup_time; + bool ttl_expired = last_touch_time + ttl_seconds <= current_time; + if (!had_restart && !ttl_expired) + continue; + + /// We were trying to delete this detached part but did not finish deleting, probably because the server crashed + LOG_INFO(log, "Removing detached part {} that we failed to remove previously", part_info.dir_name); + try + { + removeDetachedPart(part_info.disk, fs::path(relative_data_path) / "detached" / part_info.dir_name / "", part_info.dir_name); + ++unfinished_deleting_parts; + } + catch (...) + { + tryLogCurrentException(log); + } + } + + if (!getSettings()->merge_tree_enable_clear_old_broken_detached) + return unfinished_deleting_parts; + + const auto full_path = fs::path(relative_data_path) / "detached"; + size_t removed_count = 0; for (const auto & part_info : detached_parts) { if (!part_info.valid_name || part_info.prefix.empty()) @@ -2635,31 +2677,24 @@ size_t MergeTreeData::clearOldBrokenPartsFromDetachedDirectory() if (!can_be_removed_by_timeout) continue; - time_t current_time = time(nullptr); - ssize_t threshold = current_time - getSettings()->merge_tree_clear_old_broken_detached_parts_ttl_timeout_seconds; - auto path = fs::path(relative_data_path) / "detached" / part_info.dir_name; - time_t last_change_time = part_info.disk->getLastChanged(path); - time_t last_modification_time = part_info.disk->getLastModified(path).epochTime(); - time_t last_touch_time = std::max(last_change_time, last_modification_time); + ssize_t threshold = current_time - ttl_seconds; + time_t last_touch_time = get_last_touched_time(part_info); if (last_touch_time == 0 || last_touch_time >= threshold) continue; - renamed_parts.addPart(part_info.dir_name, "deleting_" + part_info.dir_name, part_info.disk); - } + const String & old_name = part_info.dir_name; + String new_name = "deleting_" + part_info.dir_name; + part_info.disk->moveFile(fs::path(full_path) / old_name, fs::path(full_path) / new_name); - LOG_INFO(log, "Will clean up {} detached parts", renamed_parts.old_and_new_names.size()); - - renamed_parts.tryRenameAll(); - - for (auto & [old_name, new_name, disk] : renamed_parts.old_and_new_names) - { - removeDetachedPart(disk, fs::path(relative_data_path) / "detached" / new_name / "", old_name); + removeDetachedPart(part_info.disk, fs::path(relative_data_path) / "detached" / new_name / "", old_name); LOG_WARNING(log, "Removed broken detached part {} due to a timeout for broken detached parts", old_name); - old_name.clear(); + ++removed_count; } - return renamed_parts.old_and_new_names.size(); + LOG_INFO(log, "Cleaned up {} detached parts", removed_count); + + return removed_count + unfinished_deleting_parts; } size_t MergeTreeData::clearOldWriteAheadLogs() diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index ba300b110d7..7654791c997 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -17,6 +17,7 @@ namespace DB namespace ErrorCodes { extern const int DIRECTORY_ALREADY_EXISTS; + extern const int NOT_IMPLEMENTED; } MergeTreeDataPartInMemory::MergeTreeDataPartInMemory( @@ -138,8 +139,11 @@ MutableDataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String & return new_data_part_storage; } -DataPartStoragePtr MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const +DataPartStoragePtr MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot, + const DiskTransactionPtr & disk_transaction) const { + if (disk_transaction) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "InMemory parts are not compatible with disk transactions"); String detached_path = *getRelativePathForDetachedPart(prefix, /* broken */ false); return flushToDisk(detached_path, metadata_snapshot); } diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h index 81549eeed3e..29506a54fdc 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -42,7 +42,8 @@ public: bool hasColumnFiles(const NameAndTypePair & column) const override { return !!getColumnPosition(column.getNameInStorage()); } String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; } void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) override; - DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const override; + DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot, + const DiskTransactionPtr & disk_transaction = {}) const override; std::optional getColumnModificationTime(const String & /* column_name */) const override { return {}; } MutableDataPartStoragePtr flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 07cfced8362..b72c148a4e8 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -149,8 +149,7 @@ Float32 ReplicatedMergeTreeCleanupThread::iterate() /// do it under share lock cleaned_other += storage.clearOldWriteAheadLogs(); cleaned_part_like += storage.clearOldTemporaryDirectories(storage.getSettings()->temporary_directories_lifetime.totalSeconds()); - if (storage.getSettings()->merge_tree_enable_clear_old_broken_detached) - cleaned_part_like += storage.clearOldBrokenPartsFromDetachedDirectory(); + cleaned_part_like += storage.clearOldBrokenPartsFromDetachedDirectory(); } /// This is loose condition: no problem if we actually had lost leadership at this moment diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index 0db3464a637..bf0acef89c2 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -633,8 +633,8 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithFa delayed_chunk.reset(); } -template -void ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::MutableDataPartPtr & part) +template<> +bool ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::MutableDataPartPtr & part) { /// NOTE: No delay in this case. That's Ok. auto origin_zookeeper = storage.getZooKeeper(); @@ -649,8 +649,10 @@ void ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData: try { part->version.setCreationTID(Tx::PrehistoricTID, nullptr); - commitPart(zookeeper, part, BlockIDsType(), replicas_num, true); + String block_id = deduplicate ? fmt::format("{}_{}", part->info.partition_id, part->checksums.getTotalChecksumHex()) : ""; + bool deduplicated = commitPart(zookeeper, part, block_id, replicas_num, /* writing_existing_part */ true).second; PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, watch.elapsed(), profile_events_scope.getSnapshot())); + return deduplicated; } catch (...) { diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h index 868590efa25..4a192a822f5 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h @@ -56,7 +56,7 @@ public: String getName() const override { return "ReplicatedMergeTreeSink"; } /// For ATTACHing existing data on filesystem. - void writeExistingPart(MergeTreeData::MutableDataPartPtr & part); + bool writeExistingPart(MergeTreeData::MutableDataPartPtr & part); /// For proper deduplication in MaterializedViews bool lastBlockIsDuplicate() const override diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index ad9013d9f13..542701aeb98 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1379,8 +1379,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign cleared_count += clearOldWriteAheadLogs(); cleared_count += clearOldMutations(); cleared_count += clearEmptyParts(); - if (getSettings()->merge_tree_enable_clear_old_broken_detached) - cleared_count += clearOldBrokenPartsFromDetachedDirectory(); + cleared_count += clearOldBrokenPartsFromDetachedDirectory(); return cleared_count; /// TODO maybe take into account number of cleared objects when calculating backoff }, common_assignee_trigger, getStorageID()), /* need_trigger */ false); diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 7fce373e26b..72c939f9e82 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -6133,8 +6133,9 @@ PartitionCommandsResultInfo StorageReplicatedMergeTree::attachPartition( MutableDataPartsVector loaded_parts = tryLoadPartsToAttach(partition, attach_part, query_context, renamed_parts); /// TODO Allow to use quorum here. - ReplicatedMergeTreeSink output(*this, metadata_snapshot, 0, 0, 0, false, false, false, query_context, - /*is_attach*/true); + ReplicatedMergeTreeSink output(*this, metadata_snapshot, /* quorum */ 0, /* quorum_timeout_ms */ 0, /* max_parts_per_block */ 0, + /* quorum_parallel */ false, query_context->getSettingsRef().insert_deduplicate, + /* majority_quorum */ false, query_context, /*is_attach*/true); for (size_t i = 0; i < loaded_parts.size(); ++i) { diff --git a/tests/integration/test_broken_detached_part_clean_up/test.py b/tests/integration/test_broken_detached_part_clean_up/test.py index 9a70ebe0d48..e7341deae35 100644 --- a/tests/integration/test_broken_detached_part_clean_up/test.py +++ b/tests/integration/test_broken_detached_part_clean_up/test.py @@ -57,27 +57,28 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix): ] ) - node.exec_in_container(["mkdir", f"{path_to_detached}../unexpected_all_42_1337_5"]) - node.exec_in_container( - [ - "touch", - "-t", - "1312031429.30", - f"{path_to_detached}../unexpected_all_42_1337_5", - ] - ) - result = node.exec_in_container( - ["stat", f"{path_to_detached}../unexpected_all_42_1337_5"] - ) - print(result) - assert "Modify: 2013-12-03" in result - node.exec_in_container( - [ - "mv", - f"{path_to_detached}../unexpected_all_42_1337_5", - f"{path_to_detached}unexpected_all_42_1337_5", - ] - ) + for name in ['unexpected_all_42_1337_5', 'deleting_all_123_456_7', 'tmp-fetch_all_12_34_5']: + node.exec_in_container(["mkdir", f"{path_to_detached}../{name}"]) + node.exec_in_container( + [ + "touch", + "-t", + "1312031429.30", + f"{path_to_detached}../{name}", + ] + ) + result = node.exec_in_container( + ["stat", f"{path_to_detached}../{name}"] + ) + print(result) + assert "Modify: 2013-12-03" in result + node.exec_in_container( + [ + "mv", + f"{path_to_detached}../{name}", + f"{path_to_detached}{name}", + ] + ) result = node.query( f"CHECK TABLE {table}", settings={"check_query_single_value_result": 0} @@ -87,6 +88,10 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix): node.query(f"DETACH TABLE {table}") node.query(f"ATTACH TABLE {table}") + node.wait_for_log_line( + "Removing detached part deleting_all_123_456_7", timeout=90, look_behind_lines=1000000 + ) + result = node.exec_in_container(["ls", path_to_detached]) print(result) assert f"{expect_broken_prefix}_all_3_3_0" in result @@ -94,6 +99,7 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix): assert "trash" in result assert "broken_all_fake" in result assert "unexpected_all_42_1337_5" in result + assert "deleting_all_123_456_7" not in result time.sleep(15) assert node.contains_in_log( @@ -106,7 +112,13 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix): assert "all_1_1_0" in result assert "trash" in result assert "broken_all_fake" in result + assert "tmp-fetch_all_12_34_5" in result assert "unexpected_all_42_1337_5" not in result + assert "deleting_all_123_456_7" not in result + + node.query(f"ALTER TABLE {table} DROP DETACHED PART 'tmp-fetch_all_12_34_5'", settings={"allow_drop_detached": 1}) + result = node.exec_in_container(["ls", path_to_detached]) + assert "tmp-fetch_all_12_34_5" not in result node.query(f"DROP TABLE {table} SYNC") diff --git a/tests/queries/0_stateless/02443_detach_attach_partition.reference b/tests/queries/0_stateless/02443_detach_attach_partition.reference new file mode 100644 index 00000000000..77cfb77479d --- /dev/null +++ b/tests/queries/0_stateless/02443_detach_attach_partition.reference @@ -0,0 +1,4 @@ +default begin inserts +default end inserts +30 465 +30 465 diff --git a/tests/queries/0_stateless/02443_detach_attach_partition.sh b/tests/queries/0_stateless/02443_detach_attach_partition.sh new file mode 100755 index 00000000000..c983d5d56d3 --- /dev/null +++ b/tests/queries/0_stateless/02443_detach_attach_partition.sh @@ -0,0 +1,74 @@ +#!/usr/bin/env bash +# Tags: race, zookeeper, no-parallel + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh +# shellcheck source=./replication.lib +. "$CURDIR"/replication.lib + + +$CLICKHOUSE_CLIENT -n -q " + DROP TABLE IF EXISTS alter_table0; + DROP TABLE IF EXISTS alter_table1; + + CREATE TABLE alter_table0 (a UInt8, b Int16) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r1') ORDER BY a; + CREATE TABLE alter_table1 (a UInt8, b Int16) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r2') ORDER BY a; +" || exit 1 + +function thread_detach() +{ + while true; do + $CLICKHOUSE_CLIENT -mn -q "ALTER TABLE alter_table$(($RANDOM % 2)) DETACH PARTITION ID 'all'; SELECT sleep($RANDOM / 32000) format Null;" 2>/dev/null ||: + done +} +function thread_attach() +{ + while true; do + $CLICKHOUSE_CLIENT -mn -q "ALTER TABLE alter_table$(($RANDOM % 2)) ATTACH PARTITION ID 'all'; SELECT sleep($RANDOM / 32000) format Null;" 2>/dev/null ||: + done +} + +function insert() +{ + $CLICKHOUSE_CLIENT -q "INSERT INTO alter_table$(($RANDOM % 2)) VALUES ($RANDOM, $i)" +} + +thread_detach & PID_1=$! +thread_attach & PID_2=$! +thread_detach & PID_3=$! +thread_attach & PID_4=$! + +function do_inserts() +{ + for i in {1..30}; do + while ! insert; do $CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'retrying insert $i' FORMAT Null"; done + done +} + +$CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'begin inserts'" +do_inserts 2>&1| grep -Fa "Exception: " | grep -Fv "was cancelled by concurrent ALTER PARTITION" +$CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'end inserts'" + +kill -TERM $PID_1 && kill -TERM $PID_2 && kill -TERM $PID_3 && kill -TERM $PID_4 +wait + +$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table0" +$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1" +$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table0 ATTACH PARTITION ID 'all'"; +$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table1 ATTACH PARTITION ID 'all'"; +$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table0" +$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1" + +engine=$($CLICKHOUSE_CLIENT -q "SELECT engine FROM system.tables WHERE database=currentDatabase() AND table='alter_table0'") +if [[ "$engine" == "ReplicatedMergeTree" ]]; then + # ReplicatedMergeTree may duplicate data on ATTACH PARTITION (when one replica has a merged part and another replica has source parts only) + $CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE alter_table0 FINAL DEDUPLICATE" + $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1" +fi + +$CLICKHOUSE_CLIENT -q "SELECT count(), sum(b) FROM alter_table0" +$CLICKHOUSE_CLIENT -q "SELECT count(), sum(b) FROM alter_table1" + +$CLICKHOUSE_CLIENT -q "DROP TABLE alter_table0" +$CLICKHOUSE_CLIENT -q "DROP TABLE alter_table1" From 3d59ebe108016a83bba161751f728b08d5f94d70 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 10 Aug 2023 20:11:22 +0200 Subject: [PATCH 13/30] fix --- src/Storages/MergeTree/IMergeTreeDataPart.h | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- .../MergeTree/MergeTreeDataPartInMemory.h | 2 +- .../MergeTree/ReplicatedMergeTreeSink.cpp | 5 ++- .../test.py | 45 ++++++++++--------- 5 files changed, 31 insertions(+), 25 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 1df091ab1a3..195fdbc4d05 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -369,7 +369,7 @@ public: /// Makes clone of a part in detached/ directory via hard links virtual DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot, - const DiskTransactionPtr & disk_transaction = {}) const; + const DiskTransactionPtr & disk_transaction = {}) const; /// NOLINT /// Makes full clone of part in specified subdirectory (relative to storage data directory, e.g. "detached") on another disk MutableDataPartStoragePtr makeCloneOnDisk(const DiskPtr & disk, const String & directory_name) const; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index ed9127de977..395b480a84f 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2636,7 +2636,7 @@ size_t MergeTreeData::clearOldBrokenPartsFromDetachedDirectory() if (!part_info.dir_name.starts_with("deleting_")) continue; - time_t startup_time = current_time + static_cast(Context::getGlobalContextInstance()->getUptimeSeconds()); + time_t startup_time = current_time - static_cast(Context::getGlobalContextInstance()->getUptimeSeconds()); time_t last_touch_time = get_last_touched_time(part_info); /// Maybe it's being deleted right now (for example, in ALTER DROP DETACHED) diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h index 29506a54fdc..95a17cbf589 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -43,7 +43,7 @@ public: String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; } void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) override; DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot, - const DiskTransactionPtr & disk_transaction = {}) const override; + const DiskTransactionPtr & disk_transaction = {}) const override; /// NOLINT std::optional getColumnModificationTime(const String & /* column_name */) const override { return {}; } MutableDataPartStoragePtr flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const; diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp index bf0acef89c2..fa5a40cf27a 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp @@ -651,7 +651,10 @@ bool ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::Mutabl part->version.setCreationTID(Tx::PrehistoricTID, nullptr); String block_id = deduplicate ? fmt::format("{}_{}", part->info.partition_id, part->checksums.getTotalChecksumHex()) : ""; bool deduplicated = commitPart(zookeeper, part, block_id, replicas_num, /* writing_existing_part */ true).second; - PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, watch.elapsed(), profile_events_scope.getSnapshot())); + + /// Set a special error code if the block is duplicate + int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0; + PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, watch.elapsed(), profile_events_scope.getSnapshot()), ExecutionStatus(error)); return deduplicated; } catch (...) diff --git a/tests/integration/test_broken_detached_part_clean_up/test.py b/tests/integration/test_broken_detached_part_clean_up/test.py index e7341deae35..bdf993ddedf 100644 --- a/tests/integration/test_broken_detached_part_clean_up/test.py +++ b/tests/integration/test_broken_detached_part_clean_up/test.py @@ -57,7 +57,11 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix): ] ) - for name in ['unexpected_all_42_1337_5', 'deleting_all_123_456_7', 'tmp-fetch_all_12_34_5']: + for name in [ + "unexpected_all_42_1337_5", + "deleting_all_123_456_7", + "covered-by-broken_all_12_34_5", + ]: node.exec_in_container(["mkdir", f"{path_to_detached}../{name}"]) node.exec_in_container( [ @@ -67,9 +71,7 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix): f"{path_to_detached}../{name}", ] ) - result = node.exec_in_container( - ["stat", f"{path_to_detached}../{name}"] - ) + result = node.exec_in_container(["stat", f"{path_to_detached}../{name}"]) print(result) assert "Modify: 2013-12-03" in result node.exec_in_container( @@ -89,21 +91,19 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix): node.query(f"ATTACH TABLE {table}") node.wait_for_log_line( - "Removing detached part deleting_all_123_456_7", timeout=90, look_behind_lines=1000000 + "Removing detached part deleting_all_123_456_7", + timeout=90, + look_behind_lines=1000000, ) - - result = node.exec_in_container(["ls", path_to_detached]) - print(result) - assert f"{expect_broken_prefix}_all_3_3_0" in result - assert "all_1_1_0" in result - assert "trash" in result - assert "broken_all_fake" in result - assert "unexpected_all_42_1337_5" in result - assert "deleting_all_123_456_7" not in result - - time.sleep(15) - assert node.contains_in_log( - "Removed broken detached part unexpected_all_42_1337_5 due to a timeout" + node.wait_for_log_line( + f"Removed broken detached part {expect_broken_prefix}_all_3_3_0 due to a timeout", + timeout=10, + look_behind_lines=1000000, + ) + node.wait_for_log_line( + "Removed broken detached part unexpected_all_42_1337_5 due to a timeout", + timeout=10, + look_behind_lines=1000000, ) result = node.exec_in_container(["ls", path_to_detached]) @@ -112,13 +112,16 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix): assert "all_1_1_0" in result assert "trash" in result assert "broken_all_fake" in result - assert "tmp-fetch_all_12_34_5" in result + assert "covered-by-broken_all_12_34_5" in result assert "unexpected_all_42_1337_5" not in result assert "deleting_all_123_456_7" not in result - node.query(f"ALTER TABLE {table} DROP DETACHED PART 'tmp-fetch_all_12_34_5'", settings={"allow_drop_detached": 1}) + node.query( + f"ALTER TABLE {table} DROP DETACHED PART 'covered-by-broken_all_12_34_5'", + settings={"allow_drop_detached": 1}, + ) result = node.exec_in_container(["ls", path_to_detached]) - assert "tmp-fetch_all_12_34_5" not in result + assert "covered-by-broken_all_12_34_5" not in result node.query(f"DROP TABLE {table} SYNC") From 5a8b8203b2df3f7c9c054d7f0435b35c6d06f008 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Thu, 10 Aug 2023 23:22:51 +0200 Subject: [PATCH 14/30] fix --- src/Storages/MergeTree/IMergeTreeDataPart.h | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 2 +- src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp | 3 ++- src/Storages/MergeTree/MergeTreeDataPartInMemory.h | 2 +- src/Storages/StorageMergeTree.cpp | 6 +++--- src/Storages/StorageReplicatedMergeTree.cpp | 8 ++++---- 6 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 195fdbc4d05..49aa2e1e7f1 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -369,7 +369,7 @@ public: /// Makes clone of a part in detached/ directory via hard links virtual DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot, - const DiskTransactionPtr & disk_transaction = {}) const; /// NOLINT + const DiskTransactionPtr & disk_transaction) const; /// Makes full clone of part in specified subdirectory (relative to storage data directory, e.g. "detached") on another disk MutableDataPartStoragePtr makeCloneOnDisk(const DiskPtr & disk, const String & directory_name) const; diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 395b480a84f..5ec52d4162e 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -4066,7 +4066,7 @@ void MergeTreeData::restoreAndActivatePart(const DataPartPtr & part, DataPartsLo void MergeTreeData::outdateUnexpectedPartAndCloneToDetached(const DataPartPtr & part_to_detach) { LOG_INFO(log, "Cloning part {} to unexpected_{} and making it obsolete.", part_to_detach->getDataPartStorage().getPartDirectory(), part_to_detach->name); - part_to_detach->makeCloneInDetached("unexpected", getInMemoryMetadataPtr()); + part_to_detach->makeCloneInDetached("unexpected", getInMemoryMetadataPtr(), /*disk_transaction*/ {}); DataPartsLock lock = lockParts(); part_to_detach->is_unexpected_local_part = true; diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp index 7654791c997..d8034e62802 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp @@ -139,7 +139,8 @@ MutableDataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String & return new_data_part_storage; } -DataPartStoragePtr MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot, +DataPartStoragePtr MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix, + const StorageMetadataPtr & metadata_snapshot, const DiskTransactionPtr & disk_transaction) const { if (disk_transaction) diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h index 95a17cbf589..95f7b796f9a 100644 --- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h +++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h @@ -43,7 +43,7 @@ public: String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; } void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) override; DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot, - const DiskTransactionPtr & disk_transaction = {}) const override; /// NOLINT + const DiskTransactionPtr & disk_transaction) const override; std::optional getColumnModificationTime(const String & /* column_name */) const override { return {}; } MutableDataPartStoragePtr flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const; diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 542701aeb98..9506d6f1075 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1817,7 +1817,7 @@ void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPt { auto metadata_snapshot = getInMemoryMetadataPtr(); LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory()); - part->makeCloneInDetached("", metadata_snapshot); + part->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {}); } { @@ -1902,7 +1902,7 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont { auto metadata_snapshot = getInMemoryMetadataPtr(); LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory()); - part->makeCloneInDetached("", metadata_snapshot); + part->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {}); } } @@ -1944,7 +1944,7 @@ void StorageMergeTree::dropPartsImpl(DataPartsVector && parts_to_remove, bool de for (const auto & part : parts_to_remove) { LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory()); - part->makeCloneInDetached("", metadata_snapshot); + part->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {}); } } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 72c939f9e82..bc2cff80c59 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2098,7 +2098,7 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry) if (auto part_to_detach = part.getPartIfItWasActive()) { LOG_INFO(log, "Detaching {}", part_to_detach->getDataPartStorage().getPartDirectory()); - part_to_detach->makeCloneInDetached("", metadata_snapshot); + part_to_detach->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {}); } } } @@ -2828,7 +2828,7 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo for (const auto & part : parts_to_remove_from_working_set) { LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory()); - part->makeCloneInDetached("clone", metadata_snapshot); + part->makeCloneInDetached("clone", metadata_snapshot, /*disk_transaction*/ {}); } } @@ -3794,12 +3794,12 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n chassert(!broken_part); chassert(!storage_init); part->was_removed_as_broken = true; - part->makeCloneInDetached("broken", getInMemoryMetadataPtr()); + part->makeCloneInDetached("broken", getInMemoryMetadataPtr(), /*disk_transaction*/ {}); broken_part = part; } else { - part->makeCloneInDetached("covered-by-broken", getInMemoryMetadataPtr()); + part->makeCloneInDetached("covered-by-broken", getInMemoryMetadataPtr(), /*disk_transaction*/ {}); } detached_parts.push_back(part->name); } From 44403458556ef1037b69a5ae49eb9cc9cba16456 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Sun, 13 Aug 2023 17:09:11 +0200 Subject: [PATCH 15/30] fix --- .../02443_detach_attach_partition.reference | 4 ++-- .../0_stateless/02443_detach_attach_partition.sh | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02443_detach_attach_partition.reference b/tests/queries/0_stateless/02443_detach_attach_partition.reference index 77cfb77479d..70930ea6d9a 100644 --- a/tests/queries/0_stateless/02443_detach_attach_partition.reference +++ b/tests/queries/0_stateless/02443_detach_attach_partition.reference @@ -1,4 +1,4 @@ default begin inserts default end inserts -30 465 -30 465 +20 210 +20 210 diff --git a/tests/queries/0_stateless/02443_detach_attach_partition.sh b/tests/queries/0_stateless/02443_detach_attach_partition.sh index c983d5d56d3..36bc3309924 100755 --- a/tests/queries/0_stateless/02443_detach_attach_partition.sh +++ b/tests/queries/0_stateless/02443_detach_attach_partition.sh @@ -31,7 +31,7 @@ function thread_attach() function insert() { - $CLICKHOUSE_CLIENT -q "INSERT INTO alter_table$(($RANDOM % 2)) VALUES ($RANDOM, $i)" + $CLICKHOUSE_CLIENT -q "INSERT INTO alter_table$(($RANDOM % 2)) SELECT $RANDOM, $i" 2>/dev/null } thread_detach & PID_1=$! @@ -41,7 +41,7 @@ thread_attach & PID_4=$! function do_inserts() { - for i in {1..30}; do + for i in {1..20}; do while ! insert; do $CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'retrying insert $i' FORMAT Null"; done done } @@ -55,8 +55,10 @@ wait $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table0" $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1" -$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table0 ATTACH PARTITION ID 'all'"; -$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table1 ATTACH PARTITION ID 'all'"; +$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table0 ATTACH PARTITION ID 'all'" +$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table1 ATTACH PARTITION ID 'all'" 2>/dev/null +$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1" +$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table1 ATTACH PARTITION ID 'all'" $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table0" $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1" From bd0e8792886ac2a02ad45eb2a48b935aa89fb5fe Mon Sep 17 00:00:00 2001 From: Filipp Ozinov Date: Sun, 13 Aug 2023 22:48:35 +0400 Subject: [PATCH 16/30] Add note about skip indexes Related to #53350 --- docs/en/engines/database-engines/materialized-mysql.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/database-engines/materialized-mysql.md b/docs/en/engines/database-engines/materialized-mysql.md index f7cc52e622e..b7e567c7b6c 100644 --- a/docs/en/engines/database-engines/materialized-mysql.md +++ b/docs/en/engines/database-engines/materialized-mysql.md @@ -190,7 +190,7 @@ These are the schema conversion manipulations you can do with table overrides fo * Modify [column TTL](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#mergetree-column-ttl). * Modify [column compression codec](/docs/en/sql-reference/statements/create/table.md/#codecs). * Add [ALIAS columns](/docs/en/sql-reference/statements/create/table.md/#alias). - * Add [skipping indexes](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-data_skipping-indexes) + * Add [skipping indexes](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-data_skipping-indexes). Note that you need to enable `use_skip_indexes_if_final` setting to make them work (MaterializedMySQL is using `SELECT ... FINAL` by default) * Add [projections](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#projections). Note that projection optimizations are disabled when using `SELECT ... FINAL` (which MaterializedMySQL does by default), so their utility is limited here. `INDEX ... TYPE hypothesis` as [described in the v21.12 blog post]](https://clickhouse.com/blog/en/2021/clickhouse-v21.12-released/) From f8b1d7474dffa024ff692bec35578c5172aeea8a Mon Sep 17 00:00:00 2001 From: vdimir Date: Mon, 14 Aug 2023 12:46:23 +0000 Subject: [PATCH 17/30] Update test_distributed_inter_server_secret to pass with analyzer --- tests/analyzer_integration_broken_tests.txt | 18 ----- .../test.py | 68 +++++++------------ 2 files changed, 25 insertions(+), 61 deletions(-) diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt index 68822fbf311..3cc4869aa62 100644 --- a/tests/analyzer_integration_broken_tests.txt +++ b/tests/analyzer_integration_broken_tests.txt @@ -5,24 +5,6 @@ test_distributed_ddl/test.py::test_default_database[configs_secure] test_distributed_ddl/test.py::test_on_server_fail[configs] test_distributed_ddl/test.py::test_on_server_fail[configs_secure] test_distributed_insert_backward_compatibility/test.py::test_distributed_in_tuple -test_distributed_inter_server_secret/test.py::test_per_user_inline_settings_secure_cluster[default-] -test_distributed_inter_server_secret/test.py::test_per_user_inline_settings_secure_cluster[nopass-] -test_distributed_inter_server_secret/test.py::test_per_user_inline_settings_secure_cluster[pass-foo] -test_distributed_inter_server_secret/test.py::test_per_user_protocol_settings_secure_cluster[default-] -test_distributed_inter_server_secret/test.py::test_per_user_protocol_settings_secure_cluster[nopass-] -test_distributed_inter_server_secret/test.py::test_per_user_protocol_settings_secure_cluster[pass-foo] -test_distributed_inter_server_secret/test.py::test_user_insecure_cluster[default-] -test_distributed_inter_server_secret/test.py::test_user_insecure_cluster[nopass-] -test_distributed_inter_server_secret/test.py::test_user_insecure_cluster[pass-foo] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster[default-] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster[nopass-] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster[pass-foo] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster_from_backward[default-] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster_from_backward[nopass-] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster_from_backward[pass-foo] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster_with_backward[default-] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster_with_backward[nopass-] -test_distributed_inter_server_secret/test.py::test_user_secure_cluster_with_backward[pass-foo] test_distributed_load_balancing/test.py::test_distributed_replica_max_ignored_errors test_distributed_load_balancing/test.py::test_load_balancing_default test_distributed_load_balancing/test.py::test_load_balancing_priority_round_robin[dist_priority] diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py index 36ac07a550a..1aeaddcf3c5 100644 --- a/tests/integration/test_distributed_inter_server_secret/test.py +++ b/tests/integration/test_distributed_inter_server_secret/test.py @@ -110,10 +110,6 @@ def start_cluster(): cluster.shutdown() -def query_with_id(node, id_, query, **kwargs): - return node.query("WITH '{}' AS __id {}".format(id_, query), **kwargs) - - # @return -- [user, initial_user] def get_query_user_info(node, query_pattern): node.query("SYSTEM FLUSH LOGS") @@ -334,7 +330,7 @@ def test_secure_disagree_insert(): @users def test_user_insecure_cluster(user, password): id_ = "query-dist_insecure-" + user - query_with_id(n1, id_, "SELECT * FROM dist_insecure", user=user, password=password) + n1.query(f"SELECT *, '{id_}' FROM dist_insecure", user=user, password=password) assert get_query_user_info(n1, id_) == [ user, user, @@ -345,7 +341,7 @@ def test_user_insecure_cluster(user, password): @users def test_user_secure_cluster(user, password): id_ = "query-dist_secure-" + user - query_with_id(n1, id_, "SELECT * FROM dist_secure", user=user, password=password) + n1.query(f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password) assert get_query_user_info(n1, id_) == [user, user] assert get_query_user_info(n2, id_) == [user, user] @@ -353,16 +349,14 @@ def test_user_secure_cluster(user, password): @users def test_per_user_inline_settings_insecure_cluster(user, password): id_ = "query-ddl-settings-dist_insecure-" + user - query_with_id( - n1, - id_, - """ - SELECT * FROM dist_insecure - SETTINGS - prefer_localhost_replica=0, - max_memory_usage_for_user=1e9, - max_untracked_memory=0 - """, + n1.query( + f""" + SELECT *, '{id_}' FROM dist_insecure + SETTINGS + prefer_localhost_replica=0, + max_memory_usage_for_user=1e9, + max_untracked_memory=0 + """, user=user, password=password, ) @@ -372,16 +366,14 @@ def test_per_user_inline_settings_insecure_cluster(user, password): @users def test_per_user_inline_settings_secure_cluster(user, password): id_ = "query-ddl-settings-dist_secure-" + user - query_with_id( - n1, - id_, - """ - SELECT * FROM dist_secure - SETTINGS - prefer_localhost_replica=0, - max_memory_usage_for_user=1e9, - max_untracked_memory=0 - """, + n1.query( + f""" + SELECT *, '{id_}' FROM dist_secure + SETTINGS + prefer_localhost_replica=0, + max_memory_usage_for_user=1e9, + max_untracked_memory=0 + """, user=user, password=password, ) @@ -393,10 +385,8 @@ def test_per_user_inline_settings_secure_cluster(user, password): @users def test_per_user_protocol_settings_insecure_cluster(user, password): id_ = "query-protocol-settings-dist_insecure-" + user - query_with_id( - n1, - id_, - "SELECT * FROM dist_insecure", + n1.query( + f"SELECT *, '{id_}' FROM dist_insecure", user=user, password=password, settings={ @@ -411,10 +401,8 @@ def test_per_user_protocol_settings_insecure_cluster(user, password): @users def test_per_user_protocol_settings_secure_cluster(user, password): id_ = "query-protocol-settings-dist_secure-" + user - query_with_id( - n1, - id_, - "SELECT * FROM dist_secure", + n1.query( + f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password, settings={ @@ -431,8 +419,8 @@ def test_per_user_protocol_settings_secure_cluster(user, password): @users def test_user_secure_cluster_with_backward(user, password): id_ = "with-backward-query-dist_secure-" + user - query_with_id( - n1, id_, "SELECT * FROM dist_secure_backward", user=user, password=password + n1.query( + f"SELECT *, '{id_}' FROM dist_secure_backward", user=user, password=password ) assert get_query_user_info(n1, id_) == [user, user] assert get_query_user_info(backward, id_) == [user, user] @@ -441,13 +429,7 @@ def test_user_secure_cluster_with_backward(user, password): @users def test_user_secure_cluster_from_backward(user, password): id_ = "from-backward-query-dist_secure-" + user - query_with_id( - backward, - id_, - "SELECT * FROM dist_secure_backward", - user=user, - password=password, - ) + backward.query(f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password) assert get_query_user_info(n1, id_) == [user, user] assert get_query_user_info(backward, id_) == [user, user] From 368f6d7b1390b98ccac2610eb88a4237abcab439 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Mon, 14 Aug 2023 20:46:41 +0200 Subject: [PATCH 18/30] fix --- src/Functions/transform.cpp | 4 ++++ tests/queries/0_stateless/02443_detach_attach_partition.sh | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp index 16326dd5a44..62ab51abd76 100644 --- a/src/Functions/transform.cpp +++ b/src/Functions/transform.cpp @@ -776,8 +776,12 @@ namespace UInt64 key = 0; auto * dst = reinterpret_cast(&key); const auto ref = cache.from_column->getDataAt(i); + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunreachable-code" if constexpr (std::endian::native == std::endian::big) dst += sizeof(key) - ref.size; +#pragma clang diagnostic pop memcpy(dst, ref.data, ref.size); table[key] = i; diff --git a/tests/queries/0_stateless/02443_detach_attach_partition.sh b/tests/queries/0_stateless/02443_detach_attach_partition.sh index 36bc3309924..13ea966dbf5 100755 --- a/tests/queries/0_stateless/02443_detach_attach_partition.sh +++ b/tests/queries/0_stateless/02443_detach_attach_partition.sh @@ -55,7 +55,7 @@ wait $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table0" $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1" -$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table0 ATTACH PARTITION ID 'all'" +while ! $CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table0 ATTACH PARTITION ID 'all'" 2>/dev/null; do sleep 0.5; done $CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table1 ATTACH PARTITION ID 'all'" 2>/dev/null $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1" $CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table1 ATTACH PARTITION ID 'all'" From bf40767f10e16d9fd6c5b29a8af1ae81c93694fc Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 15 Aug 2023 14:27:49 +0200 Subject: [PATCH 19/30] fix another race --- src/Storages/MergeTree/MergeTreeData.cpp | 25 +++++++++++++------ src/Storages/StorageMergeTree.cpp | 12 ++++++--- src/Storages/StorageReplicatedMergeTree.cpp | 4 ++- ..._replace_partition_from_table_zookeeper.sh | 20 --------------- .../00933_ttl_replicated_zookeeper.sh | 16 ------------ ...034_move_partition_from_table_zookeeper.sh | 17 ------------- .../02443_detach_attach_partition.sh | 2 +- .../0_stateless/02482_load_parts_refcounts.sh | 17 ------------- tests/queries/shell_config.sh | 20 +++++++++++++++ 9 files changed, 51 insertions(+), 82 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 561eef28c78..4026be31286 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -5832,18 +5832,21 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const { const String source_dir = "detached/"; - std::map name_to_disk; - /// Let's compose a list of parts that should be added. if (attach_part) { const String part_id = partition->as().value.safeGet(); validateDetachedPartName(part_id); - auto disk = getDiskForDetachedPart(part_id); - renamed_parts.addPart(part_id, "attaching_" + part_id, disk); - - if (MergeTreePartInfo::tryParsePartName(part_id, format_version)) - name_to_disk[part_id] = getDiskForDetachedPart(part_id); + if (temporary_parts.contains(String(DETACHED_DIR_NAME) + "/" + part_id)) + { + LOG_WARNING(log, "Will not try to attach part {} because its directory is temporary, " + "probably it's being detached right now", part_id); + } + else + { + auto disk = getDiskForDetachedPart(part_id); + renamed_parts.addPart(part_id, "attaching_" + part_id, disk); + } } else { @@ -5860,6 +5863,12 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const for (const auto & part_info : detached_parts) { + if (temporary_parts.contains(String(DETACHED_DIR_NAME) + "/" + part_info.dir_name)) + { + LOG_WARNING(log, "Will not try to attach part {} because its directory is temporary, " + "probably it's being detached right now", part_info.dir_name); + continue; + } LOG_DEBUG(log, "Found part {}", part_info.dir_name); active_parts.add(part_info.dir_name); } @@ -5870,6 +5879,8 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const for (const auto & part_info : detached_parts) { const String containing_part = active_parts.getContainingPart(part_info.dir_name); + if (containing_part.empty()) + continue; LOG_DEBUG(log, "Found containing part {} for part {}", containing_part, part_info.dir_name); diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 9506d6f1075..03bb1b554eb 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -1816,7 +1816,9 @@ void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPt if (detach) { auto metadata_snapshot = getInMemoryMetadataPtr(); - LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory()); + String part_dir = part->getDataPartStorage().getPartDirectory(); + LOG_INFO(log, "Detaching {}", part_dir); + auto holder = getTemporaryPartDirectoryHolder(String(DETACHED_DIR_NAME) + "/" + part_dir); part->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {}); } @@ -1901,7 +1903,9 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont for (const auto & part : parts) { auto metadata_snapshot = getInMemoryMetadataPtr(); - LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory()); + String part_dir = part->getDataPartStorage().getPartDirectory(); + LOG_INFO(log, "Detaching {}", part_dir); + auto holder = getTemporaryPartDirectoryHolder(String(DETACHED_DIR_NAME) + "/" + part_dir); part->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {}); } } @@ -1943,7 +1947,9 @@ void StorageMergeTree::dropPartsImpl(DataPartsVector && parts_to_remove, bool de /// NOTE: no race with background cleanup until we hold pointers to parts for (const auto & part : parts_to_remove) { - LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory()); + String part_dir = part->getDataPartStorage().getPartDirectory(); + LOG_INFO(log, "Detaching {}", part_dir); + auto holder = getTemporaryPartDirectoryHolder(String(DETACHED_DIR_NAME) + "/" + part_dir); part->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {}); } } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index bc2cff80c59..6b4ee3334c7 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -2097,7 +2097,9 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry) { if (auto part_to_detach = part.getPartIfItWasActive()) { - LOG_INFO(log, "Detaching {}", part_to_detach->getDataPartStorage().getPartDirectory()); + String part_dir = part_to_detach->getDataPartStorage().getPartDirectory(); + LOG_INFO(log, "Detaching {}", part_dir); + auto holder = getTemporaryPartDirectoryHolder(String(DETACHED_DIR_NAME) + "/" + part_dir); part_to_detach->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {}); } } diff --git a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh index c32b6d04a42..334025cba28 100755 --- a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh @@ -11,26 +11,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -function query_with_retry -{ - local query="$1" && shift - - local retry=0 - until [ $retry -ge 5 ] - do - local result - result="$($CLICKHOUSE_CLIENT "$@" --query="$query" 2>&1)" - if [ "$?" == 0 ]; then - echo -n "$result" - return - else - retry=$((retry + 1)) - sleep 3 - fi - done - echo "Query '$query' failed with '$result'" -} - $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS src;" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r1;" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r2;" diff --git a/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.sh b/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.sh index 22d9e0690b3..d06037fb836 100755 --- a/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.sh +++ b/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.sh @@ -5,22 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -function query_with_retry -{ - retry=0 - until [ $retry -ge 5 ] - do - result=$($CLICKHOUSE_CLIENT $2 --query="$1" 2>&1) - if [ "$?" == 0 ]; then - echo -n "$result" - return - else - retry=$(($retry + 1)) - sleep 3 - fi - done - echo "Query '$1' failed with '$result'" -} $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS ttl_repl1" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS ttl_repl2" diff --git a/tests/queries/0_stateless/01034_move_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/01034_move_partition_from_table_zookeeper.sh index e0a84323dbd..39c5742e7a7 100755 --- a/tests/queries/0_stateless/01034_move_partition_from_table_zookeeper.sh +++ b/tests/queries/0_stateless/01034_move_partition_from_table_zookeeper.sh @@ -7,23 +7,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -function query_with_retry -{ - retry=0 - until [ $retry -ge 5 ] - do - result=$($CLICKHOUSE_CLIENT $2 --query="$1" 2>&1) - if [ "$?" == 0 ]; then - echo -n "$result" - return - else - retry=$(($retry + 1)) - sleep 3 - fi - done - echo "Query '$1' failed with '$result'" -} - $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS src;" $CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst;" diff --git a/tests/queries/0_stateless/02443_detach_attach_partition.sh b/tests/queries/0_stateless/02443_detach_attach_partition.sh index 13ea966dbf5..5a3f1b64065 100755 --- a/tests/queries/0_stateless/02443_detach_attach_partition.sh +++ b/tests/queries/0_stateless/02443_detach_attach_partition.sh @@ -55,7 +55,7 @@ wait $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table0" $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1" -while ! $CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table0 ATTACH PARTITION ID 'all'" 2>/dev/null; do sleep 0.5; done +query_with_retry "ALTER TABLE alter_table0 ATTACH PARTITION ID 'all'" 2>/dev/null; $CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table1 ATTACH PARTITION ID 'all'" 2>/dev/null $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1" $CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table1 ATTACH PARTITION ID 'all'" diff --git a/tests/queries/0_stateless/02482_load_parts_refcounts.sh b/tests/queries/0_stateless/02482_load_parts_refcounts.sh index 4d588dabeb9..fe3cee1359e 100755 --- a/tests/queries/0_stateless/02482_load_parts_refcounts.sh +++ b/tests/queries/0_stateless/02482_load_parts_refcounts.sh @@ -5,23 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh . "$CURDIR"/../shell_config.sh -function query_with_retry -{ - retry=0 - until [ $retry -ge 5 ] - do - result=$($CLICKHOUSE_CLIENT $2 --query="$1" 2>&1) - if [ "$?" == 0 ]; then - echo -n "$result" - return - else - retry=$(($retry + 1)) - sleep 3 - fi - done - echo "Query '$1' failed with '$result'" -} - $CLICKHOUSE_CLIENT -n --query " DROP TABLE IF EXISTS load_parts_refcounts SYNC; diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh index ef70c82aefc..12bc0002191 100644 --- a/tests/queries/shell_config.sh +++ b/tests/queries/shell_config.sh @@ -155,3 +155,23 @@ function random_str() local n=$1 && shift tr -cd '[:lower:]' < /dev/urandom | head -c"$n" } + +function query_with_retry +{ + local query="$1" && shift + + local retry=0 + until [ $retry -ge 5 ] + do + local result + result="$($CLICKHOUSE_CLIENT "$@" --query="$query" 2>&1)" + if [ "$?" == 0 ]; then + echo -n "$result" + return + else + retry=$((retry + 1)) + sleep 3 + fi + done + echo "Query '$query' failed with '$result'" +} From 2aa211acc2af778728f87a0cf36be8efb68243b3 Mon Sep 17 00:00:00 2001 From: Alexey Gerasimchuck Date: Tue, 15 Aug 2023 13:26:39 +0000 Subject: [PATCH 20/30] Added integration test for session log --- .../test.py | 5 +- tests/integration/test_session_log/.gitignore | 1 + .../integration/test_session_log/__init__.py | 0 .../test_session_log/configs/log.xml | 9 + .../test_session_log/configs/ports.xml | 9 + .../test_session_log/configs/session_log.xml | 9 + .../test_session_log/configs/users.xml | 23 ++ .../protos/clickhouse_grpc.proto | 1 + tests/integration/test_session_log/test.py | 292 ++++++++++++++++++ 9 files changed, 345 insertions(+), 4 deletions(-) create mode 100644 tests/integration/test_session_log/.gitignore create mode 100644 tests/integration/test_session_log/__init__.py create mode 100644 tests/integration/test_session_log/configs/log.xml create mode 100644 tests/integration/test_session_log/configs/ports.xml create mode 100644 tests/integration/test_session_log/configs/session_log.xml create mode 100644 tests/integration/test_session_log/configs/users.xml create mode 120000 tests/integration/test_session_log/protos/clickhouse_grpc.proto create mode 100644 tests/integration/test_session_log/test.py diff --git a/tests/integration/test_profile_max_sessions_for_user/test.py b/tests/integration/test_profile_max_sessions_for_user/test.py index c5c33b1cddb..5eaef09bf6d 100755 --- a/tests/integration/test_profile_max_sessions_for_user/test.py +++ b/tests/integration/test_profile_max_sessions_for_user/test.py @@ -28,10 +28,7 @@ proto_dir = os.path.join(SCRIPT_DIR, "./protos") gen_dir = os.path.join(SCRIPT_DIR, "./_gen") os.makedirs(gen_dir, exist_ok=True) run_and_check( - "python3 -m grpc_tools.protoc -I{proto_dir} --python_out={gen_dir} --grpc_python_out={gen_dir} \ - {proto_dir}/clickhouse_grpc.proto".format( - proto_dir=proto_dir, gen_dir=gen_dir - ), + f"python3 -m grpc_tools.protoc -I{proto_dir} --python_out={gen_dir} --grpc_python_out={gen_dir} {proto_dir}/clickhouse_grpc.proto", shell=True, ) diff --git a/tests/integration/test_session_log/.gitignore b/tests/integration/test_session_log/.gitignore new file mode 100644 index 00000000000..edf565ec632 --- /dev/null +++ b/tests/integration/test_session_log/.gitignore @@ -0,0 +1 @@ +_gen diff --git a/tests/integration/test_session_log/__init__.py b/tests/integration/test_session_log/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_session_log/configs/log.xml b/tests/integration/test_session_log/configs/log.xml new file mode 100644 index 00000000000..7a079b81e69 --- /dev/null +++ b/tests/integration/test_session_log/configs/log.xml @@ -0,0 +1,9 @@ + + + trace + /var/log/clickhouse-server/clickhouse-server.log + /var/log/clickhouse-server/clickhouse-server.err.log + 1000M + 10 + + \ No newline at end of file diff --git a/tests/integration/test_session_log/configs/ports.xml b/tests/integration/test_session_log/configs/ports.xml new file mode 100644 index 00000000000..fbaefc16b3a --- /dev/null +++ b/tests/integration/test_session_log/configs/ports.xml @@ -0,0 +1,9 @@ + + 5433 + 9001 + 9100 + + + false + + \ No newline at end of file diff --git a/tests/integration/test_session_log/configs/session_log.xml b/tests/integration/test_session_log/configs/session_log.xml new file mode 100644 index 00000000000..a0e4e3e2216 --- /dev/null +++ b/tests/integration/test_session_log/configs/session_log.xml @@ -0,0 +1,9 @@ + + + system + session_log
+ + toYYYYMM(event_date) + 7500 +
+
diff --git a/tests/integration/test_session_log/configs/users.xml b/tests/integration/test_session_log/configs/users.xml new file mode 100644 index 00000000000..0416dfadc8a --- /dev/null +++ b/tests/integration/test_session_log/configs/users.xml @@ -0,0 +1,23 @@ + + + + 0 + + + + + + + pass + + + pass + + + pass + + + pass + + + \ No newline at end of file diff --git a/tests/integration/test_session_log/protos/clickhouse_grpc.proto b/tests/integration/test_session_log/protos/clickhouse_grpc.proto new file mode 120000 index 00000000000..25d15f11e3b --- /dev/null +++ b/tests/integration/test_session_log/protos/clickhouse_grpc.proto @@ -0,0 +1 @@ +../../../../src/Server/grpc_protos/clickhouse_grpc.proto \ No newline at end of file diff --git a/tests/integration/test_session_log/test.py b/tests/integration/test_session_log/test.py new file mode 100644 index 00000000000..b860cde1df6 --- /dev/null +++ b/tests/integration/test_session_log/test.py @@ -0,0 +1,292 @@ +import os + +import grpc +import pymysql.connections +import psycopg2 as py_psql +import pytest +import random +import logging +import sys +import threading + +from helpers.cluster import ClickHouseCluster, run_and_check + +POSTGRES_SERVER_PORT = 5433 +MYSQL_SERVER_PORT = 9001 +GRPC_PORT = 9100 +SESSION_LOG_MATCHING_FIELDS = "auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface" + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +DEFAULT_ENCODING = "utf-8" + +# Use grpcio-tools to generate *pb2.py files from *.proto. +proto_dir = os.path.join(SCRIPT_DIR, "./protos") +gen_dir = os.path.join(SCRIPT_DIR, "./_gen") +os.makedirs(gen_dir, exist_ok=True) +run_and_check( + f"python3 -m grpc_tools.protoc -I{proto_dir} --python_out={gen_dir} --grpc_python_out={gen_dir} {proto_dir}/clickhouse_grpc.proto", + shell=True, +) + +sys.path.append(gen_dir) + +import clickhouse_grpc_pb2 +import clickhouse_grpc_pb2_grpc + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance( + "node", + main_configs=[ + "configs/ports.xml", + "configs/log.xml", + "configs/session_log.xml", + ], + user_configs=["configs/users.xml"], + # Bug in TSAN reproduces in this test https://github.com/grpc/grpc/issues/29550#issuecomment-1188085387 + env_variables={ + "TSAN_OPTIONS": "report_atomic_races=0 " + os.getenv("TSAN_OPTIONS", default="") + }, + with_postgres=True, +) + + +def grpc_get_url(): + return f"{instance.ip_address}:{GRPC_PORT}" + + +def grpc_create_insecure_channel(): + channel = grpc.insecure_channel(grpc_get_url()) + grpc.channel_ready_future(channel).result(timeout=2) + return channel + + +session_id_counter = 0 + + +def next_session_id(): + global session_id_counter + session_id = session_id_counter + session_id_counter += 1 + return str(session_id) + + +def grpc_query(query, user_, pass_, raise_exception): + try: + query_info = clickhouse_grpc_pb2.QueryInfo( + query=query, + session_id=next_session_id(), + user_name=user_, + password=pass_, + ) + channel = grpc_create_insecure_channel() + stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(channel) + result = stub.ExecuteQuery(query_info) + if result and result.HasField("exception"): + raise Exception(result.exception.display_text) + + return result.output.decode(DEFAULT_ENCODING) + except Exception: + assert raise_exception + + +def postgres_query(query, user_, pass_, raise_exception): + try: + connection_string = f"host={instance.hostname} port={POSTGRES_SERVER_PORT} dbname=default user={user_} password={pass_}" + cluster.exec_in_container(cluster.postgres_id, + [ + "/usr/bin/psql", + connection_string, + "--no-align", + "--field-separator=' '", + "-c", + query + ], + shell=True + ) + except Exception: + assert raise_exception + + +def mysql_query(query, user_, pass_, raise_exception): + try: + client = pymysql.connections.Connection( + host=instance.ip_address, + user=user_, + password=pass_, + database="default", + port=MYSQL_SERVER_PORT, + ) + cursor = client.cursor(pymysql.cursors.DictCursor) + if raise_exception: + with pytest.raises(Exception): + cursor.execute(query) + else: + cursor.execute(query) + cursor.fetchall() + except Exception: + assert raise_exception + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +def test_grpc_session(started_cluster): + grpc_query("SELECT 1", "grpc_user", "pass", False) + grpc_query("SELECT 2", "grpc_user", "wrong_pass", True) + grpc_query("SELECT 3", "wrong_grpc_user", "pass", True) + + instance.query("SYSTEM FLUSH LOGS") + login_success_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='grpc_user' AND type = 'LoginSuccess'" + ) + assert login_success_records == "grpc_user\t1\t1\n" + logout_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='grpc_user' AND type = 'Logout'" + ) + assert logout_records == "grpc_user\t1\t1\n" + login_failure_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='grpc_user' AND type = 'LoginFailure'" + ) + assert login_failure_records == "grpc_user\t1\t1\n" + logins_and_logouts = instance.query( + f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'grpc_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'grpc_user' AND type = 'Logout')" + ) + assert logins_and_logouts == "1\n" + + +def test_mysql_session(started_cluster): + mysql_query("SELECT 1", "mysql_user", "pass", False) + mysql_query("SELECT 2", "mysql_user", "wrong_pass", True) + mysql_query("SELECT 3", "wrong_mysql_user", "pass", True) + + instance.query("SYSTEM FLUSH LOGS") + login_success_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='mysql_user' AND type = 'LoginSuccess'" + ) + assert login_success_records == "mysql_user\t1\t1\n" + logout_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='mysql_user' AND type = 'Logout'" + ) + assert logout_records == "mysql_user\t1\t1\n" + login_failure_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='mysql_user' AND type = 'LoginFailure'" + ) + assert login_failure_records == "mysql_user\t1\t1\n" + logins_and_logouts = instance.query( + f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'mysql_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'mysql_user' AND type = 'Logout')" + ) + assert logins_and_logouts == "1\n" + + +def test_postgres_session(started_cluster): + postgres_query("SELECT 1", "postgres_user", "pass", False) + postgres_query("SELECT 2", "postgres_user", "wrong_pass", True) + postgres_query("SELECT 3", "wrong_postgres_user", "pass", True) + + instance.query("SYSTEM FLUSH LOGS") + login_success_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='postgres_user' AND type = 'LoginSuccess'" + ) + assert login_success_records == "postgres_user\t1\t1\n" + logout_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='postgres_user' AND type = 'Logout'" + ) + assert logout_records == "postgres_user\t1\t1\n" + login_failure_records = instance.query( + "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='postgres_user' AND type = 'LoginFailure'" + ) + assert login_failure_records == "postgres_user\t1\t1\n" + logins_and_logouts = instance.query( + f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'postgres_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'postgres_user' AND type = 'Logout')" + ) + assert logins_and_logouts == "1\n" + + +def test_parallel_sessions(started_cluster): + thread_list = [] + for _ in range(10): + # Sleep time does not significantly matter here, + # test should pass even without sleeping. + for function in [postgres_query, grpc_query, mysql_query]: + thread = threading.Thread( + target=function, + args=( + f"SELECT sleep({random.uniform(0.03, 0.04)})", + "parallel_user", + "pass", + False, + ), + ) + thread.start() + thread_list.append(thread) + thread = threading.Thread( + target=function, + args=( + f"SELECT sleep({random.uniform(0.03, 0.04)})", + "parallel_user", + "wrong_pass", + True, + ), + ) + thread.start() + thread_list.append(thread) + thread = threading.Thread( + target=function, + args=( + f"SELECT sleep({random.uniform(0.03, 0.04)})", + "wrong_parallel_user", + "pass", + True, + ), + ) + thread.start() + thread_list.append(thread) + + for thread in thread_list: + thread.join() + + instance.query("SYSTEM FLUSH LOGS") + port_0_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user'" + ) + assert port_0_sessions == "90\n" + + port_0_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND client_port = 0" + ) + assert port_0_sessions == "0\n" + + address_0_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND client_address = toIPv6('::')" + ) + assert address_0_sessions == "0\n" + + grpc_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND interface = 'gRPC'" + ) + assert grpc_sessions == "30\n" + + mysql_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND interface = 'MySQL'" + ) + assert mysql_sessions == "30\n" + + postgres_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND interface = 'PostgreSQL'" + ) + assert postgres_sessions == "30\n" + + logins_and_logouts = instance.query( + f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'parallel_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'parallel_user' AND type = 'Logout')" + ) + assert logins_and_logouts == "30\n" + + logout_failure_sessions = instance.query( + f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND type = 'LoginFailure'" + ) + assert logout_failure_sessions == "30\n" From 0e1728801eccb11a9cadf181fc3f555a4e39e125 Mon Sep 17 00:00:00 2001 From: Alexey Gerasimchuck Date: Tue, 15 Aug 2023 13:31:53 +0000 Subject: [PATCH 21/30] black run --- tests/integration/test_session_log/test.py | 28 +++++++++++----------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/integration/test_session_log/test.py b/tests/integration/test_session_log/test.py index b860cde1df6..bb7cafa4ee6 100644 --- a/tests/integration/test_session_log/test.py +++ b/tests/integration/test_session_log/test.py @@ -2,10 +2,8 @@ import os import grpc import pymysql.connections -import psycopg2 as py_psql import pytest import random -import logging import sys import threading @@ -92,19 +90,20 @@ def grpc_query(query, user_, pass_, raise_exception): def postgres_query(query, user_, pass_, raise_exception): try: connection_string = f"host={instance.hostname} port={POSTGRES_SERVER_PORT} dbname=default user={user_} password={pass_}" - cluster.exec_in_container(cluster.postgres_id, - [ - "/usr/bin/psql", - connection_string, - "--no-align", - "--field-separator=' '", - "-c", - query - ], - shell=True - ) + cluster.exec_in_container( + cluster.postgres_id, + [ + "/usr/bin/psql", + connection_string, + "--no-align", + "--field-separator=' '", + "-c", + query, + ], + shell=True, + ) except Exception: - assert raise_exception + assert raise_exception def mysql_query(query, user_, pass_, raise_exception): @@ -126,6 +125,7 @@ def mysql_query(query, user_, pass_, raise_exception): except Exception: assert raise_exception + @pytest.fixture(scope="module") def started_cluster(): try: From 0fd28bf3309a65e5c0204c814bef0a5f13dada9d Mon Sep 17 00:00:00 2001 From: Alexey Gerasimchuck Date: Tue, 15 Aug 2023 13:39:34 +0000 Subject: [PATCH 22/30] added remote session log test --- .../02834_remote_session_log.reference | 13 +++++ .../0_stateless/02834_remote_session_log.sh | 56 +++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 tests/queries/0_stateless/02834_remote_session_log.reference create mode 100755 tests/queries/0_stateless/02834_remote_session_log.sh diff --git a/tests/queries/0_stateless/02834_remote_session_log.reference b/tests/queries/0_stateless/02834_remote_session_log.reference new file mode 100644 index 00000000000..e2680982ab0 --- /dev/null +++ b/tests/queries/0_stateless/02834_remote_session_log.reference @@ -0,0 +1,13 @@ +0 +0 +0 +0 +client_port 0 connections: +0 +client_address '::' connections: +0 +login failures: +0 +TCP Login and logout count is equal +HTTP Login and logout count is equal +MySQL Login and logout count is equal diff --git a/tests/queries/0_stateless/02834_remote_session_log.sh b/tests/queries/0_stateless/02834_remote_session_log.sh new file mode 100755 index 00000000000..3bedfb6c9ee --- /dev/null +++ b/tests/queries/0_stateless/02834_remote_session_log.sh @@ -0,0 +1,56 @@ +#!/usr/bin/env bash +# Tags: no-fasttest + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +readonly PID=$$ +readonly TEST_USER=$"02834_USER_${PID}" +readonly SESSION_LOG_MATCHING_FIELDS="auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface" + +${CLICKHOUSE_CLIENT} -q "CREATE USER IF NOT EXISTS ${TEST_USER} IDENTIFIED WITH plaintext_password BY 'pass'" +${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON INFORMATION_SCHEMA.* TO ${TEST_USER}" +${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.* TO ${TEST_USER}" +${CLICKHOUSE_CLIENT} -q "GRANT CREATE TEMPORARY TABLE, MYSQL, REMOTE ON *.* TO ${TEST_USER}" + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" +${CLICKHOUSE_CLIENT} -q "DELETE FROM system.session_log WHERE user = '${TEST_USER}'" + +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${TEST_USER}&password=pass" \ + -d "SELECT * FROM remote('127.0.0.1:${CLICKHOUSE_PORT_TCP}', 'system', 'one', '${TEST_USER}', 'pass')" + +${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${TEST_USER}&password=pass" \ + -d "SELECT * FROM mysql('127.0.0.1:9004', 'system', 'one', '${TEST_USER}', 'pass')" + +${CLICKHOUSE_CLIENT} -q "SELECT * FROM remote('127.0.0.1:${CLICKHOUSE_PORT_TCP}', 'system', 'one', '${TEST_USER}', 'pass')" -u "${TEST_USER}" --password "pass" +${CLICKHOUSE_CLIENT} -q "SELECT * FROM mysql('127.0.0.1:9004', 'system', 'one', '${TEST_USER}', 'pass')" -u "${TEST_USER}" --password "pass" + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" + +echo "client_port 0 connections:" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' and client_port = 0" + +echo "client_address '::' connections:" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' and client_address = toIPv6('::')" + +echo "login failures:" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' and type = 'LoginFailure'" + +# remote(...) function sometimes reuses old cached sessions for query execution. +# This makes LoginSuccess/Logout entries count unstable, but success and logouts must always match. + +for interface in 'TCP' 'HTTP' 'MySQL' +do + LOGIN_COUNT=`${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'LoginSuccess' AND interface = '${interface}'"` + CORRESPONDING_LOGOUT_RECORDS_COUNT=`${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM (SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'LoginSuccess' AND interface = '${interface}' INTERSECT SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'Logout' AND interface = '${interface}')"` + + if [ "$LOGIN_COUNT" == "$CORRESPONDING_LOGOUT_RECORDS_COUNT" ]; then + echo "${interface} Login and logout count is equal" + else + TOTAL_LOGOUT_COUNT=`${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'Logout' AND interface = '${interface}'"` + echo "${interface} Login count ${LOGIN_COUNT} != corresponding logout count ${CORRESPONDING_LOGOUT_RECORDS_COUNT}. TOTAL_LOGOUT_COUNT ${TOTAL_LOGOUT_COUNT}" + fi +done + +${CLICKHOUSE_CLIENT} -q "DROP USER ${TEST_USER}" From cbf9f88b90f69a08bd51377338d2a679e629cd82 Mon Sep 17 00:00:00 2001 From: Alexey Gerasimchuck Date: Tue, 15 Aug 2023 13:42:42 +0000 Subject: [PATCH 23/30] Added concurrent session session_log tests --- .../02833_concurrrent_sessions.reference | 34 +++++ .../0_stateless/02833_concurrrent_sessions.sh | 138 ++++++++++++++++++ 2 files changed, 172 insertions(+) create mode 100644 tests/queries/0_stateless/02833_concurrrent_sessions.reference create mode 100755 tests/queries/0_stateless/02833_concurrrent_sessions.sh diff --git a/tests/queries/0_stateless/02833_concurrrent_sessions.reference b/tests/queries/0_stateless/02833_concurrrent_sessions.reference new file mode 100644 index 00000000000..bfe507e8eac --- /dev/null +++ b/tests/queries/0_stateless/02833_concurrrent_sessions.reference @@ -0,0 +1,34 @@ +sessions: +150 +port_0_sessions: +0 +address_0_sessions: +0 +tcp_sessions +60 +http_sessions +30 +http_with_session_id_sessions +30 +my_sql_sessions +30 +Corresponding LoginSuccess/Logout +10 +LoginFailure +10 +Corresponding LoginSuccess/Logout +10 +LoginFailure +10 +Corresponding LoginSuccess/Logout +10 +LoginFailure +10 +Corresponding LoginSuccess/Logout +10 +LoginFailure +10 +Corresponding LoginSuccess/Logout +10 +LoginFailure +10 diff --git a/tests/queries/0_stateless/02833_concurrrent_sessions.sh b/tests/queries/0_stateless/02833_concurrrent_sessions.sh new file mode 100755 index 00000000000..26b48462a76 --- /dev/null +++ b/tests/queries/0_stateless/02833_concurrrent_sessions.sh @@ -0,0 +1,138 @@ +#!/usr/bin/env bash +# Tags: no-fasttest, no-debug + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +readonly PID=$$ + +# Each user uses a separate thread. +readonly TCP_USERS=( "02833_TCP_USER_${PID}"_{1,2} ) # 2 concurrent TCP users +readonly HTTP_USERS=( "02833_HTTP_USER_${PID}" ) +readonly HTTP_WITH_SESSION_ID_SESSION_USERS=( "02833_HTTP_WITH_SESSION_ID_USER_${PID}" ) +readonly MYSQL_USERS=( "02833_MYSQL_USER_${PID}") +readonly ALL_USERS=( "${TCP_USERS[@]}" "${HTTP_USERS[@]}" "${HTTP_WITH_SESSION_ID_SESSION_USERS[@]}" "${MYSQL_USERS[@]}" ) + +readonly TCP_USERS_SQL_COLLECTION_STRING="$( echo "${TCP_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )" +readonly HTTP_USERS_SQL_COLLECTION_STRING="$( echo "${HTTP_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )" +readonly HTTP_WITH_SESSION_ID_USERS_SQL_COLLECTION_STRING="$( echo "${HTTP_WITH_SESSION_ID_SESSION_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )" +readonly MYSQL_USERS_SQL_COLLECTION_STRING="$( echo "${MYSQL_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )" +readonly ALL_USERS_SQL_COLLECTION_STRING="$( echo "${ALL_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )" + +readonly SESSION_LOG_MATCHING_FIELDS="auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface" + +for user in "${ALL_USERS[@]}"; do + ${CLICKHOUSE_CLIENT} -q "CREATE USER IF NOT EXISTS ${user} IDENTIFIED WITH plaintext_password BY 'pass'" + ${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.* TO ${user}" + ${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON INFORMATION_SCHEMA.* TO ${user}"; +done + +# All _session functions execute in separate threads. +# These functions try to create a session with successful login and logout. +# Sleep a small, random amount of time to make concurrency more intense. +# and try to login with an invalid password. +function tcp_session() +{ + local user=$1 + local i=0 + while (( (i++) < 10 )); do + # login logout + ${CLICKHOUSE_CLIENT} -q "SELECT 1, sleep(0.01${RANDOM})" --user="${user}" --password="pass" + # login failure + ${CLICKHOUSE_CLIENT} -q "SELECT 2" --user="${user}" --password 'invalid' + done +} + +function http_session() +{ + local user=$1 + local i=0 + while (( (i++) < 10 )); do + # login logout + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=pass" -d "SELECT 3, sleep(0.01${RANDOM})" + + # login failure + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=wrong" -d "SELECT 4" + done +} + +function http_with_session_id_session() +{ + local user=$1 + local i=0 + while (( (i++) < 10 )); do + # login logout + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${user}&user=${user}&password=pass" -d "SELECT 5, sleep 0.01${RANDOM}" + + # login failure + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${user}&user=${user}&password=wrong" -d "SELECT 6" + done +} + +function mysql_session() +{ + local user=$1 + local i=0 + while (( (i++) < 10 )); do + # login logout + ${CLICKHOUSE_CLIENT} -q "SELECT 1, sleep(0.01${RANDOM}) FROM mysql('127.0.0.1:9004', 'system', 'one', '${user}', 'pass')" + + # login failure + ${CLICKHOUSE_CLIENT} -q "SELECT 1 FROM mysql('127.0.0.1:9004', 'system', 'one', '${user}', 'wrong', SETTINGS connection_max_tries=1)" + done +} + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" +${CLICKHOUSE_CLIENT} -q "DELETE FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING})" + +export -f tcp_session; +export -f http_session; +export -f http_with_session_id_session; +export -f mysql_session; + +for user in "${TCP_USERS[@]}"; do + timeout 60s bash -c "tcp_session ${user}" >/dev/null 2>&1 & +done + +for user in "${HTTP_USERS[@]}"; do + timeout 60s bash -c "http_session ${user}" >/dev/null 2>&1 & +done + +for user in "${HTTP_WITH_SESSION_ID_SESSION_USERS[@]}"; do + timeout 60s bash -c "http_with_session_id_session ${user}" >/dev/null 2>&1 & +done + +for user in "${MYSQL_USERS[@]}"; do + timeout 60s bash -c "mysql_session ${user}" >/dev/null 2>&1 & +done + +wait + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" + +echo "sessions:" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING})" + +echo "port_0_sessions:" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING}) AND client_port = 0" + +echo "address_0_sessions:" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING}) AND client_address = toIPv6('::')" + +echo "tcp_sessions" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${TCP_USERS_SQL_COLLECTION_STRING}) AND interface = 'TCP'" +echo "http_sessions" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${HTTP_USERS_SQL_COLLECTION_STRING}) AND interface = 'HTTP'" +echo "http_with_session_id_sessions" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${HTTP_WITH_SESSION_ID_USERS_SQL_COLLECTION_STRING}) AND interface = 'HTTP'" +echo "my_sql_sessions" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${MYSQL_USERS_SQL_COLLECTION_STRING}) AND interface = 'MySQL'" + +for user in "${ALL_USERS[@]}"; do + ${CLICKHOUSE_CLIENT} -q "DROP USER ${user}" + echo "Corresponding LoginSuccess/Logout" + ${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM (SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${user}' AND type = 'LoginSuccess' INTERSECT SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${user}' AND type = 'Logout')" + echo "LoginFailure" + ${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM system.session_log WHERE user = '${user}' AND type = 'LoginFailure'" + done From 4b5874b512802022e4c5581e17c9ed86c505129e Mon Sep 17 00:00:00 2001 From: Alexey Gerasimchuck Date: Tue, 15 Aug 2023 13:45:06 +0000 Subject: [PATCH 24/30] added drop user during session test --- .../02835_drop_user_during_session.reference | 8 ++ .../02835_drop_user_during_session.sh | 114 ++++++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 tests/queries/0_stateless/02835_drop_user_during_session.reference create mode 100755 tests/queries/0_stateless/02835_drop_user_during_session.sh diff --git a/tests/queries/0_stateless/02835_drop_user_during_session.reference b/tests/queries/0_stateless/02835_drop_user_during_session.reference new file mode 100644 index 00000000000..7252faab8c6 --- /dev/null +++ b/tests/queries/0_stateless/02835_drop_user_during_session.reference @@ -0,0 +1,8 @@ +port_0_sessions: +0 +address_0_sessions: +0 +Corresponding LoginSuccess/Logout +9 +LoginFailure +0 diff --git a/tests/queries/0_stateless/02835_drop_user_during_session.sh b/tests/queries/0_stateless/02835_drop_user_during_session.sh new file mode 100755 index 00000000000..347ebd22f96 --- /dev/null +++ b/tests/queries/0_stateless/02835_drop_user_during_session.sh @@ -0,0 +1,114 @@ +#!/usr/bin/env bash +# Tags: no-debug + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +readonly PID=$$ + +readonly TEST_USER="02835_USER_${PID}" +readonly TEST_ROLE="02835_ROLE_${PID}" +readonly TEST_PROFILE="02835_PROFILE_${PID}" +readonly SESSION_LOG_MATCHING_FIELDS="auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface" + +function tcp_session() +{ + local user=$1 + ${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM system.numbers" --user="${user}" +} + +function http_session() +{ + local user=$1 + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=pass" -d "SELECT COUNT(*) FROM system.numbers" +} + +function http_with_session_id_session() +{ + local user=$1 + ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=pass" -d "SELECT COUNT(*) FROM system.numbers" +} + +# Busy-waits until user $1, specified amount of queries ($2) will run simultaneously. +function wait_for_queries_start() +{ + local user=$1 + local queries_count=$2 + # 10 seconds waiting + counter=0 retries=100 + while [[ $counter -lt $retries ]]; do + result=$($CLICKHOUSE_CLIENT --query "SELECT COUNT(*) FROM system.processes WHERE user = '${user}'") + if [[ $result == "${queries_count}" ]]; then + break; + fi + sleep 0.1 + ((++counter)) + done +} + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" +${CLICKHOUSE_CLIENT} -q "DELETE FROM system.session_log WHERE user = '${TEST_USER}'" + +# DROP USE CASE +${CLICKHOUSE_CLIENT} -q "CREATE USER IF NOT EXISTS ${TEST_USER}" +${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.numbers TO ${TEST_USER}" + +export -f tcp_session; +export -f http_session; +export -f http_with_session_id_session; + +timeout 10s bash -c "tcp_session ${TEST_USER}" >/dev/null 2>&1 & +timeout 10s bash -c "http_session ${TEST_USER}" >/dev/null 2>&1 & +timeout 10s bash -c "http_with_session_id_session ${TEST_USER}" >/dev/null 2>&1 & + +wait_for_queries_start $TEST_USER 3 +${CLICKHOUSE_CLIENT} -q "DROP USER ${TEST_USER}" +${CLICKHOUSE_CLIENT} -q "KILL QUERY WHERE user = '${TEST_USER}' SYNC" >/dev/null & + +wait + +# DROP ROLE CASE +${CLICKHOUSE_CLIENT} -q "CREATE ROLE IF NOT EXISTS ${TEST_ROLE}" +${CLICKHOUSE_CLIENT} -q "CREATE USER ${TEST_USER} DEFAULT ROLE ${TEST_ROLE}" +${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.numbers TO ${TEST_USER}" + +timeout 10s bash -c "tcp_session ${TEST_USER}" >/dev/null 2>&1 & +timeout 10s bash -c "http_session ${TEST_USER}" >/dev/null 2>&1 & +timeout 10s bash -c "http_with_session_id_session ${TEST_USER}" >/dev/null 2>&1 & + +wait_for_queries_start $TEST_USER 3 +${CLICKHOUSE_CLIENT} -q "DROP ROLE ${TEST_ROLE}" +${CLICKHOUSE_CLIENT} -q "DROP USER ${TEST_USER}" + +${CLICKHOUSE_CLIENT} -q "KILL QUERY WHERE user = '${TEST_USER}' SYNC" >/dev/null & + +wait + +# DROP PROFILE CASE +${CLICKHOUSE_CLIENT} -q "CREATE SETTINGS PROFILE IF NOT EXISTS '${TEST_PROFILE}'" +${CLICKHOUSE_CLIENT} -q "CREATE USER ${TEST_USER} SETTINGS PROFILE '${TEST_PROFILE}'" +${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.numbers TO ${TEST_USER}" + +timeout 10s bash -c "tcp_session ${TEST_USER}" >/dev/null 2>&1 & +timeout 10s bash -c "http_session ${TEST_USER}" >/dev/null 2>&1 & +timeout 10s bash -c "http_with_session_id_session ${TEST_USER}" >/dev/null 2>&1 & + +wait_for_queries_start $TEST_USER 3 +${CLICKHOUSE_CLIENT} -q "DROP SETTINGS PROFILE '${TEST_PROFILE}'" +${CLICKHOUSE_CLIENT} -q "DROP USER ${TEST_USER}" + +${CLICKHOUSE_CLIENT} -q "KILL QUERY WHERE user = '${TEST_USER}' SYNC" >/dev/null & + +wait + +${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS" + +echo "port_0_sessions:" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' AND client_port = 0" +echo "address_0_sessions:" +${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' AND client_address = toIPv6('::')" +echo "Corresponding LoginSuccess/Logout" +${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM (SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'LoginSuccess' INTERSECT SELECT ${SESSION_LOG_MATCHING_FIELDS}, FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'Logout')" +echo "LoginFailure" +${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'LoginFailure'" From 387ce81895d0d9a6a8e994bf24801b00dc3af049 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 16 Aug 2023 00:46:53 +0200 Subject: [PATCH 25/30] Clean all containers properly --- tests/ci/install_check.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/ci/install_check.py b/tests/ci/install_check.py index 010b0dab408..700550bf077 100644 --- a/tests/ci/install_check.py +++ b/tests/ci/install_check.py @@ -191,6 +191,9 @@ def test_install(image: DockerImage, tests: Dict[str, str]) -> TestResults: retcode = process.wait() if retcode == 0: status = OK + subprocess.check_call( + f"docker kill -s 9 {container_id}", shell=True + ) break status = FAIL @@ -198,8 +201,8 @@ def test_install(image: DockerImage, tests: Dict[str, str]) -> TestResults: archive_path = TEMP_PATH / f"{container_name}-{retry}.tar.gz" compress_fast(LOGS_PATH, archive_path) logs.append(archive_path) + subprocess.check_call(f"docker kill -s 9 {container_id}", shell=True) - subprocess.check_call(f"docker kill -s 9 {container_id}", shell=True) test_results.append(TestResult(name, status, stopwatch.duration_seconds, logs)) return test_results From 790475385acc5b722460e5b9581f637ac6ff9b1e Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 16 Aug 2023 00:47:39 +0200 Subject: [PATCH 26/30] Improve downloading: skip dbg, do not pull images on --no-download --- tests/ci/install_check.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/ci/install_check.py b/tests/ci/install_check.py index 700550bf077..2ca947192da 100644 --- a/tests/ci/install_check.py +++ b/tests/ci/install_check.py @@ -279,7 +279,7 @@ def main(): sys.exit(0) docker_images = { - name: get_image_with_version(REPORTS_PATH, name) + name: get_image_with_version(REPORTS_PATH, name, args.download) for name in (RPM_IMAGE, DEB_IMAGE) } prepare_test_scripts() @@ -296,6 +296,8 @@ def main(): is_match = is_match or path.endswith(".rpm") if args.tgz: is_match = is_match or path.endswith(".tgz") + # We don't need debug packages, so let's filter them out + is_match = is_match and "-dbg" not in path return is_match download_builds_filter( From 3cd9fa395d2d3483e9e71274076cf151ef8ff682 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 16 Aug 2023 00:51:44 +0200 Subject: [PATCH 27/30] Add test for systemd + /etc/default/clickhouse --- tests/ci/install_check.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/ci/install_check.py b/tests/ci/install_check.py index 2ca947192da..b08e94c52b4 100644 --- a/tests/ci/install_check.py +++ b/tests/ci/install_check.py @@ -50,8 +50,11 @@ def prepare_test_scripts(): server_test = r"""#!/bin/bash set -e trap "bash -ex /packages/preserve_logs.sh" ERR +test_env='TEST_THE_DEFAULT_PARAMETER=15' +echo "$test_env" >> /etc/default/clickhouse systemctl start clickhouse-server -clickhouse-client -q 'SELECT version()'""" +clickhouse-client -q 'SELECT version()' +grep "$test_env" /proc/$(cat /var/run/clickhouse-server/clickhouse-server.pid)/environ""" keeper_test = r"""#!/bin/bash set -e trap "bash -ex /packages/preserve_logs.sh" ERR From 651a45b04d1cc4ec0b8be5b0fbb3068b09813fce Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 16 Aug 2023 00:57:22 +0200 Subject: [PATCH 28/30] Add tests for initd start --- docker/test/install/deb/Dockerfile | 1 + tests/ci/install_check.py | 12 ++++++++++++ 2 files changed, 13 insertions(+) diff --git a/docker/test/install/deb/Dockerfile b/docker/test/install/deb/Dockerfile index 9614473c69b..e9c928b1fe7 100644 --- a/docker/test/install/deb/Dockerfile +++ b/docker/test/install/deb/Dockerfile @@ -12,6 +12,7 @@ ENV \ # install systemd packages RUN apt-get update && \ apt-get install -y --no-install-recommends \ + sudo \ systemd \ && \ apt-get clean && \ diff --git a/tests/ci/install_check.py b/tests/ci/install_check.py index b08e94c52b4..a5788e2af3f 100644 --- a/tests/ci/install_check.py +++ b/tests/ci/install_check.py @@ -54,6 +54,14 @@ test_env='TEST_THE_DEFAULT_PARAMETER=15' echo "$test_env" >> /etc/default/clickhouse systemctl start clickhouse-server clickhouse-client -q 'SELECT version()' +grep "$test_env" /proc/$(cat /var/run/clickhouse-server/clickhouse-server.pid)/environ""" + initd_test = r"""#!/bin/bash +set -e +trap "bash -ex /packages/preserve_logs.sh" ERR +test_env='TEST_THE_DEFAULT_PARAMETER=15' +echo "$test_env" >> /etc/default/clickhouse +/etc/init.d/clickhouse-server start +clickhouse-client -q 'SELECT version()' grep "$test_env" /proc/$(cat /var/run/clickhouse-server/clickhouse-server.pid)/environ""" keeper_test = r"""#!/bin/bash set -e @@ -105,6 +113,7 @@ chmod a+rw -R /tests_logs exit 1 """ (TEMP_PATH / "server_test.sh").write_text(server_test, encoding="utf-8") + (TEMP_PATH / "initd_test.sh").write_text(initd_test, encoding="utf-8") (TEMP_PATH / "keeper_test.sh").write_text(keeper_test, encoding="utf-8") (TEMP_PATH / "binary_test.sh").write_text(binary_test, encoding="utf-8") (TEMP_PATH / "preserve_logs.sh").write_text(preserve_logs, encoding="utf-8") @@ -115,6 +124,9 @@ def test_install_deb(image: DockerImage) -> TestResults: "Install server deb": r"""#!/bin/bash -ex apt-get install /packages/clickhouse-{server,client,common}*deb bash -ex /packages/server_test.sh""", + "Run server init.d": r"""#!/bin/bash -ex +apt-get install /packages/clickhouse-{server,client,common}*deb +bash -ex /packages/initd_test.sh""", "Install keeper deb": r"""#!/bin/bash -ex apt-get install /packages/clickhouse-keeper*deb bash -ex /packages/keeper_test.sh""", From 428a05a560dd9561f1729c38b963250b980c2f19 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Wed, 16 Aug 2023 14:04:14 +0300 Subject: [PATCH 29/30] Follow-up: Do not send logs to CI if the credentials are not set (#53456) * Follow-up * Automatic style fix * Update tests/ci/ast_fuzzer_check.py * Update tests/ci/functional_test_check.py * Update tests/ci/stress_check.py * Automatic style fix --------- Co-authored-by: robot-clickhouse Co-authored-by: Alexander Tokmakov --- tests/ci/ast_fuzzer_check.py | 2 +- tests/ci/functional_test_check.py | 2 +- tests/ci/stress_check.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index 1a75d02bef4..fecf207589e 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -146,7 +146,7 @@ def main(): "CLICKHOUSE_CI_LOGS_PASSWORD", "CLICKHOUSE_CI_LOGS_PASSWORD" ) - if ci_logs_host != "CLICKHOUSE_CI_LOGS_HOST": + if ci_logs_host not in ("CLICKHOUSE_CI_LOGS_HOST", ""): subprocess.check_call( f"sed -i -r -e 's!{ci_logs_host}!CLICKHOUSE_CI_LOGS_HOST!g; s!{ci_logs_password}!CLICKHOUSE_CI_LOGS_PASSWORD!g;' '{run_log_path}' '{main_log_path}'", shell=True, diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index 22210390b09..2bab330bd66 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -394,7 +394,7 @@ def main(): ci_logs_password = os.getenv( "CLICKHOUSE_CI_LOGS_PASSWORD", "CLICKHOUSE_CI_LOGS_PASSWORD" ) - if ci_logs_host != "CLICKHOUSE_CI_LOGS_HOST": + if ci_logs_host not in ("CLICKHOUSE_CI_LOGS_HOST", ""): subprocess.check_call( f"sed -i -r -e 's!{ci_logs_host}!CLICKHOUSE_CI_LOGS_HOST!g; s!{ci_logs_password}!CLICKHOUSE_CI_LOGS_PASSWORD!g;' '{run_log_path}'", shell=True, diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index 9c18bcbfe40..21c3178faab 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -209,7 +209,7 @@ def run_stress_test(docker_image_name): ci_logs_password = os.getenv( "CLICKHOUSE_CI_LOGS_PASSWORD", "CLICKHOUSE_CI_LOGS_PASSWORD" ) - if ci_logs_host != "CLICKHOUSE_CI_LOGS_HOST": + if ci_logs_host not in ("CLICKHOUSE_CI_LOGS_HOST", ""): subprocess.check_call( f"sed -i -r -e 's!{ci_logs_host}!CLICKHOUSE_CI_LOGS_HOST!g; s!{ci_logs_password}!CLICKHOUSE_CI_LOGS_PASSWORD!g;' '{run_log_path}'", shell=True, From d5ed014ec4e4a2a0c49ac95a193aa0c15a511f4c Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 16 Aug 2023 22:56:32 +0300 Subject: [PATCH 30/30] Fix flaky test `02443_detach_attach_partition` (#53478) * fix flaky test * empty commit --- .../02443_detach_attach_partition.reference | 4 ++-- .../02443_detach_attach_partition.sh | 18 ++++++++++++++---- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/tests/queries/0_stateless/02443_detach_attach_partition.reference b/tests/queries/0_stateless/02443_detach_attach_partition.reference index 70930ea6d9a..77cfb77479d 100644 --- a/tests/queries/0_stateless/02443_detach_attach_partition.reference +++ b/tests/queries/0_stateless/02443_detach_attach_partition.reference @@ -1,4 +1,4 @@ default begin inserts default end inserts -20 210 -20 210 +30 465 +30 465 diff --git a/tests/queries/0_stateless/02443_detach_attach_partition.sh b/tests/queries/0_stateless/02443_detach_attach_partition.sh index 5a3f1b64065..ae104b833e3 100755 --- a/tests/queries/0_stateless/02443_detach_attach_partition.sh +++ b/tests/queries/0_stateless/02443_detach_attach_partition.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -# Tags: race, zookeeper, no-parallel +# Tags: race, zookeeper, long CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) # shellcheck source=../shell_config.sh @@ -29,9 +29,19 @@ function thread_attach() done } +insert_type=$(($RANDOM % 3)) +$CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'insert_type $insert_type' FORMAT Null" + function insert() { - $CLICKHOUSE_CLIENT -q "INSERT INTO alter_table$(($RANDOM % 2)) SELECT $RANDOM, $i" 2>/dev/null + # Fault injection may lead to duplicates + if [[ "$insert_type" -eq 0 ]]; then + $CLICKHOUSE_CLIENT --insert_deduplication_token=$1 -q "INSERT INTO alter_table$(($RANDOM % 2)) SELECT $RANDOM, $1" 2>/dev/null + elif [[ "$insert_type" -eq 1 ]]; then + $CLICKHOUSE_CLIENT -q "INSERT INTO alter_table$(($RANDOM % 2)) SELECT $1, $1" 2>/dev/null + else + $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "INSERT INTO alter_table$(($RANDOM % 2)) SELECT $RANDOM, $1" 2>/dev/null + fi } thread_detach & PID_1=$! @@ -41,8 +51,8 @@ thread_attach & PID_4=$! function do_inserts() { - for i in {1..20}; do - while ! insert; do $CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'retrying insert $i' FORMAT Null"; done + for i in {1..30}; do + while ! insert $i; do $CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'retrying insert $i' FORMAT Null"; done done }