From 4f9920c71ccb5edeeff15686e8cb75a07287d98c Mon Sep 17 00:00:00 2001
From: liuneng <1398775315@qq.com>
Date: Fri, 24 Feb 2023 17:53:17 +0800
Subject: [PATCH 01/30] optimize performance of nullable String And Number
column serializeValueIntoArena
---
.../aggregate_with_serialized_method.xml | 31 +++++++++++++++++++
1 file changed, 31 insertions(+)
create mode 100644 tests/performance/aggregate_with_serialized_method.xml
diff --git a/tests/performance/aggregate_with_serialized_method.xml b/tests/performance/aggregate_with_serialized_method.xml
new file mode 100644
index 00000000000..52c7a0ddd3f
--- /dev/null
+++ b/tests/performance/aggregate_with_serialized_method.xml
@@ -0,0 +1,31 @@
+
+
+ 8
+ 0
+ 4
+
+
+
+ CREATE TABLE t_nullable
+ (
+ key_string1 Nullable(String),
+ key_string2 Nullable(String),
+ key_string3 Nullable(String),
+ key_int64_1 Nullable(Int64),
+ key_int64_2 Nullable(Int64),
+ key_int64_3 Nullable(Int64),
+ key_int64_4 Nullable(Int64),
+ key_int64_5 Nullable(Int64),
+ m1 Int64,
+ m2 Int64,
+ )
+ ENGINE = MergeTree
+ ORDER BY tuple()
+
+ insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%1000+1, number%1000+2, number%1000+3, number%1000+4,number%1000+5, number%6000+1, number%5000+2 from numbers_mt(20000000)
+ OPTIMIZE TABLE t_nullable FINAL
+ select min(m1) from t_nullable group by key_string1,key_string2,key_string3 format Null
+ select min(m1) from t_nullable group by key_int64_1,key_int64_2,key_string3 format Null
+
+ drop table if exists t_nullable
+
\ No newline at end of file
From 43e0481ac040922edfc519a5bd0cf6cd781924cd Mon Sep 17 00:00:00 2001
From: LiuNeng <1398775315@qq.com>
Date: Tue, 25 Apr 2023 11:38:50 +0800
Subject: [PATCH 02/30] optimize agg with multiple string key
---
src/Columns/ColumnNullable.cpp | 47 +++++++++++++++++++++++++++++-----
1 file changed, 40 insertions(+), 7 deletions(-)
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index 2eb2ff0bf69..08f707d0b30 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -137,18 +137,51 @@ void ColumnNullable::insertData(const char * pos, size_t length)
StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
const auto & arr = getNullMapData();
+ const bool is_null = arr[n];
static constexpr auto s = sizeof(arr[0]);
+ char * pos;
+ if (const ColumnString * string_col = checkAndGetColumn(getNestedColumn()))
+ {
+ auto data = string_col->getDataAt(n);
+ size_t string_size = data.size + 1;
+ auto memory_size = is_null ? s : s + sizeof(string_size) + string_size;
+ pos = arena.allocContinue(memory_size, begin);
+ memcpy(pos, &arr[n], s);
+ if (!is_null)
+ {
+ memcpy(pos + s, &string_size, sizeof(string_size));
+ memcpy(pos + s + sizeof(string_size), data.data, string_size);
+ }
+ return StringRef(pos, memory_size);
+ }
+ else if (getNestedColumn().valuesHaveFixedSize())
+ {
+ auto col = getNestedColumnPtr();
+ auto data = col->getDataAt(n);
+ auto size = col->sizeOfValueIfFixed();
+ auto memory_size = is_null ? s : s + size;
+ pos = arena.allocContinue(memory_size, begin);
+ memcpy(pos, &arr[n], s);
+ if (!is_null)
+ {
+ memcpy(pos + s, data.data, size);
+ }
+ return StringRef(pos, memory_size);
+ }
+ else
+ {
+ pos = arena.allocContinue(s, begin);
+ memcpy(pos, &arr[n], s);
- auto * pos = arena.allocContinue(s, begin);
- memcpy(pos, &arr[n], s);
+ if (arr[n])
+ return StringRef(pos, s);
- if (arr[n])
- return StringRef(pos, s);
+ auto nested_ref = getNestedColumn().serializeValueIntoArena(n, arena, begin);
- auto nested_ref = getNestedColumn().serializeValueIntoArena(n, arena, begin);
+ /// serializeValueIntoArena may reallocate memory. Have to use ptr from nested_ref.data and move it back.
+ return StringRef(nested_ref.data - s, nested_ref.size + s);
+ }
- /// serializeValueIntoArena may reallocate memory. Have to use ptr from nested_ref.data and move it back.
- return StringRef(nested_ref.data - s, nested_ref.size + s);
}
const char * ColumnNullable::deserializeAndInsertFromArena(const char * pos)
From 035dbdaf220d4bfdedc88711aae799145362221d Mon Sep 17 00:00:00 2001
From: liuneng <1398775315@qq.com>
Date: Mon, 26 Jun 2023 13:42:24 +0800
Subject: [PATCH 03/30] remove numbers optimization. It will decrease
performance
---
src/Columns/ColumnNullable.cpp | 14 --------------
.../aggregate_with_serialized_method.xml | 14 +++-----------
2 files changed, 3 insertions(+), 25 deletions(-)
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index 08f707d0b30..48b3740fa97 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -154,20 +154,6 @@ StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char
}
return StringRef(pos, memory_size);
}
- else if (getNestedColumn().valuesHaveFixedSize())
- {
- auto col = getNestedColumnPtr();
- auto data = col->getDataAt(n);
- auto size = col->sizeOfValueIfFixed();
- auto memory_size = is_null ? s : s + size;
- pos = arena.allocContinue(memory_size, begin);
- memcpy(pos, &arr[n], s);
- if (!is_null)
- {
- memcpy(pos + s, data.data, size);
- }
- return StringRef(pos, memory_size);
- }
else
{
pos = arena.allocContinue(s, begin);
diff --git a/tests/performance/aggregate_with_serialized_method.xml b/tests/performance/aggregate_with_serialized_method.xml
index 52c7a0ddd3f..3c0ad4a7223 100644
--- a/tests/performance/aggregate_with_serialized_method.xml
+++ b/tests/performance/aggregate_with_serialized_method.xml
@@ -11,21 +11,13 @@
key_string1 Nullable(String),
key_string2 Nullable(String),
key_string3 Nullable(String),
- key_int64_1 Nullable(Int64),
- key_int64_2 Nullable(Int64),
- key_int64_3 Nullable(Int64),
- key_int64_4 Nullable(Int64),
- key_int64_5 Nullable(Int64),
m1 Int64,
m2 Int64,
)
- ENGINE = MergeTree
- ORDER BY tuple()
+ ENGINE = Memory
- insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%1000+1, number%1000+2, number%1000+3, number%1000+4,number%1000+5, number%6000+1, number%5000+2 from numbers_mt(20000000)
- OPTIMIZE TABLE t_nullable FINAL
- select min(m1) from t_nullable group by key_string1,key_string2,key_string3 format Null
- select min(m1) from t_nullable group by key_int64_1,key_int64_2,key_string3 format Null
+ insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%6000+1, number%5000+2 from numbers_mt(20000000)
+ select key_string1,key_string2,key_string3, min(m1) from t_nullable group by key_string1,key_string2,key_string3
drop table if exists t_nullable
\ No newline at end of file
From f96b9b7512222ba71f48c905ac2d181515e99774 Mon Sep 17 00:00:00 2001
From: liuneng <1398775315@qq.com>
Date: Wed, 28 Jun 2023 15:04:43 +0800
Subject: [PATCH 04/30] optimize fixed size column
---
src/Columns/ColumnNullable.cpp | 17 +++++++++++++++--
.../aggregate_with_serialized_method.xml | 10 ++++++++--
2 files changed, 23 insertions(+), 4 deletions(-)
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index 48b3740fa97..02a3de5ae55 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -140,9 +140,9 @@ StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char
const bool is_null = arr[n];
static constexpr auto s = sizeof(arr[0]);
char * pos;
- if (const ColumnString * string_col = checkAndGetColumn(getNestedColumn()))
+ if (isString(nested_column->getDataType()))
{
- auto data = string_col->getDataAt(n);
+ auto data = nested_column->getDataAt(n);
size_t string_size = data.size + 1;
auto memory_size = is_null ? s : s + sizeof(string_size) + string_size;
pos = arena.allocContinue(memory_size, begin);
@@ -154,6 +154,19 @@ StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char
}
return StringRef(pos, memory_size);
}
+ else if (isNumber(nested_column->getDataType()) || isFixedString(nested_column->getDataType()))
+ {
+ auto data = nested_column->getDataAt(n);
+ auto size = data.size;
+ auto memory_size = is_null ? s : s + size;
+ pos = arena.allocContinue(memory_size, begin);
+ memcpy(pos, &arr[n], s);
+ if (!is_null)
+ {
+ memcpy(pos + s, data.data, size);
+ }
+ return StringRef(pos, memory_size);
+ }
else
{
pos = arena.allocContinue(s, begin);
diff --git a/tests/performance/aggregate_with_serialized_method.xml b/tests/performance/aggregate_with_serialized_method.xml
index 3c0ad4a7223..a280dae67aa 100644
--- a/tests/performance/aggregate_with_serialized_method.xml
+++ b/tests/performance/aggregate_with_serialized_method.xml
@@ -11,13 +11,19 @@
key_string1 Nullable(String),
key_string2 Nullable(String),
key_string3 Nullable(String),
+ key_int64_1 Nullable(Int64),
+ key_int64_2 Nullable(Int64),
+ key_int64_3 Nullable(Int64),
+ key_int64_4 Nullable(Int64),
+ key_int64_5 Nullable(Int64),
m1 Int64,
- m2 Int64,
+ m2 Int64
)
ENGINE = Memory
- insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%6000+1, number%5000+2 from numbers_mt(20000000)
+ insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%1000+1, number%1000+2, number%1000+3, number%1000+4,number%1000+5, number%6000+1, number%5000+2 from numbers_mt(20000000)
select key_string1,key_string2,key_string3, min(m1) from t_nullable group by key_string1,key_string2,key_string3
+ select key_string3,key_int64_1,key_int64_2, min(m1) from t_nullable group by key_string3,key_int64_1,key_int64_2
drop table if exists t_nullable
\ No newline at end of file
From 62dffd0be232469a0440beb91a16efd40a398583 Mon Sep 17 00:00:00 2001
From: liuneng <1398775315@qq.com>
Date: Wed, 28 Jun 2023 22:42:54 +0800
Subject: [PATCH 05/30] optimize conditions
---
src/Columns/ColumnNullable.cpp | 6 ++++--
src/Columns/ColumnNullable.h | 3 +++
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index 02a3de5ae55..3cf1f158031 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -34,6 +34,8 @@ ColumnNullable::ColumnNullable(MutableColumnPtr && nested_column_, MutableColumn
{
/// ColumnNullable cannot have constant nested column. But constant argument could be passed. Materialize it.
nested_column = getNestedColumn().convertToFullColumnIfConst();
+ is_string = isString(nested_column->getDataType());
+ is_number_or_fixed_string = isNumber(nested_column->getDataType()) || isFixedString(nested_column->getDataType());
if (!getNestedColumn().canBeInsideNullable())
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "{} cannot be inside Nullable column", getNestedColumn().getName());
@@ -140,7 +142,7 @@ StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char
const bool is_null = arr[n];
static constexpr auto s = sizeof(arr[0]);
char * pos;
- if (isString(nested_column->getDataType()))
+ if (is_string)
{
auto data = nested_column->getDataAt(n);
size_t string_size = data.size + 1;
@@ -154,7 +156,7 @@ StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char
}
return StringRef(pos, memory_size);
}
- else if (isNumber(nested_column->getDataType()) || isFixedString(nested_column->getDataType()))
+ else if (is_number_or_fixed_string)
{
auto data = nested_column->getDataAt(n);
auto size = data.size;
diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h
index bc95eca69b9..e569b989c35 100644
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@@ -212,6 +212,9 @@ public:
private:
WrappedPtr nested_column;
WrappedPtr null_map;
+ // optimize serializeValueIntoArena
+ bool is_string;
+ bool is_number_or_fixed_string;
template
void applyNullMapImpl(const NullMap & map);
From 81f0d175285c08ce96d619771d29555b84b8c7fd Mon Sep 17 00:00:00 2001
From: liuneng <1398775315@qq.com>
Date: Thu, 29 Jun 2023 10:25:36 +0800
Subject: [PATCH 06/30] change param name
---
src/Columns/ColumnNullable.cpp | 4 ++--
src/Columns/ColumnNullable.h | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index 3cf1f158031..9045851d790 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -35,7 +35,7 @@ ColumnNullable::ColumnNullable(MutableColumnPtr && nested_column_, MutableColumn
/// ColumnNullable cannot have constant nested column. But constant argument could be passed. Materialize it.
nested_column = getNestedColumn().convertToFullColumnIfConst();
is_string = isString(nested_column->getDataType());
- is_number_or_fixed_string = isNumber(nested_column->getDataType()) || isFixedString(nested_column->getDataType());
+ is_fixed_size_column = nested_column->valuesHaveFixedSize();
if (!getNestedColumn().canBeInsideNullable())
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "{} cannot be inside Nullable column", getNestedColumn().getName());
@@ -156,7 +156,7 @@ StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char
}
return StringRef(pos, memory_size);
}
- else if (is_number_or_fixed_string)
+ else if (is_fixed_size_column)
{
auto data = nested_column->getDataAt(n);
auto size = data.size;
diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h
index e569b989c35..4f37650ffe3 100644
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@@ -214,7 +214,7 @@ private:
WrappedPtr null_map;
// optimize serializeValueIntoArena
bool is_string;
- bool is_number_or_fixed_string;
+ bool is_fixed_size_column;
template
void applyNullMapImpl(const NullMap & map);
From 594b38229f05d6c3a1182f7efdd21ca1efa4b6b4 Mon Sep 17 00:00:00 2001
From: liuneng <1398775315@qq.com>
Date: Wed, 5 Jul 2023 13:53:12 +0800
Subject: [PATCH 07/30] another version
---
src/Columns/ColumnAggregateFunction.cpp | 2 +-
src/Columns/ColumnAggregateFunction.h | 2 +-
src/Columns/ColumnArray.cpp | 2 +-
src/Columns/ColumnArray.h | 2 +-
src/Columns/ColumnCompressed.h | 2 +-
src/Columns/ColumnConst.h | 2 +-
src/Columns/ColumnDecimal.cpp | 21 +++++++++++++++++--
src/Columns/ColumnDecimal.h | 2 +-
src/Columns/ColumnFixedString.cpp | 23 ++++++++++++++++++---
src/Columns/ColumnFixedString.h | 2 +-
src/Columns/ColumnFunction.h | 2 +-
src/Columns/ColumnLowCardinality.cpp | 2 +-
src/Columns/ColumnLowCardinality.h | 2 +-
src/Columns/ColumnMap.cpp | 2 +-
src/Columns/ColumnMap.h | 2 +-
src/Columns/ColumnNullable.cpp | 27 ++++---------------------
src/Columns/ColumnNullable.h | 2 +-
src/Columns/ColumnObject.h | 2 +-
src/Columns/ColumnSparse.cpp | 2 +-
src/Columns/ColumnSparse.h | 2 +-
src/Columns/ColumnString.cpp | 23 ++++++++++++++++-----
src/Columns/ColumnString.h | 3 ++-
src/Columns/ColumnTuple.cpp | 2 +-
src/Columns/ColumnTuple.h | 2 +-
src/Columns/ColumnUnique.h | 4 ++--
src/Columns/ColumnVector.cpp | 23 ++++++++++++++++++---
src/Columns/ColumnVector.h | 2 +-
src/Columns/IColumn.h | 2 +-
src/Columns/IColumnDummy.h | 2 +-
29 files changed, 107 insertions(+), 61 deletions(-)
diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp
index 62ec324455e..3ebb30df87e 100644
--- a/src/Columns/ColumnAggregateFunction.cpp
+++ b/src/Columns/ColumnAggregateFunction.cpp
@@ -524,7 +524,7 @@ void ColumnAggregateFunction::insertDefault()
pushBackAndCreateState(data, arena, func.get());
}
-StringRef ColumnAggregateFunction::serializeValueIntoArena(size_t n, Arena & arena, const char *& begin) const
+StringRef ColumnAggregateFunction::serializeValueIntoArena(size_t n, Arena & arena, const char *& begin, const UInt8 *) const
{
WriteBufferFromArena out(arena, begin);
func->serialize(data[n], out, version);
diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h
index f9ce45708c9..7c7201e585a 100644
--- a/src/Columns/ColumnAggregateFunction.h
+++ b/src/Columns/ColumnAggregateFunction.h
@@ -162,7 +162,7 @@ public:
void insertDefault() override;
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
const char * deserializeAndInsertFromArena(const char * src_arena) override;
diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp
index 74512d1669b..1cb8188bce6 100644
--- a/src/Columns/ColumnArray.cpp
+++ b/src/Columns/ColumnArray.cpp
@@ -205,7 +205,7 @@ void ColumnArray::insertData(const char * pos, size_t length)
}
-StringRef ColumnArray::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
+StringRef ColumnArray::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
{
size_t array_size = sizeAt(n);
size_t offset = offsetAt(n);
diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h
index f011d9a607b..2a9bfa405e5 100644
--- a/src/Columns/ColumnArray.h
+++ b/src/Columns/ColumnArray.h
@@ -77,7 +77,7 @@ public:
StringRef getDataAt(size_t n) const override;
bool isDefaultAt(size_t n) const override;
void insertData(const char * pos, size_t length) override;
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h
index bfe7cdb4924..b780fbbf37a 100644
--- a/src/Columns/ColumnCompressed.h
+++ b/src/Columns/ColumnCompressed.h
@@ -88,7 +88,7 @@ public:
void insertData(const char *, size_t) override { throwMustBeDecompressed(); }
void insertDefault() override { throwMustBeDecompressed(); }
void popBack(size_t) override { throwMustBeDecompressed(); }
- StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override { throwMustBeDecompressed(); }
+ StringRef serializeValueIntoArena(size_t, Arena &, char const *&, const UInt8 *) const override { throwMustBeDecompressed(); }
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeDecompressed(); }
const char * skipSerializedInArena(const char *) const override { throwMustBeDecompressed(); }
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeDecompressed(); }
diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h
index f769dd6cc2a..dc84e0c2402 100644
--- a/src/Columns/ColumnConst.h
+++ b/src/Columns/ColumnConst.h
@@ -151,7 +151,7 @@ public:
s -= n;
}
- StringRef serializeValueIntoArena(size_t, Arena & arena, char const *& begin) const override
+ StringRef serializeValueIntoArena(size_t, Arena & arena, char const *& begin, const UInt8 *) const override
{
return data->serializeValueIntoArena(0, arena, begin);
}
diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp
index 8e5792934cf..142ee6c271d 100644
--- a/src/Columns/ColumnDecimal.cpp
+++ b/src/Columns/ColumnDecimal.cpp
@@ -59,9 +59,26 @@ bool ColumnDecimal::hasEqualValues() const
}
template
-StringRef ColumnDecimal::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
+StringRef ColumnDecimal::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const
{
- auto * pos = arena.allocContinue(sizeof(T), begin);
+ constexpr size_t null_bit_size = sizeof(UInt8);
+ StringRef res;
+ char * pos;
+ if (null_bit)
+ {
+ res.size = * null_bit ? null_bit_size : null_bit_size + sizeof(T);
+ pos = arena.allocContinue(res.size, begin);
+ res.data = pos;
+ memcpy(pos, null_bit, null_bit_size);
+ if (*null_bit) return res;
+ pos += null_bit_size;
+ }
+ else
+ {
+ res.size = sizeof(T);
+ pos = arena.allocContinue(res.size, begin);
+ res.data = pos;
+ }
memcpy(pos, &data[n], sizeof(T));
return StringRef(pos, sizeof(T));
}
diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h
index 03e0b9be558..fb24ae4554b 100644
--- a/src/Columns/ColumnDecimal.h
+++ b/src/Columns/ColumnDecimal.h
@@ -80,7 +80,7 @@ public:
Float64 getFloat64(size_t n) const final { return DecimalUtils::convertTo(data[n], scale); }
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp
index 24b5c435ecd..a18e5c522a1 100644
--- a/src/Columns/ColumnFixedString.cpp
+++ b/src/Columns/ColumnFixedString.cpp
@@ -86,11 +86,28 @@ void ColumnFixedString::insertData(const char * pos, size_t length)
memset(chars.data() + old_size + length, 0, n - length);
}
-StringRef ColumnFixedString::serializeValueIntoArena(size_t index, Arena & arena, char const *& begin) const
+StringRef ColumnFixedString::serializeValueIntoArena(size_t index, Arena & arena, char const *& begin, const UInt8 * null_bit) const
{
- auto * pos = arena.allocContinue(n, begin);
+ constexpr size_t null_bit_size = sizeof(UInt8);
+ StringRef res;
+ char * pos;
+ if (null_bit)
+ {
+ res.size = * null_bit ? null_bit_size : null_bit_size + n;
+ pos = arena.allocContinue(res.size, begin);
+ res.data = pos;
+ memcpy(pos, null_bit, null_bit_size);
+ if (*null_bit) return res;
+ pos += null_bit_size;
+ }
+ else
+ {
+ res.size = n;
+ pos = arena.allocContinue(res.size, begin);
+ res.data = pos;
+ }
memcpy(pos, &chars[n * index], n);
- return StringRef(pos, n);
+ return res;
}
const char * ColumnFixedString::deserializeAndInsertFromArena(const char * pos)
diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h
index 39497e3403e..445432b7b28 100644
--- a/src/Columns/ColumnFixedString.h
+++ b/src/Columns/ColumnFixedString.h
@@ -115,7 +115,7 @@ public:
chars.resize_assume_reserved(chars.size() - n * elems);
}
- StringRef serializeValueIntoArena(size_t index, Arena & arena, char const *& begin) const override;
+ StringRef serializeValueIntoArena(size_t index, Arena & arena, char const *& begin, const UInt8 *) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h
index a1f6245c2bd..c21e88744e0 100644
--- a/src/Columns/ColumnFunction.h
+++ b/src/Columns/ColumnFunction.h
@@ -96,7 +96,7 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert into {}", getName());
}
- StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override
+ StringRef serializeValueIntoArena(size_t, Arena &, char const *&, const UInt8 *) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot serialize from {}", getName());
}
diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp
index 9269ea4ee4d..41358a4e538 100644
--- a/src/Columns/ColumnLowCardinality.cpp
+++ b/src/Columns/ColumnLowCardinality.cpp
@@ -255,7 +255,7 @@ void ColumnLowCardinality::insertData(const char * pos, size_t length)
idx.insertPosition(dictionary.getColumnUnique().uniqueInsertData(pos, length));
}
-StringRef ColumnLowCardinality::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
+StringRef ColumnLowCardinality::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
{
return getDictionary().serializeValueIntoArena(getIndexes().getUInt(n), arena, begin);
}
diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h
index dcd07ff3b34..91bd5945fd9 100644
--- a/src/Columns/ColumnLowCardinality.h
+++ b/src/Columns/ColumnLowCardinality.h
@@ -87,7 +87,7 @@ public:
void popBack(size_t n) override { idx.popBack(n); }
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp
index 797700e87b0..ddcde43ca23 100644
--- a/src/Columns/ColumnMap.cpp
+++ b/src/Columns/ColumnMap.cpp
@@ -111,7 +111,7 @@ void ColumnMap::popBack(size_t n)
nested->popBack(n);
}
-StringRef ColumnMap::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
+StringRef ColumnMap::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
{
return nested->serializeValueIntoArena(n, arena, begin);
}
diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h
index e5bc26127df..fde8a7e0e67 100644
--- a/src/Columns/ColumnMap.h
+++ b/src/Columns/ColumnMap.h
@@ -58,7 +58,7 @@ public:
void insert(const Field & x) override;
void insertDefault() override;
void popBack(size_t n) override;
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index 9045851d790..ce0876647b9 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -136,38 +136,19 @@ void ColumnNullable::insertData(const char * pos, size_t length)
}
}
-StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
+StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
{
const auto & arr = getNullMapData();
- const bool is_null = arr[n];
static constexpr auto s = sizeof(arr[0]);
char * pos;
if (is_string)
{
- auto data = nested_column->getDataAt(n);
- size_t string_size = data.size + 1;
- auto memory_size = is_null ? s : s + sizeof(string_size) + string_size;
- pos = arena.allocContinue(memory_size, begin);
- memcpy(pos, &arr[n], s);
- if (!is_null)
- {
- memcpy(pos + s, &string_size, sizeof(string_size));
- memcpy(pos + s + sizeof(string_size), data.data, string_size);
- }
- return StringRef(pos, memory_size);
+ const auto * column_string = static_cast(nested_column.get());
+ return column_string->serializeValueIntoArena(n, arena, begin, &arr[n]);
}
else if (is_fixed_size_column)
{
- auto data = nested_column->getDataAt(n);
- auto size = data.size;
- auto memory_size = is_null ? s : s + size;
- pos = arena.allocContinue(memory_size, begin);
- memcpy(pos, &arr[n], s);
- if (!is_null)
- {
- memcpy(pos + s, data.data, size);
- }
- return StringRef(pos, memory_size);
+ return nested_column->serializeValueIntoArena(n, arena, begin, &arr[n]);
}
else
{
diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h
index 4f37650ffe3..679f51d5900 100644
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@@ -62,7 +62,7 @@ public:
StringRef getDataAt(size_t) const override;
/// Will insert null value if pos=nullptr
void insertData(const char * pos, size_t length) override;
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h
index bc5a6b69bb0..36a33a8f10f 100644
--- a/src/Columns/ColumnObject.h
+++ b/src/Columns/ColumnObject.h
@@ -244,7 +244,7 @@ public:
StringRef getDataAt(size_t) const override { throwMustBeConcrete(); }
bool isDefaultAt(size_t) const override { throwMustBeConcrete(); }
void insertData(const char *, size_t) override { throwMustBeConcrete(); }
- StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override { throwMustBeConcrete(); }
+ StringRef serializeValueIntoArena(size_t, Arena &, char const *&, const UInt8 *) const override { throwMustBeConcrete(); }
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeConcrete(); }
const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); }
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }
diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp
index 4f76a9be4b9..057c0cd7112 100644
--- a/src/Columns/ColumnSparse.cpp
+++ b/src/Columns/ColumnSparse.cpp
@@ -150,7 +150,7 @@ void ColumnSparse::insertData(const char * pos, size_t length)
insertSingleValue([&](IColumn & column) { column.insertData(pos, length); });
}
-StringRef ColumnSparse::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
+StringRef ColumnSparse::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
{
return values->serializeValueIntoArena(getValueIndex(n), arena, begin);
}
diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h
index 26e05655f60..48c7422dd27 100644
--- a/src/Columns/ColumnSparse.h
+++ b/src/Columns/ColumnSparse.h
@@ -78,7 +78,7 @@ public:
/// Will insert null value if pos=nullptr
void insertData(const char * pos, size_t length) override;
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char *) const override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp
index 38c7b2c0dd6..50fe90ad8ef 100644
--- a/src/Columns/ColumnString.cpp
+++ b/src/Columns/ColumnString.cpp
@@ -213,17 +213,30 @@ ColumnPtr ColumnString::permute(const Permutation & perm, size_t limit) const
}
-StringRef ColumnString::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
+StringRef ColumnString::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const
{
size_t string_size = sizeAt(n);
size_t offset = offsetAt(n);
-
+ constexpr size_t null_bit_size = sizeof(UInt8);
StringRef res;
- res.size = sizeof(string_size) + string_size;
- char * pos = arena.allocContinue(res.size, begin);
+ char * pos;
+ if (null_bit)
+ {
+ res.size = * null_bit ? null_bit_size : null_bit_size + sizeof(string_size) + string_size;
+ pos = arena.allocContinue(res.size, begin);
+ res.data = pos;
+ memcpy(pos, null_bit, null_bit_size);
+ if (*null_bit) return res;
+ pos += null_bit_size;
+ }
+ else
+ {
+ res.size = sizeof(string_size) + string_size;
+ pos = arena.allocContinue(res.size, begin);
+ res.data = pos;
+ }
memcpy(pos, &string_size, sizeof(string_size));
memcpy(pos + sizeof(string_size), &chars[offset], string_size);
- res.data = pos;
return res;
}
diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h
index 08c876a803d..e8e5ebbcbf9 100644
--- a/src/Columns/ColumnString.h
+++ b/src/Columns/ColumnString.h
@@ -11,6 +11,7 @@
#include
#include
#include
+#include
class Collator;
@@ -168,7 +169,7 @@ public:
offsets.resize_assume_reserved(offsets.size() - n);
}
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp
index 9702d275114..d8992125be4 100644
--- a/src/Columns/ColumnTuple.cpp
+++ b/src/Columns/ColumnTuple.cpp
@@ -171,7 +171,7 @@ void ColumnTuple::popBack(size_t n)
column->popBack(n);
}
-StringRef ColumnTuple::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
+StringRef ColumnTuple::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
{
StringRef res(begin, 0);
for (const auto & column : columns)
diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h
index e7dee9b8ff9..79099f4c098 100644
--- a/src/Columns/ColumnTuple.h
+++ b/src/Columns/ColumnTuple.h
@@ -61,7 +61,7 @@ public:
void insertFrom(const IColumn & src_, size_t n) override;
void insertDefault() override;
void popBack(size_t n) override;
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h
index 377255d80c7..d2fc69d7fb8 100644
--- a/src/Columns/ColumnUnique.h
+++ b/src/Columns/ColumnUnique.h
@@ -79,7 +79,7 @@ public:
Float32 getFloat32(size_t n) const override { return getNestedColumn()->getFloat32(n); }
bool getBool(size_t n) const override { return getNestedColumn()->getBool(n); }
bool isNullAt(size_t n) const override { return is_nullable && n == getNullValueIndex(); }
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash_func) const override
{
@@ -373,7 +373,7 @@ size_t ColumnUnique::uniqueInsertData(const char * pos, size_t lengt
}
template
-StringRef ColumnUnique::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
+StringRef ColumnUnique::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
{
if (is_nullable)
{
diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp
index f2fe343a371..a9b8c0ccacb 100644
--- a/src/Columns/ColumnVector.cpp
+++ b/src/Columns/ColumnVector.cpp
@@ -49,11 +49,28 @@ namespace ErrorCodes
}
template
-StringRef ColumnVector::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
+StringRef ColumnVector::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const
{
- auto * pos = arena.allocContinue(sizeof(T), begin);
+ constexpr size_t null_bit_size = sizeof(UInt8);
+ StringRef res;
+ char * pos;
+ if (null_bit)
+ {
+ res.size = * null_bit ? null_bit_size : null_bit_size + sizeof(T);
+ pos = arena.allocContinue(res.size, begin);
+ res.data = pos;
+ memcpy(pos, null_bit, null_bit_size);
+ if (*null_bit) return res;
+ pos += null_bit_size;
+ }
+ else
+ {
+ res.size = sizeof(T);
+ pos = arena.allocContinue(res.size, begin);
+ res.data = pos;
+ }
unalignedStore(pos, data[n]);
- return StringRef(pos, sizeof(T));
+ return res;
}
template
diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h
index b8ebff2a5d5..7bb69656c5a 100644
--- a/src/Columns/ColumnVector.h
+++ b/src/Columns/ColumnVector.h
@@ -174,7 +174,7 @@ public:
data.resize_assume_reserved(data.size() - n);
}
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h
index b4eaf5c28f5..12ac1102efd 100644
--- a/src/Columns/IColumn.h
+++ b/src/Columns/IColumn.h
@@ -218,7 +218,7 @@ public:
* For example, to obtain unambiguous representation of Array of strings, strings data should be interleaved with their sizes.
* Parameter begin should be used with Arena::allocContinue.
*/
- virtual StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const = 0;
+ virtual StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const = 0;
/// Deserializes a value that was serialized using IColumn::serializeValueIntoArena method.
/// Returns pointer to the position after the read data.
diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h
index 82d4c857b29..4cadae2bc3d 100644
--- a/src/Columns/IColumnDummy.h
+++ b/src/Columns/IColumnDummy.h
@@ -57,7 +57,7 @@ public:
++s;
}
- StringRef serializeValueIntoArena(size_t /*n*/, Arena & arena, char const *& begin) const override
+ StringRef serializeValueIntoArena(size_t /*n*/, Arena & arena, char const *& begin, const UInt8 *) const override
{
/// Has to put one useless byte into Arena, because serialization into zero number of bytes is ambiguous.
char * res = arena.allocContinue(1, begin);
From 2997fe0677813edd622a5b8f2fe7f4ae17591b03 Mon Sep 17 00:00:00 2001
From: liuneng <1398775315@qq.com>
Date: Wed, 5 Jul 2023 18:30:54 +0800
Subject: [PATCH 08/30] add default value for compile
---
src/Columns/ColumnNullable.h | 2 +-
src/Columns/ColumnString.h | 2 +-
src/Columns/ColumnUnique.h | 2 +-
src/Columns/ColumnVector.h | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h
index 679f51d5900..8064ce014d3 100644
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@@ -62,7 +62,7 @@ public:
StringRef getDataAt(size_t) const override;
/// Will insert null value if pos=nullptr
void insertData(const char * pos, size_t length) override;
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h
index e8e5ebbcbf9..907dc83caeb 100644
--- a/src/Columns/ColumnString.h
+++ b/src/Columns/ColumnString.h
@@ -169,7 +169,7 @@ public:
offsets.resize_assume_reserved(offsets.size() - n);
}
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h
index d2fc69d7fb8..69f4818e6be 100644
--- a/src/Columns/ColumnUnique.h
+++ b/src/Columns/ColumnUnique.h
@@ -79,7 +79,7 @@ public:
Float32 getFloat32(size_t n) const override { return getNestedColumn()->getFloat32(n); }
bool getBool(size_t n) const override { return getNestedColumn()->getBool(n); }
bool isNullAt(size_t n) const override { return is_nullable && n == getNullValueIndex(); }
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash_func) const override
{
diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h
index 7bb69656c5a..232769a5295 100644
--- a/src/Columns/ColumnVector.h
+++ b/src/Columns/ColumnVector.h
@@ -174,7 +174,7 @@ public:
data.resize_assume_reserved(data.size() - n);
}
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
From f33367cd8b50089d33ad3dc431157396f369fb12 Mon Sep 17 00:00:00 2001
From: liuneng <1398775315@qq.com>
Date: Mon, 7 Aug 2023 13:37:24 +0800
Subject: [PATCH 09/30] add more test
---
src/Columns/ColumnNullable.h | 2 +-
src/Columns/ColumnString.h | 2 +-
src/Columns/ColumnUnique.h | 2 +-
src/Columns/ColumnVector.h | 2 +-
src/Columns/tests/gtest_column_unique.cpp | 6 +++---
tests/performance/aggregate_with_serialized_method.xml | 4 +++-
6 files changed, 10 insertions(+), 8 deletions(-)
diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h
index 8064ce014d3..719fa698acc 100644
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@@ -62,7 +62,7 @@ public:
StringRef getDataAt(size_t) const override;
/// Will insert null value if pos=nullptr
void insertData(const char * pos, size_t length) override;
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h
index 907dc83caeb..e8e5ebbcbf9 100644
--- a/src/Columns/ColumnString.h
+++ b/src/Columns/ColumnString.h
@@ -169,7 +169,7 @@ public:
offsets.resize_assume_reserved(offsets.size() - n);
}
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h
index 69f4818e6be..882d17b1649 100644
--- a/src/Columns/ColumnUnique.h
+++ b/src/Columns/ColumnUnique.h
@@ -79,7 +79,7 @@ public:
Float32 getFloat32(size_t n) const override { return getNestedColumn()->getFloat32(n); }
bool getBool(size_t n) const override { return getNestedColumn()->getBool(n); }
bool isNullAt(size_t n) const override { return is_nullable && n == getNullValueIndex(); }
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash_func) const override
{
diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h
index 232769a5295..7bb69656c5a 100644
--- a/src/Columns/ColumnVector.h
+++ b/src/Columns/ColumnVector.h
@@ -174,7 +174,7 @@ public:
data.resize_assume_reserved(data.size() - n);
}
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
diff --git a/src/Columns/tests/gtest_column_unique.cpp b/src/Columns/tests/gtest_column_unique.cpp
index 15208da70fb..ab2cb42b603 100644
--- a/src/Columns/tests/gtest_column_unique.cpp
+++ b/src/Columns/tests/gtest_column_unique.cpp
@@ -117,7 +117,7 @@ void column_unique_unique_deserialize_from_arena_impl(ColumnType & column, const
const char * pos = nullptr;
for (size_t i = 0; i < num_values; ++i)
{
- auto ref = column_unique_pattern->serializeValueIntoArena(idx->getUInt(i), arena, pos);
+ auto ref = column_unique_pattern->serializeValueIntoArena(idx->getUInt(i), arena, pos, nullptr);
const char * new_pos;
column_unique->uniqueDeserializeAndInsertFromArena(ref.data, new_pos);
ASSERT_EQ(new_pos - ref.data, ref.size) << "Deserialized data has different sizes at position " << i;
@@ -140,8 +140,8 @@ void column_unique_unique_deserialize_from_arena_impl(ColumnType & column, const
const char * pos_lc = nullptr;
for (size_t i = 0; i < num_values; ++i)
{
- auto ref_string = column.serializeValueIntoArena(i, arena_string, pos_string);
- auto ref_lc = column_unique->serializeValueIntoArena(idx->getUInt(i), arena_lc, pos_lc);
+ auto ref_string = column.serializeValueIntoArena(i, arena_string, pos_string, nullptr);
+ auto ref_lc = column_unique->serializeValueIntoArena(idx->getUInt(i), arena_lc, pos_lc, nullptr);
ASSERT_EQ(ref_string, ref_lc) << "Serialized data is different from pattern at position " << i;
}
}
diff --git a/tests/performance/aggregate_with_serialized_method.xml b/tests/performance/aggregate_with_serialized_method.xml
index a280dae67aa..4c4ef0438ae 100644
--- a/tests/performance/aggregate_with_serialized_method.xml
+++ b/tests/performance/aggregate_with_serialized_method.xml
@@ -21,9 +21,11 @@
)
ENGINE = Memory
- insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%1000+1, number%1000+2, number%1000+3, number%1000+4,number%1000+5, number%6000+1, number%5000+2 from numbers_mt(20000000)
+ insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%10+1, number%10+2, number%10+3, number%10+4,number%10+5, number%6000+1, number%5000+2 from numbers_mt(20000000)
select key_string1,key_string2,key_string3, min(m1) from t_nullable group by key_string1,key_string2,key_string3
select key_string3,key_int64_1,key_int64_2, min(m1) from t_nullable group by key_string3,key_int64_1,key_int64_2
+ select key_int64_1,key_int64_2,key_int64_3,key_int64_4,key_int64_5, min(m1) from t_nullable group by key_int64_1,key_int64_2,key_int64_3,key_int64_4,key_int64_5
+ select toFloat64(key_int64_1),toFloat64(key_int64_2),toFloat64(key_int64_3),toFloat64(key_int64_4),toFloat64(key_int64_5), min(m1) from t_nullable group by toFloat64(key_int64_1),toFloat64(key_int64_2),toFloat64(key_int64_3),toFloat64(key_int64_4),toFloat64(key_int64_5) limit 10
drop table if exists t_nullable
\ No newline at end of file
From 8a8330131644c106771055de7f67f761d01e00cd Mon Sep 17 00:00:00 2001
From: liuneng <1398775315@qq.com>
Date: Mon, 7 Aug 2023 14:25:15 +0800
Subject: [PATCH 10/30] optimize
---
src/Columns/ColumnNullable.cpp | 92 ++++++++++++++-----
src/Columns/ColumnNullable.h | 4 +-
.../aggregate_with_serialized_method.xml | 1 +
3 files changed, 72 insertions(+), 25 deletions(-)
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index ce0876647b9..ea95016a766 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -4,6 +4,10 @@
#include
#include
#include
+#include "ColumnDecimal.h"
+#include "ColumnFixedString.h"
+#include "ColumnsDateTime.h"
+#include "ColumnsNumber.h"
#include
#include
#include
@@ -34,8 +38,7 @@ ColumnNullable::ColumnNullable(MutableColumnPtr && nested_column_, MutableColumn
{
/// ColumnNullable cannot have constant nested column. But constant argument could be passed. Materialize it.
nested_column = getNestedColumn().convertToFullColumnIfConst();
- is_string = isString(nested_column->getDataType());
- is_fixed_size_column = nested_column->valuesHaveFixedSize();
+ nested_type = nested_column->getDataType();
if (!getNestedColumn().canBeInsideNullable())
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "{} cannot be inside Nullable column", getNestedColumn().getName());
@@ -141,29 +144,72 @@ StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char
const auto & arr = getNullMapData();
static constexpr auto s = sizeof(arr[0]);
char * pos;
- if (is_string)
+
+ switch (nested_type)
{
- const auto * column_string = static_cast(nested_column.get());
- return column_string->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::UInt8:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::UInt16:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::UInt32:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::UInt64:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::UInt128:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::UInt256:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Int8:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Int16:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Int32:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Int64:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Int128:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Int256:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Float32:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Float64:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Date:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Date32:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::DateTime:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::DateTime64:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::String:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::FixedString:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Decimal32:
+ return static_cast *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Decimal64:
+ return static_cast *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Decimal128:
+ return static_cast *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Decimal256:
+ return static_cast *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::UUID:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::IPv4:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::IPv6:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ default:
+ pos = arena.allocContinue(s, begin);
+ memcpy(pos, &arr[n], s);
+ if (arr[n])
+ return StringRef(pos, s);
+ auto nested_ref = getNestedColumn().serializeValueIntoArena(n, arena, begin);
+ /// serializeValueIntoArena may reallocate memory. Have to use ptr from nested_ref.data and move it back.
+ return StringRef(nested_ref.data - s, nested_ref.size + s);
}
- else if (is_fixed_size_column)
- {
- return nested_column->serializeValueIntoArena(n, arena, begin, &arr[n]);
- }
- else
- {
- pos = arena.allocContinue(s, begin);
- memcpy(pos, &arr[n], s);
-
- if (arr[n])
- return StringRef(pos, s);
-
- auto nested_ref = getNestedColumn().serializeValueIntoArena(n, arena, begin);
-
- /// serializeValueIntoArena may reallocate memory. Have to use ptr from nested_ref.data and move it back.
- return StringRef(nested_ref.data - s, nested_ref.size + s);
- }
-
}
const char * ColumnNullable::deserializeAndInsertFromArena(const char * pos)
diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h
index 719fa698acc..b57fdf3064d 100644
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@@ -6,6 +6,7 @@
#include
#include
+#include "Core/TypeId.h"
#include "config.h"
@@ -213,8 +214,7 @@ private:
WrappedPtr nested_column;
WrappedPtr null_map;
// optimize serializeValueIntoArena
- bool is_string;
- bool is_fixed_size_column;
+ TypeIndex nested_type;
template
void applyNullMapImpl(const NullMap & map);
diff --git a/tests/performance/aggregate_with_serialized_method.xml b/tests/performance/aggregate_with_serialized_method.xml
index 4c4ef0438ae..91763c69bb9 100644
--- a/tests/performance/aggregate_with_serialized_method.xml
+++ b/tests/performance/aggregate_with_serialized_method.xml
@@ -26,6 +26,7 @@
select key_string3,key_int64_1,key_int64_2, min(m1) from t_nullable group by key_string3,key_int64_1,key_int64_2
select key_int64_1,key_int64_2,key_int64_3,key_int64_4,key_int64_5, min(m1) from t_nullable group by key_int64_1,key_int64_2,key_int64_3,key_int64_4,key_int64_5
select toFloat64(key_int64_1),toFloat64(key_int64_2),toFloat64(key_int64_3),toFloat64(key_int64_4),toFloat64(key_int64_5), min(m1) from t_nullable group by toFloat64(key_int64_1),toFloat64(key_int64_2),toFloat64(key_int64_3),toFloat64(key_int64_4),toFloat64(key_int64_5) limit 10
+ select toDecimal64(key_int64_1, 3),toDecimal64(key_int64_2, 3),toDecimal64(key_int64_3, 3),toDecimal64(key_int64_4, 3),toDecimal64(key_int64_5, 3), min(m1) from t_nullable group by toDecimal64(key_int64_1, 3),toDecimal64(key_int64_2, 3),toDecimal64(key_int64_3, 3),toDecimal64(key_int64_4, 3),toDecimal64(key_int64_5, 3) limit 10
drop table if exists t_nullable
\ No newline at end of file
From 65aeb0563f020dcc4035f3903dfded305329975b Mon Sep 17 00:00:00 2001
From: liuneng <1398775315@qq.com>
Date: Tue, 8 Aug 2023 10:07:45 +0800
Subject: [PATCH 11/30] fix include
---
src/Columns/ColumnNullable.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index ea95016a766..fcd95e5c963 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -4,10 +4,10 @@
#include
#include
#include
-#include "ColumnDecimal.h"
-#include "ColumnFixedString.h"
-#include "ColumnsDateTime.h"
-#include "ColumnsNumber.h"
+#include
+#include
+#include
+#include
#include
#include
#include
From c1b94b4a3febdd2fbb22f1c2b8aa17b0089137d9 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov
Date: Tue, 8 Aug 2023 00:04:03 +0200
Subject: [PATCH 12/30] fixes for detach/attach partition
---
src/Storages/MergeTree/IMergeTreeDataPart.cpp | 6 +-
src/Storages/MergeTree/IMergeTreeDataPart.h | 3 +-
src/Storages/MergeTree/MergeTreeData.cpp | 71 +++++++++++++-----
.../MergeTree/MergeTreeDataPartInMemory.cpp | 6 +-
.../MergeTree/MergeTreeDataPartInMemory.h | 3 +-
.../ReplicatedMergeTreeCleanupThread.cpp | 3 +-
.../MergeTree/ReplicatedMergeTreeSink.cpp | 8 +-
.../MergeTree/ReplicatedMergeTreeSink.h | 2 +-
src/Storages/StorageMergeTree.cpp | 3 +-
src/Storages/StorageReplicatedMergeTree.cpp | 5 +-
.../test.py | 54 ++++++++------
.../02443_detach_attach_partition.reference | 4 +
.../02443_detach_attach_partition.sh | 74 +++++++++++++++++++
13 files changed, 188 insertions(+), 54 deletions(-)
create mode 100644 tests/queries/0_stateless/02443_detach_attach_partition.reference
create mode 100755 tests/queries/0_stateless/02443_detach_attach_partition.sh
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 6d7b6b39a40..b05c3d15f24 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -1780,7 +1780,8 @@ void IMergeTreeDataPart::renameToDetached(const String & prefix)
part_is_probably_removed_from_disk = true;
}
-DataPartStoragePtr IMergeTreeDataPart::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & /*metadata_snapshot*/) const
+DataPartStoragePtr IMergeTreeDataPart::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & /*metadata_snapshot*/,
+ const DiskTransactionPtr & disk_transaction) const
{
/// Avoid unneeded duplicates of broken parts if we try to detach the same broken part multiple times.
/// Otherwise it may pollute detached/ with dirs with _tryN suffix and we will fail to remove broken part after 10 attempts.
@@ -1795,7 +1796,8 @@ DataPartStoragePtr IMergeTreeDataPart::makeCloneInDetached(const String & prefix
IDataPartStorage::ClonePartParams params
{
.copy_instead_of_hardlink = isStoredOnRemoteDiskWithZeroCopySupport() && storage.supportsReplication() && storage_settings->allow_remote_fs_zero_copy_replication,
- .make_source_readonly = true
+ .make_source_readonly = true,
+ .external_transaction = disk_transaction
};
return getDataPartStorage().freeze(
storage.relative_data_path,
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index 9243c91987b..1df091ab1a3 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -368,7 +368,8 @@ public:
virtual void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists);
/// Makes clone of a part in detached/ directory via hard links
- virtual DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const;
+ virtual DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot,
+ const DiskTransactionPtr & disk_transaction = {}) const;
/// Makes full clone of part in specified subdirectory (relative to storage data directory, e.g. "detached") on another disk
MutableDataPartStoragePtr makeCloneOnDisk(const DiskPtr & disk, const String & directory_name) const;
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 0cfcd815cce..ed9127de977 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -2619,8 +2619,50 @@ size_t MergeTreeData::clearOldBrokenPartsFromDetachedDirectory()
if (detached_parts.empty())
return 0;
- PartsTemporaryRename renamed_parts(*this, "detached/");
+ auto get_last_touched_time = [&](const DetachedPartInfo & part_info) -> time_t
+ {
+ auto path = fs::path(relative_data_path) / "detached" / part_info.dir_name;
+ time_t last_change_time = part_info.disk->getLastChanged(path);
+ time_t last_modification_time = part_info.disk->getLastModified(path).epochTime();
+ return std::max(last_change_time, last_modification_time);
+ };
+ time_t ttl_seconds = getSettings()->merge_tree_clear_old_broken_detached_parts_ttl_timeout_seconds;
+
+ size_t unfinished_deleting_parts = 0;
+ time_t current_time = time(nullptr);
+ for (const auto & part_info : detached_parts)
+ {
+ if (!part_info.dir_name.starts_with("deleting_"))
+ continue;
+
+ time_t startup_time = current_time + static_cast(Context::getGlobalContextInstance()->getUptimeSeconds());
+ time_t last_touch_time = get_last_touched_time(part_info);
+
+ /// Maybe it's being deleted right now (for example, in ALTER DROP DETACHED)
+ bool had_restart = last_touch_time < startup_time;
+ bool ttl_expired = last_touch_time + ttl_seconds <= current_time;
+ if (!had_restart && !ttl_expired)
+ continue;
+
+ /// We were trying to delete this detached part but did not finish deleting, probably because the server crashed
+ LOG_INFO(log, "Removing detached part {} that we failed to remove previously", part_info.dir_name);
+ try
+ {
+ removeDetachedPart(part_info.disk, fs::path(relative_data_path) / "detached" / part_info.dir_name / "", part_info.dir_name);
+ ++unfinished_deleting_parts;
+ }
+ catch (...)
+ {
+ tryLogCurrentException(log);
+ }
+ }
+
+ if (!getSettings()->merge_tree_enable_clear_old_broken_detached)
+ return unfinished_deleting_parts;
+
+ const auto full_path = fs::path(relative_data_path) / "detached";
+ size_t removed_count = 0;
for (const auto & part_info : detached_parts)
{
if (!part_info.valid_name || part_info.prefix.empty())
@@ -2635,31 +2677,24 @@ size_t MergeTreeData::clearOldBrokenPartsFromDetachedDirectory()
if (!can_be_removed_by_timeout)
continue;
- time_t current_time = time(nullptr);
- ssize_t threshold = current_time - getSettings()->merge_tree_clear_old_broken_detached_parts_ttl_timeout_seconds;
- auto path = fs::path(relative_data_path) / "detached" / part_info.dir_name;
- time_t last_change_time = part_info.disk->getLastChanged(path);
- time_t last_modification_time = part_info.disk->getLastModified(path).epochTime();
- time_t last_touch_time = std::max(last_change_time, last_modification_time);
+ ssize_t threshold = current_time - ttl_seconds;
+ time_t last_touch_time = get_last_touched_time(part_info);
if (last_touch_time == 0 || last_touch_time >= threshold)
continue;
- renamed_parts.addPart(part_info.dir_name, "deleting_" + part_info.dir_name, part_info.disk);
- }
+ const String & old_name = part_info.dir_name;
+ String new_name = "deleting_" + part_info.dir_name;
+ part_info.disk->moveFile(fs::path(full_path) / old_name, fs::path(full_path) / new_name);
- LOG_INFO(log, "Will clean up {} detached parts", renamed_parts.old_and_new_names.size());
-
- renamed_parts.tryRenameAll();
-
- for (auto & [old_name, new_name, disk] : renamed_parts.old_and_new_names)
- {
- removeDetachedPart(disk, fs::path(relative_data_path) / "detached" / new_name / "", old_name);
+ removeDetachedPart(part_info.disk, fs::path(relative_data_path) / "detached" / new_name / "", old_name);
LOG_WARNING(log, "Removed broken detached part {} due to a timeout for broken detached parts", old_name);
- old_name.clear();
+ ++removed_count;
}
- return renamed_parts.old_and_new_names.size();
+ LOG_INFO(log, "Cleaned up {} detached parts", removed_count);
+
+ return removed_count + unfinished_deleting_parts;
}
size_t MergeTreeData::clearOldWriteAheadLogs()
diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp
index ba300b110d7..7654791c997 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp
@@ -17,6 +17,7 @@ namespace DB
namespace ErrorCodes
{
extern const int DIRECTORY_ALREADY_EXISTS;
+ extern const int NOT_IMPLEMENTED;
}
MergeTreeDataPartInMemory::MergeTreeDataPartInMemory(
@@ -138,8 +139,11 @@ MutableDataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String &
return new_data_part_storage;
}
-DataPartStoragePtr MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const
+DataPartStoragePtr MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot,
+ const DiskTransactionPtr & disk_transaction) const
{
+ if (disk_transaction)
+ throw Exception(ErrorCodes::NOT_IMPLEMENTED, "InMemory parts are not compatible with disk transactions");
String detached_path = *getRelativePathForDetachedPart(prefix, /* broken */ false);
return flushToDisk(detached_path, metadata_snapshot);
}
diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h
index 81549eeed3e..29506a54fdc 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h
@@ -42,7 +42,8 @@ public:
bool hasColumnFiles(const NameAndTypePair & column) const override { return !!getColumnPosition(column.getNameInStorage()); }
String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; }
void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) override;
- DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot) const override;
+ DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot,
+ const DiskTransactionPtr & disk_transaction = {}) const override;
std::optional getColumnModificationTime(const String & /* column_name */) const override { return {}; }
MutableDataPartStoragePtr flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const;
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp
index 07cfced8362..b72c148a4e8 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp
@@ -149,8 +149,7 @@ Float32 ReplicatedMergeTreeCleanupThread::iterate()
/// do it under share lock
cleaned_other += storage.clearOldWriteAheadLogs();
cleaned_part_like += storage.clearOldTemporaryDirectories(storage.getSettings()->temporary_directories_lifetime.totalSeconds());
- if (storage.getSettings()->merge_tree_enable_clear_old_broken_detached)
- cleaned_part_like += storage.clearOldBrokenPartsFromDetachedDirectory();
+ cleaned_part_like += storage.clearOldBrokenPartsFromDetachedDirectory();
}
/// This is loose condition: no problem if we actually had lost leadership at this moment
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
index 0db3464a637..bf0acef89c2 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
@@ -633,8 +633,8 @@ void ReplicatedMergeTreeSinkImpl::finishDelayedChunk(const ZooKeeperWithFa
delayed_chunk.reset();
}
-template
-void ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::MutableDataPartPtr & part)
+template<>
+bool ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::MutableDataPartPtr & part)
{
/// NOTE: No delay in this case. That's Ok.
auto origin_zookeeper = storage.getZooKeeper();
@@ -649,8 +649,10 @@ void ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData:
try
{
part->version.setCreationTID(Tx::PrehistoricTID, nullptr);
- commitPart(zookeeper, part, BlockIDsType(), replicas_num, true);
+ String block_id = deduplicate ? fmt::format("{}_{}", part->info.partition_id, part->checksums.getTotalChecksumHex()) : "";
+ bool deduplicated = commitPart(zookeeper, part, block_id, replicas_num, /* writing_existing_part */ true).second;
PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, watch.elapsed(), profile_events_scope.getSnapshot()));
+ return deduplicated;
}
catch (...)
{
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h
index 868590efa25..4a192a822f5 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.h
@@ -56,7 +56,7 @@ public:
String getName() const override { return "ReplicatedMergeTreeSink"; }
/// For ATTACHing existing data on filesystem.
- void writeExistingPart(MergeTreeData::MutableDataPartPtr & part);
+ bool writeExistingPart(MergeTreeData::MutableDataPartPtr & part);
/// For proper deduplication in MaterializedViews
bool lastBlockIsDuplicate() const override
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index ad9013d9f13..542701aeb98 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -1379,8 +1379,7 @@ bool StorageMergeTree::scheduleDataProcessingJob(BackgroundJobsAssignee & assign
cleared_count += clearOldWriteAheadLogs();
cleared_count += clearOldMutations();
cleared_count += clearEmptyParts();
- if (getSettings()->merge_tree_enable_clear_old_broken_detached)
- cleared_count += clearOldBrokenPartsFromDetachedDirectory();
+ cleared_count += clearOldBrokenPartsFromDetachedDirectory();
return cleared_count;
/// TODO maybe take into account number of cleared objects when calculating backoff
}, common_assignee_trigger, getStorageID()), /* need_trigger */ false);
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 7fce373e26b..72c939f9e82 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -6133,8 +6133,9 @@ PartitionCommandsResultInfo StorageReplicatedMergeTree::attachPartition(
MutableDataPartsVector loaded_parts = tryLoadPartsToAttach(partition, attach_part, query_context, renamed_parts);
/// TODO Allow to use quorum here.
- ReplicatedMergeTreeSink output(*this, metadata_snapshot, 0, 0, 0, false, false, false, query_context,
- /*is_attach*/true);
+ ReplicatedMergeTreeSink output(*this, metadata_snapshot, /* quorum */ 0, /* quorum_timeout_ms */ 0, /* max_parts_per_block */ 0,
+ /* quorum_parallel */ false, query_context->getSettingsRef().insert_deduplicate,
+ /* majority_quorum */ false, query_context, /*is_attach*/true);
for (size_t i = 0; i < loaded_parts.size(); ++i)
{
diff --git a/tests/integration/test_broken_detached_part_clean_up/test.py b/tests/integration/test_broken_detached_part_clean_up/test.py
index 9a70ebe0d48..e7341deae35 100644
--- a/tests/integration/test_broken_detached_part_clean_up/test.py
+++ b/tests/integration/test_broken_detached_part_clean_up/test.py
@@ -57,27 +57,28 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix):
]
)
- node.exec_in_container(["mkdir", f"{path_to_detached}../unexpected_all_42_1337_5"])
- node.exec_in_container(
- [
- "touch",
- "-t",
- "1312031429.30",
- f"{path_to_detached}../unexpected_all_42_1337_5",
- ]
- )
- result = node.exec_in_container(
- ["stat", f"{path_to_detached}../unexpected_all_42_1337_5"]
- )
- print(result)
- assert "Modify: 2013-12-03" in result
- node.exec_in_container(
- [
- "mv",
- f"{path_to_detached}../unexpected_all_42_1337_5",
- f"{path_to_detached}unexpected_all_42_1337_5",
- ]
- )
+ for name in ['unexpected_all_42_1337_5', 'deleting_all_123_456_7', 'tmp-fetch_all_12_34_5']:
+ node.exec_in_container(["mkdir", f"{path_to_detached}../{name}"])
+ node.exec_in_container(
+ [
+ "touch",
+ "-t",
+ "1312031429.30",
+ f"{path_to_detached}../{name}",
+ ]
+ )
+ result = node.exec_in_container(
+ ["stat", f"{path_to_detached}../{name}"]
+ )
+ print(result)
+ assert "Modify: 2013-12-03" in result
+ node.exec_in_container(
+ [
+ "mv",
+ f"{path_to_detached}../{name}",
+ f"{path_to_detached}{name}",
+ ]
+ )
result = node.query(
f"CHECK TABLE {table}", settings={"check_query_single_value_result": 0}
@@ -87,6 +88,10 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix):
node.query(f"DETACH TABLE {table}")
node.query(f"ATTACH TABLE {table}")
+ node.wait_for_log_line(
+ "Removing detached part deleting_all_123_456_7", timeout=90, look_behind_lines=1000000
+ )
+
result = node.exec_in_container(["ls", path_to_detached])
print(result)
assert f"{expect_broken_prefix}_all_3_3_0" in result
@@ -94,6 +99,7 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix):
assert "trash" in result
assert "broken_all_fake" in result
assert "unexpected_all_42_1337_5" in result
+ assert "deleting_all_123_456_7" not in result
time.sleep(15)
assert node.contains_in_log(
@@ -106,7 +112,13 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix):
assert "all_1_1_0" in result
assert "trash" in result
assert "broken_all_fake" in result
+ assert "tmp-fetch_all_12_34_5" in result
assert "unexpected_all_42_1337_5" not in result
+ assert "deleting_all_123_456_7" not in result
+
+ node.query(f"ALTER TABLE {table} DROP DETACHED PART 'tmp-fetch_all_12_34_5'", settings={"allow_drop_detached": 1})
+ result = node.exec_in_container(["ls", path_to_detached])
+ assert "tmp-fetch_all_12_34_5" not in result
node.query(f"DROP TABLE {table} SYNC")
diff --git a/tests/queries/0_stateless/02443_detach_attach_partition.reference b/tests/queries/0_stateless/02443_detach_attach_partition.reference
new file mode 100644
index 00000000000..77cfb77479d
--- /dev/null
+++ b/tests/queries/0_stateless/02443_detach_attach_partition.reference
@@ -0,0 +1,4 @@
+default begin inserts
+default end inserts
+30 465
+30 465
diff --git a/tests/queries/0_stateless/02443_detach_attach_partition.sh b/tests/queries/0_stateless/02443_detach_attach_partition.sh
new file mode 100755
index 00000000000..c983d5d56d3
--- /dev/null
+++ b/tests/queries/0_stateless/02443_detach_attach_partition.sh
@@ -0,0 +1,74 @@
+#!/usr/bin/env bash
+# Tags: race, zookeeper, no-parallel
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+# shellcheck source=./replication.lib
+. "$CURDIR"/replication.lib
+
+
+$CLICKHOUSE_CLIENT -n -q "
+ DROP TABLE IF EXISTS alter_table0;
+ DROP TABLE IF EXISTS alter_table1;
+
+ CREATE TABLE alter_table0 (a UInt8, b Int16) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r1') ORDER BY a;
+ CREATE TABLE alter_table1 (a UInt8, b Int16) ENGINE = ReplicatedMergeTree('/clickhouse/tables/$CLICKHOUSE_TEST_ZOOKEEPER_PREFIX/alter_table', 'r2') ORDER BY a;
+" || exit 1
+
+function thread_detach()
+{
+ while true; do
+ $CLICKHOUSE_CLIENT -mn -q "ALTER TABLE alter_table$(($RANDOM % 2)) DETACH PARTITION ID 'all'; SELECT sleep($RANDOM / 32000) format Null;" 2>/dev/null ||:
+ done
+}
+function thread_attach()
+{
+ while true; do
+ $CLICKHOUSE_CLIENT -mn -q "ALTER TABLE alter_table$(($RANDOM % 2)) ATTACH PARTITION ID 'all'; SELECT sleep($RANDOM / 32000) format Null;" 2>/dev/null ||:
+ done
+}
+
+function insert()
+{
+ $CLICKHOUSE_CLIENT -q "INSERT INTO alter_table$(($RANDOM % 2)) VALUES ($RANDOM, $i)"
+}
+
+thread_detach & PID_1=$!
+thread_attach & PID_2=$!
+thread_detach & PID_3=$!
+thread_attach & PID_4=$!
+
+function do_inserts()
+{
+ for i in {1..30}; do
+ while ! insert; do $CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'retrying insert $i' FORMAT Null"; done
+ done
+}
+
+$CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'begin inserts'"
+do_inserts 2>&1| grep -Fa "Exception: " | grep -Fv "was cancelled by concurrent ALTER PARTITION"
+$CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'end inserts'"
+
+kill -TERM $PID_1 && kill -TERM $PID_2 && kill -TERM $PID_3 && kill -TERM $PID_4
+wait
+
+$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table0"
+$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1"
+$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table0 ATTACH PARTITION ID 'all'";
+$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table1 ATTACH PARTITION ID 'all'";
+$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table0"
+$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1"
+
+engine=$($CLICKHOUSE_CLIENT -q "SELECT engine FROM system.tables WHERE database=currentDatabase() AND table='alter_table0'")
+if [[ "$engine" == "ReplicatedMergeTree" ]]; then
+ # ReplicatedMergeTree may duplicate data on ATTACH PARTITION (when one replica has a merged part and another replica has source parts only)
+ $CLICKHOUSE_CLIENT -q "OPTIMIZE TABLE alter_table0 FINAL DEDUPLICATE"
+ $CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1"
+fi
+
+$CLICKHOUSE_CLIENT -q "SELECT count(), sum(b) FROM alter_table0"
+$CLICKHOUSE_CLIENT -q "SELECT count(), sum(b) FROM alter_table1"
+
+$CLICKHOUSE_CLIENT -q "DROP TABLE alter_table0"
+$CLICKHOUSE_CLIENT -q "DROP TABLE alter_table1"
From 3d59ebe108016a83bba161751f728b08d5f94d70 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov
Date: Thu, 10 Aug 2023 20:11:22 +0200
Subject: [PATCH 13/30] fix
---
src/Storages/MergeTree/IMergeTreeDataPart.h | 2 +-
src/Storages/MergeTree/MergeTreeData.cpp | 2 +-
.../MergeTree/MergeTreeDataPartInMemory.h | 2 +-
.../MergeTree/ReplicatedMergeTreeSink.cpp | 5 ++-
.../test.py | 45 ++++++++++---------
5 files changed, 31 insertions(+), 25 deletions(-)
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index 1df091ab1a3..195fdbc4d05 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -369,7 +369,7 @@ public:
/// Makes clone of a part in detached/ directory via hard links
virtual DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot,
- const DiskTransactionPtr & disk_transaction = {}) const;
+ const DiskTransactionPtr & disk_transaction = {}) const; /// NOLINT
/// Makes full clone of part in specified subdirectory (relative to storage data directory, e.g. "detached") on another disk
MutableDataPartStoragePtr makeCloneOnDisk(const DiskPtr & disk, const String & directory_name) const;
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index ed9127de977..395b480a84f 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -2636,7 +2636,7 @@ size_t MergeTreeData::clearOldBrokenPartsFromDetachedDirectory()
if (!part_info.dir_name.starts_with("deleting_"))
continue;
- time_t startup_time = current_time + static_cast(Context::getGlobalContextInstance()->getUptimeSeconds());
+ time_t startup_time = current_time - static_cast(Context::getGlobalContextInstance()->getUptimeSeconds());
time_t last_touch_time = get_last_touched_time(part_info);
/// Maybe it's being deleted right now (for example, in ALTER DROP DETACHED)
diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h
index 29506a54fdc..95a17cbf589 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h
@@ -43,7 +43,7 @@ public:
String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; }
void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) override;
DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot,
- const DiskTransactionPtr & disk_transaction = {}) const override;
+ const DiskTransactionPtr & disk_transaction = {}) const override; /// NOLINT
std::optional getColumnModificationTime(const String & /* column_name */) const override { return {}; }
MutableDataPartStoragePtr flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const;
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
index bf0acef89c2..fa5a40cf27a 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeSink.cpp
@@ -651,7 +651,10 @@ bool ReplicatedMergeTreeSinkImpl::writeExistingPart(MergeTreeData::Mutabl
part->version.setCreationTID(Tx::PrehistoricTID, nullptr);
String block_id = deduplicate ? fmt::format("{}_{}", part->info.partition_id, part->checksums.getTotalChecksumHex()) : "";
bool deduplicated = commitPart(zookeeper, part, block_id, replicas_num, /* writing_existing_part */ true).second;
- PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, watch.elapsed(), profile_events_scope.getSnapshot()));
+
+ /// Set a special error code if the block is duplicate
+ int error = (deduplicate && deduplicated) ? ErrorCodes::INSERT_WAS_DEDUPLICATED : 0;
+ PartLog::addNewPart(storage.getContext(), PartLog::PartLogEntry(part, watch.elapsed(), profile_events_scope.getSnapshot()), ExecutionStatus(error));
return deduplicated;
}
catch (...)
diff --git a/tests/integration/test_broken_detached_part_clean_up/test.py b/tests/integration/test_broken_detached_part_clean_up/test.py
index e7341deae35..bdf993ddedf 100644
--- a/tests/integration/test_broken_detached_part_clean_up/test.py
+++ b/tests/integration/test_broken_detached_part_clean_up/test.py
@@ -57,7 +57,11 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix):
]
)
- for name in ['unexpected_all_42_1337_5', 'deleting_all_123_456_7', 'tmp-fetch_all_12_34_5']:
+ for name in [
+ "unexpected_all_42_1337_5",
+ "deleting_all_123_456_7",
+ "covered-by-broken_all_12_34_5",
+ ]:
node.exec_in_container(["mkdir", f"{path_to_detached}../{name}"])
node.exec_in_container(
[
@@ -67,9 +71,7 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix):
f"{path_to_detached}../{name}",
]
)
- result = node.exec_in_container(
- ["stat", f"{path_to_detached}../{name}"]
- )
+ result = node.exec_in_container(["stat", f"{path_to_detached}../{name}"])
print(result)
assert "Modify: 2013-12-03" in result
node.exec_in_container(
@@ -89,21 +91,19 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix):
node.query(f"ATTACH TABLE {table}")
node.wait_for_log_line(
- "Removing detached part deleting_all_123_456_7", timeout=90, look_behind_lines=1000000
+ "Removing detached part deleting_all_123_456_7",
+ timeout=90,
+ look_behind_lines=1000000,
)
-
- result = node.exec_in_container(["ls", path_to_detached])
- print(result)
- assert f"{expect_broken_prefix}_all_3_3_0" in result
- assert "all_1_1_0" in result
- assert "trash" in result
- assert "broken_all_fake" in result
- assert "unexpected_all_42_1337_5" in result
- assert "deleting_all_123_456_7" not in result
-
- time.sleep(15)
- assert node.contains_in_log(
- "Removed broken detached part unexpected_all_42_1337_5 due to a timeout"
+ node.wait_for_log_line(
+ f"Removed broken detached part {expect_broken_prefix}_all_3_3_0 due to a timeout",
+ timeout=10,
+ look_behind_lines=1000000,
+ )
+ node.wait_for_log_line(
+ "Removed broken detached part unexpected_all_42_1337_5 due to a timeout",
+ timeout=10,
+ look_behind_lines=1000000,
)
result = node.exec_in_container(["ls", path_to_detached])
@@ -112,13 +112,16 @@ def remove_broken_detached_part_impl(table, node, expect_broken_prefix):
assert "all_1_1_0" in result
assert "trash" in result
assert "broken_all_fake" in result
- assert "tmp-fetch_all_12_34_5" in result
+ assert "covered-by-broken_all_12_34_5" in result
assert "unexpected_all_42_1337_5" not in result
assert "deleting_all_123_456_7" not in result
- node.query(f"ALTER TABLE {table} DROP DETACHED PART 'tmp-fetch_all_12_34_5'", settings={"allow_drop_detached": 1})
+ node.query(
+ f"ALTER TABLE {table} DROP DETACHED PART 'covered-by-broken_all_12_34_5'",
+ settings={"allow_drop_detached": 1},
+ )
result = node.exec_in_container(["ls", path_to_detached])
- assert "tmp-fetch_all_12_34_5" not in result
+ assert "covered-by-broken_all_12_34_5" not in result
node.query(f"DROP TABLE {table} SYNC")
From 5a8b8203b2df3f7c9c054d7f0435b35c6d06f008 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov
Date: Thu, 10 Aug 2023 23:22:51 +0200
Subject: [PATCH 14/30] fix
---
src/Storages/MergeTree/IMergeTreeDataPart.h | 2 +-
src/Storages/MergeTree/MergeTreeData.cpp | 2 +-
src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp | 3 ++-
src/Storages/MergeTree/MergeTreeDataPartInMemory.h | 2 +-
src/Storages/StorageMergeTree.cpp | 6 +++---
src/Storages/StorageReplicatedMergeTree.cpp | 8 ++++----
6 files changed, 12 insertions(+), 11 deletions(-)
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h
index 195fdbc4d05..49aa2e1e7f1 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@@ -369,7 +369,7 @@ public:
/// Makes clone of a part in detached/ directory via hard links
virtual DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot,
- const DiskTransactionPtr & disk_transaction = {}) const; /// NOLINT
+ const DiskTransactionPtr & disk_transaction) const;
/// Makes full clone of part in specified subdirectory (relative to storage data directory, e.g. "detached") on another disk
MutableDataPartStoragePtr makeCloneOnDisk(const DiskPtr & disk, const String & directory_name) const;
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 395b480a84f..5ec52d4162e 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -4066,7 +4066,7 @@ void MergeTreeData::restoreAndActivatePart(const DataPartPtr & part, DataPartsLo
void MergeTreeData::outdateUnexpectedPartAndCloneToDetached(const DataPartPtr & part_to_detach)
{
LOG_INFO(log, "Cloning part {} to unexpected_{} and making it obsolete.", part_to_detach->getDataPartStorage().getPartDirectory(), part_to_detach->name);
- part_to_detach->makeCloneInDetached("unexpected", getInMemoryMetadataPtr());
+ part_to_detach->makeCloneInDetached("unexpected", getInMemoryMetadataPtr(), /*disk_transaction*/ {});
DataPartsLock lock = lockParts();
part_to_detach->is_unexpected_local_part = true;
diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp
index 7654791c997..d8034e62802 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.cpp
@@ -139,7 +139,8 @@ MutableDataPartStoragePtr MergeTreeDataPartInMemory::flushToDisk(const String &
return new_data_part_storage;
}
-DataPartStoragePtr MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot,
+DataPartStoragePtr MergeTreeDataPartInMemory::makeCloneInDetached(const String & prefix,
+ const StorageMetadataPtr & metadata_snapshot,
const DiskTransactionPtr & disk_transaction) const
{
if (disk_transaction)
diff --git a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h
index 95a17cbf589..95f7b796f9a 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartInMemory.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartInMemory.h
@@ -43,7 +43,7 @@ public:
String getFileNameForColumn(const NameAndTypePair & /* column */) const override { return ""; }
void renameTo(const String & new_relative_path, bool remove_new_dir_if_exists) override;
DataPartStoragePtr makeCloneInDetached(const String & prefix, const StorageMetadataPtr & metadata_snapshot,
- const DiskTransactionPtr & disk_transaction = {}) const override; /// NOLINT
+ const DiskTransactionPtr & disk_transaction) const override;
std::optional getColumnModificationTime(const String & /* column_name */) const override { return {}; }
MutableDataPartStoragePtr flushToDisk(const String & new_relative_path, const StorageMetadataPtr & metadata_snapshot) const;
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 542701aeb98..9506d6f1075 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -1817,7 +1817,7 @@ void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPt
{
auto metadata_snapshot = getInMemoryMetadataPtr();
LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory());
- part->makeCloneInDetached("", metadata_snapshot);
+ part->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {});
}
{
@@ -1902,7 +1902,7 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont
{
auto metadata_snapshot = getInMemoryMetadataPtr();
LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory());
- part->makeCloneInDetached("", metadata_snapshot);
+ part->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {});
}
}
@@ -1944,7 +1944,7 @@ void StorageMergeTree::dropPartsImpl(DataPartsVector && parts_to_remove, bool de
for (const auto & part : parts_to_remove)
{
LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory());
- part->makeCloneInDetached("", metadata_snapshot);
+ part->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {});
}
}
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 72c939f9e82..bc2cff80c59 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -2098,7 +2098,7 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry)
if (auto part_to_detach = part.getPartIfItWasActive())
{
LOG_INFO(log, "Detaching {}", part_to_detach->getDataPartStorage().getPartDirectory());
- part_to_detach->makeCloneInDetached("", metadata_snapshot);
+ part_to_detach->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {});
}
}
}
@@ -2828,7 +2828,7 @@ void StorageReplicatedMergeTree::cloneReplica(const String & source_replica, Coo
for (const auto & part : parts_to_remove_from_working_set)
{
LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory());
- part->makeCloneInDetached("clone", metadata_snapshot);
+ part->makeCloneInDetached("clone", metadata_snapshot, /*disk_transaction*/ {});
}
}
@@ -3794,12 +3794,12 @@ void StorageReplicatedMergeTree::removePartAndEnqueueFetch(const String & part_n
chassert(!broken_part);
chassert(!storage_init);
part->was_removed_as_broken = true;
- part->makeCloneInDetached("broken", getInMemoryMetadataPtr());
+ part->makeCloneInDetached("broken", getInMemoryMetadataPtr(), /*disk_transaction*/ {});
broken_part = part;
}
else
{
- part->makeCloneInDetached("covered-by-broken", getInMemoryMetadataPtr());
+ part->makeCloneInDetached("covered-by-broken", getInMemoryMetadataPtr(), /*disk_transaction*/ {});
}
detached_parts.push_back(part->name);
}
From 44403458556ef1037b69a5ae49eb9cc9cba16456 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov
Date: Sun, 13 Aug 2023 17:09:11 +0200
Subject: [PATCH 15/30] fix
---
.../02443_detach_attach_partition.reference | 4 ++--
.../0_stateless/02443_detach_attach_partition.sh | 10 ++++++----
2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/tests/queries/0_stateless/02443_detach_attach_partition.reference b/tests/queries/0_stateless/02443_detach_attach_partition.reference
index 77cfb77479d..70930ea6d9a 100644
--- a/tests/queries/0_stateless/02443_detach_attach_partition.reference
+++ b/tests/queries/0_stateless/02443_detach_attach_partition.reference
@@ -1,4 +1,4 @@
default begin inserts
default end inserts
-30 465
-30 465
+20 210
+20 210
diff --git a/tests/queries/0_stateless/02443_detach_attach_partition.sh b/tests/queries/0_stateless/02443_detach_attach_partition.sh
index c983d5d56d3..36bc3309924 100755
--- a/tests/queries/0_stateless/02443_detach_attach_partition.sh
+++ b/tests/queries/0_stateless/02443_detach_attach_partition.sh
@@ -31,7 +31,7 @@ function thread_attach()
function insert()
{
- $CLICKHOUSE_CLIENT -q "INSERT INTO alter_table$(($RANDOM % 2)) VALUES ($RANDOM, $i)"
+ $CLICKHOUSE_CLIENT -q "INSERT INTO alter_table$(($RANDOM % 2)) SELECT $RANDOM, $i" 2>/dev/null
}
thread_detach & PID_1=$!
@@ -41,7 +41,7 @@ thread_attach & PID_4=$!
function do_inserts()
{
- for i in {1..30}; do
+ for i in {1..20}; do
while ! insert; do $CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'retrying insert $i' FORMAT Null"; done
done
}
@@ -55,8 +55,10 @@ wait
$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table0"
$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1"
-$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table0 ATTACH PARTITION ID 'all'";
-$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table1 ATTACH PARTITION ID 'all'";
+$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table0 ATTACH PARTITION ID 'all'"
+$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table1 ATTACH PARTITION ID 'all'" 2>/dev/null
+$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1"
+$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table1 ATTACH PARTITION ID 'all'"
$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table0"
$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1"
From bd0e8792886ac2a02ad45eb2a48b935aa89fb5fe Mon Sep 17 00:00:00 2001
From: Filipp Ozinov
Date: Sun, 13 Aug 2023 22:48:35 +0400
Subject: [PATCH 16/30] Add note about skip indexes
Related to #53350
---
docs/en/engines/database-engines/materialized-mysql.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/en/engines/database-engines/materialized-mysql.md b/docs/en/engines/database-engines/materialized-mysql.md
index f7cc52e622e..b7e567c7b6c 100644
--- a/docs/en/engines/database-engines/materialized-mysql.md
+++ b/docs/en/engines/database-engines/materialized-mysql.md
@@ -190,7 +190,7 @@ These are the schema conversion manipulations you can do with table overrides fo
* Modify [column TTL](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#mergetree-column-ttl).
* Modify [column compression codec](/docs/en/sql-reference/statements/create/table.md/#codecs).
* Add [ALIAS columns](/docs/en/sql-reference/statements/create/table.md/#alias).
- * Add [skipping indexes](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-data_skipping-indexes)
+ * Add [skipping indexes](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-data_skipping-indexes). Note that you need to enable `use_skip_indexes_if_final` setting to make them work (MaterializedMySQL is using `SELECT ... FINAL` by default)
* Add [projections](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#projections). Note that projection optimizations are
disabled when using `SELECT ... FINAL` (which MaterializedMySQL does by default), so their utility is limited here.
`INDEX ... TYPE hypothesis` as [described in the v21.12 blog post]](https://clickhouse.com/blog/en/2021/clickhouse-v21.12-released/)
From f8b1d7474dffa024ff692bec35578c5172aeea8a Mon Sep 17 00:00:00 2001
From: vdimir
Date: Mon, 14 Aug 2023 12:46:23 +0000
Subject: [PATCH 17/30] Update test_distributed_inter_server_secret to pass
with analyzer
---
tests/analyzer_integration_broken_tests.txt | 18 -----
.../test.py | 68 +++++++------------
2 files changed, 25 insertions(+), 61 deletions(-)
diff --git a/tests/analyzer_integration_broken_tests.txt b/tests/analyzer_integration_broken_tests.txt
index 68822fbf311..3cc4869aa62 100644
--- a/tests/analyzer_integration_broken_tests.txt
+++ b/tests/analyzer_integration_broken_tests.txt
@@ -5,24 +5,6 @@ test_distributed_ddl/test.py::test_default_database[configs_secure]
test_distributed_ddl/test.py::test_on_server_fail[configs]
test_distributed_ddl/test.py::test_on_server_fail[configs_secure]
test_distributed_insert_backward_compatibility/test.py::test_distributed_in_tuple
-test_distributed_inter_server_secret/test.py::test_per_user_inline_settings_secure_cluster[default-]
-test_distributed_inter_server_secret/test.py::test_per_user_inline_settings_secure_cluster[nopass-]
-test_distributed_inter_server_secret/test.py::test_per_user_inline_settings_secure_cluster[pass-foo]
-test_distributed_inter_server_secret/test.py::test_per_user_protocol_settings_secure_cluster[default-]
-test_distributed_inter_server_secret/test.py::test_per_user_protocol_settings_secure_cluster[nopass-]
-test_distributed_inter_server_secret/test.py::test_per_user_protocol_settings_secure_cluster[pass-foo]
-test_distributed_inter_server_secret/test.py::test_user_insecure_cluster[default-]
-test_distributed_inter_server_secret/test.py::test_user_insecure_cluster[nopass-]
-test_distributed_inter_server_secret/test.py::test_user_insecure_cluster[pass-foo]
-test_distributed_inter_server_secret/test.py::test_user_secure_cluster[default-]
-test_distributed_inter_server_secret/test.py::test_user_secure_cluster[nopass-]
-test_distributed_inter_server_secret/test.py::test_user_secure_cluster[pass-foo]
-test_distributed_inter_server_secret/test.py::test_user_secure_cluster_from_backward[default-]
-test_distributed_inter_server_secret/test.py::test_user_secure_cluster_from_backward[nopass-]
-test_distributed_inter_server_secret/test.py::test_user_secure_cluster_from_backward[pass-foo]
-test_distributed_inter_server_secret/test.py::test_user_secure_cluster_with_backward[default-]
-test_distributed_inter_server_secret/test.py::test_user_secure_cluster_with_backward[nopass-]
-test_distributed_inter_server_secret/test.py::test_user_secure_cluster_with_backward[pass-foo]
test_distributed_load_balancing/test.py::test_distributed_replica_max_ignored_errors
test_distributed_load_balancing/test.py::test_load_balancing_default
test_distributed_load_balancing/test.py::test_load_balancing_priority_round_robin[dist_priority]
diff --git a/tests/integration/test_distributed_inter_server_secret/test.py b/tests/integration/test_distributed_inter_server_secret/test.py
index 36ac07a550a..1aeaddcf3c5 100644
--- a/tests/integration/test_distributed_inter_server_secret/test.py
+++ b/tests/integration/test_distributed_inter_server_secret/test.py
@@ -110,10 +110,6 @@ def start_cluster():
cluster.shutdown()
-def query_with_id(node, id_, query, **kwargs):
- return node.query("WITH '{}' AS __id {}".format(id_, query), **kwargs)
-
-
# @return -- [user, initial_user]
def get_query_user_info(node, query_pattern):
node.query("SYSTEM FLUSH LOGS")
@@ -334,7 +330,7 @@ def test_secure_disagree_insert():
@users
def test_user_insecure_cluster(user, password):
id_ = "query-dist_insecure-" + user
- query_with_id(n1, id_, "SELECT * FROM dist_insecure", user=user, password=password)
+ n1.query(f"SELECT *, '{id_}' FROM dist_insecure", user=user, password=password)
assert get_query_user_info(n1, id_) == [
user,
user,
@@ -345,7 +341,7 @@ def test_user_insecure_cluster(user, password):
@users
def test_user_secure_cluster(user, password):
id_ = "query-dist_secure-" + user
- query_with_id(n1, id_, "SELECT * FROM dist_secure", user=user, password=password)
+ n1.query(f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password)
assert get_query_user_info(n1, id_) == [user, user]
assert get_query_user_info(n2, id_) == [user, user]
@@ -353,16 +349,14 @@ def test_user_secure_cluster(user, password):
@users
def test_per_user_inline_settings_insecure_cluster(user, password):
id_ = "query-ddl-settings-dist_insecure-" + user
- query_with_id(
- n1,
- id_,
- """
- SELECT * FROM dist_insecure
- SETTINGS
- prefer_localhost_replica=0,
- max_memory_usage_for_user=1e9,
- max_untracked_memory=0
- """,
+ n1.query(
+ f"""
+ SELECT *, '{id_}' FROM dist_insecure
+ SETTINGS
+ prefer_localhost_replica=0,
+ max_memory_usage_for_user=1e9,
+ max_untracked_memory=0
+ """,
user=user,
password=password,
)
@@ -372,16 +366,14 @@ def test_per_user_inline_settings_insecure_cluster(user, password):
@users
def test_per_user_inline_settings_secure_cluster(user, password):
id_ = "query-ddl-settings-dist_secure-" + user
- query_with_id(
- n1,
- id_,
- """
- SELECT * FROM dist_secure
- SETTINGS
- prefer_localhost_replica=0,
- max_memory_usage_for_user=1e9,
- max_untracked_memory=0
- """,
+ n1.query(
+ f"""
+ SELECT *, '{id_}' FROM dist_secure
+ SETTINGS
+ prefer_localhost_replica=0,
+ max_memory_usage_for_user=1e9,
+ max_untracked_memory=0
+ """,
user=user,
password=password,
)
@@ -393,10 +385,8 @@ def test_per_user_inline_settings_secure_cluster(user, password):
@users
def test_per_user_protocol_settings_insecure_cluster(user, password):
id_ = "query-protocol-settings-dist_insecure-" + user
- query_with_id(
- n1,
- id_,
- "SELECT * FROM dist_insecure",
+ n1.query(
+ f"SELECT *, '{id_}' FROM dist_insecure",
user=user,
password=password,
settings={
@@ -411,10 +401,8 @@ def test_per_user_protocol_settings_insecure_cluster(user, password):
@users
def test_per_user_protocol_settings_secure_cluster(user, password):
id_ = "query-protocol-settings-dist_secure-" + user
- query_with_id(
- n1,
- id_,
- "SELECT * FROM dist_secure",
+ n1.query(
+ f"SELECT *, '{id_}' FROM dist_secure",
user=user,
password=password,
settings={
@@ -431,8 +419,8 @@ def test_per_user_protocol_settings_secure_cluster(user, password):
@users
def test_user_secure_cluster_with_backward(user, password):
id_ = "with-backward-query-dist_secure-" + user
- query_with_id(
- n1, id_, "SELECT * FROM dist_secure_backward", user=user, password=password
+ n1.query(
+ f"SELECT *, '{id_}' FROM dist_secure_backward", user=user, password=password
)
assert get_query_user_info(n1, id_) == [user, user]
assert get_query_user_info(backward, id_) == [user, user]
@@ -441,13 +429,7 @@ def test_user_secure_cluster_with_backward(user, password):
@users
def test_user_secure_cluster_from_backward(user, password):
id_ = "from-backward-query-dist_secure-" + user
- query_with_id(
- backward,
- id_,
- "SELECT * FROM dist_secure_backward",
- user=user,
- password=password,
- )
+ backward.query(f"SELECT *, '{id_}' FROM dist_secure", user=user, password=password)
assert get_query_user_info(n1, id_) == [user, user]
assert get_query_user_info(backward, id_) == [user, user]
From 368f6d7b1390b98ccac2610eb88a4237abcab439 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov
Date: Mon, 14 Aug 2023 20:46:41 +0200
Subject: [PATCH 18/30] fix
---
src/Functions/transform.cpp | 4 ++++
tests/queries/0_stateless/02443_detach_attach_partition.sh | 2 +-
2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/src/Functions/transform.cpp b/src/Functions/transform.cpp
index 16326dd5a44..62ab51abd76 100644
--- a/src/Functions/transform.cpp
+++ b/src/Functions/transform.cpp
@@ -776,8 +776,12 @@ namespace
UInt64 key = 0;
auto * dst = reinterpret_cast(&key);
const auto ref = cache.from_column->getDataAt(i);
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunreachable-code"
if constexpr (std::endian::native == std::endian::big)
dst += sizeof(key) - ref.size;
+#pragma clang diagnostic pop
memcpy(dst, ref.data, ref.size);
table[key] = i;
diff --git a/tests/queries/0_stateless/02443_detach_attach_partition.sh b/tests/queries/0_stateless/02443_detach_attach_partition.sh
index 36bc3309924..13ea966dbf5 100755
--- a/tests/queries/0_stateless/02443_detach_attach_partition.sh
+++ b/tests/queries/0_stateless/02443_detach_attach_partition.sh
@@ -55,7 +55,7 @@ wait
$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table0"
$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1"
-$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table0 ATTACH PARTITION ID 'all'"
+while ! $CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table0 ATTACH PARTITION ID 'all'" 2>/dev/null; do sleep 0.5; done
$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table1 ATTACH PARTITION ID 'all'" 2>/dev/null
$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1"
$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table1 ATTACH PARTITION ID 'all'"
From bf40767f10e16d9fd6c5b29a8af1ae81c93694fc Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov
Date: Tue, 15 Aug 2023 14:27:49 +0200
Subject: [PATCH 19/30] fix another race
---
src/Storages/MergeTree/MergeTreeData.cpp | 25 +++++++++++++------
src/Storages/StorageMergeTree.cpp | 12 ++++++---
src/Storages/StorageReplicatedMergeTree.cpp | 4 ++-
..._replace_partition_from_table_zookeeper.sh | 20 ---------------
.../00933_ttl_replicated_zookeeper.sh | 16 ------------
...034_move_partition_from_table_zookeeper.sh | 17 -------------
.../02443_detach_attach_partition.sh | 2 +-
.../0_stateless/02482_load_parts_refcounts.sh | 17 -------------
tests/queries/shell_config.sh | 20 +++++++++++++++
9 files changed, 51 insertions(+), 82 deletions(-)
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 561eef28c78..4026be31286 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -5832,18 +5832,21 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const
{
const String source_dir = "detached/";
- std::map name_to_disk;
-
/// Let's compose a list of parts that should be added.
if (attach_part)
{
const String part_id = partition->as().value.safeGet();
validateDetachedPartName(part_id);
- auto disk = getDiskForDetachedPart(part_id);
- renamed_parts.addPart(part_id, "attaching_" + part_id, disk);
-
- if (MergeTreePartInfo::tryParsePartName(part_id, format_version))
- name_to_disk[part_id] = getDiskForDetachedPart(part_id);
+ if (temporary_parts.contains(String(DETACHED_DIR_NAME) + "/" + part_id))
+ {
+ LOG_WARNING(log, "Will not try to attach part {} because its directory is temporary, "
+ "probably it's being detached right now", part_id);
+ }
+ else
+ {
+ auto disk = getDiskForDetachedPart(part_id);
+ renamed_parts.addPart(part_id, "attaching_" + part_id, disk);
+ }
}
else
{
@@ -5860,6 +5863,12 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const
for (const auto & part_info : detached_parts)
{
+ if (temporary_parts.contains(String(DETACHED_DIR_NAME) + "/" + part_info.dir_name))
+ {
+ LOG_WARNING(log, "Will not try to attach part {} because its directory is temporary, "
+ "probably it's being detached right now", part_info.dir_name);
+ continue;
+ }
LOG_DEBUG(log, "Found part {}", part_info.dir_name);
active_parts.add(part_info.dir_name);
}
@@ -5870,6 +5879,8 @@ MergeTreeData::MutableDataPartsVector MergeTreeData::tryLoadPartsToAttach(const
for (const auto & part_info : detached_parts)
{
const String containing_part = active_parts.getContainingPart(part_info.dir_name);
+ if (containing_part.empty())
+ continue;
LOG_DEBUG(log, "Found containing part {} for part {}", containing_part, part_info.dir_name);
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index 9506d6f1075..03bb1b554eb 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -1816,7 +1816,9 @@ void StorageMergeTree::dropPart(const String & part_name, bool detach, ContextPt
if (detach)
{
auto metadata_snapshot = getInMemoryMetadataPtr();
- LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory());
+ String part_dir = part->getDataPartStorage().getPartDirectory();
+ LOG_INFO(log, "Detaching {}", part_dir);
+ auto holder = getTemporaryPartDirectoryHolder(String(DETACHED_DIR_NAME) + "/" + part_dir);
part->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {});
}
@@ -1901,7 +1903,9 @@ void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, Cont
for (const auto & part : parts)
{
auto metadata_snapshot = getInMemoryMetadataPtr();
- LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory());
+ String part_dir = part->getDataPartStorage().getPartDirectory();
+ LOG_INFO(log, "Detaching {}", part_dir);
+ auto holder = getTemporaryPartDirectoryHolder(String(DETACHED_DIR_NAME) + "/" + part_dir);
part->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {});
}
}
@@ -1943,7 +1947,9 @@ void StorageMergeTree::dropPartsImpl(DataPartsVector && parts_to_remove, bool de
/// NOTE: no race with background cleanup until we hold pointers to parts
for (const auto & part : parts_to_remove)
{
- LOG_INFO(log, "Detaching {}", part->getDataPartStorage().getPartDirectory());
+ String part_dir = part->getDataPartStorage().getPartDirectory();
+ LOG_INFO(log, "Detaching {}", part_dir);
+ auto holder = getTemporaryPartDirectoryHolder(String(DETACHED_DIR_NAME) + "/" + part_dir);
part->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {});
}
}
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index bc2cff80c59..6b4ee3334c7 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -2097,7 +2097,9 @@ void StorageReplicatedMergeTree::executeDropRange(const LogEntry & entry)
{
if (auto part_to_detach = part.getPartIfItWasActive())
{
- LOG_INFO(log, "Detaching {}", part_to_detach->getDataPartStorage().getPartDirectory());
+ String part_dir = part_to_detach->getDataPartStorage().getPartDirectory();
+ LOG_INFO(log, "Detaching {}", part_dir);
+ auto holder = getTemporaryPartDirectoryHolder(String(DETACHED_DIR_NAME) + "/" + part_dir);
part_to_detach->makeCloneInDetached("", metadata_snapshot, /*disk_transaction*/ {});
}
}
diff --git a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh
index c32b6d04a42..334025cba28 100755
--- a/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh
+++ b/tests/queries/0_stateless/00626_replace_partition_from_table_zookeeper.sh
@@ -11,26 +11,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
-function query_with_retry
-{
- local query="$1" && shift
-
- local retry=0
- until [ $retry -ge 5 ]
- do
- local result
- result="$($CLICKHOUSE_CLIENT "$@" --query="$query" 2>&1)"
- if [ "$?" == 0 ]; then
- echo -n "$result"
- return
- else
- retry=$((retry + 1))
- sleep 3
- fi
- done
- echo "Query '$query' failed with '$result'"
-}
-
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS src;"
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r1;"
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst_r2;"
diff --git a/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.sh b/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.sh
index 22d9e0690b3..d06037fb836 100755
--- a/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.sh
+++ b/tests/queries/0_stateless/00933_ttl_replicated_zookeeper.sh
@@ -5,22 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
-function query_with_retry
-{
- retry=0
- until [ $retry -ge 5 ]
- do
- result=$($CLICKHOUSE_CLIENT $2 --query="$1" 2>&1)
- if [ "$?" == 0 ]; then
- echo -n "$result"
- return
- else
- retry=$(($retry + 1))
- sleep 3
- fi
- done
- echo "Query '$1' failed with '$result'"
-}
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS ttl_repl1"
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS ttl_repl2"
diff --git a/tests/queries/0_stateless/01034_move_partition_from_table_zookeeper.sh b/tests/queries/0_stateless/01034_move_partition_from_table_zookeeper.sh
index e0a84323dbd..39c5742e7a7 100755
--- a/tests/queries/0_stateless/01034_move_partition_from_table_zookeeper.sh
+++ b/tests/queries/0_stateless/01034_move_partition_from_table_zookeeper.sh
@@ -7,23 +7,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
-function query_with_retry
-{
- retry=0
- until [ $retry -ge 5 ]
- do
- result=$($CLICKHOUSE_CLIENT $2 --query="$1" 2>&1)
- if [ "$?" == 0 ]; then
- echo -n "$result"
- return
- else
- retry=$(($retry + 1))
- sleep 3
- fi
- done
- echo "Query '$1' failed with '$result'"
-}
-
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS src;"
$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS dst;"
diff --git a/tests/queries/0_stateless/02443_detach_attach_partition.sh b/tests/queries/0_stateless/02443_detach_attach_partition.sh
index 13ea966dbf5..5a3f1b64065 100755
--- a/tests/queries/0_stateless/02443_detach_attach_partition.sh
+++ b/tests/queries/0_stateless/02443_detach_attach_partition.sh
@@ -55,7 +55,7 @@ wait
$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table0"
$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1"
-while ! $CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table0 ATTACH PARTITION ID 'all'" 2>/dev/null; do sleep 0.5; done
+query_with_retry "ALTER TABLE alter_table0 ATTACH PARTITION ID 'all'" 2>/dev/null;
$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table1 ATTACH PARTITION ID 'all'" 2>/dev/null
$CLICKHOUSE_CLIENT -q "SYSTEM SYNC REPLICA alter_table1"
$CLICKHOUSE_CLIENT -q "ALTER TABLE alter_table1 ATTACH PARTITION ID 'all'"
diff --git a/tests/queries/0_stateless/02482_load_parts_refcounts.sh b/tests/queries/0_stateless/02482_load_parts_refcounts.sh
index 4d588dabeb9..fe3cee1359e 100755
--- a/tests/queries/0_stateless/02482_load_parts_refcounts.sh
+++ b/tests/queries/0_stateless/02482_load_parts_refcounts.sh
@@ -5,23 +5,6 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
-function query_with_retry
-{
- retry=0
- until [ $retry -ge 5 ]
- do
- result=$($CLICKHOUSE_CLIENT $2 --query="$1" 2>&1)
- if [ "$?" == 0 ]; then
- echo -n "$result"
- return
- else
- retry=$(($retry + 1))
- sleep 3
- fi
- done
- echo "Query '$1' failed with '$result'"
-}
-
$CLICKHOUSE_CLIENT -n --query "
DROP TABLE IF EXISTS load_parts_refcounts SYNC;
diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh
index ef70c82aefc..12bc0002191 100644
--- a/tests/queries/shell_config.sh
+++ b/tests/queries/shell_config.sh
@@ -155,3 +155,23 @@ function random_str()
local n=$1 && shift
tr -cd '[:lower:]' < /dev/urandom | head -c"$n"
}
+
+function query_with_retry
+{
+ local query="$1" && shift
+
+ local retry=0
+ until [ $retry -ge 5 ]
+ do
+ local result
+ result="$($CLICKHOUSE_CLIENT "$@" --query="$query" 2>&1)"
+ if [ "$?" == 0 ]; then
+ echo -n "$result"
+ return
+ else
+ retry=$((retry + 1))
+ sleep 3
+ fi
+ done
+ echo "Query '$query' failed with '$result'"
+}
From 2aa211acc2af778728f87a0cf36be8efb68243b3 Mon Sep 17 00:00:00 2001
From: Alexey Gerasimchuck
Date: Tue, 15 Aug 2023 13:26:39 +0000
Subject: [PATCH 20/30] Added integration test for session log
---
.../test.py | 5 +-
tests/integration/test_session_log/.gitignore | 1 +
.../integration/test_session_log/__init__.py | 0
.../test_session_log/configs/log.xml | 9 +
.../test_session_log/configs/ports.xml | 9 +
.../test_session_log/configs/session_log.xml | 9 +
.../test_session_log/configs/users.xml | 23 ++
.../protos/clickhouse_grpc.proto | 1 +
tests/integration/test_session_log/test.py | 292 ++++++++++++++++++
9 files changed, 345 insertions(+), 4 deletions(-)
create mode 100644 tests/integration/test_session_log/.gitignore
create mode 100644 tests/integration/test_session_log/__init__.py
create mode 100644 tests/integration/test_session_log/configs/log.xml
create mode 100644 tests/integration/test_session_log/configs/ports.xml
create mode 100644 tests/integration/test_session_log/configs/session_log.xml
create mode 100644 tests/integration/test_session_log/configs/users.xml
create mode 120000 tests/integration/test_session_log/protos/clickhouse_grpc.proto
create mode 100644 tests/integration/test_session_log/test.py
diff --git a/tests/integration/test_profile_max_sessions_for_user/test.py b/tests/integration/test_profile_max_sessions_for_user/test.py
index c5c33b1cddb..5eaef09bf6d 100755
--- a/tests/integration/test_profile_max_sessions_for_user/test.py
+++ b/tests/integration/test_profile_max_sessions_for_user/test.py
@@ -28,10 +28,7 @@ proto_dir = os.path.join(SCRIPT_DIR, "./protos")
gen_dir = os.path.join(SCRIPT_DIR, "./_gen")
os.makedirs(gen_dir, exist_ok=True)
run_and_check(
- "python3 -m grpc_tools.protoc -I{proto_dir} --python_out={gen_dir} --grpc_python_out={gen_dir} \
- {proto_dir}/clickhouse_grpc.proto".format(
- proto_dir=proto_dir, gen_dir=gen_dir
- ),
+ f"python3 -m grpc_tools.protoc -I{proto_dir} --python_out={gen_dir} --grpc_python_out={gen_dir} {proto_dir}/clickhouse_grpc.proto",
shell=True,
)
diff --git a/tests/integration/test_session_log/.gitignore b/tests/integration/test_session_log/.gitignore
new file mode 100644
index 00000000000..edf565ec632
--- /dev/null
+++ b/tests/integration/test_session_log/.gitignore
@@ -0,0 +1 @@
+_gen
diff --git a/tests/integration/test_session_log/__init__.py b/tests/integration/test_session_log/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_session_log/configs/log.xml b/tests/integration/test_session_log/configs/log.xml
new file mode 100644
index 00000000000..7a079b81e69
--- /dev/null
+++ b/tests/integration/test_session_log/configs/log.xml
@@ -0,0 +1,9 @@
+
+
+ trace
+ /var/log/clickhouse-server/clickhouse-server.log
+ /var/log/clickhouse-server/clickhouse-server.err.log
+ 1000M
+ 10
+
+
\ No newline at end of file
diff --git a/tests/integration/test_session_log/configs/ports.xml b/tests/integration/test_session_log/configs/ports.xml
new file mode 100644
index 00000000000..fbaefc16b3a
--- /dev/null
+++ b/tests/integration/test_session_log/configs/ports.xml
@@ -0,0 +1,9 @@
+
+ 5433
+ 9001
+ 9100
+
+
+ false
+
+
\ No newline at end of file
diff --git a/tests/integration/test_session_log/configs/session_log.xml b/tests/integration/test_session_log/configs/session_log.xml
new file mode 100644
index 00000000000..a0e4e3e2216
--- /dev/null
+++ b/tests/integration/test_session_log/configs/session_log.xml
@@ -0,0 +1,9 @@
+
+
+ system
+
+
+ toYYYYMM(event_date)
+ 7500
+
+
diff --git a/tests/integration/test_session_log/configs/users.xml b/tests/integration/test_session_log/configs/users.xml
new file mode 100644
index 00000000000..0416dfadc8a
--- /dev/null
+++ b/tests/integration/test_session_log/configs/users.xml
@@ -0,0 +1,23 @@
+
+
+
+ 0
+
+
+
+
+
+
+ pass
+
+
+ pass
+
+
+ pass
+
+
+ pass
+
+
+
\ No newline at end of file
diff --git a/tests/integration/test_session_log/protos/clickhouse_grpc.proto b/tests/integration/test_session_log/protos/clickhouse_grpc.proto
new file mode 120000
index 00000000000..25d15f11e3b
--- /dev/null
+++ b/tests/integration/test_session_log/protos/clickhouse_grpc.proto
@@ -0,0 +1 @@
+../../../../src/Server/grpc_protos/clickhouse_grpc.proto
\ No newline at end of file
diff --git a/tests/integration/test_session_log/test.py b/tests/integration/test_session_log/test.py
new file mode 100644
index 00000000000..b860cde1df6
--- /dev/null
+++ b/tests/integration/test_session_log/test.py
@@ -0,0 +1,292 @@
+import os
+
+import grpc
+import pymysql.connections
+import psycopg2 as py_psql
+import pytest
+import random
+import logging
+import sys
+import threading
+
+from helpers.cluster import ClickHouseCluster, run_and_check
+
+POSTGRES_SERVER_PORT = 5433
+MYSQL_SERVER_PORT = 9001
+GRPC_PORT = 9100
+SESSION_LOG_MATCHING_FIELDS = "auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface"
+
+SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
+DEFAULT_ENCODING = "utf-8"
+
+# Use grpcio-tools to generate *pb2.py files from *.proto.
+proto_dir = os.path.join(SCRIPT_DIR, "./protos")
+gen_dir = os.path.join(SCRIPT_DIR, "./_gen")
+os.makedirs(gen_dir, exist_ok=True)
+run_and_check(
+ f"python3 -m grpc_tools.protoc -I{proto_dir} --python_out={gen_dir} --grpc_python_out={gen_dir} {proto_dir}/clickhouse_grpc.proto",
+ shell=True,
+)
+
+sys.path.append(gen_dir)
+
+import clickhouse_grpc_pb2
+import clickhouse_grpc_pb2_grpc
+
+cluster = ClickHouseCluster(__file__)
+instance = cluster.add_instance(
+ "node",
+ main_configs=[
+ "configs/ports.xml",
+ "configs/log.xml",
+ "configs/session_log.xml",
+ ],
+ user_configs=["configs/users.xml"],
+ # Bug in TSAN reproduces in this test https://github.com/grpc/grpc/issues/29550#issuecomment-1188085387
+ env_variables={
+ "TSAN_OPTIONS": "report_atomic_races=0 " + os.getenv("TSAN_OPTIONS", default="")
+ },
+ with_postgres=True,
+)
+
+
+def grpc_get_url():
+ return f"{instance.ip_address}:{GRPC_PORT}"
+
+
+def grpc_create_insecure_channel():
+ channel = grpc.insecure_channel(grpc_get_url())
+ grpc.channel_ready_future(channel).result(timeout=2)
+ return channel
+
+
+session_id_counter = 0
+
+
+def next_session_id():
+ global session_id_counter
+ session_id = session_id_counter
+ session_id_counter += 1
+ return str(session_id)
+
+
+def grpc_query(query, user_, pass_, raise_exception):
+ try:
+ query_info = clickhouse_grpc_pb2.QueryInfo(
+ query=query,
+ session_id=next_session_id(),
+ user_name=user_,
+ password=pass_,
+ )
+ channel = grpc_create_insecure_channel()
+ stub = clickhouse_grpc_pb2_grpc.ClickHouseStub(channel)
+ result = stub.ExecuteQuery(query_info)
+ if result and result.HasField("exception"):
+ raise Exception(result.exception.display_text)
+
+ return result.output.decode(DEFAULT_ENCODING)
+ except Exception:
+ assert raise_exception
+
+
+def postgres_query(query, user_, pass_, raise_exception):
+ try:
+ connection_string = f"host={instance.hostname} port={POSTGRES_SERVER_PORT} dbname=default user={user_} password={pass_}"
+ cluster.exec_in_container(cluster.postgres_id,
+ [
+ "/usr/bin/psql",
+ connection_string,
+ "--no-align",
+ "--field-separator=' '",
+ "-c",
+ query
+ ],
+ shell=True
+ )
+ except Exception:
+ assert raise_exception
+
+
+def mysql_query(query, user_, pass_, raise_exception):
+ try:
+ client = pymysql.connections.Connection(
+ host=instance.ip_address,
+ user=user_,
+ password=pass_,
+ database="default",
+ port=MYSQL_SERVER_PORT,
+ )
+ cursor = client.cursor(pymysql.cursors.DictCursor)
+ if raise_exception:
+ with pytest.raises(Exception):
+ cursor.execute(query)
+ else:
+ cursor.execute(query)
+ cursor.fetchall()
+ except Exception:
+ assert raise_exception
+
+@pytest.fixture(scope="module")
+def started_cluster():
+ try:
+ cluster.start()
+ yield cluster
+ finally:
+ cluster.shutdown()
+
+
+def test_grpc_session(started_cluster):
+ grpc_query("SELECT 1", "grpc_user", "pass", False)
+ grpc_query("SELECT 2", "grpc_user", "wrong_pass", True)
+ grpc_query("SELECT 3", "wrong_grpc_user", "pass", True)
+
+ instance.query("SYSTEM FLUSH LOGS")
+ login_success_records = instance.query(
+ "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='grpc_user' AND type = 'LoginSuccess'"
+ )
+ assert login_success_records == "grpc_user\t1\t1\n"
+ logout_records = instance.query(
+ "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='grpc_user' AND type = 'Logout'"
+ )
+ assert logout_records == "grpc_user\t1\t1\n"
+ login_failure_records = instance.query(
+ "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='grpc_user' AND type = 'LoginFailure'"
+ )
+ assert login_failure_records == "grpc_user\t1\t1\n"
+ logins_and_logouts = instance.query(
+ f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'grpc_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'grpc_user' AND type = 'Logout')"
+ )
+ assert logins_and_logouts == "1\n"
+
+
+def test_mysql_session(started_cluster):
+ mysql_query("SELECT 1", "mysql_user", "pass", False)
+ mysql_query("SELECT 2", "mysql_user", "wrong_pass", True)
+ mysql_query("SELECT 3", "wrong_mysql_user", "pass", True)
+
+ instance.query("SYSTEM FLUSH LOGS")
+ login_success_records = instance.query(
+ "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='mysql_user' AND type = 'LoginSuccess'"
+ )
+ assert login_success_records == "mysql_user\t1\t1\n"
+ logout_records = instance.query(
+ "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='mysql_user' AND type = 'Logout'"
+ )
+ assert logout_records == "mysql_user\t1\t1\n"
+ login_failure_records = instance.query(
+ "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='mysql_user' AND type = 'LoginFailure'"
+ )
+ assert login_failure_records == "mysql_user\t1\t1\n"
+ logins_and_logouts = instance.query(
+ f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'mysql_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'mysql_user' AND type = 'Logout')"
+ )
+ assert logins_and_logouts == "1\n"
+
+
+def test_postgres_session(started_cluster):
+ postgres_query("SELECT 1", "postgres_user", "pass", False)
+ postgres_query("SELECT 2", "postgres_user", "wrong_pass", True)
+ postgres_query("SELECT 3", "wrong_postgres_user", "pass", True)
+
+ instance.query("SYSTEM FLUSH LOGS")
+ login_success_records = instance.query(
+ "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='postgres_user' AND type = 'LoginSuccess'"
+ )
+ assert login_success_records == "postgres_user\t1\t1\n"
+ logout_records = instance.query(
+ "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='postgres_user' AND type = 'Logout'"
+ )
+ assert logout_records == "postgres_user\t1\t1\n"
+ login_failure_records = instance.query(
+ "SELECT user, client_port <> 0, client_address <> toIPv6('::') FROM system.session_log WHERE user='postgres_user' AND type = 'LoginFailure'"
+ )
+ assert login_failure_records == "postgres_user\t1\t1\n"
+ logins_and_logouts = instance.query(
+ f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'postgres_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'postgres_user' AND type = 'Logout')"
+ )
+ assert logins_and_logouts == "1\n"
+
+
+def test_parallel_sessions(started_cluster):
+ thread_list = []
+ for _ in range(10):
+ # Sleep time does not significantly matter here,
+ # test should pass even without sleeping.
+ for function in [postgres_query, grpc_query, mysql_query]:
+ thread = threading.Thread(
+ target=function,
+ args=(
+ f"SELECT sleep({random.uniform(0.03, 0.04)})",
+ "parallel_user",
+ "pass",
+ False,
+ ),
+ )
+ thread.start()
+ thread_list.append(thread)
+ thread = threading.Thread(
+ target=function,
+ args=(
+ f"SELECT sleep({random.uniform(0.03, 0.04)})",
+ "parallel_user",
+ "wrong_pass",
+ True,
+ ),
+ )
+ thread.start()
+ thread_list.append(thread)
+ thread = threading.Thread(
+ target=function,
+ args=(
+ f"SELECT sleep({random.uniform(0.03, 0.04)})",
+ "wrong_parallel_user",
+ "pass",
+ True,
+ ),
+ )
+ thread.start()
+ thread_list.append(thread)
+
+ for thread in thread_list:
+ thread.join()
+
+ instance.query("SYSTEM FLUSH LOGS")
+ port_0_sessions = instance.query(
+ f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user'"
+ )
+ assert port_0_sessions == "90\n"
+
+ port_0_sessions = instance.query(
+ f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND client_port = 0"
+ )
+ assert port_0_sessions == "0\n"
+
+ address_0_sessions = instance.query(
+ f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND client_address = toIPv6('::')"
+ )
+ assert address_0_sessions == "0\n"
+
+ grpc_sessions = instance.query(
+ f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND interface = 'gRPC'"
+ )
+ assert grpc_sessions == "30\n"
+
+ mysql_sessions = instance.query(
+ f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND interface = 'MySQL'"
+ )
+ assert mysql_sessions == "30\n"
+
+ postgres_sessions = instance.query(
+ f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND interface = 'PostgreSQL'"
+ )
+ assert postgres_sessions == "30\n"
+
+ logins_and_logouts = instance.query(
+ f"SELECT COUNT(*) FROM (SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'parallel_user' AND type = 'LoginSuccess' INTERSECT SELECT {SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = 'parallel_user' AND type = 'Logout')"
+ )
+ assert logins_and_logouts == "30\n"
+
+ logout_failure_sessions = instance.query(
+ f"SELECT COUNT(*) FROM system.session_log WHERE user = 'parallel_user' AND type = 'LoginFailure'"
+ )
+ assert logout_failure_sessions == "30\n"
From 0e1728801eccb11a9cadf181fc3f555a4e39e125 Mon Sep 17 00:00:00 2001
From: Alexey Gerasimchuck
Date: Tue, 15 Aug 2023 13:31:53 +0000
Subject: [PATCH 21/30] black run
---
tests/integration/test_session_log/test.py | 28 +++++++++++-----------
1 file changed, 14 insertions(+), 14 deletions(-)
diff --git a/tests/integration/test_session_log/test.py b/tests/integration/test_session_log/test.py
index b860cde1df6..bb7cafa4ee6 100644
--- a/tests/integration/test_session_log/test.py
+++ b/tests/integration/test_session_log/test.py
@@ -2,10 +2,8 @@ import os
import grpc
import pymysql.connections
-import psycopg2 as py_psql
import pytest
import random
-import logging
import sys
import threading
@@ -92,19 +90,20 @@ def grpc_query(query, user_, pass_, raise_exception):
def postgres_query(query, user_, pass_, raise_exception):
try:
connection_string = f"host={instance.hostname} port={POSTGRES_SERVER_PORT} dbname=default user={user_} password={pass_}"
- cluster.exec_in_container(cluster.postgres_id,
- [
- "/usr/bin/psql",
- connection_string,
- "--no-align",
- "--field-separator=' '",
- "-c",
- query
- ],
- shell=True
- )
+ cluster.exec_in_container(
+ cluster.postgres_id,
+ [
+ "/usr/bin/psql",
+ connection_string,
+ "--no-align",
+ "--field-separator=' '",
+ "-c",
+ query,
+ ],
+ shell=True,
+ )
except Exception:
- assert raise_exception
+ assert raise_exception
def mysql_query(query, user_, pass_, raise_exception):
@@ -126,6 +125,7 @@ def mysql_query(query, user_, pass_, raise_exception):
except Exception:
assert raise_exception
+
@pytest.fixture(scope="module")
def started_cluster():
try:
From 0fd28bf3309a65e5c0204c814bef0a5f13dada9d Mon Sep 17 00:00:00 2001
From: Alexey Gerasimchuck
Date: Tue, 15 Aug 2023 13:39:34 +0000
Subject: [PATCH 22/30] added remote session log test
---
.../02834_remote_session_log.reference | 13 +++++
.../0_stateless/02834_remote_session_log.sh | 56 +++++++++++++++++++
2 files changed, 69 insertions(+)
create mode 100644 tests/queries/0_stateless/02834_remote_session_log.reference
create mode 100755 tests/queries/0_stateless/02834_remote_session_log.sh
diff --git a/tests/queries/0_stateless/02834_remote_session_log.reference b/tests/queries/0_stateless/02834_remote_session_log.reference
new file mode 100644
index 00000000000..e2680982ab0
--- /dev/null
+++ b/tests/queries/0_stateless/02834_remote_session_log.reference
@@ -0,0 +1,13 @@
+0
+0
+0
+0
+client_port 0 connections:
+0
+client_address '::' connections:
+0
+login failures:
+0
+TCP Login and logout count is equal
+HTTP Login and logout count is equal
+MySQL Login and logout count is equal
diff --git a/tests/queries/0_stateless/02834_remote_session_log.sh b/tests/queries/0_stateless/02834_remote_session_log.sh
new file mode 100755
index 00000000000..3bedfb6c9ee
--- /dev/null
+++ b/tests/queries/0_stateless/02834_remote_session_log.sh
@@ -0,0 +1,56 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+readonly PID=$$
+readonly TEST_USER=$"02834_USER_${PID}"
+readonly SESSION_LOG_MATCHING_FIELDS="auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface"
+
+${CLICKHOUSE_CLIENT} -q "CREATE USER IF NOT EXISTS ${TEST_USER} IDENTIFIED WITH plaintext_password BY 'pass'"
+${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON INFORMATION_SCHEMA.* TO ${TEST_USER}"
+${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.* TO ${TEST_USER}"
+${CLICKHOUSE_CLIENT} -q "GRANT CREATE TEMPORARY TABLE, MYSQL, REMOTE ON *.* TO ${TEST_USER}"
+
+${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
+${CLICKHOUSE_CLIENT} -q "DELETE FROM system.session_log WHERE user = '${TEST_USER}'"
+
+${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${TEST_USER}&password=pass" \
+ -d "SELECT * FROM remote('127.0.0.1:${CLICKHOUSE_PORT_TCP}', 'system', 'one', '${TEST_USER}', 'pass')"
+
+${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${TEST_USER}&password=pass" \
+ -d "SELECT * FROM mysql('127.0.0.1:9004', 'system', 'one', '${TEST_USER}', 'pass')"
+
+${CLICKHOUSE_CLIENT} -q "SELECT * FROM remote('127.0.0.1:${CLICKHOUSE_PORT_TCP}', 'system', 'one', '${TEST_USER}', 'pass')" -u "${TEST_USER}" --password "pass"
+${CLICKHOUSE_CLIENT} -q "SELECT * FROM mysql('127.0.0.1:9004', 'system', 'one', '${TEST_USER}', 'pass')" -u "${TEST_USER}" --password "pass"
+
+${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
+
+echo "client_port 0 connections:"
+${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' and client_port = 0"
+
+echo "client_address '::' connections:"
+${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' and client_address = toIPv6('::')"
+
+echo "login failures:"
+${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' and type = 'LoginFailure'"
+
+# remote(...) function sometimes reuses old cached sessions for query execution.
+# This makes LoginSuccess/Logout entries count unstable, but success and logouts must always match.
+
+for interface in 'TCP' 'HTTP' 'MySQL'
+do
+ LOGIN_COUNT=`${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'LoginSuccess' AND interface = '${interface}'"`
+ CORRESPONDING_LOGOUT_RECORDS_COUNT=`${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM (SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'LoginSuccess' AND interface = '${interface}' INTERSECT SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'Logout' AND interface = '${interface}')"`
+
+ if [ "$LOGIN_COUNT" == "$CORRESPONDING_LOGOUT_RECORDS_COUNT" ]; then
+ echo "${interface} Login and logout count is equal"
+ else
+ TOTAL_LOGOUT_COUNT=`${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'Logout' AND interface = '${interface}'"`
+ echo "${interface} Login count ${LOGIN_COUNT} != corresponding logout count ${CORRESPONDING_LOGOUT_RECORDS_COUNT}. TOTAL_LOGOUT_COUNT ${TOTAL_LOGOUT_COUNT}"
+ fi
+done
+
+${CLICKHOUSE_CLIENT} -q "DROP USER ${TEST_USER}"
From cbf9f88b90f69a08bd51377338d2a679e629cd82 Mon Sep 17 00:00:00 2001
From: Alexey Gerasimchuck
Date: Tue, 15 Aug 2023 13:42:42 +0000
Subject: [PATCH 23/30] Added concurrent session session_log tests
---
.../02833_concurrrent_sessions.reference | 34 +++++
.../0_stateless/02833_concurrrent_sessions.sh | 138 ++++++++++++++++++
2 files changed, 172 insertions(+)
create mode 100644 tests/queries/0_stateless/02833_concurrrent_sessions.reference
create mode 100755 tests/queries/0_stateless/02833_concurrrent_sessions.sh
diff --git a/tests/queries/0_stateless/02833_concurrrent_sessions.reference b/tests/queries/0_stateless/02833_concurrrent_sessions.reference
new file mode 100644
index 00000000000..bfe507e8eac
--- /dev/null
+++ b/tests/queries/0_stateless/02833_concurrrent_sessions.reference
@@ -0,0 +1,34 @@
+sessions:
+150
+port_0_sessions:
+0
+address_0_sessions:
+0
+tcp_sessions
+60
+http_sessions
+30
+http_with_session_id_sessions
+30
+my_sql_sessions
+30
+Corresponding LoginSuccess/Logout
+10
+LoginFailure
+10
+Corresponding LoginSuccess/Logout
+10
+LoginFailure
+10
+Corresponding LoginSuccess/Logout
+10
+LoginFailure
+10
+Corresponding LoginSuccess/Logout
+10
+LoginFailure
+10
+Corresponding LoginSuccess/Logout
+10
+LoginFailure
+10
diff --git a/tests/queries/0_stateless/02833_concurrrent_sessions.sh b/tests/queries/0_stateless/02833_concurrrent_sessions.sh
new file mode 100755
index 00000000000..26b48462a76
--- /dev/null
+++ b/tests/queries/0_stateless/02833_concurrrent_sessions.sh
@@ -0,0 +1,138 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest, no-debug
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+readonly PID=$$
+
+# Each user uses a separate thread.
+readonly TCP_USERS=( "02833_TCP_USER_${PID}"_{1,2} ) # 2 concurrent TCP users
+readonly HTTP_USERS=( "02833_HTTP_USER_${PID}" )
+readonly HTTP_WITH_SESSION_ID_SESSION_USERS=( "02833_HTTP_WITH_SESSION_ID_USER_${PID}" )
+readonly MYSQL_USERS=( "02833_MYSQL_USER_${PID}")
+readonly ALL_USERS=( "${TCP_USERS[@]}" "${HTTP_USERS[@]}" "${HTTP_WITH_SESSION_ID_SESSION_USERS[@]}" "${MYSQL_USERS[@]}" )
+
+readonly TCP_USERS_SQL_COLLECTION_STRING="$( echo "${TCP_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )"
+readonly HTTP_USERS_SQL_COLLECTION_STRING="$( echo "${HTTP_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )"
+readonly HTTP_WITH_SESSION_ID_USERS_SQL_COLLECTION_STRING="$( echo "${HTTP_WITH_SESSION_ID_SESSION_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )"
+readonly MYSQL_USERS_SQL_COLLECTION_STRING="$( echo "${MYSQL_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )"
+readonly ALL_USERS_SQL_COLLECTION_STRING="$( echo "${ALL_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )"
+
+readonly SESSION_LOG_MATCHING_FIELDS="auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface"
+
+for user in "${ALL_USERS[@]}"; do
+ ${CLICKHOUSE_CLIENT} -q "CREATE USER IF NOT EXISTS ${user} IDENTIFIED WITH plaintext_password BY 'pass'"
+ ${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.* TO ${user}"
+ ${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON INFORMATION_SCHEMA.* TO ${user}";
+done
+
+# All _session functions execute in separate threads.
+# These functions try to create a session with successful login and logout.
+# Sleep a small, random amount of time to make concurrency more intense.
+# and try to login with an invalid password.
+function tcp_session()
+{
+ local user=$1
+ local i=0
+ while (( (i++) < 10 )); do
+ # login logout
+ ${CLICKHOUSE_CLIENT} -q "SELECT 1, sleep(0.01${RANDOM})" --user="${user}" --password="pass"
+ # login failure
+ ${CLICKHOUSE_CLIENT} -q "SELECT 2" --user="${user}" --password 'invalid'
+ done
+}
+
+function http_session()
+{
+ local user=$1
+ local i=0
+ while (( (i++) < 10 )); do
+ # login logout
+ ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=pass" -d "SELECT 3, sleep(0.01${RANDOM})"
+
+ # login failure
+ ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=wrong" -d "SELECT 4"
+ done
+}
+
+function http_with_session_id_session()
+{
+ local user=$1
+ local i=0
+ while (( (i++) < 10 )); do
+ # login logout
+ ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${user}&user=${user}&password=pass" -d "SELECT 5, sleep 0.01${RANDOM}"
+
+ # login failure
+ ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${user}&user=${user}&password=wrong" -d "SELECT 6"
+ done
+}
+
+function mysql_session()
+{
+ local user=$1
+ local i=0
+ while (( (i++) < 10 )); do
+ # login logout
+ ${CLICKHOUSE_CLIENT} -q "SELECT 1, sleep(0.01${RANDOM}) FROM mysql('127.0.0.1:9004', 'system', 'one', '${user}', 'pass')"
+
+ # login failure
+ ${CLICKHOUSE_CLIENT} -q "SELECT 1 FROM mysql('127.0.0.1:9004', 'system', 'one', '${user}', 'wrong', SETTINGS connection_max_tries=1)"
+ done
+}
+
+${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
+${CLICKHOUSE_CLIENT} -q "DELETE FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING})"
+
+export -f tcp_session;
+export -f http_session;
+export -f http_with_session_id_session;
+export -f mysql_session;
+
+for user in "${TCP_USERS[@]}"; do
+ timeout 60s bash -c "tcp_session ${user}" >/dev/null 2>&1 &
+done
+
+for user in "${HTTP_USERS[@]}"; do
+ timeout 60s bash -c "http_session ${user}" >/dev/null 2>&1 &
+done
+
+for user in "${HTTP_WITH_SESSION_ID_SESSION_USERS[@]}"; do
+ timeout 60s bash -c "http_with_session_id_session ${user}" >/dev/null 2>&1 &
+done
+
+for user in "${MYSQL_USERS[@]}"; do
+ timeout 60s bash -c "mysql_session ${user}" >/dev/null 2>&1 &
+done
+
+wait
+
+${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
+
+echo "sessions:"
+${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING})"
+
+echo "port_0_sessions:"
+${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING}) AND client_port = 0"
+
+echo "address_0_sessions:"
+${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING}) AND client_address = toIPv6('::')"
+
+echo "tcp_sessions"
+${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${TCP_USERS_SQL_COLLECTION_STRING}) AND interface = 'TCP'"
+echo "http_sessions"
+${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${HTTP_USERS_SQL_COLLECTION_STRING}) AND interface = 'HTTP'"
+echo "http_with_session_id_sessions"
+${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${HTTP_WITH_SESSION_ID_USERS_SQL_COLLECTION_STRING}) AND interface = 'HTTP'"
+echo "my_sql_sessions"
+${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${MYSQL_USERS_SQL_COLLECTION_STRING}) AND interface = 'MySQL'"
+
+for user in "${ALL_USERS[@]}"; do
+ ${CLICKHOUSE_CLIENT} -q "DROP USER ${user}"
+ echo "Corresponding LoginSuccess/Logout"
+ ${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM (SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${user}' AND type = 'LoginSuccess' INTERSECT SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${user}' AND type = 'Logout')"
+ echo "LoginFailure"
+ ${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM system.session_log WHERE user = '${user}' AND type = 'LoginFailure'"
+ done
From 4b5874b512802022e4c5581e17c9ed86c505129e Mon Sep 17 00:00:00 2001
From: Alexey Gerasimchuck
Date: Tue, 15 Aug 2023 13:45:06 +0000
Subject: [PATCH 24/30] added drop user during session test
---
.../02835_drop_user_during_session.reference | 8 ++
.../02835_drop_user_during_session.sh | 114 ++++++++++++++++++
2 files changed, 122 insertions(+)
create mode 100644 tests/queries/0_stateless/02835_drop_user_during_session.reference
create mode 100755 tests/queries/0_stateless/02835_drop_user_during_session.sh
diff --git a/tests/queries/0_stateless/02835_drop_user_during_session.reference b/tests/queries/0_stateless/02835_drop_user_during_session.reference
new file mode 100644
index 00000000000..7252faab8c6
--- /dev/null
+++ b/tests/queries/0_stateless/02835_drop_user_during_session.reference
@@ -0,0 +1,8 @@
+port_0_sessions:
+0
+address_0_sessions:
+0
+Corresponding LoginSuccess/Logout
+9
+LoginFailure
+0
diff --git a/tests/queries/0_stateless/02835_drop_user_during_session.sh b/tests/queries/0_stateless/02835_drop_user_during_session.sh
new file mode 100755
index 00000000000..347ebd22f96
--- /dev/null
+++ b/tests/queries/0_stateless/02835_drop_user_during_session.sh
@@ -0,0 +1,114 @@
+#!/usr/bin/env bash
+# Tags: no-debug
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+readonly PID=$$
+
+readonly TEST_USER="02835_USER_${PID}"
+readonly TEST_ROLE="02835_ROLE_${PID}"
+readonly TEST_PROFILE="02835_PROFILE_${PID}"
+readonly SESSION_LOG_MATCHING_FIELDS="auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface"
+
+function tcp_session()
+{
+ local user=$1
+ ${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM system.numbers" --user="${user}"
+}
+
+function http_session()
+{
+ local user=$1
+ ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=pass" -d "SELECT COUNT(*) FROM system.numbers"
+}
+
+function http_with_session_id_session()
+{
+ local user=$1
+ ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=pass" -d "SELECT COUNT(*) FROM system.numbers"
+}
+
+# Busy-waits until user $1, specified amount of queries ($2) will run simultaneously.
+function wait_for_queries_start()
+{
+ local user=$1
+ local queries_count=$2
+ # 10 seconds waiting
+ counter=0 retries=100
+ while [[ $counter -lt $retries ]]; do
+ result=$($CLICKHOUSE_CLIENT --query "SELECT COUNT(*) FROM system.processes WHERE user = '${user}'")
+ if [[ $result == "${queries_count}" ]]; then
+ break;
+ fi
+ sleep 0.1
+ ((++counter))
+ done
+}
+
+${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
+${CLICKHOUSE_CLIENT} -q "DELETE FROM system.session_log WHERE user = '${TEST_USER}'"
+
+# DROP USE CASE
+${CLICKHOUSE_CLIENT} -q "CREATE USER IF NOT EXISTS ${TEST_USER}"
+${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.numbers TO ${TEST_USER}"
+
+export -f tcp_session;
+export -f http_session;
+export -f http_with_session_id_session;
+
+timeout 10s bash -c "tcp_session ${TEST_USER}" >/dev/null 2>&1 &
+timeout 10s bash -c "http_session ${TEST_USER}" >/dev/null 2>&1 &
+timeout 10s bash -c "http_with_session_id_session ${TEST_USER}" >/dev/null 2>&1 &
+
+wait_for_queries_start $TEST_USER 3
+${CLICKHOUSE_CLIENT} -q "DROP USER ${TEST_USER}"
+${CLICKHOUSE_CLIENT} -q "KILL QUERY WHERE user = '${TEST_USER}' SYNC" >/dev/null &
+
+wait
+
+# DROP ROLE CASE
+${CLICKHOUSE_CLIENT} -q "CREATE ROLE IF NOT EXISTS ${TEST_ROLE}"
+${CLICKHOUSE_CLIENT} -q "CREATE USER ${TEST_USER} DEFAULT ROLE ${TEST_ROLE}"
+${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.numbers TO ${TEST_USER}"
+
+timeout 10s bash -c "tcp_session ${TEST_USER}" >/dev/null 2>&1 &
+timeout 10s bash -c "http_session ${TEST_USER}" >/dev/null 2>&1 &
+timeout 10s bash -c "http_with_session_id_session ${TEST_USER}" >/dev/null 2>&1 &
+
+wait_for_queries_start $TEST_USER 3
+${CLICKHOUSE_CLIENT} -q "DROP ROLE ${TEST_ROLE}"
+${CLICKHOUSE_CLIENT} -q "DROP USER ${TEST_USER}"
+
+${CLICKHOUSE_CLIENT} -q "KILL QUERY WHERE user = '${TEST_USER}' SYNC" >/dev/null &
+
+wait
+
+# DROP PROFILE CASE
+${CLICKHOUSE_CLIENT} -q "CREATE SETTINGS PROFILE IF NOT EXISTS '${TEST_PROFILE}'"
+${CLICKHOUSE_CLIENT} -q "CREATE USER ${TEST_USER} SETTINGS PROFILE '${TEST_PROFILE}'"
+${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.numbers TO ${TEST_USER}"
+
+timeout 10s bash -c "tcp_session ${TEST_USER}" >/dev/null 2>&1 &
+timeout 10s bash -c "http_session ${TEST_USER}" >/dev/null 2>&1 &
+timeout 10s bash -c "http_with_session_id_session ${TEST_USER}" >/dev/null 2>&1 &
+
+wait_for_queries_start $TEST_USER 3
+${CLICKHOUSE_CLIENT} -q "DROP SETTINGS PROFILE '${TEST_PROFILE}'"
+${CLICKHOUSE_CLIENT} -q "DROP USER ${TEST_USER}"
+
+${CLICKHOUSE_CLIENT} -q "KILL QUERY WHERE user = '${TEST_USER}' SYNC" >/dev/null &
+
+wait
+
+${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
+
+echo "port_0_sessions:"
+${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' AND client_port = 0"
+echo "address_0_sessions:"
+${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' AND client_address = toIPv6('::')"
+echo "Corresponding LoginSuccess/Logout"
+${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM (SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'LoginSuccess' INTERSECT SELECT ${SESSION_LOG_MATCHING_FIELDS}, FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'Logout')"
+echo "LoginFailure"
+${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'LoginFailure'"
From 387ce81895d0d9a6a8e994bf24801b00dc3af049 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev"
Date: Wed, 16 Aug 2023 00:46:53 +0200
Subject: [PATCH 25/30] Clean all containers properly
---
tests/ci/install_check.py | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/tests/ci/install_check.py b/tests/ci/install_check.py
index 010b0dab408..700550bf077 100644
--- a/tests/ci/install_check.py
+++ b/tests/ci/install_check.py
@@ -191,6 +191,9 @@ def test_install(image: DockerImage, tests: Dict[str, str]) -> TestResults:
retcode = process.wait()
if retcode == 0:
status = OK
+ subprocess.check_call(
+ f"docker kill -s 9 {container_id}", shell=True
+ )
break
status = FAIL
@@ -198,8 +201,8 @@ def test_install(image: DockerImage, tests: Dict[str, str]) -> TestResults:
archive_path = TEMP_PATH / f"{container_name}-{retry}.tar.gz"
compress_fast(LOGS_PATH, archive_path)
logs.append(archive_path)
+ subprocess.check_call(f"docker kill -s 9 {container_id}", shell=True)
- subprocess.check_call(f"docker kill -s 9 {container_id}", shell=True)
test_results.append(TestResult(name, status, stopwatch.duration_seconds, logs))
return test_results
From 790475385acc5b722460e5b9581f637ac6ff9b1e Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev"
Date: Wed, 16 Aug 2023 00:47:39 +0200
Subject: [PATCH 26/30] Improve downloading: skip dbg, do not pull images on
--no-download
---
tests/ci/install_check.py | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/tests/ci/install_check.py b/tests/ci/install_check.py
index 700550bf077..2ca947192da 100644
--- a/tests/ci/install_check.py
+++ b/tests/ci/install_check.py
@@ -279,7 +279,7 @@ def main():
sys.exit(0)
docker_images = {
- name: get_image_with_version(REPORTS_PATH, name)
+ name: get_image_with_version(REPORTS_PATH, name, args.download)
for name in (RPM_IMAGE, DEB_IMAGE)
}
prepare_test_scripts()
@@ -296,6 +296,8 @@ def main():
is_match = is_match or path.endswith(".rpm")
if args.tgz:
is_match = is_match or path.endswith(".tgz")
+ # We don't need debug packages, so let's filter them out
+ is_match = is_match and "-dbg" not in path
return is_match
download_builds_filter(
From 3cd9fa395d2d3483e9e71274076cf151ef8ff682 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev"
Date: Wed, 16 Aug 2023 00:51:44 +0200
Subject: [PATCH 27/30] Add test for systemd + /etc/default/clickhouse
---
tests/ci/install_check.py | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/tests/ci/install_check.py b/tests/ci/install_check.py
index 2ca947192da..b08e94c52b4 100644
--- a/tests/ci/install_check.py
+++ b/tests/ci/install_check.py
@@ -50,8 +50,11 @@ def prepare_test_scripts():
server_test = r"""#!/bin/bash
set -e
trap "bash -ex /packages/preserve_logs.sh" ERR
+test_env='TEST_THE_DEFAULT_PARAMETER=15'
+echo "$test_env" >> /etc/default/clickhouse
systemctl start clickhouse-server
-clickhouse-client -q 'SELECT version()'"""
+clickhouse-client -q 'SELECT version()'
+grep "$test_env" /proc/$(cat /var/run/clickhouse-server/clickhouse-server.pid)/environ"""
keeper_test = r"""#!/bin/bash
set -e
trap "bash -ex /packages/preserve_logs.sh" ERR
From 651a45b04d1cc4ec0b8be5b0fbb3068b09813fce Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev"
Date: Wed, 16 Aug 2023 00:57:22 +0200
Subject: [PATCH 28/30] Add tests for initd start
---
docker/test/install/deb/Dockerfile | 1 +
tests/ci/install_check.py | 12 ++++++++++++
2 files changed, 13 insertions(+)
diff --git a/docker/test/install/deb/Dockerfile b/docker/test/install/deb/Dockerfile
index 9614473c69b..e9c928b1fe7 100644
--- a/docker/test/install/deb/Dockerfile
+++ b/docker/test/install/deb/Dockerfile
@@ -12,6 +12,7 @@ ENV \
# install systemd packages
RUN apt-get update && \
apt-get install -y --no-install-recommends \
+ sudo \
systemd \
&& \
apt-get clean && \
diff --git a/tests/ci/install_check.py b/tests/ci/install_check.py
index b08e94c52b4..a5788e2af3f 100644
--- a/tests/ci/install_check.py
+++ b/tests/ci/install_check.py
@@ -54,6 +54,14 @@ test_env='TEST_THE_DEFAULT_PARAMETER=15'
echo "$test_env" >> /etc/default/clickhouse
systemctl start clickhouse-server
clickhouse-client -q 'SELECT version()'
+grep "$test_env" /proc/$(cat /var/run/clickhouse-server/clickhouse-server.pid)/environ"""
+ initd_test = r"""#!/bin/bash
+set -e
+trap "bash -ex /packages/preserve_logs.sh" ERR
+test_env='TEST_THE_DEFAULT_PARAMETER=15'
+echo "$test_env" >> /etc/default/clickhouse
+/etc/init.d/clickhouse-server start
+clickhouse-client -q 'SELECT version()'
grep "$test_env" /proc/$(cat /var/run/clickhouse-server/clickhouse-server.pid)/environ"""
keeper_test = r"""#!/bin/bash
set -e
@@ -105,6 +113,7 @@ chmod a+rw -R /tests_logs
exit 1
"""
(TEMP_PATH / "server_test.sh").write_text(server_test, encoding="utf-8")
+ (TEMP_PATH / "initd_test.sh").write_text(initd_test, encoding="utf-8")
(TEMP_PATH / "keeper_test.sh").write_text(keeper_test, encoding="utf-8")
(TEMP_PATH / "binary_test.sh").write_text(binary_test, encoding="utf-8")
(TEMP_PATH / "preserve_logs.sh").write_text(preserve_logs, encoding="utf-8")
@@ -115,6 +124,9 @@ def test_install_deb(image: DockerImage) -> TestResults:
"Install server deb": r"""#!/bin/bash -ex
apt-get install /packages/clickhouse-{server,client,common}*deb
bash -ex /packages/server_test.sh""",
+ "Run server init.d": r"""#!/bin/bash -ex
+apt-get install /packages/clickhouse-{server,client,common}*deb
+bash -ex /packages/initd_test.sh""",
"Install keeper deb": r"""#!/bin/bash -ex
apt-get install /packages/clickhouse-keeper*deb
bash -ex /packages/keeper_test.sh""",
From 428a05a560dd9561f1729c38b963250b980c2f19 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov
Date: Wed, 16 Aug 2023 14:04:14 +0300
Subject: [PATCH 29/30] Follow-up: Do not send logs to CI if the credentials
are not set (#53456)
* Follow-up
* Automatic style fix
* Update tests/ci/ast_fuzzer_check.py
* Update tests/ci/functional_test_check.py
* Update tests/ci/stress_check.py
* Automatic style fix
---------
Co-authored-by: robot-clickhouse
Co-authored-by: Alexander Tokmakov
---
tests/ci/ast_fuzzer_check.py | 2 +-
tests/ci/functional_test_check.py | 2 +-
tests/ci/stress_check.py | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py
index 1a75d02bef4..fecf207589e 100644
--- a/tests/ci/ast_fuzzer_check.py
+++ b/tests/ci/ast_fuzzer_check.py
@@ -146,7 +146,7 @@ def main():
"CLICKHOUSE_CI_LOGS_PASSWORD", "CLICKHOUSE_CI_LOGS_PASSWORD"
)
- if ci_logs_host != "CLICKHOUSE_CI_LOGS_HOST":
+ if ci_logs_host not in ("CLICKHOUSE_CI_LOGS_HOST", ""):
subprocess.check_call(
f"sed -i -r -e 's!{ci_logs_host}!CLICKHOUSE_CI_LOGS_HOST!g; s!{ci_logs_password}!CLICKHOUSE_CI_LOGS_PASSWORD!g;' '{run_log_path}' '{main_log_path}'",
shell=True,
diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py
index 22210390b09..2bab330bd66 100644
--- a/tests/ci/functional_test_check.py
+++ b/tests/ci/functional_test_check.py
@@ -394,7 +394,7 @@ def main():
ci_logs_password = os.getenv(
"CLICKHOUSE_CI_LOGS_PASSWORD", "CLICKHOUSE_CI_LOGS_PASSWORD"
)
- if ci_logs_host != "CLICKHOUSE_CI_LOGS_HOST":
+ if ci_logs_host not in ("CLICKHOUSE_CI_LOGS_HOST", ""):
subprocess.check_call(
f"sed -i -r -e 's!{ci_logs_host}!CLICKHOUSE_CI_LOGS_HOST!g; s!{ci_logs_password}!CLICKHOUSE_CI_LOGS_PASSWORD!g;' '{run_log_path}'",
shell=True,
diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py
index 9c18bcbfe40..21c3178faab 100644
--- a/tests/ci/stress_check.py
+++ b/tests/ci/stress_check.py
@@ -209,7 +209,7 @@ def run_stress_test(docker_image_name):
ci_logs_password = os.getenv(
"CLICKHOUSE_CI_LOGS_PASSWORD", "CLICKHOUSE_CI_LOGS_PASSWORD"
)
- if ci_logs_host != "CLICKHOUSE_CI_LOGS_HOST":
+ if ci_logs_host not in ("CLICKHOUSE_CI_LOGS_HOST", ""):
subprocess.check_call(
f"sed -i -r -e 's!{ci_logs_host}!CLICKHOUSE_CI_LOGS_HOST!g; s!{ci_logs_password}!CLICKHOUSE_CI_LOGS_PASSWORD!g;' '{run_log_path}'",
shell=True,
From d5ed014ec4e4a2a0c49ac95a193aa0c15a511f4c Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov
Date: Wed, 16 Aug 2023 22:56:32 +0300
Subject: [PATCH 30/30] Fix flaky test `02443_detach_attach_partition` (#53478)
* fix flaky test
* empty commit
---
.../02443_detach_attach_partition.reference | 4 ++--
.../02443_detach_attach_partition.sh | 18 ++++++++++++++----
2 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/tests/queries/0_stateless/02443_detach_attach_partition.reference b/tests/queries/0_stateless/02443_detach_attach_partition.reference
index 70930ea6d9a..77cfb77479d 100644
--- a/tests/queries/0_stateless/02443_detach_attach_partition.reference
+++ b/tests/queries/0_stateless/02443_detach_attach_partition.reference
@@ -1,4 +1,4 @@
default begin inserts
default end inserts
-20 210
-20 210
+30 465
+30 465
diff --git a/tests/queries/0_stateless/02443_detach_attach_partition.sh b/tests/queries/0_stateless/02443_detach_attach_partition.sh
index 5a3f1b64065..ae104b833e3 100755
--- a/tests/queries/0_stateless/02443_detach_attach_partition.sh
+++ b/tests/queries/0_stateless/02443_detach_attach_partition.sh
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
-# Tags: race, zookeeper, no-parallel
+# Tags: race, zookeeper, long
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
@@ -29,9 +29,19 @@ function thread_attach()
done
}
+insert_type=$(($RANDOM % 3))
+$CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'insert_type $insert_type' FORMAT Null"
+
function insert()
{
- $CLICKHOUSE_CLIENT -q "INSERT INTO alter_table$(($RANDOM % 2)) SELECT $RANDOM, $i" 2>/dev/null
+ # Fault injection may lead to duplicates
+ if [[ "$insert_type" -eq 0 ]]; then
+ $CLICKHOUSE_CLIENT --insert_deduplication_token=$1 -q "INSERT INTO alter_table$(($RANDOM % 2)) SELECT $RANDOM, $1" 2>/dev/null
+ elif [[ "$insert_type" -eq 1 ]]; then
+ $CLICKHOUSE_CLIENT -q "INSERT INTO alter_table$(($RANDOM % 2)) SELECT $1, $1" 2>/dev/null
+ else
+ $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "INSERT INTO alter_table$(($RANDOM % 2)) SELECT $RANDOM, $1" 2>/dev/null
+ fi
}
thread_detach & PID_1=$!
@@ -41,8 +51,8 @@ thread_attach & PID_4=$!
function do_inserts()
{
- for i in {1..20}; do
- while ! insert; do $CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'retrying insert $i' FORMAT Null"; done
+ for i in {1..30}; do
+ while ! insert $i; do $CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'retrying insert $i' FORMAT Null"; done
done
}