From 4f9920c71ccb5edeeff15686e8cb75a07287d98c Mon Sep 17 00:00:00 2001
From: liuneng <1398775315@qq.com>
Date: Fri, 24 Feb 2023 17:53:17 +0800
Subject: [PATCH 01/51] optimize performance of nullable String And Number
column serializeValueIntoArena
---
.../aggregate_with_serialized_method.xml | 31 +++++++++++++++++++
1 file changed, 31 insertions(+)
create mode 100644 tests/performance/aggregate_with_serialized_method.xml
diff --git a/tests/performance/aggregate_with_serialized_method.xml b/tests/performance/aggregate_with_serialized_method.xml
new file mode 100644
index 00000000000..52c7a0ddd3f
--- /dev/null
+++ b/tests/performance/aggregate_with_serialized_method.xml
@@ -0,0 +1,31 @@
+
+
+ 8
+ 0
+ 4
+
+
+
+ CREATE TABLE t_nullable
+ (
+ key_string1 Nullable(String),
+ key_string2 Nullable(String),
+ key_string3 Nullable(String),
+ key_int64_1 Nullable(Int64),
+ key_int64_2 Nullable(Int64),
+ key_int64_3 Nullable(Int64),
+ key_int64_4 Nullable(Int64),
+ key_int64_5 Nullable(Int64),
+ m1 Int64,
+ m2 Int64,
+ )
+ ENGINE = MergeTree
+ ORDER BY tuple()
+
+ insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%1000+1, number%1000+2, number%1000+3, number%1000+4,number%1000+5, number%6000+1, number%5000+2 from numbers_mt(20000000)
+ OPTIMIZE TABLE t_nullable FINAL
+ select min(m1) from t_nullable group by key_string1,key_string2,key_string3 format Null
+ select min(m1) from t_nullable group by key_int64_1,key_int64_2,key_string3 format Null
+
+ drop table if exists t_nullable
+
\ No newline at end of file
From 43e0481ac040922edfc519a5bd0cf6cd781924cd Mon Sep 17 00:00:00 2001
From: LiuNeng <1398775315@qq.com>
Date: Tue, 25 Apr 2023 11:38:50 +0800
Subject: [PATCH 02/51] optimize agg with multiple string key
---
src/Columns/ColumnNullable.cpp | 47 +++++++++++++++++++++++++++++-----
1 file changed, 40 insertions(+), 7 deletions(-)
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index 2eb2ff0bf69..08f707d0b30 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -137,18 +137,51 @@ void ColumnNullable::insertData(const char * pos, size_t length)
StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
{
const auto & arr = getNullMapData();
+ const bool is_null = arr[n];
static constexpr auto s = sizeof(arr[0]);
+ char * pos;
+ if (const ColumnString * string_col = checkAndGetColumn(getNestedColumn()))
+ {
+ auto data = string_col->getDataAt(n);
+ size_t string_size = data.size + 1;
+ auto memory_size = is_null ? s : s + sizeof(string_size) + string_size;
+ pos = arena.allocContinue(memory_size, begin);
+ memcpy(pos, &arr[n], s);
+ if (!is_null)
+ {
+ memcpy(pos + s, &string_size, sizeof(string_size));
+ memcpy(pos + s + sizeof(string_size), data.data, string_size);
+ }
+ return StringRef(pos, memory_size);
+ }
+ else if (getNestedColumn().valuesHaveFixedSize())
+ {
+ auto col = getNestedColumnPtr();
+ auto data = col->getDataAt(n);
+ auto size = col->sizeOfValueIfFixed();
+ auto memory_size = is_null ? s : s + size;
+ pos = arena.allocContinue(memory_size, begin);
+ memcpy(pos, &arr[n], s);
+ if (!is_null)
+ {
+ memcpy(pos + s, data.data, size);
+ }
+ return StringRef(pos, memory_size);
+ }
+ else
+ {
+ pos = arena.allocContinue(s, begin);
+ memcpy(pos, &arr[n], s);
- auto * pos = arena.allocContinue(s, begin);
- memcpy(pos, &arr[n], s);
+ if (arr[n])
+ return StringRef(pos, s);
- if (arr[n])
- return StringRef(pos, s);
+ auto nested_ref = getNestedColumn().serializeValueIntoArena(n, arena, begin);
- auto nested_ref = getNestedColumn().serializeValueIntoArena(n, arena, begin);
+ /// serializeValueIntoArena may reallocate memory. Have to use ptr from nested_ref.data and move it back.
+ return StringRef(nested_ref.data - s, nested_ref.size + s);
+ }
- /// serializeValueIntoArena may reallocate memory. Have to use ptr from nested_ref.data and move it back.
- return StringRef(nested_ref.data - s, nested_ref.size + s);
}
const char * ColumnNullable::deserializeAndInsertFromArena(const char * pos)
From 035dbdaf220d4bfdedc88711aae799145362221d Mon Sep 17 00:00:00 2001
From: liuneng <1398775315@qq.com>
Date: Mon, 26 Jun 2023 13:42:24 +0800
Subject: [PATCH 03/51] remove numbers optimization. It will decrease
performance
---
src/Columns/ColumnNullable.cpp | 14 --------------
.../aggregate_with_serialized_method.xml | 14 +++-----------
2 files changed, 3 insertions(+), 25 deletions(-)
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index 08f707d0b30..48b3740fa97 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -154,20 +154,6 @@ StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char
}
return StringRef(pos, memory_size);
}
- else if (getNestedColumn().valuesHaveFixedSize())
- {
- auto col = getNestedColumnPtr();
- auto data = col->getDataAt(n);
- auto size = col->sizeOfValueIfFixed();
- auto memory_size = is_null ? s : s + size;
- pos = arena.allocContinue(memory_size, begin);
- memcpy(pos, &arr[n], s);
- if (!is_null)
- {
- memcpy(pos + s, data.data, size);
- }
- return StringRef(pos, memory_size);
- }
else
{
pos = arena.allocContinue(s, begin);
diff --git a/tests/performance/aggregate_with_serialized_method.xml b/tests/performance/aggregate_with_serialized_method.xml
index 52c7a0ddd3f..3c0ad4a7223 100644
--- a/tests/performance/aggregate_with_serialized_method.xml
+++ b/tests/performance/aggregate_with_serialized_method.xml
@@ -11,21 +11,13 @@
key_string1 Nullable(String),
key_string2 Nullable(String),
key_string3 Nullable(String),
- key_int64_1 Nullable(Int64),
- key_int64_2 Nullable(Int64),
- key_int64_3 Nullable(Int64),
- key_int64_4 Nullable(Int64),
- key_int64_5 Nullable(Int64),
m1 Int64,
m2 Int64,
)
- ENGINE = MergeTree
- ORDER BY tuple()
+ ENGINE = Memory
- insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%1000+1, number%1000+2, number%1000+3, number%1000+4,number%1000+5, number%6000+1, number%5000+2 from numbers_mt(20000000)
- OPTIMIZE TABLE t_nullable FINAL
- select min(m1) from t_nullable group by key_string1,key_string2,key_string3 format Null
- select min(m1) from t_nullable group by key_int64_1,key_int64_2,key_string3 format Null
+ insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%6000+1, number%5000+2 from numbers_mt(20000000)
+ select key_string1,key_string2,key_string3, min(m1) from t_nullable group by key_string1,key_string2,key_string3
drop table if exists t_nullable
\ No newline at end of file
From f96b9b7512222ba71f48c905ac2d181515e99774 Mon Sep 17 00:00:00 2001
From: liuneng <1398775315@qq.com>
Date: Wed, 28 Jun 2023 15:04:43 +0800
Subject: [PATCH 04/51] optimize fixed size column
---
src/Columns/ColumnNullable.cpp | 17 +++++++++++++++--
.../aggregate_with_serialized_method.xml | 10 ++++++++--
2 files changed, 23 insertions(+), 4 deletions(-)
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index 48b3740fa97..02a3de5ae55 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -140,9 +140,9 @@ StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char
const bool is_null = arr[n];
static constexpr auto s = sizeof(arr[0]);
char * pos;
- if (const ColumnString * string_col = checkAndGetColumn(getNestedColumn()))
+ if (isString(nested_column->getDataType()))
{
- auto data = string_col->getDataAt(n);
+ auto data = nested_column->getDataAt(n);
size_t string_size = data.size + 1;
auto memory_size = is_null ? s : s + sizeof(string_size) + string_size;
pos = arena.allocContinue(memory_size, begin);
@@ -154,6 +154,19 @@ StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char
}
return StringRef(pos, memory_size);
}
+ else if (isNumber(nested_column->getDataType()) || isFixedString(nested_column->getDataType()))
+ {
+ auto data = nested_column->getDataAt(n);
+ auto size = data.size;
+ auto memory_size = is_null ? s : s + size;
+ pos = arena.allocContinue(memory_size, begin);
+ memcpy(pos, &arr[n], s);
+ if (!is_null)
+ {
+ memcpy(pos + s, data.data, size);
+ }
+ return StringRef(pos, memory_size);
+ }
else
{
pos = arena.allocContinue(s, begin);
diff --git a/tests/performance/aggregate_with_serialized_method.xml b/tests/performance/aggregate_with_serialized_method.xml
index 3c0ad4a7223..a280dae67aa 100644
--- a/tests/performance/aggregate_with_serialized_method.xml
+++ b/tests/performance/aggregate_with_serialized_method.xml
@@ -11,13 +11,19 @@
key_string1 Nullable(String),
key_string2 Nullable(String),
key_string3 Nullable(String),
+ key_int64_1 Nullable(Int64),
+ key_int64_2 Nullable(Int64),
+ key_int64_3 Nullable(Int64),
+ key_int64_4 Nullable(Int64),
+ key_int64_5 Nullable(Int64),
m1 Int64,
- m2 Int64,
+ m2 Int64
)
ENGINE = Memory
- insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%6000+1, number%5000+2 from numbers_mt(20000000)
+ insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%1000+1, number%1000+2, number%1000+3, number%1000+4,number%1000+5, number%6000+1, number%5000+2 from numbers_mt(20000000)
select key_string1,key_string2,key_string3, min(m1) from t_nullable group by key_string1,key_string2,key_string3
+ select key_string3,key_int64_1,key_int64_2, min(m1) from t_nullable group by key_string3,key_int64_1,key_int64_2
drop table if exists t_nullable
\ No newline at end of file
From 62dffd0be232469a0440beb91a16efd40a398583 Mon Sep 17 00:00:00 2001
From: liuneng <1398775315@qq.com>
Date: Wed, 28 Jun 2023 22:42:54 +0800
Subject: [PATCH 05/51] optimize conditions
---
src/Columns/ColumnNullable.cpp | 6 ++++--
src/Columns/ColumnNullable.h | 3 +++
2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index 02a3de5ae55..3cf1f158031 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -34,6 +34,8 @@ ColumnNullable::ColumnNullable(MutableColumnPtr && nested_column_, MutableColumn
{
/// ColumnNullable cannot have constant nested column. But constant argument could be passed. Materialize it.
nested_column = getNestedColumn().convertToFullColumnIfConst();
+ is_string = isString(nested_column->getDataType());
+ is_number_or_fixed_string = isNumber(nested_column->getDataType()) || isFixedString(nested_column->getDataType());
if (!getNestedColumn().canBeInsideNullable())
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "{} cannot be inside Nullable column", getNestedColumn().getName());
@@ -140,7 +142,7 @@ StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char
const bool is_null = arr[n];
static constexpr auto s = sizeof(arr[0]);
char * pos;
- if (isString(nested_column->getDataType()))
+ if (is_string)
{
auto data = nested_column->getDataAt(n);
size_t string_size = data.size + 1;
@@ -154,7 +156,7 @@ StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char
}
return StringRef(pos, memory_size);
}
- else if (isNumber(nested_column->getDataType()) || isFixedString(nested_column->getDataType()))
+ else if (is_number_or_fixed_string)
{
auto data = nested_column->getDataAt(n);
auto size = data.size;
diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h
index bc95eca69b9..e569b989c35 100644
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@@ -212,6 +212,9 @@ public:
private:
WrappedPtr nested_column;
WrappedPtr null_map;
+ // optimize serializeValueIntoArena
+ bool is_string;
+ bool is_number_or_fixed_string;
template
void applyNullMapImpl(const NullMap & map);
From 81f0d175285c08ce96d619771d29555b84b8c7fd Mon Sep 17 00:00:00 2001
From: liuneng <1398775315@qq.com>
Date: Thu, 29 Jun 2023 10:25:36 +0800
Subject: [PATCH 06/51] change param name
---
src/Columns/ColumnNullable.cpp | 4 ++--
src/Columns/ColumnNullable.h | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index 3cf1f158031..9045851d790 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -35,7 +35,7 @@ ColumnNullable::ColumnNullable(MutableColumnPtr && nested_column_, MutableColumn
/// ColumnNullable cannot have constant nested column. But constant argument could be passed. Materialize it.
nested_column = getNestedColumn().convertToFullColumnIfConst();
is_string = isString(nested_column->getDataType());
- is_number_or_fixed_string = isNumber(nested_column->getDataType()) || isFixedString(nested_column->getDataType());
+ is_fixed_size_column = nested_column->valuesHaveFixedSize();
if (!getNestedColumn().canBeInsideNullable())
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "{} cannot be inside Nullable column", getNestedColumn().getName());
@@ -156,7 +156,7 @@ StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char
}
return StringRef(pos, memory_size);
}
- else if (is_number_or_fixed_string)
+ else if (is_fixed_size_column)
{
auto data = nested_column->getDataAt(n);
auto size = data.size;
diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h
index e569b989c35..4f37650ffe3 100644
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@@ -214,7 +214,7 @@ private:
WrappedPtr null_map;
// optimize serializeValueIntoArena
bool is_string;
- bool is_number_or_fixed_string;
+ bool is_fixed_size_column;
template
void applyNullMapImpl(const NullMap & map);
From 594b38229f05d6c3a1182f7efdd21ca1efa4b6b4 Mon Sep 17 00:00:00 2001
From: liuneng <1398775315@qq.com>
Date: Wed, 5 Jul 2023 13:53:12 +0800
Subject: [PATCH 07/51] another version
---
src/Columns/ColumnAggregateFunction.cpp | 2 +-
src/Columns/ColumnAggregateFunction.h | 2 +-
src/Columns/ColumnArray.cpp | 2 +-
src/Columns/ColumnArray.h | 2 +-
src/Columns/ColumnCompressed.h | 2 +-
src/Columns/ColumnConst.h | 2 +-
src/Columns/ColumnDecimal.cpp | 21 +++++++++++++++++--
src/Columns/ColumnDecimal.h | 2 +-
src/Columns/ColumnFixedString.cpp | 23 ++++++++++++++++++---
src/Columns/ColumnFixedString.h | 2 +-
src/Columns/ColumnFunction.h | 2 +-
src/Columns/ColumnLowCardinality.cpp | 2 +-
src/Columns/ColumnLowCardinality.h | 2 +-
src/Columns/ColumnMap.cpp | 2 +-
src/Columns/ColumnMap.h | 2 +-
src/Columns/ColumnNullable.cpp | 27 ++++---------------------
src/Columns/ColumnNullable.h | 2 +-
src/Columns/ColumnObject.h | 2 +-
src/Columns/ColumnSparse.cpp | 2 +-
src/Columns/ColumnSparse.h | 2 +-
src/Columns/ColumnString.cpp | 23 ++++++++++++++++-----
src/Columns/ColumnString.h | 3 ++-
src/Columns/ColumnTuple.cpp | 2 +-
src/Columns/ColumnTuple.h | 2 +-
src/Columns/ColumnUnique.h | 4 ++--
src/Columns/ColumnVector.cpp | 23 ++++++++++++++++++---
src/Columns/ColumnVector.h | 2 +-
src/Columns/IColumn.h | 2 +-
src/Columns/IColumnDummy.h | 2 +-
29 files changed, 107 insertions(+), 61 deletions(-)
diff --git a/src/Columns/ColumnAggregateFunction.cpp b/src/Columns/ColumnAggregateFunction.cpp
index 62ec324455e..3ebb30df87e 100644
--- a/src/Columns/ColumnAggregateFunction.cpp
+++ b/src/Columns/ColumnAggregateFunction.cpp
@@ -524,7 +524,7 @@ void ColumnAggregateFunction::insertDefault()
pushBackAndCreateState(data, arena, func.get());
}
-StringRef ColumnAggregateFunction::serializeValueIntoArena(size_t n, Arena & arena, const char *& begin) const
+StringRef ColumnAggregateFunction::serializeValueIntoArena(size_t n, Arena & arena, const char *& begin, const UInt8 *) const
{
WriteBufferFromArena out(arena, begin);
func->serialize(data[n], out, version);
diff --git a/src/Columns/ColumnAggregateFunction.h b/src/Columns/ColumnAggregateFunction.h
index f9ce45708c9..7c7201e585a 100644
--- a/src/Columns/ColumnAggregateFunction.h
+++ b/src/Columns/ColumnAggregateFunction.h
@@ -162,7 +162,7 @@ public:
void insertDefault() override;
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
const char * deserializeAndInsertFromArena(const char * src_arena) override;
diff --git a/src/Columns/ColumnArray.cpp b/src/Columns/ColumnArray.cpp
index 74512d1669b..1cb8188bce6 100644
--- a/src/Columns/ColumnArray.cpp
+++ b/src/Columns/ColumnArray.cpp
@@ -205,7 +205,7 @@ void ColumnArray::insertData(const char * pos, size_t length)
}
-StringRef ColumnArray::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
+StringRef ColumnArray::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
{
size_t array_size = sizeAt(n);
size_t offset = offsetAt(n);
diff --git a/src/Columns/ColumnArray.h b/src/Columns/ColumnArray.h
index f011d9a607b..2a9bfa405e5 100644
--- a/src/Columns/ColumnArray.h
+++ b/src/Columns/ColumnArray.h
@@ -77,7 +77,7 @@ public:
StringRef getDataAt(size_t n) const override;
bool isDefaultAt(size_t n) const override;
void insertData(const char * pos, size_t length) override;
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
diff --git a/src/Columns/ColumnCompressed.h b/src/Columns/ColumnCompressed.h
index bfe7cdb4924..b780fbbf37a 100644
--- a/src/Columns/ColumnCompressed.h
+++ b/src/Columns/ColumnCompressed.h
@@ -88,7 +88,7 @@ public:
void insertData(const char *, size_t) override { throwMustBeDecompressed(); }
void insertDefault() override { throwMustBeDecompressed(); }
void popBack(size_t) override { throwMustBeDecompressed(); }
- StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override { throwMustBeDecompressed(); }
+ StringRef serializeValueIntoArena(size_t, Arena &, char const *&, const UInt8 *) const override { throwMustBeDecompressed(); }
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeDecompressed(); }
const char * skipSerializedInArena(const char *) const override { throwMustBeDecompressed(); }
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeDecompressed(); }
diff --git a/src/Columns/ColumnConst.h b/src/Columns/ColumnConst.h
index f769dd6cc2a..dc84e0c2402 100644
--- a/src/Columns/ColumnConst.h
+++ b/src/Columns/ColumnConst.h
@@ -151,7 +151,7 @@ public:
s -= n;
}
- StringRef serializeValueIntoArena(size_t, Arena & arena, char const *& begin) const override
+ StringRef serializeValueIntoArena(size_t, Arena & arena, char const *& begin, const UInt8 *) const override
{
return data->serializeValueIntoArena(0, arena, begin);
}
diff --git a/src/Columns/ColumnDecimal.cpp b/src/Columns/ColumnDecimal.cpp
index 8e5792934cf..142ee6c271d 100644
--- a/src/Columns/ColumnDecimal.cpp
+++ b/src/Columns/ColumnDecimal.cpp
@@ -59,9 +59,26 @@ bool ColumnDecimal::hasEqualValues() const
}
template
-StringRef ColumnDecimal::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
+StringRef ColumnDecimal::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const
{
- auto * pos = arena.allocContinue(sizeof(T), begin);
+ constexpr size_t null_bit_size = sizeof(UInt8);
+ StringRef res;
+ char * pos;
+ if (null_bit)
+ {
+ res.size = * null_bit ? null_bit_size : null_bit_size + sizeof(T);
+ pos = arena.allocContinue(res.size, begin);
+ res.data = pos;
+ memcpy(pos, null_bit, null_bit_size);
+ if (*null_bit) return res;
+ pos += null_bit_size;
+ }
+ else
+ {
+ res.size = sizeof(T);
+ pos = arena.allocContinue(res.size, begin);
+ res.data = pos;
+ }
memcpy(pos, &data[n], sizeof(T));
return StringRef(pos, sizeof(T));
}
diff --git a/src/Columns/ColumnDecimal.h b/src/Columns/ColumnDecimal.h
index 03e0b9be558..fb24ae4554b 100644
--- a/src/Columns/ColumnDecimal.h
+++ b/src/Columns/ColumnDecimal.h
@@ -80,7 +80,7 @@ public:
Float64 getFloat64(size_t n) const final { return DecimalUtils::convertTo(data[n], scale); }
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
diff --git a/src/Columns/ColumnFixedString.cpp b/src/Columns/ColumnFixedString.cpp
index 24b5c435ecd..a18e5c522a1 100644
--- a/src/Columns/ColumnFixedString.cpp
+++ b/src/Columns/ColumnFixedString.cpp
@@ -86,11 +86,28 @@ void ColumnFixedString::insertData(const char * pos, size_t length)
memset(chars.data() + old_size + length, 0, n - length);
}
-StringRef ColumnFixedString::serializeValueIntoArena(size_t index, Arena & arena, char const *& begin) const
+StringRef ColumnFixedString::serializeValueIntoArena(size_t index, Arena & arena, char const *& begin, const UInt8 * null_bit) const
{
- auto * pos = arena.allocContinue(n, begin);
+ constexpr size_t null_bit_size = sizeof(UInt8);
+ StringRef res;
+ char * pos;
+ if (null_bit)
+ {
+ res.size = * null_bit ? null_bit_size : null_bit_size + n;
+ pos = arena.allocContinue(res.size, begin);
+ res.data = pos;
+ memcpy(pos, null_bit, null_bit_size);
+ if (*null_bit) return res;
+ pos += null_bit_size;
+ }
+ else
+ {
+ res.size = n;
+ pos = arena.allocContinue(res.size, begin);
+ res.data = pos;
+ }
memcpy(pos, &chars[n * index], n);
- return StringRef(pos, n);
+ return res;
}
const char * ColumnFixedString::deserializeAndInsertFromArena(const char * pos)
diff --git a/src/Columns/ColumnFixedString.h b/src/Columns/ColumnFixedString.h
index 39497e3403e..445432b7b28 100644
--- a/src/Columns/ColumnFixedString.h
+++ b/src/Columns/ColumnFixedString.h
@@ -115,7 +115,7 @@ public:
chars.resize_assume_reserved(chars.size() - n * elems);
}
- StringRef serializeValueIntoArena(size_t index, Arena & arena, char const *& begin) const override;
+ StringRef serializeValueIntoArena(size_t index, Arena & arena, char const *& begin, const UInt8 *) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
diff --git a/src/Columns/ColumnFunction.h b/src/Columns/ColumnFunction.h
index a1f6245c2bd..c21e88744e0 100644
--- a/src/Columns/ColumnFunction.h
+++ b/src/Columns/ColumnFunction.h
@@ -96,7 +96,7 @@ public:
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot insert into {}", getName());
}
- StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override
+ StringRef serializeValueIntoArena(size_t, Arena &, char const *&, const UInt8 *) const override
{
throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Cannot serialize from {}", getName());
}
diff --git a/src/Columns/ColumnLowCardinality.cpp b/src/Columns/ColumnLowCardinality.cpp
index 9269ea4ee4d..41358a4e538 100644
--- a/src/Columns/ColumnLowCardinality.cpp
+++ b/src/Columns/ColumnLowCardinality.cpp
@@ -255,7 +255,7 @@ void ColumnLowCardinality::insertData(const char * pos, size_t length)
idx.insertPosition(dictionary.getColumnUnique().uniqueInsertData(pos, length));
}
-StringRef ColumnLowCardinality::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
+StringRef ColumnLowCardinality::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
{
return getDictionary().serializeValueIntoArena(getIndexes().getUInt(n), arena, begin);
}
diff --git a/src/Columns/ColumnLowCardinality.h b/src/Columns/ColumnLowCardinality.h
index dcd07ff3b34..91bd5945fd9 100644
--- a/src/Columns/ColumnLowCardinality.h
+++ b/src/Columns/ColumnLowCardinality.h
@@ -87,7 +87,7 @@ public:
void popBack(size_t n) override { idx.popBack(n); }
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
diff --git a/src/Columns/ColumnMap.cpp b/src/Columns/ColumnMap.cpp
index 797700e87b0..ddcde43ca23 100644
--- a/src/Columns/ColumnMap.cpp
+++ b/src/Columns/ColumnMap.cpp
@@ -111,7 +111,7 @@ void ColumnMap::popBack(size_t n)
nested->popBack(n);
}
-StringRef ColumnMap::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
+StringRef ColumnMap::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
{
return nested->serializeValueIntoArena(n, arena, begin);
}
diff --git a/src/Columns/ColumnMap.h b/src/Columns/ColumnMap.h
index e5bc26127df..fde8a7e0e67 100644
--- a/src/Columns/ColumnMap.h
+++ b/src/Columns/ColumnMap.h
@@ -58,7 +58,7 @@ public:
void insert(const Field & x) override;
void insertDefault() override;
void popBack(size_t n) override;
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index 9045851d790..ce0876647b9 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -136,38 +136,19 @@ void ColumnNullable::insertData(const char * pos, size_t length)
}
}
-StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
+StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
{
const auto & arr = getNullMapData();
- const bool is_null = arr[n];
static constexpr auto s = sizeof(arr[0]);
char * pos;
if (is_string)
{
- auto data = nested_column->getDataAt(n);
- size_t string_size = data.size + 1;
- auto memory_size = is_null ? s : s + sizeof(string_size) + string_size;
- pos = arena.allocContinue(memory_size, begin);
- memcpy(pos, &arr[n], s);
- if (!is_null)
- {
- memcpy(pos + s, &string_size, sizeof(string_size));
- memcpy(pos + s + sizeof(string_size), data.data, string_size);
- }
- return StringRef(pos, memory_size);
+ const auto * column_string = static_cast(nested_column.get());
+ return column_string->serializeValueIntoArena(n, arena, begin, &arr[n]);
}
else if (is_fixed_size_column)
{
- auto data = nested_column->getDataAt(n);
- auto size = data.size;
- auto memory_size = is_null ? s : s + size;
- pos = arena.allocContinue(memory_size, begin);
- memcpy(pos, &arr[n], s);
- if (!is_null)
- {
- memcpy(pos + s, data.data, size);
- }
- return StringRef(pos, memory_size);
+ return nested_column->serializeValueIntoArena(n, arena, begin, &arr[n]);
}
else
{
diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h
index 4f37650ffe3..679f51d5900 100644
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@@ -62,7 +62,7 @@ public:
StringRef getDataAt(size_t) const override;
/// Will insert null value if pos=nullptr
void insertData(const char * pos, size_t length) override;
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
diff --git a/src/Columns/ColumnObject.h b/src/Columns/ColumnObject.h
index bc5a6b69bb0..36a33a8f10f 100644
--- a/src/Columns/ColumnObject.h
+++ b/src/Columns/ColumnObject.h
@@ -244,7 +244,7 @@ public:
StringRef getDataAt(size_t) const override { throwMustBeConcrete(); }
bool isDefaultAt(size_t) const override { throwMustBeConcrete(); }
void insertData(const char *, size_t) override { throwMustBeConcrete(); }
- StringRef serializeValueIntoArena(size_t, Arena &, char const *&) const override { throwMustBeConcrete(); }
+ StringRef serializeValueIntoArena(size_t, Arena &, char const *&, const UInt8 *) const override { throwMustBeConcrete(); }
const char * deserializeAndInsertFromArena(const char *) override { throwMustBeConcrete(); }
const char * skipSerializedInArena(const char *) const override { throwMustBeConcrete(); }
void updateHashWithValue(size_t, SipHash &) const override { throwMustBeConcrete(); }
diff --git a/src/Columns/ColumnSparse.cpp b/src/Columns/ColumnSparse.cpp
index 4f76a9be4b9..057c0cd7112 100644
--- a/src/Columns/ColumnSparse.cpp
+++ b/src/Columns/ColumnSparse.cpp
@@ -150,7 +150,7 @@ void ColumnSparse::insertData(const char * pos, size_t length)
insertSingleValue([&](IColumn & column) { column.insertData(pos, length); });
}
-StringRef ColumnSparse::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
+StringRef ColumnSparse::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
{
return values->serializeValueIntoArena(getValueIndex(n), arena, begin);
}
diff --git a/src/Columns/ColumnSparse.h b/src/Columns/ColumnSparse.h
index 26e05655f60..48c7422dd27 100644
--- a/src/Columns/ColumnSparse.h
+++ b/src/Columns/ColumnSparse.h
@@ -78,7 +78,7 @@ public:
/// Will insert null value if pos=nullptr
void insertData(const char * pos, size_t length) override;
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char *) const override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
diff --git a/src/Columns/ColumnString.cpp b/src/Columns/ColumnString.cpp
index 38c7b2c0dd6..50fe90ad8ef 100644
--- a/src/Columns/ColumnString.cpp
+++ b/src/Columns/ColumnString.cpp
@@ -213,17 +213,30 @@ ColumnPtr ColumnString::permute(const Permutation & perm, size_t limit) const
}
-StringRef ColumnString::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
+StringRef ColumnString::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const
{
size_t string_size = sizeAt(n);
size_t offset = offsetAt(n);
-
+ constexpr size_t null_bit_size = sizeof(UInt8);
StringRef res;
- res.size = sizeof(string_size) + string_size;
- char * pos = arena.allocContinue(res.size, begin);
+ char * pos;
+ if (null_bit)
+ {
+ res.size = * null_bit ? null_bit_size : null_bit_size + sizeof(string_size) + string_size;
+ pos = arena.allocContinue(res.size, begin);
+ res.data = pos;
+ memcpy(pos, null_bit, null_bit_size);
+ if (*null_bit) return res;
+ pos += null_bit_size;
+ }
+ else
+ {
+ res.size = sizeof(string_size) + string_size;
+ pos = arena.allocContinue(res.size, begin);
+ res.data = pos;
+ }
memcpy(pos, &string_size, sizeof(string_size));
memcpy(pos + sizeof(string_size), &chars[offset], string_size);
- res.data = pos;
return res;
}
diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h
index 08c876a803d..e8e5ebbcbf9 100644
--- a/src/Columns/ColumnString.h
+++ b/src/Columns/ColumnString.h
@@ -11,6 +11,7 @@
#include
#include
#include
+#include
class Collator;
@@ -168,7 +169,7 @@ public:
offsets.resize_assume_reserved(offsets.size() - n);
}
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
diff --git a/src/Columns/ColumnTuple.cpp b/src/Columns/ColumnTuple.cpp
index 9702d275114..d8992125be4 100644
--- a/src/Columns/ColumnTuple.cpp
+++ b/src/Columns/ColumnTuple.cpp
@@ -171,7 +171,7 @@ void ColumnTuple::popBack(size_t n)
column->popBack(n);
}
-StringRef ColumnTuple::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
+StringRef ColumnTuple::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
{
StringRef res(begin, 0);
for (const auto & column : columns)
diff --git a/src/Columns/ColumnTuple.h b/src/Columns/ColumnTuple.h
index e7dee9b8ff9..79099f4c098 100644
--- a/src/Columns/ColumnTuple.h
+++ b/src/Columns/ColumnTuple.h
@@ -61,7 +61,7 @@ public:
void insertFrom(const IColumn & src_, size_t n) override;
void insertDefault() override;
void popBack(size_t n) override;
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash) const override;
diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h
index 377255d80c7..d2fc69d7fb8 100644
--- a/src/Columns/ColumnUnique.h
+++ b/src/Columns/ColumnUnique.h
@@ -79,7 +79,7 @@ public:
Float32 getFloat32(size_t n) const override { return getNestedColumn()->getFloat32(n); }
bool getBool(size_t n) const override { return getNestedColumn()->getBool(n); }
bool isNullAt(size_t n) const override { return is_nullable && n == getNullValueIndex(); }
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash_func) const override
{
@@ -373,7 +373,7 @@ size_t ColumnUnique::uniqueInsertData(const char * pos, size_t lengt
}
template
-StringRef ColumnUnique::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
+StringRef ColumnUnique::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const
{
if (is_nullable)
{
diff --git a/src/Columns/ColumnVector.cpp b/src/Columns/ColumnVector.cpp
index f2fe343a371..a9b8c0ccacb 100644
--- a/src/Columns/ColumnVector.cpp
+++ b/src/Columns/ColumnVector.cpp
@@ -49,11 +49,28 @@ namespace ErrorCodes
}
template
-StringRef ColumnVector::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const
+StringRef ColumnVector::serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const
{
- auto * pos = arena.allocContinue(sizeof(T), begin);
+ constexpr size_t null_bit_size = sizeof(UInt8);
+ StringRef res;
+ char * pos;
+ if (null_bit)
+ {
+ res.size = * null_bit ? null_bit_size : null_bit_size + sizeof(T);
+ pos = arena.allocContinue(res.size, begin);
+ res.data = pos;
+ memcpy(pos, null_bit, null_bit_size);
+ if (*null_bit) return res;
+ pos += null_bit_size;
+ }
+ else
+ {
+ res.size = sizeof(T);
+ pos = arena.allocContinue(res.size, begin);
+ res.data = pos;
+ }
unalignedStore(pos, data[n]);
- return StringRef(pos, sizeof(T));
+ return res;
}
template
diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h
index b8ebff2a5d5..7bb69656c5a 100644
--- a/src/Columns/ColumnVector.h
+++ b/src/Columns/ColumnVector.h
@@ -174,7 +174,7 @@ public:
data.resize_assume_reserved(data.size() - n);
}
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
diff --git a/src/Columns/IColumn.h b/src/Columns/IColumn.h
index b4eaf5c28f5..12ac1102efd 100644
--- a/src/Columns/IColumn.h
+++ b/src/Columns/IColumn.h
@@ -218,7 +218,7 @@ public:
* For example, to obtain unambiguous representation of Array of strings, strings data should be interleaved with their sizes.
* Parameter begin should be used with Arena::allocContinue.
*/
- virtual StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin) const = 0;
+ virtual StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const = 0;
/// Deserializes a value that was serialized using IColumn::serializeValueIntoArena method.
/// Returns pointer to the position after the read data.
diff --git a/src/Columns/IColumnDummy.h b/src/Columns/IColumnDummy.h
index 82d4c857b29..4cadae2bc3d 100644
--- a/src/Columns/IColumnDummy.h
+++ b/src/Columns/IColumnDummy.h
@@ -57,7 +57,7 @@ public:
++s;
}
- StringRef serializeValueIntoArena(size_t /*n*/, Arena & arena, char const *& begin) const override
+ StringRef serializeValueIntoArena(size_t /*n*/, Arena & arena, char const *& begin, const UInt8 *) const override
{
/// Has to put one useless byte into Arena, because serialization into zero number of bytes is ambiguous.
char * res = arena.allocContinue(1, begin);
From 2997fe0677813edd622a5b8f2fe7f4ae17591b03 Mon Sep 17 00:00:00 2001
From: liuneng <1398775315@qq.com>
Date: Wed, 5 Jul 2023 18:30:54 +0800
Subject: [PATCH 08/51] add default value for compile
---
src/Columns/ColumnNullable.h | 2 +-
src/Columns/ColumnString.h | 2 +-
src/Columns/ColumnUnique.h | 2 +-
src/Columns/ColumnVector.h | 2 +-
4 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h
index 679f51d5900..8064ce014d3 100644
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@@ -62,7 +62,7 @@ public:
StringRef getDataAt(size_t) const override;
/// Will insert null value if pos=nullptr
void insertData(const char * pos, size_t length) override;
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h
index e8e5ebbcbf9..907dc83caeb 100644
--- a/src/Columns/ColumnString.h
+++ b/src/Columns/ColumnString.h
@@ -169,7 +169,7 @@ public:
offsets.resize_assume_reserved(offsets.size() - n);
}
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h
index d2fc69d7fb8..69f4818e6be 100644
--- a/src/Columns/ColumnUnique.h
+++ b/src/Columns/ColumnUnique.h
@@ -79,7 +79,7 @@ public:
Float32 getFloat32(size_t n) const override { return getNestedColumn()->getFloat32(n); }
bool getBool(size_t n) const override { return getNestedColumn()->getBool(n); }
bool isNullAt(size_t n) const override { return is_nullable && n == getNullValueIndex(); }
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 *) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash_func) const override
{
diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h
index 7bb69656c5a..232769a5295 100644
--- a/src/Columns/ColumnVector.h
+++ b/src/Columns/ColumnVector.h
@@ -174,7 +174,7 @@ public:
data.resize_assume_reserved(data.size() - n);
}
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
From f33367cd8b50089d33ad3dc431157396f369fb12 Mon Sep 17 00:00:00 2001
From: liuneng <1398775315@qq.com>
Date: Mon, 7 Aug 2023 13:37:24 +0800
Subject: [PATCH 09/51] add more test
---
src/Columns/ColumnNullable.h | 2 +-
src/Columns/ColumnString.h | 2 +-
src/Columns/ColumnUnique.h | 2 +-
src/Columns/ColumnVector.h | 2 +-
src/Columns/tests/gtest_column_unique.cpp | 6 +++---
tests/performance/aggregate_with_serialized_method.xml | 4 +++-
6 files changed, 10 insertions(+), 8 deletions(-)
diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h
index 8064ce014d3..719fa698acc 100644
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@@ -62,7 +62,7 @@ public:
StringRef getDataAt(size_t) const override;
/// Will insert null value if pos=nullptr
void insertData(const char * pos, size_t length) override;
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
const char * skipSerializedInArena(const char * pos) const override;
void insertRangeFrom(const IColumn & src, size_t start, size_t length) override;
diff --git a/src/Columns/ColumnString.h b/src/Columns/ColumnString.h
index 907dc83caeb..e8e5ebbcbf9 100644
--- a/src/Columns/ColumnString.h
+++ b/src/Columns/ColumnString.h
@@ -169,7 +169,7 @@ public:
offsets.resize_assume_reserved(offsets.size() - n);
}
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
diff --git a/src/Columns/ColumnUnique.h b/src/Columns/ColumnUnique.h
index 69f4818e6be..882d17b1649 100644
--- a/src/Columns/ColumnUnique.h
+++ b/src/Columns/ColumnUnique.h
@@ -79,7 +79,7 @@ public:
Float32 getFloat32(size_t n) const override { return getNestedColumn()->getFloat32(n); }
bool getBool(size_t n) const override { return getNestedColumn()->getBool(n); }
bool isNullAt(size_t n) const override { return is_nullable && n == getNullValueIndex(); }
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
const char * skipSerializedInArena(const char * pos) const override;
void updateHashWithValue(size_t n, SipHash & hash_func) const override
{
diff --git a/src/Columns/ColumnVector.h b/src/Columns/ColumnVector.h
index 232769a5295..7bb69656c5a 100644
--- a/src/Columns/ColumnVector.h
+++ b/src/Columns/ColumnVector.h
@@ -174,7 +174,7 @@ public:
data.resize_assume_reserved(data.size() - n);
}
- StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit = nullptr) const override;
+ StringRef serializeValueIntoArena(size_t n, Arena & arena, char const *& begin, const UInt8 * null_bit) const override;
const char * deserializeAndInsertFromArena(const char * pos) override;
diff --git a/src/Columns/tests/gtest_column_unique.cpp b/src/Columns/tests/gtest_column_unique.cpp
index 15208da70fb..ab2cb42b603 100644
--- a/src/Columns/tests/gtest_column_unique.cpp
+++ b/src/Columns/tests/gtest_column_unique.cpp
@@ -117,7 +117,7 @@ void column_unique_unique_deserialize_from_arena_impl(ColumnType & column, const
const char * pos = nullptr;
for (size_t i = 0; i < num_values; ++i)
{
- auto ref = column_unique_pattern->serializeValueIntoArena(idx->getUInt(i), arena, pos);
+ auto ref = column_unique_pattern->serializeValueIntoArena(idx->getUInt(i), arena, pos, nullptr);
const char * new_pos;
column_unique->uniqueDeserializeAndInsertFromArena(ref.data, new_pos);
ASSERT_EQ(new_pos - ref.data, ref.size) << "Deserialized data has different sizes at position " << i;
@@ -140,8 +140,8 @@ void column_unique_unique_deserialize_from_arena_impl(ColumnType & column, const
const char * pos_lc = nullptr;
for (size_t i = 0; i < num_values; ++i)
{
- auto ref_string = column.serializeValueIntoArena(i, arena_string, pos_string);
- auto ref_lc = column_unique->serializeValueIntoArena(idx->getUInt(i), arena_lc, pos_lc);
+ auto ref_string = column.serializeValueIntoArena(i, arena_string, pos_string, nullptr);
+ auto ref_lc = column_unique->serializeValueIntoArena(idx->getUInt(i), arena_lc, pos_lc, nullptr);
ASSERT_EQ(ref_string, ref_lc) << "Serialized data is different from pattern at position " << i;
}
}
diff --git a/tests/performance/aggregate_with_serialized_method.xml b/tests/performance/aggregate_with_serialized_method.xml
index a280dae67aa..4c4ef0438ae 100644
--- a/tests/performance/aggregate_with_serialized_method.xml
+++ b/tests/performance/aggregate_with_serialized_method.xml
@@ -21,9 +21,11 @@
)
ENGINE = Memory
- insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%1000+1, number%1000+2, number%1000+3, number%1000+4,number%1000+5, number%6000+1, number%5000+2 from numbers_mt(20000000)
+ insert into t_nullable select ['aaaaaa','bbaaaa','ccaaaa','ddaaaa'][number % 101 + 1], ['aa','bb','cc','dd'][number % 100 + 1], ['aa','bb','cc','dd'][number % 102 + 1], number%10+1, number%10+2, number%10+3, number%10+4,number%10+5, number%6000+1, number%5000+2 from numbers_mt(20000000)
select key_string1,key_string2,key_string3, min(m1) from t_nullable group by key_string1,key_string2,key_string3
select key_string3,key_int64_1,key_int64_2, min(m1) from t_nullable group by key_string3,key_int64_1,key_int64_2
+ select key_int64_1,key_int64_2,key_int64_3,key_int64_4,key_int64_5, min(m1) from t_nullable group by key_int64_1,key_int64_2,key_int64_3,key_int64_4,key_int64_5
+ select toFloat64(key_int64_1),toFloat64(key_int64_2),toFloat64(key_int64_3),toFloat64(key_int64_4),toFloat64(key_int64_5), min(m1) from t_nullable group by toFloat64(key_int64_1),toFloat64(key_int64_2),toFloat64(key_int64_3),toFloat64(key_int64_4),toFloat64(key_int64_5) limit 10
drop table if exists t_nullable
\ No newline at end of file
From 8a8330131644c106771055de7f67f761d01e00cd Mon Sep 17 00:00:00 2001
From: liuneng <1398775315@qq.com>
Date: Mon, 7 Aug 2023 14:25:15 +0800
Subject: [PATCH 10/51] optimize
---
src/Columns/ColumnNullable.cpp | 92 ++++++++++++++-----
src/Columns/ColumnNullable.h | 4 +-
.../aggregate_with_serialized_method.xml | 1 +
3 files changed, 72 insertions(+), 25 deletions(-)
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index ce0876647b9..ea95016a766 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -4,6 +4,10 @@
#include
#include
#include
+#include "ColumnDecimal.h"
+#include "ColumnFixedString.h"
+#include "ColumnsDateTime.h"
+#include "ColumnsNumber.h"
#include
#include
#include
@@ -34,8 +38,7 @@ ColumnNullable::ColumnNullable(MutableColumnPtr && nested_column_, MutableColumn
{
/// ColumnNullable cannot have constant nested column. But constant argument could be passed. Materialize it.
nested_column = getNestedColumn().convertToFullColumnIfConst();
- is_string = isString(nested_column->getDataType());
- is_fixed_size_column = nested_column->valuesHaveFixedSize();
+ nested_type = nested_column->getDataType();
if (!getNestedColumn().canBeInsideNullable())
throw Exception(ErrorCodes::ILLEGAL_COLUMN, "{} cannot be inside Nullable column", getNestedColumn().getName());
@@ -141,29 +144,72 @@ StringRef ColumnNullable::serializeValueIntoArena(size_t n, Arena & arena, char
const auto & arr = getNullMapData();
static constexpr auto s = sizeof(arr[0]);
char * pos;
- if (is_string)
+
+ switch (nested_type)
{
- const auto * column_string = static_cast(nested_column.get());
- return column_string->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::UInt8:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::UInt16:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::UInt32:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::UInt64:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::UInt128:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::UInt256:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Int8:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Int16:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Int32:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Int64:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Int128:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Int256:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Float32:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Float64:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Date:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Date32:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::DateTime:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::DateTime64:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::String:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::FixedString:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Decimal32:
+ return static_cast *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Decimal64:
+ return static_cast *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Decimal128:
+ return static_cast *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::Decimal256:
+ return static_cast *>(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::UUID:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::IPv4:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ case TypeIndex::IPv6:
+ return static_cast(nested_column.get())->serializeValueIntoArena(n, arena, begin, &arr[n]);
+ default:
+ pos = arena.allocContinue(s, begin);
+ memcpy(pos, &arr[n], s);
+ if (arr[n])
+ return StringRef(pos, s);
+ auto nested_ref = getNestedColumn().serializeValueIntoArena(n, arena, begin);
+ /// serializeValueIntoArena may reallocate memory. Have to use ptr from nested_ref.data and move it back.
+ return StringRef(nested_ref.data - s, nested_ref.size + s);
}
- else if (is_fixed_size_column)
- {
- return nested_column->serializeValueIntoArena(n, arena, begin, &arr[n]);
- }
- else
- {
- pos = arena.allocContinue(s, begin);
- memcpy(pos, &arr[n], s);
-
- if (arr[n])
- return StringRef(pos, s);
-
- auto nested_ref = getNestedColumn().serializeValueIntoArena(n, arena, begin);
-
- /// serializeValueIntoArena may reallocate memory. Have to use ptr from nested_ref.data and move it back.
- return StringRef(nested_ref.data - s, nested_ref.size + s);
- }
-
}
const char * ColumnNullable::deserializeAndInsertFromArena(const char * pos)
diff --git a/src/Columns/ColumnNullable.h b/src/Columns/ColumnNullable.h
index 719fa698acc..b57fdf3064d 100644
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@@ -6,6 +6,7 @@
#include
#include
+#include "Core/TypeId.h"
#include "config.h"
@@ -213,8 +214,7 @@ private:
WrappedPtr nested_column;
WrappedPtr null_map;
// optimize serializeValueIntoArena
- bool is_string;
- bool is_fixed_size_column;
+ TypeIndex nested_type;
template
void applyNullMapImpl(const NullMap & map);
diff --git a/tests/performance/aggregate_with_serialized_method.xml b/tests/performance/aggregate_with_serialized_method.xml
index 4c4ef0438ae..91763c69bb9 100644
--- a/tests/performance/aggregate_with_serialized_method.xml
+++ b/tests/performance/aggregate_with_serialized_method.xml
@@ -26,6 +26,7 @@
select key_string3,key_int64_1,key_int64_2, min(m1) from t_nullable group by key_string3,key_int64_1,key_int64_2
select key_int64_1,key_int64_2,key_int64_3,key_int64_4,key_int64_5, min(m1) from t_nullable group by key_int64_1,key_int64_2,key_int64_3,key_int64_4,key_int64_5
select toFloat64(key_int64_1),toFloat64(key_int64_2),toFloat64(key_int64_3),toFloat64(key_int64_4),toFloat64(key_int64_5), min(m1) from t_nullable group by toFloat64(key_int64_1),toFloat64(key_int64_2),toFloat64(key_int64_3),toFloat64(key_int64_4),toFloat64(key_int64_5) limit 10
+ select toDecimal64(key_int64_1, 3),toDecimal64(key_int64_2, 3),toDecimal64(key_int64_3, 3),toDecimal64(key_int64_4, 3),toDecimal64(key_int64_5, 3), min(m1) from t_nullable group by toDecimal64(key_int64_1, 3),toDecimal64(key_int64_2, 3),toDecimal64(key_int64_3, 3),toDecimal64(key_int64_4, 3),toDecimal64(key_int64_5, 3) limit 10
drop table if exists t_nullable
\ No newline at end of file
From 65aeb0563f020dcc4035f3903dfded305329975b Mon Sep 17 00:00:00 2001
From: liuneng <1398775315@qq.com>
Date: Tue, 8 Aug 2023 10:07:45 +0800
Subject: [PATCH 11/51] fix include
---
src/Columns/ColumnNullable.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/Columns/ColumnNullable.cpp b/src/Columns/ColumnNullable.cpp
index ea95016a766..fcd95e5c963 100644
--- a/src/Columns/ColumnNullable.cpp
+++ b/src/Columns/ColumnNullable.cpp
@@ -4,10 +4,10 @@
#include
#include
#include
-#include "ColumnDecimal.h"
-#include "ColumnFixedString.h"
-#include "ColumnsDateTime.h"
-#include "ColumnsNumber.h"
+#include
+#include
+#include
+#include
#include
#include
#include
From 9f9b794feb84129a4f9a07d015fc2c137131d247 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov
Date: Sun, 13 Aug 2023 00:15:22 +0200
Subject: [PATCH 12/51] Export logs from CI in performance (preparation)
---
docker/test/performance-comparison/Dockerfile | 5 ++---
docker/test/performance-comparison/compare.sh | 2 ++
docker/test/performance-comparison/entrypoint.sh | 8 ++------
3 files changed, 6 insertions(+), 9 deletions(-)
diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile
index cfd7c613868..1cc644ba0b1 100644
--- a/docker/test/performance-comparison/Dockerfile
+++ b/docker/test/performance-comparison/Dockerfile
@@ -56,10 +56,9 @@ COPY * /
# node #0 should be less stable because of system interruptions. We bind
# randomly to node 1 or 0 to gather some statistics on that. We have to bind
# both servers and the tmpfs on which the database is stored. How to do it
-# through Yandex Sandbox API is unclear, but by default tmpfs uses
+# is unclear, but by default tmpfs uses
# 'process allocation policy', not sure which process but hopefully the one that
-# writes to it, so just bind the downloader script as well. We could also try to
-# remount it with proper options in Sandbox task.
+# writes to it, so just bind the downloader script as well.
# https://www.kernel.org/doc/Documentation/filesystems/tmpfs.txt
# Double-escaped backslashes are a tribute to the engineering wonder of docker --
# it gives '/bin/sh: 1: [bash,: not found' otherwise.
diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh
index f949e66ab17..6814ffc5efd 100755
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@@ -71,6 +71,8 @@ function configure
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
echo all killed
+
+
set -m # Spawn temporary in its own process groups
local setup_left_server_opts=(
diff --git a/docker/test/performance-comparison/entrypoint.sh b/docker/test/performance-comparison/entrypoint.sh
index 74571777be0..fb5e6bd2a7a 100755
--- a/docker/test/performance-comparison/entrypoint.sh
+++ b/docker/test/performance-comparison/entrypoint.sh
@@ -130,7 +130,7 @@ then
git -C right/ch diff --name-only "$base" pr -- :!tests/performance :!docker/test/performance-comparison | tee other-changed-files.txt
fi
-# Set python output encoding so that we can print queries with Russian letters.
+# Set python output encoding so that we can print queries with non-ASCII letters.
export PYTHONIOENCODING=utf-8
# By default, use the main comparison script from the tested package, so that we
@@ -151,11 +151,7 @@ export PATH
export REF_PR
export REF_SHA
-# Try to collect some core dumps. I've seen two patterns in Sandbox:
-# 1) |/home/zomb-sandbox/venv/bin/python /home/zomb-sandbox/client/sandbox/bin/coredumper.py %e %p %g %u %s %P %c
-# Not sure what this script does (puts them to sandbox resources, logs some messages?),
-# and it's not accessible from inside docker anyway.
-# 2) something like %e.%p.core.dmp. The dump should end up in the workspace directory.
+# Try to collect some core dumps.
# At least we remove the ulimit and then try to pack some common file names into output.
ulimit -c unlimited
cat /proc/sys/kernel/core_pattern
From ec779ca246e4f4944cdf4bddda99ca40d2035275 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov
Date: Sun, 13 Aug 2023 00:31:48 +0200
Subject: [PATCH 13/51] Export logs from CI in performance (part 2)
---
docker/test/base/setup_export_logs.sh | 13 +++----
docker/test/performance-comparison/compare.sh | 35 +++++++++++++++++--
2 files changed, 39 insertions(+), 9 deletions(-)
diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index 12fae855b03..fdaf22a5d59 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -7,6 +7,7 @@
# Pre-configured destination cluster, where to export the data
CLUSTER=${CLUSTER:=system_logs_export}
+LOCAL_PARAMETERS=$1
EXTRA_COLUMNS=${EXTRA_COLUMNS:="pull_request_number UInt32, commit_sha String, check_start_time DateTime, check_name LowCardinality(String), instance_type LowCardinality(String), "}
EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:="0 AS pull_request_number, '' AS commit_sha, now() AS check_start_time, '' AS check_name, '' AS instance_type"}
@@ -15,13 +16,13 @@ EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:="check_name, "}
CONNECTION_PARAMETERS=${CONNECTION_PARAMETERS:=""}
# Create all configured system logs:
-clickhouse-client --query "SYSTEM FLUSH LOGS"
+clickhouse-client $LOCAL_PARAMETERS --query "SYSTEM FLUSH LOGS"
# For each system log table:
-clickhouse-client --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table
+clickhouse-client $LOCAL_PARAMETERS --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table
do
# Calculate hash of its structure:
- hash=$(clickhouse-client --query "
+ hash=$(clickhouse-client $LOCAL_PARAMETERS --query "
SELECT sipHash64(groupArray((name, type)))
FROM (SELECT name, type FROM system.columns
WHERE database = 'system' AND table = '$table'
@@ -29,7 +30,7 @@ do
")
# Create the destination table with adapted name and structure:
- statement=$(clickhouse-client --format TSVRaw --query "SHOW CREATE TABLE system.${table}" | sed -r -e '
+ statement=$(clickhouse-client $LOCAL_PARAMETERS --format TSVRaw --query "SHOW CREATE TABLE system.${table}" | sed -r -e '
s/^\($/('"$EXTRA_COLUMNS"'/;
s/ORDER BY \(/ORDER BY ('"$EXTRA_ORDER_BY_COLUMNS"'/;
s/^CREATE TABLE system\.\w+_log$/CREATE TABLE IF NOT EXISTS '"$table"'_'"$hash"'/;
@@ -43,7 +44,7 @@ do
echo "Creating table system.${table}_sender" >&2
# Create Distributed table and materialized view to watch on the original table:
- clickhouse-client --query "
+ clickhouse-client $LOCAL_PARAMETERS --query "
CREATE TABLE system.${table}_sender
ENGINE = Distributed(${CLUSTER}, default, ${table}_${hash})
EMPTY AS
@@ -53,7 +54,7 @@ do
echo "Creating materialized view system.${table}_watcher" >&2
- clickhouse-client --query "
+ clickhouse-client $LOCAL_PARAMETERS --query "
CREATE MATERIALIZED VIEW system.${table}_watcher TO system.${table}_sender AS
SELECT ${EXTRA_COLUMNS_EXPRESSION}, *
FROM system.${table}
diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh
index 6814ffc5efd..816bdef51c3 100755
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@@ -63,6 +63,22 @@ function left_or_right()
function configure
{
+ # Setup a cluster for logs export to ClickHouse Cloud
+ # Note: these variables are provided to the Docker run command by the Python script in tests/ci
+ if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ]
+ then
+ echo "
+remote_servers:
+ system_logs_export:
+ shard:
+ replica:
+ secure: 1
+ user: ci
+ host: '${CLICKHOUSE_CI_LOGS_HOST}'
+ password: '${CLICKHOUSE_CI_LOGS_PASSWORD}'
+" > right/config/config.d/system_logs_export.yaml
+ fi
+
# Use the new config for both servers, so that we can change it in a PR.
rm right/config/config.d/text_log.xml ||:
cp -rv right/config left ||:
@@ -71,8 +87,6 @@ function configure
while pkill -f clickhouse-serv ; do echo . ; sleep 1 ; done
echo all killed
-
-
set -m # Spawn temporary in its own process groups
local setup_left_server_opts=(
@@ -92,7 +106,22 @@ function configure
set +m
wait_for_server $LEFT_SERVER_PORT $left_pid
- echo Server for setup started
+ echo "Server for setup started"
+
+ # Initialize export of system logs to ClickHouse Cloud
+ # Note: it is set up for the "left" server, and its database is then cloned to the "right" server.
+ if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ]
+ then
+ export EXTRA_COLUMNS_EXPRESSION="$PR_TO_TEST AS pull_request_number, '$SHA_TO_TEST' AS commit_sha, '$CHECK_START_TIME' AS check_start_time, '$CHECK_NAME' AS check_name, '$INSTANCE_TYPE' AS instance_type"
+ export CONNECTION_PARAMETERS="--secure --user ci --host ${CLICKHOUSE_CI_LOGS_HOST} --password ${CLICKHOUSE_CI_LOGS_PASSWORD}"
+
+ ./setup_export_logs.sh "--port $LEFT_SERVER_PORT"
+
+ # Unset variables after use
+ export CONNECTION_PARAMETERS=''
+ export CLICKHOUSE_CI_LOGS_HOST=''
+ export CLICKHOUSE_CI_LOGS_PASSWORD=''
+ fi
clickhouse-client --port $LEFT_SERVER_PORT --query "create database test" ||:
clickhouse-client --port $LEFT_SERVER_PORT --query "rename table datasets.hits_v1 to test.hits" ||:
From 32778e04071f08262d134b407e64d554d171bb01 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov
Date: Sun, 13 Aug 2023 00:34:01 +0200
Subject: [PATCH 14/51] Export logs from CI in performance (part 3)
---
tests/ci/performance_comparison_check.py | 42 ++++++++++++++++++++++--
1 file changed, 40 insertions(+), 2 deletions(-)
diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py
index 70d37b24c4e..975ca26b7e8 100644
--- a/tests/ci/performance_comparison_check.py
+++ b/tests/ci/performance_comparison_check.py
@@ -1,6 +1,5 @@
#!/usr/bin/env python3
-
import os
import logging
import sys
@@ -20,11 +19,15 @@ from get_robot_token import get_best_robot_token, get_parameter_from_ssm
from pr_info import PRInfo
from s3_helper import S3Helper
from tee_popen import TeePopen
+from clickhouse_helper import get_instance_type
+from stopwatch import Stopwatch
IMAGE_NAME = "clickhouse/performance-comparison"
def get_run_command(
+ check_start_time,
+ check_name,
workspace,
result_path,
repo_tests_path,
@@ -33,12 +36,26 @@ def get_run_command(
additional_env,
image,
):
+ instance_type = get_instance_type()
+
+ envs = [
+ "-e CLICKHOUSE_CI_LOGS_HOST",
+ "-e CLICKHOUSE_CI_LOGS_PASSWORD",
+ f"-e CHECK_START_TIME='{check_start_time}'",
+ f"-e CHECK_NAME='{check_name}'",
+ f"-e INSTANCE_TYPE='{instance_type}'",
+ f"-e PR_TO_TEST={pr_to_test}",
+ f"-e SHA_TO_TEST={sha_to_test}",
+ ]
+
+ env_str = " ".join(envs)
+
return (
f"docker run --privileged --volume={workspace}:/workspace "
f"--volume={result_path}:/output "
f"--volume={repo_tests_path}:/usr/share/clickhouse-test "
f"--cap-add syslog --cap-add sys_admin --cap-add sys_rawio "
- f"-e PR_TO_TEST={pr_to_test} -e SHA_TO_TEST={sha_to_test} {additional_env} "
+ f"{envs} {additional_env} "
f"{image}"
)
@@ -62,6 +79,9 @@ class RamDrive:
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)
+
+ stopwatch = Stopwatch()
+
temp_path = os.getenv("TEMP_PATH", os.path.abspath("."))
repo_path = os.getenv("REPO_COPY", os.path.abspath("../../"))
repo_tests_path = os.path.join(repo_path, "tests")
@@ -157,6 +177,8 @@ if __name__ == "__main__":
docker_env += "".join([f" -e {name}" for name in env_extra])
run_command = get_run_command(
+ stopwatch.start_time_str,
+ check_name,
result_path,
result_path,
repo_tests_path,
@@ -180,6 +202,22 @@ if __name__ == "__main__":
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
+ # Cleanup run log from the credentials of CI logs database.
+ # Note: a malicious user can still print them by splitting the value into parts.
+ # But we will be warned when a malicious user modifies CI script.
+ # Although they can also print them from inside tests.
+ # Nevertheless, the credentials of the CI logs have limited scope
+ # and does not provide access to sensitive info.
+
+ ci_logs_host = os.getenv("CLICKHOUSE_CI_LOGS_HOST", "CLICKHOUSE_CI_LOGS_HOST")
+ ci_logs_password = os.getenv(
+ "CLICKHOUSE_CI_LOGS_PASSWORD", "CLICKHOUSE_CI_LOGS_PASSWORD"
+ )
+ subprocess.check_call(
+ f"sed -i -r -e 's!{ci_logs_host}!CLICKHOUSE_CI_LOGS_HOST!g; s!{ci_logs_password}!CLICKHOUSE_CI_LOGS_PASSWORD!g;' '{run_log_path}'",
+ shell=True,
+ )
+
paths = {
"compare.log": os.path.join(result_path, "compare.log"),
"output.7z": os.path.join(result_path, "output.7z"),
From 221dd53d37bd555c27a2a001eb87d5142759149d Mon Sep 17 00:00:00 2001
From: Alexey Milovidov
Date: Sun, 13 Aug 2023 00:36:50 +0200
Subject: [PATCH 15/51] Fixup
---
tests/ci/performance_comparison_check.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py
index 975ca26b7e8..3fd66410697 100644
--- a/tests/ci/performance_comparison_check.py
+++ b/tests/ci/performance_comparison_check.py
@@ -55,7 +55,7 @@ def get_run_command(
f"--volume={result_path}:/output "
f"--volume={repo_tests_path}:/usr/share/clickhouse-test "
f"--cap-add syslog --cap-add sys_admin --cap-add sys_rawio "
- f"{envs} {additional_env} "
+ f"{env_str} {additional_env} "
f"{image}"
)
From 49485a67700f80bd1bf544ef894ef62bf6ec451a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov
Date: Sun, 13 Aug 2023 02:43:51 +0200
Subject: [PATCH 16/51] Fix shellcheck
---
docker/test/base/setup_export_logs.sh | 1 +
1 file changed, 1 insertion(+)
diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index fdaf22a5d59..ef510552d2f 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -1,4 +1,5 @@
#!/bin/bash
+# shellcheck disable=SC2086
# This script sets up export of system log tables to a remote server.
# Remote tables are created if not exist, and augmented with extra columns,
From 8dc0884099665db603f7903fe5a2fa79de8b9a2a Mon Sep 17 00:00:00 2001
From: Alexey Milovidov
Date: Sun, 13 Aug 2023 04:20:06 +0200
Subject: [PATCH 17/51] Fix error
---
docker/test/performance-comparison/compare.sh | 1 +
1 file changed, 1 insertion(+)
diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh
index 816bdef51c3..9711131f654 100755
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@@ -75,6 +75,7 @@ remote_servers:
secure: 1
user: ci
host: '${CLICKHOUSE_CI_LOGS_HOST}'
+ port: 9440
password: '${CLICKHOUSE_CI_LOGS_PASSWORD}'
" > right/config/config.d/system_logs_export.yaml
fi
From 645834ffb6e39c2017ec3a89c194491b744afc0c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov
Date: Sun, 13 Aug 2023 04:54:20 +0200
Subject: [PATCH 18/51] Fix errors
---
docker/test/performance-comparison/download.sh | 2 --
tests/ci/performance_comparison_check.py | 5 +++--
2 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/docker/test/performance-comparison/download.sh b/docker/test/performance-comparison/download.sh
index aee11030068..cb243b655c6 100755
--- a/docker/test/performance-comparison/download.sh
+++ b/docker/test/performance-comparison/download.sh
@@ -31,8 +31,6 @@ function download
# Test all of them.
declare -a urls_to_try=(
"$S3_URL/PRs/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst"
- "$S3_URL/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst"
- "$S3_URL/$left_pr/$left_sha/$BUILD_NAME/performance.tgz"
)
for path in "${urls_to_try[@]}"
diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py
index 3fd66410697..70369f9881e 100644
--- a/tests/ci/performance_comparison_check.py
+++ b/tests/ci/performance_comparison_check.py
@@ -190,6 +190,7 @@ if __name__ == "__main__":
logging.info("Going to run command %s", run_command)
run_log_path = os.path.join(temp_path, "run.log")
+ compare_log_path = os.path.join(result_path, "compare.log")
popen_env = os.environ.copy()
popen_env.update(env_extra)
@@ -214,12 +215,12 @@ if __name__ == "__main__":
"CLICKHOUSE_CI_LOGS_PASSWORD", "CLICKHOUSE_CI_LOGS_PASSWORD"
)
subprocess.check_call(
- f"sed -i -r -e 's!{ci_logs_host}!CLICKHOUSE_CI_LOGS_HOST!g; s!{ci_logs_password}!CLICKHOUSE_CI_LOGS_PASSWORD!g;' '{run_log_path}'",
+ f"sed -i -r -e 's!{ci_logs_host}!CLICKHOUSE_CI_LOGS_HOST!g; s!{ci_logs_password}!CLICKHOUSE_CI_LOGS_PASSWORD!g;' '{run_log_path}' '{compare_log_path}'",
shell=True,
)
paths = {
- "compare.log": os.path.join(result_path, "compare.log"),
+ "compare.log": compare_log_path,
"output.7z": os.path.join(result_path, "output.7z"),
"report.html": os.path.join(result_path, "report.html"),
"all-queries.html": os.path.join(result_path, "all-queries.html"),
From d434e58eae7ff8ec9b7fa7e94398fe3dd84444e6 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov
Date: Sun, 13 Aug 2023 06:45:41 +0200
Subject: [PATCH 19/51] Fix errors
---
docker/test/performance-comparison/Dockerfile | 4 ----
docker/test/performance-comparison/compare.sh | 20 ++++++++++++-------
2 files changed, 13 insertions(+), 11 deletions(-)
diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile
index 1cc644ba0b1..f3cab77bdbb 100644
--- a/docker/test/performance-comparison/Dockerfile
+++ b/docker/test/performance-comparison/Dockerfile
@@ -1,9 +1,5 @@
# docker build -t clickhouse/performance-comparison .
-# Using ubuntu:22.04 over 20.04 as all other images, since:
-# a) ubuntu 20.04 has too old parallel, and does not support --memsuspend
-# b) anyway for perf tests it should not be important (backward compatiblity
-# with older ubuntu had been checked lots of times in various tests)
FROM ubuntu:22.04
# ARG for quick switch to a given ubuntu mirror
diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh
index 9711131f654..fe37df9ec75 100755
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@@ -67,6 +67,7 @@ function configure
# Note: these variables are provided to the Docker run command by the Python script in tests/ci
if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ]
then
+ set +x
echo "
remote_servers:
system_logs_export:
@@ -78,6 +79,7 @@ remote_servers:
port: 9440
password: '${CLICKHOUSE_CI_LOGS_PASSWORD}'
" > right/config/config.d/system_logs_export.yaml
+ set -x
fi
# Use the new config for both servers, so that we can change it in a PR.
@@ -113,15 +115,19 @@ remote_servers:
# Note: it is set up for the "left" server, and its database is then cloned to the "right" server.
if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ]
then
- export EXTRA_COLUMNS_EXPRESSION="$PR_TO_TEST AS pull_request_number, '$SHA_TO_TEST' AS commit_sha, '$CHECK_START_TIME' AS check_start_time, '$CHECK_NAME' AS check_name, '$INSTANCE_TYPE' AS instance_type"
- export CONNECTION_PARAMETERS="--secure --user ci --host ${CLICKHOUSE_CI_LOGS_HOST} --password ${CLICKHOUSE_CI_LOGS_PASSWORD}"
+ (
+ set +x
+ export EXTRA_COLUMNS_EXPRESSION="$PR_TO_TEST AS pull_request_number, '$SHA_TO_TEST' AS commit_sha, '$CHECK_START_TIME' AS check_start_time, '$CHECK_NAME' AS check_name, '$INSTANCE_TYPE' AS instance_type"
+ export CONNECTION_PARAMETERS="--secure --user ci --host ${CLICKHOUSE_CI_LOGS_HOST} --password ${CLICKHOUSE_CI_LOGS_PASSWORD}"
- ./setup_export_logs.sh "--port $LEFT_SERVER_PORT"
+ ./setup_export_logs.sh "--port $LEFT_SERVER_PORT"
- # Unset variables after use
- export CONNECTION_PARAMETERS=''
- export CLICKHOUSE_CI_LOGS_HOST=''
- export CLICKHOUSE_CI_LOGS_PASSWORD=''
+ # Unset variables after use
+ export CONNECTION_PARAMETERS=''
+ export CLICKHOUSE_CI_LOGS_HOST=''
+ export CLICKHOUSE_CI_LOGS_PASSWORD=''
+ set -x
+ )
fi
clickhouse-client --port $LEFT_SERVER_PORT --query "create database test" ||:
From d1e50b1cbf3962844b745e8cc7518c287fe13e7e Mon Sep 17 00:00:00 2001
From: Alexey Milovidov
Date: Sun, 13 Aug 2023 06:48:19 +0200
Subject: [PATCH 20/51] Simplification
---
docker/test/performance-comparison/Dockerfile | 11 ++---------
1 file changed, 2 insertions(+), 9 deletions(-)
diff --git a/docker/test/performance-comparison/Dockerfile b/docker/test/performance-comparison/Dockerfile
index f3cab77bdbb..d31663f9071 100644
--- a/docker/test/performance-comparison/Dockerfile
+++ b/docker/test/performance-comparison/Dockerfile
@@ -1,14 +1,7 @@
# docker build -t clickhouse/performance-comparison .
-FROM ubuntu:22.04
-
-# ARG for quick switch to a given ubuntu mirror
-ARG apt_archive="http://archive.ubuntu.com"
-RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
-
-ENV LANG=C.UTF-8
-ENV TZ=Europe/Amsterdam
-RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+ARG FROM_TAG=latest
+FROM clickhouse/test-base:$FROM_TAG
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
From 957045c70bd2957d76cefefbc92bce2c778e32f8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov
Date: Sun, 13 Aug 2023 20:15:30 +0200
Subject: [PATCH 21/51] Maybe fix error
---
docker/test/performance-comparison/compare.sh | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh
index fe37df9ec75..09d33647f55 100755
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@@ -120,7 +120,7 @@ remote_servers:
export EXTRA_COLUMNS_EXPRESSION="$PR_TO_TEST AS pull_request_number, '$SHA_TO_TEST' AS commit_sha, '$CHECK_START_TIME' AS check_start_time, '$CHECK_NAME' AS check_name, '$INSTANCE_TYPE' AS instance_type"
export CONNECTION_PARAMETERS="--secure --user ci --host ${CLICKHOUSE_CI_LOGS_HOST} --password ${CLICKHOUSE_CI_LOGS_PASSWORD}"
- ./setup_export_logs.sh "--port $LEFT_SERVER_PORT"
+ /setup_export_logs.sh "--port $LEFT_SERVER_PORT"
# Unset variables after use
export CONNECTION_PARAMETERS=''
From bd0e8792886ac2a02ad45eb2a48b935aa89fb5fe Mon Sep 17 00:00:00 2001
From: Filipp Ozinov
Date: Sun, 13 Aug 2023 22:48:35 +0400
Subject: [PATCH 22/51] Add note about skip indexes
Related to #53350
---
docs/en/engines/database-engines/materialized-mysql.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docs/en/engines/database-engines/materialized-mysql.md b/docs/en/engines/database-engines/materialized-mysql.md
index f7cc52e622e..b7e567c7b6c 100644
--- a/docs/en/engines/database-engines/materialized-mysql.md
+++ b/docs/en/engines/database-engines/materialized-mysql.md
@@ -190,7 +190,7 @@ These are the schema conversion manipulations you can do with table overrides fo
* Modify [column TTL](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#mergetree-column-ttl).
* Modify [column compression codec](/docs/en/sql-reference/statements/create/table.md/#codecs).
* Add [ALIAS columns](/docs/en/sql-reference/statements/create/table.md/#alias).
- * Add [skipping indexes](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-data_skipping-indexes)
+ * Add [skipping indexes](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-data_skipping-indexes). Note that you need to enable `use_skip_indexes_if_final` setting to make them work (MaterializedMySQL is using `SELECT ... FINAL` by default)
* Add [projections](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#projections). Note that projection optimizations are
disabled when using `SELECT ... FINAL` (which MaterializedMySQL does by default), so their utility is limited here.
`INDEX ... TYPE hypothesis` as [described in the v21.12 blog post]](https://clickhouse.com/blog/en/2021/clickhouse-v21.12-released/)
From 3c6af254bc1c1548052eb3c679d444f2835f90a3 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov
Date: Mon, 14 Aug 2023 01:26:04 +0200
Subject: [PATCH 23/51] Remove something
---
docker/test/performance-comparison/compare.sh | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh
index 09d33647f55..ce8c4903c00 100755
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@@ -194,9 +194,9 @@ function restart
wait_for_server $RIGHT_SERVER_PORT $right_pid
echo right ok
- clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.tables where database != 'system'"
+ clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.tables where database NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema')"
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.build_options"
- clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.tables where database != 'system'"
+ clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.tables where database NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema')"
clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.build_options"
# Check again that both servers we started are running -- this is important
@@ -390,14 +390,12 @@ function get_profiles
wait
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_log where type in ('QueryFinish', 'ExceptionWhileProcessing') format TSVWithNamesAndTypes" > left-query-log.tsv ||: &
- clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > left-query-thread-log.tsv ||: &
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > left-trace-log.tsv ||: &
clickhouse-client --port $LEFT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > left-addresses.tsv ||: &
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.metric_log format TSVWithNamesAndTypes" > left-metric-log.tsv ||: &
clickhouse-client --port $LEFT_SERVER_PORT --query "select * from system.asynchronous_metric_log format TSVWithNamesAndTypes" > left-async-metric-log.tsv ||: &
clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_log where type in ('QueryFinish', 'ExceptionWhileProcessing') format TSVWithNamesAndTypes" > right-query-log.tsv ||: &
- clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.query_thread_log format TSVWithNamesAndTypes" > right-query-thread-log.tsv ||: &
clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.trace_log format TSVWithNamesAndTypes" > right-trace-log.tsv ||: &
clickhouse-client --port $RIGHT_SERVER_PORT --query "select arrayJoin(trace) addr, concat(splitByChar('/', addressToLine(addr))[-1], '#', demangle(addressToSymbol(addr)) ) name from system.trace_log group by addr format TSVWithNamesAndTypes" > right-addresses.tsv ||: &
clickhouse-client --port $RIGHT_SERVER_PORT --query "select * from system.metric_log format TSVWithNamesAndTypes" > right-metric-log.tsv ||: &
From e11e7c62181f39b756d0a49c25c3ce1879aa02e5 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov
Date: Mon, 14 Aug 2023 01:26:15 +0200
Subject: [PATCH 24/51] Fix typos
---
.../config/config.d/zzz-perf-comparison-tweaks-config.xml | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml b/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml
index 39c29bb61ca..10a5916264a 100644
--- a/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml
+++ b/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml
@@ -19,10 +19,11 @@
-
+ And so, to avoid extra memory reference switch *_log to Memory engine.
+ -->
ENGINE = Memory
From e357702fd05b77ed01e43a9be38e1e6dfff393a8 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov
Date: Mon, 14 Aug 2023 01:26:38 +0200
Subject: [PATCH 25/51] What will happen if I remove this?
---
.../zzz-perf-comparison-tweaks-config.xml | 26 -------------------
1 file changed, 26 deletions(-)
diff --git a/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml b/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml
index 10a5916264a..292665c4f68 100644
--- a/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml
+++ b/docker/test/performance-comparison/config/config.d/zzz-perf-comparison-tweaks-config.xml
@@ -19,32 +19,6 @@
-
-
- ENGINE = Memory
-
-
-
- ENGINE = Memory
-
-
-
- ENGINE = Memory
-
-
-
- ENGINE = Memory
-
-
-
- ENGINE = Memory
-
-
-
1000000000
10
From 9d5fcbf52466d4268134f0e35fbba212e9d08c87 Mon Sep 17 00:00:00 2001
From: Robert Schulze
Date: Mon, 14 Aug 2023 09:16:08 +0000
Subject: [PATCH 26/51] Follow-up to #52695: Move tests to a more appropriate
place
---
.../test_from_system_tables.py | 12 --------
.../01161_information_schema.reference | 2 ++
.../0_stateless/01161_information_schema.sql | 29 +++++++++++++------
3 files changed, 22 insertions(+), 21 deletions(-)
diff --git a/tests/integration/test_select_access_rights/test_from_system_tables.py b/tests/integration/test_select_access_rights/test_from_system_tables.py
index a4dfa662601..3cd6b90109c 100644
--- a/tests/integration/test_select_access_rights/test_from_system_tables.py
+++ b/tests/integration/test_select_access_rights/test_from_system_tables.py
@@ -190,15 +190,3 @@ def test_information_schema():
)
== "1\n"
)
- assert (
- node.query(
- "SELECT count() FROM information_schema.TABLES WHERE table_name='TABLES'"
- )
- == "2\n"
- )
- assert (
- node.query(
- "SELECT count() FROM INFORMATION_SCHEMA.tables WHERE table_name='tables'"
- )
- == "3\n"
- )
diff --git a/tests/queries/0_stateless/01161_information_schema.reference b/tests/queries/0_stateless/01161_information_schema.reference
index 4ec33a70be0..32ad3f16abc 100644
--- a/tests/queries/0_stateless/01161_information_schema.reference
+++ b/tests/queries/0_stateless/01161_information_schema.reference
@@ -33,3 +33,5 @@ default default v default v f 2 0 Float64 \N \N \N \N \N \N \N \N \N \N \N \N \
tmp tmp d 1 0 Date \N \N \N \N \N 0 \N \N \N \N \N \N \N \N \N Date
tmp tmp dt 2 0 DateTime \N \N \N \N \N 0 \N \N \N \N \N \N \N \N \N DateTime
tmp tmp dtms 3 0 DateTime64(3) \N \N \N \N \N 3 \N \N \N \N \N \N \N \N \N DateTime64(3)
+1
+1
diff --git a/tests/queries/0_stateless/01161_information_schema.sql b/tests/queries/0_stateless/01161_information_schema.sql
index ed77ef1c1c2..68a3b011ced 100644
--- a/tests/queries/0_stateless/01161_information_schema.sql
+++ b/tests/queries/0_stateless/01161_information_schema.sql
@@ -1,20 +1,31 @@
-show tables from information_schema;
+SHOW TABLES FROM information_schema;
SHOW TABLES FROM INFORMATION_SCHEMA;
-create table t (n UInt64, f Float32, s String, fs FixedString(42), d Decimal(9, 6)) engine=Memory;
-create view v (n Nullable(Int32), f Float64) as select n, f from t;
-create materialized view mv engine=Null as select * from system.one;
-create temporary table tmp (d Date, dt DateTime, dtms DateTime64(3));
+DROP TABLE IF EXISTS t;
+DROP VIEW IF EXISTS v;
+DROP VIEW IF EXISTS mv;
+DROP TABLE IF EXISTS tmp;
+
+CREATE TABLE t (n UInt64, f Float32, s String, fs FixedString(42), d Decimal(9, 6)) ENGINE=Memory;
+CREATE VIEW v (n Nullable(Int32), f Float64) AS SELECT n, f FROM t;
+CREATE MATERIALIZED VIEW mv ENGINE=Null AS SELECT * FROM system.one;
+CREATE TEMPORARY TABLE tmp (d Date, dt DateTime, dtms DateTime64(3));
-- FIXME #28687
-select * from information_schema.schemata where schema_name ilike 'information_schema';
+SELECT * FROM information_schema.schemata WHERE schema_name ilike 'information_schema';
-- SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE (TABLE_SCHEMA=currentDatabase() OR TABLE_SCHEMA='') AND TABLE_NAME NOT LIKE '%inner%';
SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE (table_schema=currentDatabase() OR table_schema='') AND table_name NOT LIKE '%inner%';
-select * from information_schema.views where table_schema=currentDatabase();
+SELECT * FROM information_schema.views WHERE table_schema=currentDatabase();
-- SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE (TABLE_SCHEMA=currentDatabase() OR TABLE_SCHEMA='') AND TABLE_NAME NOT LIKE '%inner%';
SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE (table_schema=currentDatabase() OR table_schema='') AND table_name NOT LIKE '%inner%';
-drop table t;
-drop view v;
+-- mixed upper/lowercase schema and table name:
+SELECT count() FROM information_schema.TABLES WHERE table_schema=currentDatabase() AND table_name = 't';
+SELECT count() FROM INFORMATION_SCHEMA.tables WHERE table_schema=currentDatabase() AND table_name = 't';
+SELECT count() FROM INFORMATION_schema.tables WHERE table_schema=currentDatabase() AND table_name = 't'; -- { serverError UNKNOWN_DATABASE }
+SELECT count() FROM information_schema.taBLES WHERE table_schema=currentDatabase() AND table_name = 't'; -- { serverError UNKNOWN_TABLE }
+
drop view mv;
+drop view v;
+drop table t;
From 364b43f19ef4b266291642e5431d9c389bd9c151 Mon Sep 17 00:00:00 2001
From: Robert Schulze
Date: Mon, 14 Aug 2023 14:49:40 +0000
Subject: [PATCH 27/51] Cosmetics: add "cache" section marker
---
src/Interpreters/Context.h | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 07e0cac79e3..b9311acc337 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -915,6 +915,8 @@ public:
void setSystemZooKeeperLogAfterInitializationIfNeeded();
+ /// --- Caches ------------------------------------------------------------------------------------------
+
/// Create a cache of uncompressed blocks of specified size. This can be done only once.
void setUncompressedCache(const String & uncompressed_cache_policy, size_t max_size_in_bytes);
std::shared_ptr getUncompressedCache() const;
@@ -962,6 +964,8 @@ public:
*/
void dropCaches() const;
+ /// -----------------------------------------------------------------------------------------------------
+
/// Settings for MergeTree background tasks stored in config.xml
BackgroundTaskSchedulingSettings getBackgroundProcessingTaskSchedulingSettings() const;
BackgroundTaskSchedulingSettings getBackgroundMoveTaskSchedulingSettings() const;
From fed995734ec446175bdb009d85bfa883d439e02e Mon Sep 17 00:00:00 2001
From: Robert Schulze
Date: Mon, 14 Aug 2023 14:52:37 +0000
Subject: [PATCH 28/51] Cosmetics: Move prefetch threadpool stuff out of
"caching" section
---
src/Interpreters/Context.cpp | 48 ++++++++++++++++--------------------
src/Interpreters/Context.h | 14 +++++------
2 files changed, 28 insertions(+), 34 deletions(-)
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index b8a700f5311..a12cc70a507 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -2315,32 +2315,6 @@ ThreadPool & Context::getLoadMarksThreadpool() const
return *shared->load_marks_threadpool;
}
-static size_t getPrefetchThreadpoolSizeFromConfig(const Poco::Util::AbstractConfiguration & config)
-{
- return config.getUInt(".prefetch_threadpool_pool_size", 100);
-}
-
-size_t Context::getPrefetchThreadpoolSize() const
-{
- const auto & config = getConfigRef();
- return getPrefetchThreadpoolSizeFromConfig(config);
-}
-
-ThreadPool & Context::getPrefetchThreadpool() const
-{
- const auto & config = getConfigRef();
-
- auto lock = getLock();
- if (!shared->prefetch_threadpool)
- {
- auto pool_size = getPrefetchThreadpoolSize();
- auto queue_size = config.getUInt(".prefetch_threadpool_queue_size", 1000000);
- shared->prefetch_threadpool = std::make_unique(
- CurrentMetrics::IOPrefetchThreads, CurrentMetrics::IOPrefetchThreadsActive, pool_size, pool_size, queue_size);
- }
- return *shared->prefetch_threadpool;
-}
-
void Context::setIndexUncompressedCache(size_t max_size_in_bytes)
{
auto lock = getLock();
@@ -2351,7 +2325,6 @@ void Context::setIndexUncompressedCache(size_t max_size_in_bytes)
shared->index_uncompressed_cache = std::make_shared(max_size_in_bytes);
}
-
UncompressedCachePtr Context::getIndexUncompressedCache() const
{
auto lock = getLock();
@@ -2468,6 +2441,27 @@ void Context::dropCaches() const
shared->mmap_cache->reset();
}
+ThreadPool & Context::getPrefetchThreadpool() const
+{
+ const auto & config = getConfigRef();
+
+ auto lock = getLock();
+ if (!shared->prefetch_threadpool)
+ {
+ auto pool_size = getPrefetchThreadpoolSize();
+ auto queue_size = config.getUInt(".prefetch_threadpool_queue_size", 1000000);
+ shared->prefetch_threadpool = std::make_unique(
+ CurrentMetrics::IOPrefetchThreads, CurrentMetrics::IOPrefetchThreadsActive, pool_size, pool_size, queue_size);
+ }
+ return *shared->prefetch_threadpool;
+}
+
+size_t Context::getPrefetchThreadpoolSize() const
+{
+ const auto & config = getConfigRef();
+ return config.getUInt(".prefetch_threadpool_pool_size", 100);
+}
+
BackgroundSchedulePool & Context::getBufferFlushSchedulePool() const
{
auto lock = getLock();
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index b9311acc337..a2f06f4fc14 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -928,13 +928,6 @@ public:
void dropMarkCache() const;
ThreadPool & getLoadMarksThreadpool() const;
- ThreadPool & getPrefetchThreadpool() const;
-
- /// Note: prefetchThreadpool is different from threadpoolReader
- /// in the way that its tasks are - wait for marks to be loaded
- /// and make a prefetch by putting a read task to threadpoolReader.
- size_t getPrefetchThreadpoolSize() const;
-
/// Create a cache of index uncompressed blocks of specified size. This can be done only once.
void setIndexUncompressedCache(size_t max_size_in_bytes);
std::shared_ptr getIndexUncompressedCache() const;
@@ -966,6 +959,13 @@ public:
/// -----------------------------------------------------------------------------------------------------
+ ThreadPool & getPrefetchThreadpool() const;
+
+ /// Note: prefetchThreadpool is different from threadpoolReader
+ /// in the way that its tasks are - wait for marks to be loaded
+ /// and make a prefetch by putting a read task to threadpoolReader.
+ size_t getPrefetchThreadpoolSize() const;
+
/// Settings for MergeTree background tasks stored in config.xml
BackgroundTaskSchedulingSettings getBackgroundProcessingTaskSchedulingSettings() const;
BackgroundTaskSchedulingSettings getBackgroundMoveTaskSchedulingSettings() const;
From def9bb8d92d6a7dad413d8a3a88eec7e74d5296d Mon Sep 17 00:00:00 2001
From: Robert Schulze
Date: Mon, 26 Jun 2023 17:11:25 +0000
Subject: [PATCH 29/51] Exclude the query cache from dropCaches()
---
src/Interpreters/Context.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index a12cc70a507..1d2774c4d77 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -2434,11 +2434,10 @@ void Context::dropCaches() const
if (shared->index_mark_cache)
shared->index_mark_cache->reset();
- if (shared->query_cache)
- shared->query_cache->reset();
-
if (shared->mmap_cache)
shared->mmap_cache->reset();
+
+ /// Intentionally not dropping the query cache which is transactionally inconsistent by design.
}
ThreadPool & Context::getPrefetchThreadpool() const
From 646c51ea40038a3d5e6f2f275732c9fa4507ddbd Mon Sep 17 00:00:00 2001
From: Robert Schulze
Date: Mon, 26 Jun 2023 17:22:15 +0000
Subject: [PATCH 30/51] Cosmetics: Move methods into a more logical order.
---
src/Interpreters/Context.cpp | 46 ++++++++++++++++++------------------
1 file changed, 23 insertions(+), 23 deletions(-)
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 1d2774c4d77..5a7ca4a7221 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -2363,6 +2363,29 @@ void Context::dropIndexMarkCache() const
shared->index_mark_cache->reset();
}
+void Context::setMMappedFileCache(size_t cache_size_in_num_entries)
+{
+ auto lock = getLock();
+
+ if (shared->mmap_cache)
+ throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapped file cache has been already created.");
+
+ shared->mmap_cache = std::make_shared(cache_size_in_num_entries);
+}
+
+MMappedFileCachePtr Context::getMMappedFileCache() const
+{
+ auto lock = getLock();
+ return shared->mmap_cache;
+}
+
+void Context::dropMMappedFileCache() const
+{
+ auto lock = getLock();
+ if (shared->mmap_cache)
+ shared->mmap_cache->reset();
+}
+
void Context::setQueryCache(const Poco::Util::AbstractConfiguration & config)
{
auto lock = getLock();
@@ -2394,29 +2417,6 @@ void Context::dropQueryCache() const
shared->query_cache->reset();
}
-void Context::setMMappedFileCache(size_t cache_size_in_num_entries)
-{
- auto lock = getLock();
-
- if (shared->mmap_cache)
- throw Exception(ErrorCodes::LOGICAL_ERROR, "Mapped file cache has been already created.");
-
- shared->mmap_cache = std::make_shared(cache_size_in_num_entries);
-}
-
-MMappedFileCachePtr Context::getMMappedFileCache() const
-{
- auto lock = getLock();
- return shared->mmap_cache;
-}
-
-void Context::dropMMappedFileCache() const
-{
- auto lock = getLock();
- if (shared->mmap_cache)
- shared->mmap_cache->reset();
-}
-
void Context::dropCaches() const
{
From 3472a928d804c7fea0a3ddd6b356abf91bdbf630 Mon Sep 17 00:00:00 2001
From: Robert Schulze
Date: Thu, 29 Jun 2023 10:02:41 +0000
Subject: [PATCH 31/51] Cosmetics: Rename dropCache() to clearCaches()
"dropCache()" is a bad name, it sounds like that the cache can no longer
be used after the call. Not true, we only "clear" the cache.
---
src/Interpreters/Context.cpp | 15 +++++++--------
src/Interpreters/Context.h | 14 +++++++-------
src/Interpreters/InterpreterDropQuery.cpp | 4 ++--
src/Interpreters/InterpreterSystemQuery.cpp | 12 ++++++------
src/Storages/MergeTree/MergeTreeData.cpp | 8 ++++----
src/Storages/StorageLog.cpp | 2 +-
src/Storages/StorageMergeTree.cpp | 2 +-
src/Storages/StorageStripeLog.cpp | 2 +-
8 files changed, 29 insertions(+), 30 deletions(-)
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 5a7ca4a7221..39284b4c1b2 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -2269,7 +2269,7 @@ UncompressedCachePtr Context::getUncompressedCache() const
}
-void Context::dropUncompressedCache() const
+void Context::clearUncompressedCache() const
{
auto lock = getLock();
if (shared->uncompressed_cache)
@@ -2293,7 +2293,7 @@ MarkCachePtr Context::getMarkCache() const
return shared->mark_cache;
}
-void Context::dropMarkCache() const
+void Context::clearMarkCache() const
{
auto lock = getLock();
if (shared->mark_cache)
@@ -2332,7 +2332,7 @@ UncompressedCachePtr Context::getIndexUncompressedCache() const
}
-void Context::dropIndexUncompressedCache() const
+void Context::clearIndexUncompressedCache() const
{
auto lock = getLock();
if (shared->index_uncompressed_cache)
@@ -2356,7 +2356,7 @@ MarkCachePtr Context::getIndexMarkCache() const
return shared->index_mark_cache;
}
-void Context::dropIndexMarkCache() const
+void Context::clearIndexMarkCache() const
{
auto lock = getLock();
if (shared->index_mark_cache)
@@ -2379,7 +2379,7 @@ MMappedFileCachePtr Context::getMMappedFileCache() const
return shared->mmap_cache;
}
-void Context::dropMMappedFileCache() const
+void Context::clearMMappedFileCache() const
{
auto lock = getLock();
if (shared->mmap_cache)
@@ -2410,15 +2410,14 @@ QueryCachePtr Context::getQueryCache() const
return shared->query_cache;
}
-void Context::dropQueryCache() const
+void Context::clearQueryCache() const
{
auto lock = getLock();
if (shared->query_cache)
shared->query_cache->reset();
}
-
-void Context::dropCaches() const
+void Context::clearCaches() const
{
auto lock = getLock();
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index a2f06f4fc14..c725d032bbe 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -920,34 +920,34 @@ public:
/// Create a cache of uncompressed blocks of specified size. This can be done only once.
void setUncompressedCache(const String & uncompressed_cache_policy, size_t max_size_in_bytes);
std::shared_ptr getUncompressedCache() const;
- void dropUncompressedCache() const;
+ void clearUncompressedCache() const;
/// Create a cache of marks of specified size. This can be done only once.
void setMarkCache(const String & mark_cache_policy, size_t cache_size_in_bytes);
std::shared_ptr getMarkCache() const;
- void dropMarkCache() const;
+ void clearMarkCache() const;
ThreadPool & getLoadMarksThreadpool() const;
/// Create a cache of index uncompressed blocks of specified size. This can be done only once.
void setIndexUncompressedCache(size_t max_size_in_bytes);
std::shared_ptr getIndexUncompressedCache() const;
- void dropIndexUncompressedCache() const;
+ void clearIndexUncompressedCache() const;
/// Create a cache of index marks of specified size. This can be done only once.
void setIndexMarkCache(size_t cache_size_in_bytes);
std::shared_ptr getIndexMarkCache() const;
- void dropIndexMarkCache() const;
+ void clearIndexMarkCache() const;
/// Create a cache of mapped files to avoid frequent open/map/unmap/close and to reuse from several threads.
void setMMappedFileCache(size_t cache_size_in_num_entries);
std::shared_ptr getMMappedFileCache() const;
- void dropMMappedFileCache() const;
+ void clearMMappedFileCache() const;
/// Create a cache of query results for statements which run repeatedly.
void setQueryCache(const Poco::Util::AbstractConfiguration & config);
void updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config);
std::shared_ptr getQueryCache() const;
- void dropQueryCache() const;
+ void clearQueryCache() const;
/** Clear the caches of the uncompressed blocks and marks.
* This is usually done when renaming tables, changing the type of columns, deleting a table.
@@ -955,7 +955,7 @@ public:
* (when deleting a table - it is necessary, since in its place another can appear)
* const - because the change in the cache is not considered significant.
*/
- void dropCaches() const;
+ void clearCaches() const;
/// -----------------------------------------------------------------------------------------------------
diff --git a/src/Interpreters/InterpreterDropQuery.cpp b/src/Interpreters/InterpreterDropQuery.cpp
index ed927d550a8..ea4dfc4df65 100644
--- a/src/Interpreters/InterpreterDropQuery.cpp
+++ b/src/Interpreters/InterpreterDropQuery.cpp
@@ -247,10 +247,10 @@ BlockIO InterpreterDropQuery::executeToTableImpl(ContextPtr context_, ASTDropQue
DatabaseCatalog::instance().removeDependencies(table_id, check_ref_deps, check_loading_deps, is_drop_or_detach_database);
database->dropTable(context_, table_id.table_name, query.sync);
- /// We have to drop mmapio cache when dropping table from Ordinary database
+ /// We have to clear mmapio cache when dropping table from Ordinary database
/// to avoid reading old data if new table with the same name is created
if (database->getUUID() == UUIDHelpers::Nil)
- context_->dropMMappedFileCache();
+ context_->clearMMappedFileCache();
}
db = database;
diff --git a/src/Interpreters/InterpreterSystemQuery.cpp b/src/Interpreters/InterpreterSystemQuery.cpp
index c0341330846..9c8bc256fa2 100644
--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@@ -319,27 +319,27 @@ BlockIO InterpreterSystemQuery::execute()
}
case Type::DROP_MARK_CACHE:
getContext()->checkAccess(AccessType::SYSTEM_DROP_MARK_CACHE);
- system_context->dropMarkCache();
+ system_context->clearMarkCache();
break;
case Type::DROP_UNCOMPRESSED_CACHE:
getContext()->checkAccess(AccessType::SYSTEM_DROP_UNCOMPRESSED_CACHE);
- system_context->dropUncompressedCache();
+ system_context->clearUncompressedCache();
break;
case Type::DROP_INDEX_MARK_CACHE:
getContext()->checkAccess(AccessType::SYSTEM_DROP_MARK_CACHE);
- system_context->dropIndexMarkCache();
+ system_context->clearIndexMarkCache();
break;
case Type::DROP_INDEX_UNCOMPRESSED_CACHE:
getContext()->checkAccess(AccessType::SYSTEM_DROP_UNCOMPRESSED_CACHE);
- system_context->dropIndexUncompressedCache();
+ system_context->clearIndexUncompressedCache();
break;
case Type::DROP_MMAP_CACHE:
getContext()->checkAccess(AccessType::SYSTEM_DROP_MMAP_CACHE);
- system_context->dropMMappedFileCache();
+ system_context->clearMMappedFileCache();
break;
case Type::DROP_QUERY_CACHE:
getContext()->checkAccess(AccessType::SYSTEM_DROP_QUERY_CACHE);
- getContext()->dropQueryCache();
+ getContext()->clearQueryCache();
break;
#if USE_EMBEDDED_COMPILER
case Type::DROP_COMPILED_EXPRESSION_CACHE:
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index db0a7b34d7e..7b1fbd0deb0 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -2328,7 +2328,7 @@ size_t MergeTreeData::clearOldPartsFromFilesystem(bool force)
removePartsFinally(parts_to_remove);
/// This is needed to close files to avoid they reside on disk after being deleted.
/// NOTE: we can drop files from cache more selectively but this is good enough.
- getContext()->dropMMappedFileCache();
+ getContext()->clearMMappedFileCache();
return parts_to_remove.size();
}
@@ -2799,7 +2799,7 @@ void MergeTreeData::rename(const String & new_table_path, const StorageID & new_
}
if (!getStorageID().hasUUID())
- getContext()->dropCaches();
+ getContext()->clearCaches();
/// TODO: remove const_cast
for (const auto & part : data_parts_by_info)
@@ -2840,9 +2840,9 @@ void MergeTreeData::dropAllData()
}
/// Tables in atomic databases have UUID and stored in persistent locations.
- /// No need to drop caches (that are keyed by filesystem path) because collision is not possible.
+ /// No need to clear caches (that are keyed by filesystem path) because collision is not possible.
if (!getStorageID().hasUUID())
- getContext()->dropCaches();
+ getContext()->clearCaches();
/// Removing of each data part before recursive removal of directory is to speed-up removal, because there will be less number of syscalls.
NameSet part_names_failed;
diff --git a/src/Storages/StorageLog.cpp b/src/Storages/StorageLog.cpp
index 87aa71f3e8d..90cf55e53b2 100644
--- a/src/Storages/StorageLog.cpp
+++ b/src/Storages/StorageLog.cpp
@@ -777,7 +777,7 @@ void StorageLog::truncate(const ASTPtr &, const StorageMetadataPtr &, ContextPtr
num_marks_saved = 0;
total_rows = 0;
total_bytes = 0;
- getContext()->dropMMappedFileCache();
+ getContext()->clearMMappedFileCache();
}
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index ad9013d9f13..dc23e3ea635 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -2001,7 +2001,7 @@ PartitionCommandsResultInfo StorageMergeTree::attachPartition(
}
/// New parts with other data may appear in place of deleted parts.
- local_context->dropCaches();
+ local_context->clearCaches();
return results;
}
diff --git a/src/Storages/StorageStripeLog.cpp b/src/Storages/StorageStripeLog.cpp
index 0bfef5ed5e5..83336cbd22e 100644
--- a/src/Storages/StorageStripeLog.cpp
+++ b/src/Storages/StorageStripeLog.cpp
@@ -426,7 +426,7 @@ void StorageStripeLog::truncate(const ASTPtr &, const StorageMetadataPtr &, Cont
num_indices_saved = 0;
total_rows = 0;
total_bytes = 0;
- getContext()->dropMMappedFileCache();
+ getContext()->clearMMappedFileCache();
}
From 54e54aed7489915a97314ac54c16c398a7cd0efc Mon Sep 17 00:00:00 2001
From: Robert Schulze
Date: Thu, 29 Jun 2023 10:19:40 +0000
Subject: [PATCH 32/51] Cosmetics: Introduce constants for default cache sizes
and policies
---
programs/local/LocalServer.cpp | 31 ++++++-------------
programs/server/Server.cpp | 18 +++--------
src/Common/CacheBase.h | 5 +--
src/Core/Defines.h | 16 ++++++++++
src/Core/ServerSettings.h | 15 ++++-----
src/Interpreters/Cache/QueryCache.cpp | 8 ++---
src/Interpreters/Context.cpp | 14 ++++-----
.../JIT/CompiledExpressionCache.h | 2 +-
8 files changed, 54 insertions(+), 55 deletions(-)
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 587c88a2745..b5c7c790a83 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -655,43 +655,32 @@ void LocalServer::processConfig()
/// There is no need for concurrent queries, override max_concurrent_queries.
global_context->getProcessList().setMaxSize(0);
- /// Size of cache for uncompressed blocks. Zero means disabled.
- String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", "");
- size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", 0);
+ String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", DEFAULT_UNCOMPRESSED_CACHE_POLICY);
+ size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE);
if (uncompressed_cache_size)
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size);
- /// Size of cache for marks (index of MergeTree family of tables).
- String mark_cache_policy = config().getString("mark_cache_policy", "");
- size_t mark_cache_size = config().getUInt64("mark_cache_size", 5368709120);
+ String mark_cache_policy = config().getString("mark_cache_policy", DEFAULT_MARK_CACHE_POLICY);
+ size_t mark_cache_size = config().getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE);
if (mark_cache_size)
global_context->setMarkCache(mark_cache_policy, mark_cache_size);
- /// Size of cache for uncompressed blocks of MergeTree indices. Zero means disabled.
- size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", 0);
+ size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE);
if (index_uncompressed_cache_size)
global_context->setIndexUncompressedCache(index_uncompressed_cache_size);
- /// Size of cache for index marks (index of MergeTree skip indices).
- size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", 0);
+ size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", DEFAULT_INDEX_MARK_CACHE_MAX_SIZE);
if (index_mark_cache_size)
global_context->setIndexMarkCache(index_mark_cache_size);
- /// A cache for mmapped files.
- size_t mmap_cache_size = config().getUInt64("mmap_cache_size", 1000); /// The choice of default is arbitrary.
+ size_t mmap_cache_size = config().getUInt64("mmap_cache_size", DEFAULT_MMAP_CACHE_MAX_SIZE);
if (mmap_cache_size)
global_context->setMMappedFileCache(mmap_cache_size);
#if USE_EMBEDDED_COMPILER
- /// 128 MB
- constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 128;
- size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", compiled_expression_cache_size_default);
-
- constexpr size_t compiled_expression_cache_elements_size_default = 10000;
- size_t compiled_expression_cache_elements_size
- = config().getUInt64("compiled_expression_cache_elements_size", compiled_expression_cache_elements_size_default);
-
- CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size, compiled_expression_cache_elements_size);
+ size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE);
+ size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES);
+ CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements);
#endif
/// NOTE: it is important to apply any overrides before
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 6237186e3a6..e5604ca422d 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1488,13 +1488,11 @@ try
size_t max_cache_size = static_cast(memory_amount * server_settings.cache_size_to_ram_max_ratio);
String uncompressed_cache_policy = server_settings.uncompressed_cache_policy;
- LOG_INFO(log, "Uncompressed cache policy name {}", uncompressed_cache_policy);
size_t uncompressed_cache_size = server_settings.uncompressed_cache_size;
if (uncompressed_cache_size > max_cache_size)
{
uncompressed_cache_size = max_cache_size;
- LOG_INFO(log, "Uncompressed cache size was lowered to {} because the system has low amount of memory",
- formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
+ LOG_INFO(log, "Uncompressed cache size was lowered to {} because the system has low amount of memory", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size);
@@ -1520,8 +1518,7 @@ try
if (mark_cache_size > max_cache_size)
{
mark_cache_size = max_cache_size;
- LOG_INFO(log, "Mark cache size was lowered to {} because the system has low amount of memory",
- formatReadableSizeWithBinarySuffix(mark_cache_size));
+ LOG_INFO(log, "Mark cache size was lowered to {} because the system has low amount of memory", formatReadableSizeWithBinarySuffix(mark_cache_size));
}
global_context->setMarkCache(mark_cache_policy, mark_cache_size);
@@ -1538,14 +1535,9 @@ try
global_context->setQueryCache(config());
#if USE_EMBEDDED_COMPILER
- /// 128 MB
- constexpr size_t compiled_expression_cache_size_default = 1024 * 1024 * 128;
- size_t compiled_expression_cache_size = config().getUInt64("compiled_expression_cache_size", compiled_expression_cache_size_default);
-
- constexpr size_t compiled_expression_cache_elements_size_default = 10000;
- size_t compiled_expression_cache_elements_size = config().getUInt64("compiled_expression_cache_elements_size", compiled_expression_cache_elements_size_default);
-
- CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size, compiled_expression_cache_elements_size);
+ size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE);
+ size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES);
+ CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_max_size_in_bytes, compiled_expression_cache_max_elements);
#endif
/// Set path for format schema files
diff --git a/src/Common/CacheBase.h b/src/Common/CacheBase.h
index 84cbd5b5c6f..aa7b3ea10cf 100644
--- a/src/Common/CacheBase.h
+++ b/src/Common/CacheBase.h
@@ -51,10 +51,11 @@ public:
{
auto on_weight_loss_function = [&](size_t weight_loss) { onRemoveOverflowWeightLoss(weight_loss); };
- static constexpr std::string_view default_cache_policy = "SLRU";
-
if (cache_policy_name.empty())
+ {
+ static constexpr auto default_cache_policy = "SLRU";
cache_policy_name = default_cache_policy;
+ }
if (cache_policy_name == "LRU")
{
diff --git a/src/Core/Defines.h b/src/Core/Defines.h
index efe14b93a3d..3039f0a67cf 100644
--- a/src/Core/Defines.h
+++ b/src/Core/Defines.h
@@ -1,6 +1,7 @@
#pragma once
#include
+#include
#define DBMS_DEFAULT_PORT 9000
#define DBMS_DEFAULT_SECURE_PORT 9440
@@ -64,6 +65,21 @@
/// Max depth of hierarchical dictionary
#define DBMS_HIERARCHICAL_DICTIONARY_MAX_DEPTH 1000
+/// Default maximum (total and entry) sizes and policies of various caches
+static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE = 0_MiB;
+static constexpr auto DEFAULT_UNCOMPRESSED_CACHE_POLICY = "SLRU";
+static constexpr auto DEFAULT_MARK_CACHE_MAX_SIZE = 5368_MiB;
+static constexpr auto DEFAULT_MARK_CACHE_POLICY = "SLRU";
+static constexpr auto DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE = 0_MiB;
+static constexpr auto DEFAULT_INDEX_MARK_CACHE_MAX_SIZE = 0_MiB;
+static constexpr auto DEFAULT_MMAP_CACHE_MAX_SIZE = 1_KiB; /// chosen by rolling dice
+static constexpr auto DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE = 128_MiB;
+static constexpr auto DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES = 10'000;
+static constexpr auto DEFAULT_QUERY_CACHE_MAX_SIZE = 1_GiB;
+static constexpr auto DEFAULT_QUERY_CACHE_MAX_ENTRIES = 1024uz;
+static constexpr auto DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_BYTES = 1_MiB;
+static constexpr auto DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_ROWS = 30'000'000uz;
+
/// Query profiler cannot work with sanitizers.
/// Sanitizers are using quick "frame walking" stack unwinding (this implies -fno-omit-frame-pointer)
/// And they do unwinding frequently (on every malloc/free, thread/mutex operations, etc).
diff --git a/src/Core/ServerSettings.h b/src/Core/ServerSettings.h
index 7678e8c3f24..f759cd150a8 100644
--- a/src/Core/ServerSettings.h
+++ b/src/Core/ServerSettings.h
@@ -2,6 +2,7 @@
#include
+#include
namespace Poco::Util
@@ -56,13 +57,13 @@ namespace DB
M(UInt64, max_concurrent_select_queries, 0, "Limit on total number of concurrently select queries. Zero means Unlimited.", 0) \
\
M(Double, cache_size_to_ram_max_ratio, 0.5, "Set cache size ro ram max ratio. Allows to lower cache size on low-memory systems.", 0) \
- M(String, uncompressed_cache_policy, "SLRU", "Uncompressed cache policy name.", 0) \
- M(UInt64, uncompressed_cache_size, 0, "Size of cache for uncompressed blocks. Zero means disabled.", 0) \
- M(UInt64, mark_cache_size, 5368709120, "Size of cache for marks (index of MergeTree family of tables).", 0) \
- M(String, mark_cache_policy, "SLRU", "Mark cache policy name.", 0) \
- M(UInt64, index_uncompressed_cache_size, 0, "Size of cache for uncompressed blocks of MergeTree indices. Zero means disabled.", 0) \
- M(UInt64, index_mark_cache_size, 0, "Size of cache for index marks. Zero means disabled.", 0) \
- M(UInt64, mmap_cache_size, 1000, "A cache for mmapped files.", 0) /* The choice of default is arbitrary. */ \
+ M(String, uncompressed_cache_policy, DEFAULT_UNCOMPRESSED_CACHE_POLICY, "Uncompressed cache policy name.", 0) \
+ M(UInt64, uncompressed_cache_size, DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks. Zero means disabled.", 0) \
+ M(UInt64, mark_cache_size, DEFAULT_MARK_CACHE_MAX_SIZE, "Size of cache for marks (index of MergeTree family of tables).", 0) \
+ M(String, mark_cache_policy, DEFAULT_MARK_CACHE_POLICY, "Mark cache policy name.", 0) \
+ M(UInt64, index_uncompressed_cache_size, DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE, "Size of cache for uncompressed blocks of MergeTree indices. Zero means disabled.", 0) \
+ M(UInt64, index_mark_cache_size, DEFAULT_INDEX_MARK_CACHE_MAX_SIZE, "Size of cache for index marks. Zero means disabled.", 0) \
+ M(UInt64, mmap_cache_size, DEFAULT_MMAP_CACHE_MAX_SIZE, "A cache for mmapped files.", 0) \
\
M(Bool, disable_internal_dns_cache, false, "Disable internal DNS caching at all.", 0) \
M(Int32, dns_cache_update_period, 15, "Internal DNS cache update period in seconds.", 0) \
diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp
index 5982a5ade50..182a186d4e1 100644
--- a/src/Interpreters/Cache/QueryCache.cpp
+++ b/src/Interpreters/Cache/QueryCache.cpp
@@ -531,14 +531,14 @@ void QueryCache::updateConfiguration(const Poco::Util::AbstractConfiguration & c
{
std::lock_guard lock(mutex);
- size_t max_size_in_bytes = config.getUInt64("query_cache.max_size_in_bytes", 1_GiB);
+ size_t max_size_in_bytes = config.getUInt64("query_cache.max_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_SIZE);
cache.setMaxSize(max_size_in_bytes);
- size_t max_entries = config.getUInt64("query_cache.max_entries", 1024);
+ size_t max_entries = config.getUInt64("query_cache.max_entries", DEFAULT_QUERY_CACHE_MAX_ENTRIES);
cache.setMaxCount(max_entries);
- max_entry_size_in_bytes = config.getUInt64("query_cache.max_entry_size_in_bytes", 1_MiB);
- max_entry_size_in_rows = config.getUInt64("query_cache.max_entry_rows_in_rows", 30'000'000);
+ max_entry_size_in_bytes = config.getUInt64("query_cache.max_entry_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_BYTES);
+ max_entry_size_in_rows = config.getUInt64("query_cache.max_entry_rows_in_rows", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_ROWS);
}
}
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 39284b4c1b2..b77b4b79cb9 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -245,9 +245,9 @@ struct ContextSharedPart : boost::noncopyable
std::optional backups_worker;
- String default_profile_name; /// Default profile name used for default values.
- String system_profile_name; /// Profile used by system processes
- String buffer_profile_name; /// Profile used by Buffer engine for flushing to the underlying
+ String default_profile_name; /// Default profile name used for default values.
+ String system_profile_name; /// Profile used by system processes
+ String buffer_profile_name; /// Profile used by Buffer engine for flushing to the underlying
std::unique_ptr access_control;
mutable ResourceManagerPtr resource_manager;
mutable UncompressedCachePtr uncompressed_cache; /// The cache of decompressed blocks.
@@ -261,11 +261,11 @@ struct ContextSharedPart : boost::noncopyable
ProcessList process_list; /// Executing queries at the moment.
SessionTracker session_tracker;
GlobalOvercommitTracker global_overcommit_tracker;
- MergeList merge_list; /// The list of executable merge (for (Replicated)?MergeTree)
- MovesList moves_list; /// The list of executing moves (for (Replicated)?MergeTree)
+ MergeList merge_list; /// The list of executable merge (for (Replicated)?MergeTree)
+ MovesList moves_list; /// The list of executing moves (for (Replicated)?MergeTree)
ReplicatedFetchList replicated_fetch_list;
- ConfigurationPtr users_config; /// Config with the users, profiles and quotas sections.
- InterserverIOHandler interserver_io_handler; /// Handler for interserver communication.
+ ConfigurationPtr users_config; /// Config with the users, profiles and quotas sections.
+ InterserverIOHandler interserver_io_handler; /// Handler for interserver communication.
mutable std::unique_ptr buffer_flush_schedule_pool; /// A thread pool that can do background flush for Buffer tables.
mutable std::unique_ptr schedule_pool; /// A thread pool that can run different jobs in background (used in replicated tables)
diff --git a/src/Interpreters/JIT/CompiledExpressionCache.h b/src/Interpreters/JIT/CompiledExpressionCache.h
index 21f7c67226c..8354b33bae9 100644
--- a/src/Interpreters/JIT/CompiledExpressionCache.h
+++ b/src/Interpreters/JIT/CompiledExpressionCache.h
@@ -19,7 +19,7 @@ public:
size_t getCompiledExpressionSize() const { return compiled_expression_size; }
- virtual ~CompiledExpressionCacheEntry() {}
+ virtual ~CompiledExpressionCacheEntry() = default;
private:
From 9e4b4541bc34a58bbc564d63fec4e0710eb45e33 Mon Sep 17 00:00:00 2001
From: Robert Schulze
Date: Mon, 14 Aug 2023 15:13:54 +0000
Subject: [PATCH 33/51] Cosmetics: More aesthetic code
---
src/Interpreters/Context.cpp | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index b77b4b79cb9..0cc3750dc26 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -250,15 +250,15 @@ struct ContextSharedPart : boost::noncopyable
String buffer_profile_name; /// Profile used by Buffer engine for flushing to the underlying
std::unique_ptr access_control;
mutable ResourceManagerPtr resource_manager;
- mutable UncompressedCachePtr uncompressed_cache; /// The cache of decompressed blocks.
- mutable MarkCachePtr mark_cache; /// Cache of marks in compressed files.
- mutable std::unique_ptr load_marks_threadpool; /// Threadpool for loading marks cache.
- mutable std::unique_ptr prefetch_threadpool; /// Threadpool for loading marks cache.
- mutable UncompressedCachePtr index_uncompressed_cache; /// The cache of decompressed blocks for MergeTree indices.
- mutable MarkCachePtr index_mark_cache; /// Cache of marks in compressed files of MergeTree indices.
- mutable QueryCachePtr query_cache; /// Cache of query results.
- mutable MMappedFileCachePtr mmap_cache; /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads.
- ProcessList process_list; /// Executing queries at the moment.
+ mutable UncompressedCachePtr uncompressed_cache; /// The cache of decompressed blocks.
+ mutable MarkCachePtr mark_cache; /// Cache of marks in compressed files.
+ mutable std::unique_ptr load_marks_threadpool; /// Threadpool for loading marks cache.
+ mutable std::unique_ptr prefetch_threadpool; /// Threadpool for loading marks cache.
+ mutable UncompressedCachePtr index_uncompressed_cache; /// The cache of decompressed blocks for MergeTree indices.
+ mutable QueryCachePtr query_cache; /// Cache of query results.
+ mutable MarkCachePtr index_mark_cache; /// Cache of marks in compressed files of MergeTree indices.
+ mutable MMappedFileCachePtr mmap_cache; /// Cache of mmapped files to avoid frequent open/map/unmap/close and to reuse from several threads.
+ ProcessList process_list; /// Executing queries at the moment.
SessionTracker session_tracker;
GlobalOvercommitTracker global_overcommit_tracker;
MergeList merge_list; /// The list of executable merge (for (Replicated)?MergeTree)
From f126e54b0d76e21f93104d42b3620dfa36fb436a Mon Sep 17 00:00:00 2001
From: Robert Schulze
Date: Thu, 29 Jun 2023 18:33:48 +0000
Subject: [PATCH 34/51] Respect max_cache_size for all caches
So far, max_cache_size was only respected in Server.cpp, and in this
file only for the mark cache and the uncompressed cache. This commit
respects it also for the other cache types in Server.cpp, as well for
all caches in LocalServer.cpp.
---
programs/local/LocalServer.cpp | 31 +++++++++++++++++++++++++++++
programs/server/Server.cpp | 36 +++++++++++++++++++++++++---------
2 files changed, 58 insertions(+), 9 deletions(-)
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index b5c7c790a83..d97a4760f49 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -2,6 +2,8 @@
#include
#include
+#include
+#include
#include
#include
#include
@@ -655,25 +657,54 @@ void LocalServer::processConfig()
/// There is no need for concurrent queries, override max_concurrent_queries.
global_context->getProcessList().setMaxSize(0);
+ const size_t memory_amount = getMemoryAmount();
+ const double cache_size_to_ram_max_ratio = config().getDouble("cache_size_to_ram_max_ratio", 0.5);
+ const size_t max_cache_size = static_cast(memory_amount * cache_size_to_ram_max_ratio);
+
String uncompressed_cache_policy = config().getString("uncompressed_cache_policy", DEFAULT_UNCOMPRESSED_CACHE_POLICY);
size_t uncompressed_cache_size = config().getUInt64("uncompressed_cache_size", DEFAULT_UNCOMPRESSED_CACHE_MAX_SIZE);
+ if (uncompressed_cache_size > max_cache_size)
+ {
+ uncompressed_cache_size = max_cache_size;
+ LOG_INFO(log, "Lowered uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
+ }
if (uncompressed_cache_size)
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size);
String mark_cache_policy = config().getString("mark_cache_policy", DEFAULT_MARK_CACHE_POLICY);
size_t mark_cache_size = config().getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE);
+ if (mark_cache_size > max_cache_size)
+ {
+ mark_cache_size = max_cache_size;
+ LOG_INFO(log, "Lowered mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size));
+ }
if (mark_cache_size)
global_context->setMarkCache(mark_cache_policy, mark_cache_size);
size_t index_uncompressed_cache_size = config().getUInt64("index_uncompressed_cache_size", DEFAULT_INDEX_UNCOMPRESSED_CACHE_MAX_SIZE);
+ if (index_uncompressed_cache_size > max_cache_size)
+ {
+ index_uncompressed_cache_size = max_cache_size;
+ LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
+ }
if (index_uncompressed_cache_size)
global_context->setIndexUncompressedCache(index_uncompressed_cache_size);
size_t index_mark_cache_size = config().getUInt64("index_mark_cache_size", DEFAULT_INDEX_MARK_CACHE_MAX_SIZE);
+ if (index_mark_cache_size > max_cache_size)
+ {
+ index_mark_cache_size = max_cache_size;
+ LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
+ }
if (index_mark_cache_size)
global_context->setIndexMarkCache(index_mark_cache_size);
size_t mmap_cache_size = config().getUInt64("mmap_cache_size", DEFAULT_MMAP_CACHE_MAX_SIZE);
+ if (mmap_cache_size > max_cache_size)
+ {
+ mmap_cache_size = max_cache_size;
+ LOG_INFO(log, "Lowered mmap file cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
+ }
if (mmap_cache_size)
global_context->setMMappedFileCache(mmap_cache_size);
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index e5604ca422d..d5ae2513fa0 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -29,6 +29,7 @@
#include
#include
#include
+#include
#include
#include
#include
@@ -658,7 +659,7 @@ try
global_context->addWarningMessage("Server was built with sanitizer. It will work slowly.");
#endif
- const auto memory_amount = getMemoryAmount();
+ const size_t memory_amount = getMemoryAmount();
LOG_INFO(log, "Available RAM: {}; physical cores: {}; logical cores: {}.",
formatReadableSizeWithBinarySuffix(memory_amount),
@@ -1485,14 +1486,14 @@ try
/// Set up caches.
- size_t max_cache_size = static_cast(memory_amount * server_settings.cache_size_to_ram_max_ratio);
+ const size_t max_cache_size = static_cast(memory_amount * server_settings.cache_size_to_ram_max_ratio);
String uncompressed_cache_policy = server_settings.uncompressed_cache_policy;
size_t uncompressed_cache_size = server_settings.uncompressed_cache_size;
if (uncompressed_cache_size > max_cache_size)
{
uncompressed_cache_size = max_cache_size;
- LOG_INFO(log, "Uncompressed cache size was lowered to {} because the system has low amount of memory", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
+ LOG_INFO(log, "Lowered uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
}
global_context->setUncompressedCache(uncompressed_cache_policy, uncompressed_cache_size);
@@ -1511,27 +1512,44 @@ try
server_settings.async_insert_queue_flush_on_shutdown));
}
- size_t mark_cache_size = server_settings.mark_cache_size;
String mark_cache_policy = server_settings.mark_cache_policy;
+ size_t mark_cache_size = server_settings.mark_cache_size;
if (!mark_cache_size)
LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation.");
if (mark_cache_size > max_cache_size)
{
mark_cache_size = max_cache_size;
- LOG_INFO(log, "Mark cache size was lowered to {} because the system has low amount of memory", formatReadableSizeWithBinarySuffix(mark_cache_size));
+ LOG_INFO(log, "Lowered mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(mark_cache_size));
}
global_context->setMarkCache(mark_cache_policy, mark_cache_size);
- if (server_settings.index_uncompressed_cache_size)
+ size_t index_uncompressed_cache_size = server_settings.index_uncompressed_cache_size;
+ if (index_uncompressed_cache_size > max_cache_size)
+ {
+ index_uncompressed_cache_size = max_cache_size;
+ LOG_INFO(log, "Lowered index uncompressed cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
+ }
+ if (index_uncompressed_cache_size)
global_context->setIndexUncompressedCache(server_settings.index_uncompressed_cache_size);
- if (server_settings.index_mark_cache_size)
+ size_t index_mark_cache_size = server_settings.index_mark_cache_size;
+ if (index_mark_cache_size > max_cache_size)
+ {
+ index_mark_cache_size = max_cache_size;
+ LOG_INFO(log, "Lowered index mark cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
+ }
+ if (index_mark_cache_size)
global_context->setIndexMarkCache(server_settings.index_mark_cache_size);
- if (server_settings.mmap_cache_size)
+ size_t mmap_cache_size = server_settings.mmap_cache_size;
+ if (mmap_cache_size > max_cache_size)
+ {
+ mmap_cache_size = max_cache_size;
+ LOG_INFO(log, "Lowered mmap file cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
+ }
+ if (mmap_cache_size)
global_context->setMMappedFileCache(server_settings.mmap_cache_size);
- /// A cache for query results.
global_context->setQueryCache(config());
#if USE_EMBEDDED_COMPILER
From bb53d635dfead645fc246b8f38bef99239290014 Mon Sep 17 00:00:00 2001
From: Robert Schulze
Date: Thu, 29 Jun 2023 18:47:03 +0000
Subject: [PATCH 35/51] More consistency: Run same mark cache size check in
ch-local
Server.cpp had a check that the configured mark cache size is not 0. Run
the same check in LocalServer.cpp.
---
programs/local/LocalServer.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index d97a4760f49..5e2c8f927bc 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -673,6 +673,8 @@ void LocalServer::processConfig()
String mark_cache_policy = config().getString("mark_cache_policy", DEFAULT_MARK_CACHE_POLICY);
size_t mark_cache_size = config().getUInt64("mark_cache_size", DEFAULT_MARK_CACHE_MAX_SIZE);
+ if (!mark_cache_size)
+ LOG_ERROR(log, "Too low mark cache size will lead to severe performance degradation.");
if (mark_cache_size > max_cache_size)
{
mark_cache_size = max_cache_size;
From ed90463888742f60939650480104b96242f19996 Mon Sep 17 00:00:00 2001
From: Robert Schulze
Date: Thu, 29 Jun 2023 18:55:09 +0000
Subject: [PATCH 36/51] Add comment
---
programs/local/LocalServer.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 5e2c8f927bc..0dea7e8e643 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -710,6 +710,8 @@ void LocalServer::processConfig()
if (mmap_cache_size)
global_context->setMMappedFileCache(mmap_cache_size);
+ /// not initializing the query cache in clickhouse-local
+
#if USE_EMBEDDED_COMPILER
size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE);
size_t compiled_expression_cache_max_elements = config().getUInt64("compiled_expression_cache_elements_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_ENTRIES);
From 7ed5166c8aca62e7b9397b8b108afa6b26098bde Mon Sep 17 00:00:00 2001
From: Robert Schulze
Date: Thu, 29 Jun 2023 10:56:32 +0000
Subject: [PATCH 37/51] Cosmetics: Make re-cfg of query cache similar to re-cfg
of other caches
This doesn't change semantics, it only changes in which places the
configuration is parsed. This makes the logic more consistent across
caches.
---
programs/server/Server.cpp | 11 ++++++++++-
src/Interpreters/Cache/QueryCache.cpp | 17 ++++++-----------
src/Interpreters/Cache/QueryCache.h | 13 +++++--------
src/Interpreters/Context.cpp | 13 +++++++++----
src/Interpreters/Context.h | 2 +-
5 files changed, 31 insertions(+), 25 deletions(-)
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index d5ae2513fa0..b823cbbe367 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1550,7 +1550,16 @@ try
if (mmap_cache_size)
global_context->setMMappedFileCache(server_settings.mmap_cache_size);
- global_context->setQueryCache(config());
+ size_t query_cache_max_size_in_bytes = config().getUInt64("query_cache.max_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_SIZE);
+ size_t query_cache_max_entries = config().getUInt64("query_cache.max_entries", DEFAULT_QUERY_CACHE_MAX_ENTRIES);
+ size_t query_cache_query_cache_max_entry_size_in_bytes = config().getUInt64("query_cache.max_entry_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_BYTES);
+ size_t query_cache_max_entry_size_in_rows = config().getUInt64("query_cache.max_entry_rows_in_rows", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_ROWS);
+ if (query_cache_max_size_in_bytes > max_cache_size)
+ {
+ query_cache_max_size_in_bytes = max_cache_size;
+ LOG_INFO(log, "Lowered query cache size to {} because the system has limited RAM", formatReadableSizeWithBinarySuffix(uncompressed_cache_size));
+ }
+ global_context->setQueryCache(query_cache_max_size_in_bytes, query_cache_max_entries, query_cache_query_cache_max_entry_size_in_bytes, query_cache_max_entry_size_in_rows);
#if USE_EMBEDDED_COMPILER
size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE);
diff --git a/src/Interpreters/Cache/QueryCache.cpp b/src/Interpreters/Cache/QueryCache.cpp
index 182a186d4e1..134aa0956d1 100644
--- a/src/Interpreters/Cache/QueryCache.cpp
+++ b/src/Interpreters/Cache/QueryCache.cpp
@@ -493,7 +493,6 @@ void QueryCache::reset()
cache.reset();
std::lock_guard lock(mutex);
times_executed.clear();
- cache_size_in_bytes = 0;
}
size_t QueryCache::weight() const
@@ -511,7 +510,7 @@ size_t QueryCache::recordQueryRun(const Key & key)
std::lock_guard lock(mutex);
size_t times = ++times_executed[key];
// Regularly drop times_executed to avoid DOS-by-unlimited-growth.
- static constexpr size_t TIMES_EXECUTED_MAX_SIZE = 10'000;
+ static constexpr auto TIMES_EXECUTED_MAX_SIZE = 10'000uz;
if (times_executed.size() > TIMES_EXECUTED_MAX_SIZE)
times_executed.clear();
return times;
@@ -522,23 +521,19 @@ std::vector QueryCache::dump() const
return cache.dump();
}
-QueryCache::QueryCache()
+QueryCache::QueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_)
: cache(std::make_unique>(std::make_unique()))
{
+ updateConfiguration(max_size_in_bytes, max_entries, max_entry_size_in_bytes_, max_entry_size_in_rows_);
}
-void QueryCache::updateConfiguration(const Poco::Util::AbstractConfiguration & config)
+void QueryCache::updateConfiguration(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_)
{
std::lock_guard lock(mutex);
-
- size_t max_size_in_bytes = config.getUInt64("query_cache.max_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_SIZE);
cache.setMaxSize(max_size_in_bytes);
-
- size_t max_entries = config.getUInt64("query_cache.max_entries", DEFAULT_QUERY_CACHE_MAX_ENTRIES);
cache.setMaxCount(max_entries);
-
- max_entry_size_in_bytes = config.getUInt64("query_cache.max_entry_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_BYTES);
- max_entry_size_in_rows = config.getUInt64("query_cache.max_entry_rows_in_rows", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_ROWS);
+ max_entry_size_in_bytes = max_entry_size_in_bytes_;
+ max_entry_size_in_rows = max_entry_size_in_rows_;
}
}
diff --git a/src/Interpreters/Cache/QueryCache.h b/src/Interpreters/Cache/QueryCache.h
index eaa54c503fa..0c0674c6302 100644
--- a/src/Interpreters/Cache/QueryCache.h
+++ b/src/Interpreters/Cache/QueryCache.h
@@ -4,7 +4,6 @@
#include
#include
#include
-#include
#include
#include
@@ -110,9 +109,6 @@ private:
/// query --> query result
using Cache = CacheBase;
- /// query --> query execution count
- using TimesExecuted = std::unordered_map;
-
public:
/// Buffers multiple partial query result chunks (buffer()) and eventually stores them as cache entry (finalizeWrite()).
///
@@ -177,9 +173,9 @@ public:
friend class QueryCache; /// for createReader()
};
- QueryCache();
+ QueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_);
- void updateConfiguration(const Poco::Util::AbstractConfiguration & config);
+ void updateConfiguration(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes_, size_t max_entry_size_in_rows_);
Reader createReader(const Key & key);
Writer createWriter(const Key & key, std::chrono::milliseconds min_query_runtime, bool squash_partial_results, size_t max_block_size, size_t max_query_cache_size_in_bytes_quota, size_t max_query_cache_entries_quota);
@@ -199,14 +195,15 @@ private:
Cache cache; /// has its own locking --> not protected by mutex
mutable std::mutex mutex;
+
+ /// query --> query execution count
+ using TimesExecuted = std::unordered_map;
TimesExecuted times_executed TSA_GUARDED_BY(mutex);
/// Cache configuration
size_t max_entry_size_in_bytes TSA_GUARDED_BY(mutex) = 0;
size_t max_entry_size_in_rows TSA_GUARDED_BY(mutex) = 0;
- size_t cache_size_in_bytes TSA_GUARDED_BY(mutex) = 0; /// Updated in each cache insert/delete
-
friend class StorageSystemQueryCache;
};
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 0cc3750dc26..0886d699734 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -2386,22 +2386,27 @@ void Context::clearMMappedFileCache() const
shared->mmap_cache->reset();
}
-void Context::setQueryCache(const Poco::Util::AbstractConfiguration & config)
+void Context::setQueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_rows)
{
auto lock = getLock();
if (shared->query_cache)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Query cache has been already created.");
- shared->query_cache = std::make_shared();
- shared->query_cache->updateConfiguration(config);
+ shared->query_cache = std::make_shared(max_size_in_bytes, max_entries, max_entry_size_in_bytes, max_entry_size_in_rows);
}
void Context::updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config)
{
auto lock = getLock();
if (shared->query_cache)
- shared->query_cache->updateConfiguration(config);
+ {
+ size_t max_size_in_bytes = config.getUInt64("query_cache.max_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_SIZE);
+ size_t max_entries = config.getUInt64("query_cache.max_entries", DEFAULT_QUERY_CACHE_MAX_ENTRIES);
+ size_t max_entry_size_in_bytes = config.getUInt64("query_cache.max_entry_size_in_bytes", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_BYTES);
+ size_t max_entry_size_in_rows = config.getUInt64("query_cache.max_entry_rows_in_rows", DEFAULT_QUERY_CACHE_MAX_ENTRY_SIZE_IN_ROWS);
+ shared->query_cache->updateConfiguration(max_size_in_bytes, max_entries, max_entry_size_in_bytes, max_entry_size_in_rows);
+ }
}
QueryCachePtr Context::getQueryCache() const
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index c725d032bbe..587fe402d4e 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -944,7 +944,7 @@ public:
void clearMMappedFileCache() const;
/// Create a cache of query results for statements which run repeatedly.
- void setQueryCache(const Poco::Util::AbstractConfiguration & config);
+ void setQueryCache(size_t max_size_in_bytes, size_t max_entries, size_t max_entry_size_in_bytes, size_t max_entry_size_in_rows);
void updateQueryCacheConfiguration(const Poco::Util::AbstractConfiguration & config);
std::shared_ptr getQueryCache() const;
void clearQueryCache() const;
From 1916aec2e24fca860a3ec0fcae172450f9210ed0 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov
Date: Mon, 14 Aug 2023 19:06:32 +0200
Subject: [PATCH 38/51] add trash for Replicated database
---
src/Interpreters/InterpreterCreateQuery.cpp | 30 ++++++++++++++++---
.../configs/config.xml | 1 +
.../test_replicated_database/test.py | 2 +-
3 files changed, 28 insertions(+), 5 deletions(-)
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index f4aee9ad4db..f1c01d1aadc 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -9,6 +9,7 @@
#include
#include
#include
+#include
#include
#include
@@ -71,7 +72,6 @@
#include
#include
-#include
#include
#include
@@ -1329,10 +1329,32 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
}
data_path = database->getTableDataPath(create);
+ auto full_data_path = fs::path{getContext()->getPath()} / data_path;
- if (!create.attach && !data_path.empty() && fs::exists(fs::path{getContext()->getPath()} / data_path))
- throw Exception(storage_already_exists_error_code,
- "Directory for {} data {} already exists", Poco::toLower(storage_name), String(data_path));
+ if (!create.attach && !data_path.empty() && fs::exists(full_data_path))
+ {
+ if (getContext()->getZooKeeperMetadataTransaction() &&
+ !getContext()->getZooKeeperMetadataTransaction()->isInitialQuery() &&
+ !DatabaseCatalog::instance().hasUUIDMapping(create.uuid) &&
+ Context::getGlobalContextInstance()->isServerCompletelyStarted() &&
+ Context::getGlobalContextInstance()->getConfigRef().getBool("allow_moving_table_dir_to_trash", false))
+ {
+ /// This is a secondary query from a Replicated database. It cannot be retried with another UUID, we must execute it as is.
+ /// We don't have a table with this UUID (and all metadata is loaded),
+ /// so the existing directory probably contains some leftovers from previous unsuccessful attempts to create the table
+
+ fs::path trash_path = fs::path{getContext()->getPath()} / "trash" / data_path / getHexUIntLowercase(thread_local_rng());
+ LOG_WARNING(&Poco::Logger::get("InterpreterCreateQuery"), "Directory for {} data {} already exists. Will move it to {}",
+ Poco::toLower(storage_name), String(data_path), trash_path);
+ fs::create_directories(trash_path.parent_path());
+ renameNoReplace(full_data_path, trash_path);
+ }
+ else
+ {
+ throw Exception(storage_already_exists_error_code,
+ "Directory for {} data {} already exists", Poco::toLower(storage_name), String(data_path));
+ }
+ }
bool from_path = create.attach_from_path.has_value();
String actual_data_path = data_path;
diff --git a/tests/integration/test_replicated_database/configs/config.xml b/tests/integration/test_replicated_database/configs/config.xml
index 16cd942e975..7a67d69c031 100644
--- a/tests/integration/test_replicated_database/configs/config.xml
+++ b/tests/integration/test_replicated_database/configs/config.xml
@@ -1,5 +1,6 @@
10
+ 1
10
diff --git a/tests/integration/test_replicated_database/test.py b/tests/integration/test_replicated_database/test.py
index d0a04f40b69..f45841124d9 100644
--- a/tests/integration/test_replicated_database/test.py
+++ b/tests/integration/test_replicated_database/test.py
@@ -1262,7 +1262,7 @@ def test_recover_digest_mismatch(started_cluster):
"mv /var/lib/clickhouse/metadata/recover_digest_mismatch/t1.sql /var/lib/clickhouse/metadata/recover_digest_mismatch/m1.sql",
"sed --follow-symlinks -i 's/Int32/String/' /var/lib/clickhouse/metadata/recover_digest_mismatch/mv1.sql",
"rm -f /var/lib/clickhouse/metadata/recover_digest_mismatch/d1.sql",
- # f"rm -rf /var/lib/clickhouse/metadata/recover_digest_mismatch/", # Directory already exists
+ "rm -rf /var/lib/clickhouse/metadata/recover_digest_mismatch/", # Will trigger "Directory already exists"
"rm -rf /var/lib/clickhouse/store",
]
From 3d8f2c335feb46c7dcd16ef9ea38d1df0a81c2c2 Mon Sep 17 00:00:00 2001
From: Robert Schulze
Date: Mon, 14 Aug 2023 18:54:15 +0000
Subject: [PATCH 39/51] Protect against invalid
asynchronous_metrics_update_period_s settings
---
src/Interpreters/ServerAsynchronousMetrics.cpp | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)
diff --git a/src/Interpreters/ServerAsynchronousMetrics.cpp b/src/Interpreters/ServerAsynchronousMetrics.cpp
index 68411e80755..1b78ff4d2b6 100644
--- a/src/Interpreters/ServerAsynchronousMetrics.cpp
+++ b/src/Interpreters/ServerAsynchronousMetrics.cpp
@@ -24,6 +24,11 @@
namespace DB
{
+namespace ErrorCodes
+{
+ extern const int INVALID_SETTING_VALUE;
+}
+
namespace
{
@@ -52,7 +57,11 @@ ServerAsynchronousMetrics::ServerAsynchronousMetrics(
: AsynchronousMetrics(update_period_seconds, protocol_server_metrics_func_)
, WithContext(global_context_)
, heavy_metric_update_period(heavy_metrics_update_period_seconds)
-{}
+{
+ /// sanity check
+ if (update_period_seconds == 0 || heavy_metrics_update_period_seconds == 0)
+ throw Exception(ErrorCodes::INVALID_SETTING_VALUE, "Setting asynchronous_metrics_update_period_s and asynchronous_heavy_metrics_update_period_s must not be zero");
+}
void ServerAsynchronousMetrics::updateImpl(AsynchronousMetricValues & new_values, TimePoint update_time, TimePoint current_time)
{
From 8992cc5af4653365351a98f2d00c7e4416c4965f Mon Sep 17 00:00:00 2001
From: Robert Schulze
Date: Thu, 6 Jul 2023 10:48:00 +0000
Subject: [PATCH 40/51] Factorize constants
---
.../MergeTree/MergeTreeIndexAnnoy.cpp | 28 ++++++++++---------
1 file changed, 15 insertions(+), 13 deletions(-)
diff --git a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
index f77cfe4fed0..1c92645dbfa 100644
--- a/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
+++ b/src/Storages/MergeTree/MergeTreeIndexAnnoy.cpp
@@ -25,6 +25,11 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
}
+static constexpr auto DISTANCE_FUNCTION_L2 = "L2Distance";
+static constexpr auto DISTANCE_FUNCTION_COSINE = "cosineDistance";
+
+static constexpr auto DEFAULT_TREES = 100uz;
+static constexpr auto DEFAULT_DISTANCE_FUNCTION = DISTANCE_FUNCTION_L2;
template
AnnoyIndexWithSerialization::AnnoyIndexWithSerialization(size_t dimensions)
@@ -224,9 +229,9 @@ bool MergeTreeIndexConditionAnnoy::alwaysUnknownOrTrue() const
std::vector MergeTreeIndexConditionAnnoy::getUsefulRanges(MergeTreeIndexGranulePtr idx_granule) const
{
- if (distance_function == "L2Distance")
+ if (distance_function == DISTANCE_FUNCTION_L2)
return getUsefulRangesImpl(idx_granule);
- else if (distance_function == "cosineDistance")
+ else if (distance_function == DISTANCE_FUNCTION_COSINE)
return getUsefulRangesImpl(idx_granule);
std::unreachable();
}
@@ -289,9 +294,9 @@ MergeTreeIndexAnnoy::MergeTreeIndexAnnoy(const IndexDescription & index_, UInt64
MergeTreeIndexGranulePtr MergeTreeIndexAnnoy::createIndexGranule() const
{
- if (distance_function == "L2Distance")
+ if (distance_function == DISTANCE_FUNCTION_L2)
return std::make_shared>(index.name, index.sample_block);
- else if (distance_function == "cosineDistance")
+ else if (distance_function == DISTANCE_FUNCTION_COSINE)
return std::make_shared>(index.name, index.sample_block);
std::unreachable();
}
@@ -299,9 +304,9 @@ MergeTreeIndexGranulePtr MergeTreeIndexAnnoy::createIndexGranule() const
MergeTreeIndexAggregatorPtr MergeTreeIndexAnnoy::createIndexAggregator() const
{
/// TODO: Support more metrics. Available metrics: https://github.com/spotify/annoy/blob/master/src/annoymodule.cc#L151-L171
- if (distance_function == "L2Distance")
+ if (distance_function == DISTANCE_FUNCTION_L2)
return std::make_shared>(index.name, index.sample_block, trees);
- else if (distance_function == "cosineDistance")
+ else if (distance_function == DISTANCE_FUNCTION_COSINE)
return std::make_shared>(index.name, index.sample_block, trees);
std::unreachable();
}
@@ -313,14 +318,11 @@ MergeTreeIndexConditionPtr MergeTreeIndexAnnoy::createIndexCondition(const Selec
MergeTreeIndexPtr annoyIndexCreator(const IndexDescription & index)
{
- static constexpr auto default_trees = 100uz;
- static constexpr auto default_distance_function = "L2Distance";
-
- String distance_function = default_distance_function;
+ String distance_function = DEFAULT_DISTANCE_FUNCTION;
if (!index.arguments.empty())
distance_function = index.arguments[0].get();
- UInt64 trees = default_trees;
+ UInt64 trees = DEFAULT_TREES;
if (index.arguments.size() > 1)
trees = index.arguments[1].get();
@@ -350,8 +352,8 @@ void annoyIndexValidator(const IndexDescription & index, bool /* attach */)
if (!index.arguments.empty())
{
String distance_name = index.arguments[0].get();
- if (distance_name != "L2Distance" && distance_name != "cosineDistance")
- throw Exception(ErrorCodes::INCORRECT_DATA, "Annoy index only supports distance functions 'L2Distance' and 'cosineDistance'");
+ if (distance_name != DISTANCE_FUNCTION_L2 && distance_name != DISTANCE_FUNCTION_COSINE)
+ throw Exception(ErrorCodes::INCORRECT_DATA, "Annoy index only supports distance functions '{}' and '{}'", DISTANCE_FUNCTION_L2, DISTANCE_FUNCTION_COSINE);
}
/// Check data type of indexed column:
From 0fd28bf3309a65e5c0204c814bef0a5f13dada9d Mon Sep 17 00:00:00 2001
From: Alexey Gerasimchuck
Date: Tue, 15 Aug 2023 13:39:34 +0000
Subject: [PATCH 41/51] added remote session log test
---
.../02834_remote_session_log.reference | 13 +++++
.../0_stateless/02834_remote_session_log.sh | 56 +++++++++++++++++++
2 files changed, 69 insertions(+)
create mode 100644 tests/queries/0_stateless/02834_remote_session_log.reference
create mode 100755 tests/queries/0_stateless/02834_remote_session_log.sh
diff --git a/tests/queries/0_stateless/02834_remote_session_log.reference b/tests/queries/0_stateless/02834_remote_session_log.reference
new file mode 100644
index 00000000000..e2680982ab0
--- /dev/null
+++ b/tests/queries/0_stateless/02834_remote_session_log.reference
@@ -0,0 +1,13 @@
+0
+0
+0
+0
+client_port 0 connections:
+0
+client_address '::' connections:
+0
+login failures:
+0
+TCP Login and logout count is equal
+HTTP Login and logout count is equal
+MySQL Login and logout count is equal
diff --git a/tests/queries/0_stateless/02834_remote_session_log.sh b/tests/queries/0_stateless/02834_remote_session_log.sh
new file mode 100755
index 00000000000..3bedfb6c9ee
--- /dev/null
+++ b/tests/queries/0_stateless/02834_remote_session_log.sh
@@ -0,0 +1,56 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+readonly PID=$$
+readonly TEST_USER=$"02834_USER_${PID}"
+readonly SESSION_LOG_MATCHING_FIELDS="auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface"
+
+${CLICKHOUSE_CLIENT} -q "CREATE USER IF NOT EXISTS ${TEST_USER} IDENTIFIED WITH plaintext_password BY 'pass'"
+${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON INFORMATION_SCHEMA.* TO ${TEST_USER}"
+${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.* TO ${TEST_USER}"
+${CLICKHOUSE_CLIENT} -q "GRANT CREATE TEMPORARY TABLE, MYSQL, REMOTE ON *.* TO ${TEST_USER}"
+
+${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
+${CLICKHOUSE_CLIENT} -q "DELETE FROM system.session_log WHERE user = '${TEST_USER}'"
+
+${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${TEST_USER}&password=pass" \
+ -d "SELECT * FROM remote('127.0.0.1:${CLICKHOUSE_PORT_TCP}', 'system', 'one', '${TEST_USER}', 'pass')"
+
+${CLICKHOUSE_CURL} -sS -X POST "${CLICKHOUSE_URL}&user=${TEST_USER}&password=pass" \
+ -d "SELECT * FROM mysql('127.0.0.1:9004', 'system', 'one', '${TEST_USER}', 'pass')"
+
+${CLICKHOUSE_CLIENT} -q "SELECT * FROM remote('127.0.0.1:${CLICKHOUSE_PORT_TCP}', 'system', 'one', '${TEST_USER}', 'pass')" -u "${TEST_USER}" --password "pass"
+${CLICKHOUSE_CLIENT} -q "SELECT * FROM mysql('127.0.0.1:9004', 'system', 'one', '${TEST_USER}', 'pass')" -u "${TEST_USER}" --password "pass"
+
+${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
+
+echo "client_port 0 connections:"
+${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' and client_port = 0"
+
+echo "client_address '::' connections:"
+${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' and client_address = toIPv6('::')"
+
+echo "login failures:"
+${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' and type = 'LoginFailure'"
+
+# remote(...) function sometimes reuses old cached sessions for query execution.
+# This makes LoginSuccess/Logout entries count unstable, but success and logouts must always match.
+
+for interface in 'TCP' 'HTTP' 'MySQL'
+do
+ LOGIN_COUNT=`${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'LoginSuccess' AND interface = '${interface}'"`
+ CORRESPONDING_LOGOUT_RECORDS_COUNT=`${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM (SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'LoginSuccess' AND interface = '${interface}' INTERSECT SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'Logout' AND interface = '${interface}')"`
+
+ if [ "$LOGIN_COUNT" == "$CORRESPONDING_LOGOUT_RECORDS_COUNT" ]; then
+ echo "${interface} Login and logout count is equal"
+ else
+ TOTAL_LOGOUT_COUNT=`${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'Logout' AND interface = '${interface}'"`
+ echo "${interface} Login count ${LOGIN_COUNT} != corresponding logout count ${CORRESPONDING_LOGOUT_RECORDS_COUNT}. TOTAL_LOGOUT_COUNT ${TOTAL_LOGOUT_COUNT}"
+ fi
+done
+
+${CLICKHOUSE_CLIENT} -q "DROP USER ${TEST_USER}"
From cbf9f88b90f69a08bd51377338d2a679e629cd82 Mon Sep 17 00:00:00 2001
From: Alexey Gerasimchuck
Date: Tue, 15 Aug 2023 13:42:42 +0000
Subject: [PATCH 42/51] Added concurrent session session_log tests
---
.../02833_concurrrent_sessions.reference | 34 +++++
.../0_stateless/02833_concurrrent_sessions.sh | 138 ++++++++++++++++++
2 files changed, 172 insertions(+)
create mode 100644 tests/queries/0_stateless/02833_concurrrent_sessions.reference
create mode 100755 tests/queries/0_stateless/02833_concurrrent_sessions.sh
diff --git a/tests/queries/0_stateless/02833_concurrrent_sessions.reference b/tests/queries/0_stateless/02833_concurrrent_sessions.reference
new file mode 100644
index 00000000000..bfe507e8eac
--- /dev/null
+++ b/tests/queries/0_stateless/02833_concurrrent_sessions.reference
@@ -0,0 +1,34 @@
+sessions:
+150
+port_0_sessions:
+0
+address_0_sessions:
+0
+tcp_sessions
+60
+http_sessions
+30
+http_with_session_id_sessions
+30
+my_sql_sessions
+30
+Corresponding LoginSuccess/Logout
+10
+LoginFailure
+10
+Corresponding LoginSuccess/Logout
+10
+LoginFailure
+10
+Corresponding LoginSuccess/Logout
+10
+LoginFailure
+10
+Corresponding LoginSuccess/Logout
+10
+LoginFailure
+10
+Corresponding LoginSuccess/Logout
+10
+LoginFailure
+10
diff --git a/tests/queries/0_stateless/02833_concurrrent_sessions.sh b/tests/queries/0_stateless/02833_concurrrent_sessions.sh
new file mode 100755
index 00000000000..26b48462a76
--- /dev/null
+++ b/tests/queries/0_stateless/02833_concurrrent_sessions.sh
@@ -0,0 +1,138 @@
+#!/usr/bin/env bash
+# Tags: no-fasttest, no-debug
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+readonly PID=$$
+
+# Each user uses a separate thread.
+readonly TCP_USERS=( "02833_TCP_USER_${PID}"_{1,2} ) # 2 concurrent TCP users
+readonly HTTP_USERS=( "02833_HTTP_USER_${PID}" )
+readonly HTTP_WITH_SESSION_ID_SESSION_USERS=( "02833_HTTP_WITH_SESSION_ID_USER_${PID}" )
+readonly MYSQL_USERS=( "02833_MYSQL_USER_${PID}")
+readonly ALL_USERS=( "${TCP_USERS[@]}" "${HTTP_USERS[@]}" "${HTTP_WITH_SESSION_ID_SESSION_USERS[@]}" "${MYSQL_USERS[@]}" )
+
+readonly TCP_USERS_SQL_COLLECTION_STRING="$( echo "${TCP_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )"
+readonly HTTP_USERS_SQL_COLLECTION_STRING="$( echo "${HTTP_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )"
+readonly HTTP_WITH_SESSION_ID_USERS_SQL_COLLECTION_STRING="$( echo "${HTTP_WITH_SESSION_ID_SESSION_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )"
+readonly MYSQL_USERS_SQL_COLLECTION_STRING="$( echo "${MYSQL_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )"
+readonly ALL_USERS_SQL_COLLECTION_STRING="$( echo "${ALL_USERS[*]}" | sed "s/[^[:space:]]\+/'&'/g" | sed 's/[[:space:]]/,/g' )"
+
+readonly SESSION_LOG_MATCHING_FIELDS="auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface"
+
+for user in "${ALL_USERS[@]}"; do
+ ${CLICKHOUSE_CLIENT} -q "CREATE USER IF NOT EXISTS ${user} IDENTIFIED WITH plaintext_password BY 'pass'"
+ ${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.* TO ${user}"
+ ${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON INFORMATION_SCHEMA.* TO ${user}";
+done
+
+# All _session functions execute in separate threads.
+# These functions try to create a session with successful login and logout.
+# Sleep a small, random amount of time to make concurrency more intense.
+# and try to login with an invalid password.
+function tcp_session()
+{
+ local user=$1
+ local i=0
+ while (( (i++) < 10 )); do
+ # login logout
+ ${CLICKHOUSE_CLIENT} -q "SELECT 1, sleep(0.01${RANDOM})" --user="${user}" --password="pass"
+ # login failure
+ ${CLICKHOUSE_CLIENT} -q "SELECT 2" --user="${user}" --password 'invalid'
+ done
+}
+
+function http_session()
+{
+ local user=$1
+ local i=0
+ while (( (i++) < 10 )); do
+ # login logout
+ ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=pass" -d "SELECT 3, sleep(0.01${RANDOM})"
+
+ # login failure
+ ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=wrong" -d "SELECT 4"
+ done
+}
+
+function http_with_session_id_session()
+{
+ local user=$1
+ local i=0
+ while (( (i++) < 10 )); do
+ # login logout
+ ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${user}&user=${user}&password=pass" -d "SELECT 5, sleep 0.01${RANDOM}"
+
+ # login failure
+ ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&session_id=${user}&user=${user}&password=wrong" -d "SELECT 6"
+ done
+}
+
+function mysql_session()
+{
+ local user=$1
+ local i=0
+ while (( (i++) < 10 )); do
+ # login logout
+ ${CLICKHOUSE_CLIENT} -q "SELECT 1, sleep(0.01${RANDOM}) FROM mysql('127.0.0.1:9004', 'system', 'one', '${user}', 'pass')"
+
+ # login failure
+ ${CLICKHOUSE_CLIENT} -q "SELECT 1 FROM mysql('127.0.0.1:9004', 'system', 'one', '${user}', 'wrong', SETTINGS connection_max_tries=1)"
+ done
+}
+
+${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
+${CLICKHOUSE_CLIENT} -q "DELETE FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING})"
+
+export -f tcp_session;
+export -f http_session;
+export -f http_with_session_id_session;
+export -f mysql_session;
+
+for user in "${TCP_USERS[@]}"; do
+ timeout 60s bash -c "tcp_session ${user}" >/dev/null 2>&1 &
+done
+
+for user in "${HTTP_USERS[@]}"; do
+ timeout 60s bash -c "http_session ${user}" >/dev/null 2>&1 &
+done
+
+for user in "${HTTP_WITH_SESSION_ID_SESSION_USERS[@]}"; do
+ timeout 60s bash -c "http_with_session_id_session ${user}" >/dev/null 2>&1 &
+done
+
+for user in "${MYSQL_USERS[@]}"; do
+ timeout 60s bash -c "mysql_session ${user}" >/dev/null 2>&1 &
+done
+
+wait
+
+${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
+
+echo "sessions:"
+${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING})"
+
+echo "port_0_sessions:"
+${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING}) AND client_port = 0"
+
+echo "address_0_sessions:"
+${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${ALL_USERS_SQL_COLLECTION_STRING}) AND client_address = toIPv6('::')"
+
+echo "tcp_sessions"
+${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${TCP_USERS_SQL_COLLECTION_STRING}) AND interface = 'TCP'"
+echo "http_sessions"
+${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${HTTP_USERS_SQL_COLLECTION_STRING}) AND interface = 'HTTP'"
+echo "http_with_session_id_sessions"
+${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${HTTP_WITH_SESSION_ID_USERS_SQL_COLLECTION_STRING}) AND interface = 'HTTP'"
+echo "my_sql_sessions"
+${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user IN (${MYSQL_USERS_SQL_COLLECTION_STRING}) AND interface = 'MySQL'"
+
+for user in "${ALL_USERS[@]}"; do
+ ${CLICKHOUSE_CLIENT} -q "DROP USER ${user}"
+ echo "Corresponding LoginSuccess/Logout"
+ ${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM (SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${user}' AND type = 'LoginSuccess' INTERSECT SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${user}' AND type = 'Logout')"
+ echo "LoginFailure"
+ ${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM system.session_log WHERE user = '${user}' AND type = 'LoginFailure'"
+ done
From 4b5874b512802022e4c5581e17c9ed86c505129e Mon Sep 17 00:00:00 2001
From: Alexey Gerasimchuck
Date: Tue, 15 Aug 2023 13:45:06 +0000
Subject: [PATCH 43/51] added drop user during session test
---
.../02835_drop_user_during_session.reference | 8 ++
.../02835_drop_user_during_session.sh | 114 ++++++++++++++++++
2 files changed, 122 insertions(+)
create mode 100644 tests/queries/0_stateless/02835_drop_user_during_session.reference
create mode 100755 tests/queries/0_stateless/02835_drop_user_during_session.sh
diff --git a/tests/queries/0_stateless/02835_drop_user_during_session.reference b/tests/queries/0_stateless/02835_drop_user_during_session.reference
new file mode 100644
index 00000000000..7252faab8c6
--- /dev/null
+++ b/tests/queries/0_stateless/02835_drop_user_during_session.reference
@@ -0,0 +1,8 @@
+port_0_sessions:
+0
+address_0_sessions:
+0
+Corresponding LoginSuccess/Logout
+9
+LoginFailure
+0
diff --git a/tests/queries/0_stateless/02835_drop_user_during_session.sh b/tests/queries/0_stateless/02835_drop_user_during_session.sh
new file mode 100755
index 00000000000..347ebd22f96
--- /dev/null
+++ b/tests/queries/0_stateless/02835_drop_user_during_session.sh
@@ -0,0 +1,114 @@
+#!/usr/bin/env bash
+# Tags: no-debug
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+readonly PID=$$
+
+readonly TEST_USER="02835_USER_${PID}"
+readonly TEST_ROLE="02835_ROLE_${PID}"
+readonly TEST_PROFILE="02835_PROFILE_${PID}"
+readonly SESSION_LOG_MATCHING_FIELDS="auth_id, auth_type, client_version_major, client_version_minor, client_version_patch, interface"
+
+function tcp_session()
+{
+ local user=$1
+ ${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM system.numbers" --user="${user}"
+}
+
+function http_session()
+{
+ local user=$1
+ ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=pass" -d "SELECT COUNT(*) FROM system.numbers"
+}
+
+function http_with_session_id_session()
+{
+ local user=$1
+ ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&user=${user}&password=pass" -d "SELECT COUNT(*) FROM system.numbers"
+}
+
+# Busy-waits until user $1, specified amount of queries ($2) will run simultaneously.
+function wait_for_queries_start()
+{
+ local user=$1
+ local queries_count=$2
+ # 10 seconds waiting
+ counter=0 retries=100
+ while [[ $counter -lt $retries ]]; do
+ result=$($CLICKHOUSE_CLIENT --query "SELECT COUNT(*) FROM system.processes WHERE user = '${user}'")
+ if [[ $result == "${queries_count}" ]]; then
+ break;
+ fi
+ sleep 0.1
+ ((++counter))
+ done
+}
+
+${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
+${CLICKHOUSE_CLIENT} -q "DELETE FROM system.session_log WHERE user = '${TEST_USER}'"
+
+# DROP USE CASE
+${CLICKHOUSE_CLIENT} -q "CREATE USER IF NOT EXISTS ${TEST_USER}"
+${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.numbers TO ${TEST_USER}"
+
+export -f tcp_session;
+export -f http_session;
+export -f http_with_session_id_session;
+
+timeout 10s bash -c "tcp_session ${TEST_USER}" >/dev/null 2>&1 &
+timeout 10s bash -c "http_session ${TEST_USER}" >/dev/null 2>&1 &
+timeout 10s bash -c "http_with_session_id_session ${TEST_USER}" >/dev/null 2>&1 &
+
+wait_for_queries_start $TEST_USER 3
+${CLICKHOUSE_CLIENT} -q "DROP USER ${TEST_USER}"
+${CLICKHOUSE_CLIENT} -q "KILL QUERY WHERE user = '${TEST_USER}' SYNC" >/dev/null &
+
+wait
+
+# DROP ROLE CASE
+${CLICKHOUSE_CLIENT} -q "CREATE ROLE IF NOT EXISTS ${TEST_ROLE}"
+${CLICKHOUSE_CLIENT} -q "CREATE USER ${TEST_USER} DEFAULT ROLE ${TEST_ROLE}"
+${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.numbers TO ${TEST_USER}"
+
+timeout 10s bash -c "tcp_session ${TEST_USER}" >/dev/null 2>&1 &
+timeout 10s bash -c "http_session ${TEST_USER}" >/dev/null 2>&1 &
+timeout 10s bash -c "http_with_session_id_session ${TEST_USER}" >/dev/null 2>&1 &
+
+wait_for_queries_start $TEST_USER 3
+${CLICKHOUSE_CLIENT} -q "DROP ROLE ${TEST_ROLE}"
+${CLICKHOUSE_CLIENT} -q "DROP USER ${TEST_USER}"
+
+${CLICKHOUSE_CLIENT} -q "KILL QUERY WHERE user = '${TEST_USER}' SYNC" >/dev/null &
+
+wait
+
+# DROP PROFILE CASE
+${CLICKHOUSE_CLIENT} -q "CREATE SETTINGS PROFILE IF NOT EXISTS '${TEST_PROFILE}'"
+${CLICKHOUSE_CLIENT} -q "CREATE USER ${TEST_USER} SETTINGS PROFILE '${TEST_PROFILE}'"
+${CLICKHOUSE_CLIENT} -q "GRANT SELECT ON system.numbers TO ${TEST_USER}"
+
+timeout 10s bash -c "tcp_session ${TEST_USER}" >/dev/null 2>&1 &
+timeout 10s bash -c "http_session ${TEST_USER}" >/dev/null 2>&1 &
+timeout 10s bash -c "http_with_session_id_session ${TEST_USER}" >/dev/null 2>&1 &
+
+wait_for_queries_start $TEST_USER 3
+${CLICKHOUSE_CLIENT} -q "DROP SETTINGS PROFILE '${TEST_PROFILE}'"
+${CLICKHOUSE_CLIENT} -q "DROP USER ${TEST_USER}"
+
+${CLICKHOUSE_CLIENT} -q "KILL QUERY WHERE user = '${TEST_USER}' SYNC" >/dev/null &
+
+wait
+
+${CLICKHOUSE_CLIENT} -q "SYSTEM FLUSH LOGS"
+
+echo "port_0_sessions:"
+${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' AND client_port = 0"
+echo "address_0_sessions:"
+${CLICKHOUSE_CLIENT} -q "SELECT count(*) FROM system.session_log WHERE user = '${TEST_USER}' AND client_address = toIPv6('::')"
+echo "Corresponding LoginSuccess/Logout"
+${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM (SELECT ${SESSION_LOG_MATCHING_FIELDS} FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'LoginSuccess' INTERSECT SELECT ${SESSION_LOG_MATCHING_FIELDS}, FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'Logout')"
+echo "LoginFailure"
+${CLICKHOUSE_CLIENT} -q "SELECT COUNT(*) FROM system.session_log WHERE user = '${TEST_USER}' AND type = 'LoginFailure'"
From f95c8599594c3a8e7101a02a4e376f5cb6ca7b8a Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov
Date: Tue, 15 Aug 2023 15:47:11 +0200
Subject: [PATCH 44/51] rename setting
---
src/Interpreters/InterpreterCreateQuery.cpp | 2 +-
tests/integration/test_replicated_database/configs/config.xml | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index f1c01d1aadc..57189012317 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -1337,7 +1337,7 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
!getContext()->getZooKeeperMetadataTransaction()->isInitialQuery() &&
!DatabaseCatalog::instance().hasUUIDMapping(create.uuid) &&
Context::getGlobalContextInstance()->isServerCompletelyStarted() &&
- Context::getGlobalContextInstance()->getConfigRef().getBool("allow_moving_table_dir_to_trash", false))
+ Context::getGlobalContextInstance()->getConfigRef().getBool("allow_moving_table_directory_to_trash", false))
{
/// This is a secondary query from a Replicated database. It cannot be retried with another UUID, we must execute it as is.
/// We don't have a table with this UUID (and all metadata is loaded),
diff --git a/tests/integration/test_replicated_database/configs/config.xml b/tests/integration/test_replicated_database/configs/config.xml
index 7a67d69c031..e598cc28d5d 100644
--- a/tests/integration/test_replicated_database/configs/config.xml
+++ b/tests/integration/test_replicated_database/configs/config.xml
@@ -1,6 +1,6 @@
10
- 1
+ 1
10
From 8343ebd4ecfc90e4fafb22d660fd0ad04aaa0e74 Mon Sep 17 00:00:00 2001
From: Robert Schulze
Date: Tue, 15 Aug 2023 14:44:13 +0000
Subject: [PATCH 45/51] Clarify comment
---
programs/local/LocalServer.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 0dea7e8e643..b38e17ecade 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -710,7 +710,8 @@ void LocalServer::processConfig()
if (mmap_cache_size)
global_context->setMMappedFileCache(mmap_cache_size);
- /// not initializing the query cache in clickhouse-local
+ /// In Server.cpp (./clickhouse-server), we would initialize the query cache here.
+ /// Intentionally not doing this in clickhouse-local as it doesn't make sense.
#if USE_EMBEDDED_COMPILER
size_t compiled_expression_cache_max_size_in_bytes = config().getUInt64("compiled_expression_cache_size", DEFAULT_COMPILED_EXPRESSION_CACHE_MAX_SIZE);
From cbe4c8adc2973ee8d6583f178bd44915c55f21f1 Mon Sep 17 00:00:00 2001
From: Michael Kolupaev
Date: Mon, 14 Aug 2023 23:24:41 +0000
Subject: [PATCH 46/51] Fix more functions with 'Context has expired' error
---
src/Functions/FunctionFactory.h | 4 +--
src/Functions/FunctionsExternalDictionaries.h | 9 +++---
src/Functions/FunctionsJSON.h | 2 +-
src/Interpreters/InterpreterExplainQuery.cpp | 8 ++---
.../QueryPlan/Optimizations/Optimizations.h | 4 +--
.../Optimizations/addPlansForSets.cpp | 6 ++--
.../QueryPlan/Optimizations/optimizeTree.cpp | 4 +--
src/Processors/QueryPlan/QueryPlan.cpp | 6 ++--
src/Processors/QueryPlan/QueryPlan.h | 2 +-
.../02843_context_has_expired.reference | 4 +++
.../0_stateless/02843_context_has_expired.sql | 29 ++++++++++++++-----
11 files changed, 48 insertions(+), 30 deletions(-)
diff --git a/src/Functions/FunctionFactory.h b/src/Functions/FunctionFactory.h
index deea41e6677..588cae64e16 100644
--- a/src/Functions/FunctionFactory.h
+++ b/src/Functions/FunctionFactory.h
@@ -20,8 +20,8 @@ using FunctionCreator = std::function;
using FunctionFactoryData = std::pair;
/** Creates function by name.
- * Function could use for initialization (take ownership of shared_ptr, for example)
- * some dictionaries from Context.
+ * The provided Context is guaranteed to outlive the created function. Functions may use it for
+ * things like settings, current database, permission checks, etc.
*/
class FunctionFactory : private boost::noncopyable, public IFactoryWithAliases
{
diff --git a/src/Functions/FunctionsExternalDictionaries.h b/src/Functions/FunctionsExternalDictionaries.h
index 1b2e2eb3bd6..db6529da73c 100644
--- a/src/Functions/FunctionsExternalDictionaries.h
+++ b/src/Functions/FunctionsExternalDictionaries.h
@@ -62,13 +62,14 @@ namespace ErrorCodes
*/
-class FunctionDictHelper
+class FunctionDictHelper : WithContext
{
public:
- explicit FunctionDictHelper(ContextPtr context_) : current_context(context_) {}
+ explicit FunctionDictHelper(ContextPtr context_) : WithContext(context_) {}
std::shared_ptr getDictionary(const String & dictionary_name)
{
+ auto current_context = getContext();
auto dict = current_context->getExternalDictionariesLoader().getDictionary(dictionary_name, current_context);
if (!access_checked)
@@ -131,12 +132,10 @@ public:
DictionaryStructure getDictionaryStructure(const String & dictionary_name) const
{
- return current_context->getExternalDictionariesLoader().getDictionaryStructure(dictionary_name, current_context);
+ return getContext()->getExternalDictionariesLoader().getDictionaryStructure(dictionary_name, getContext());
}
private:
- ContextPtr current_context;
-
/// Access cannot be not granted, since in this case checkAccess() will throw and access_checked will not be updated.
std::atomic access_checked = false;
diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h
index ca797eed856..094de0c27c2 100644
--- a/src/Functions/FunctionsJSON.h
+++ b/src/Functions/FunctionsJSON.h
@@ -336,7 +336,7 @@ private:
template typename Impl>
-class ExecutableFunctionJSON : public IExecutableFunction, WithContext
+class ExecutableFunctionJSON : public IExecutableFunction
{
public:
diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp
index 3a381cd8dab..39cc4df5c2d 100644
--- a/src/Interpreters/InterpreterExplainQuery.cpp
+++ b/src/Interpreters/InterpreterExplainQuery.cpp
@@ -541,13 +541,13 @@ QueryPipeline InterpreterExplainQuery::executeImpl()
InterpreterSelectWithUnionQuery interpreter(ast.getExplainedQuery(), getContext(), SelectQueryOptions());
interpreter.buildQueryPlan(plan);
context = interpreter.getContext();
- // collect the selected marks, rows, parts during build query pipeline.
- plan.buildQueryPipeline(
+ // Collect the selected marks, rows, parts during build query pipeline.
+ // Hold on to the returned QueryPipelineBuilderPtr because `plan` may have pointers into
+ // it (through QueryPlanResourceHolder).
+ auto builder = plan.buildQueryPipeline(
QueryPlanOptimizationSettings::fromContext(context),
BuildQueryPipelineSettings::fromContext(context));
- if (settings.optimize)
- plan.optimize(QueryPlanOptimizationSettings::fromContext(context));
plan.explainEstimate(res_columns);
insert_buf = false;
break;
diff --git a/src/Processors/QueryPlan/Optimizations/Optimizations.h b/src/Processors/QueryPlan/Optimizations/Optimizations.h
index 6ecec1359c5..2230e50425c 100644
--- a/src/Processors/QueryPlan/Optimizations/Optimizations.h
+++ b/src/Processors/QueryPlan/Optimizations/Optimizations.h
@@ -16,7 +16,7 @@ void optimizeTreeFirstPass(const QueryPlanOptimizationSettings & settings, Query
void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_settings, QueryPlan::Node & root, QueryPlan::Nodes & nodes);
/// Third pass is used to apply filters such as key conditions and skip indexes to the storages that support them.
/// After that it add CreateSetsStep for the subqueries that has not be used in the filters.
-void optimizeTreeThirdPass(QueryPlan::Node & root, QueryPlan::Nodes & nodes);
+void optimizeTreeThirdPass(QueryPlan & plan, QueryPlan::Node & root, QueryPlan::Nodes & nodes);
/// Optimization (first pass) is a function applied to QueryPlan::Node.
/// It can read and update subtree of specified node.
@@ -113,7 +113,7 @@ void optimizeReadInOrder(QueryPlan::Node & node, QueryPlan::Nodes & nodes);
void optimizeAggregationInOrder(QueryPlan::Node & node, QueryPlan::Nodes &);
bool optimizeUseAggregateProjections(QueryPlan::Node & node, QueryPlan::Nodes & nodes, bool allow_implicit_projections);
bool optimizeUseNormalProjections(Stack & stack, QueryPlan::Nodes & nodes);
-bool addPlansForSets(QueryPlan::Node & node, QueryPlan::Nodes & nodes);
+bool addPlansForSets(QueryPlan & plan, QueryPlan::Node & node, QueryPlan::Nodes & nodes);
/// Enable memory bound merging of aggregation states for remote queries
/// in case it was enabled for local plan
diff --git a/src/Processors/QueryPlan/Optimizations/addPlansForSets.cpp b/src/Processors/QueryPlan/Optimizations/addPlansForSets.cpp
index e9100ae9d02..47df05301c9 100644
--- a/src/Processors/QueryPlan/Optimizations/addPlansForSets.cpp
+++ b/src/Processors/QueryPlan/Optimizations/addPlansForSets.cpp
@@ -6,7 +6,7 @@
namespace DB::QueryPlanOptimizations
{
-bool addPlansForSets(QueryPlan::Node & node, QueryPlan::Nodes & nodes)
+bool addPlansForSets(QueryPlan & root_plan, QueryPlan::Node & node, QueryPlan::Nodes & nodes)
{
auto * delayed = typeid_cast(node.step.get());
if (!delayed)
@@ -23,7 +23,9 @@ bool addPlansForSets(QueryPlan::Node & node, QueryPlan::Nodes & nodes)
{
input_streams.push_back(plan->getCurrentDataStream());
node.children.push_back(plan->getRootNode());
- nodes.splice(nodes.end(), QueryPlan::detachNodes(std::move(*plan)));
+ auto [add_nodes, add_resources] = QueryPlan::detachNodesAndResources(std::move(*plan));
+ nodes.splice(nodes.end(), std::move(add_nodes));
+ root_plan.addResources(std::move(add_resources));
}
auto creating_sets = std::make_unique(std::move(input_streams));
diff --git a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
index b13dda9a8f0..0caedff67a5 100644
--- a/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
+++ b/src/Processors/QueryPlan/Optimizations/optimizeTree.cpp
@@ -181,7 +181,7 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
"No projection is used when optimize_use_projections = 1 and force_optimize_projection = 1");
}
-void optimizeTreeThirdPass(QueryPlan::Node & root, QueryPlan::Nodes & nodes)
+void optimizeTreeThirdPass(QueryPlan & plan, QueryPlan::Node & root, QueryPlan::Nodes & nodes)
{
Stack stack;
stack.push_back({.node = &root});
@@ -205,7 +205,7 @@ void optimizeTreeThirdPass(QueryPlan::Node & root, QueryPlan::Nodes & nodes)
source_step_with_filter->applyFilters();
}
- addPlansForSets(*frame.node, nodes);
+ addPlansForSets(plan, *frame.node, nodes);
stack.pop_back();
}
diff --git a/src/Processors/QueryPlan/QueryPlan.cpp b/src/Processors/QueryPlan/QueryPlan.cpp
index 687260441ff..ceda9f97bab 100644
--- a/src/Processors/QueryPlan/QueryPlan.cpp
+++ b/src/Processors/QueryPlan/QueryPlan.cpp
@@ -482,7 +482,7 @@ void QueryPlan::optimize(const QueryPlanOptimizationSettings & optimization_sett
QueryPlanOptimizations::optimizeTreeFirstPass(optimization_settings, *root, nodes);
QueryPlanOptimizations::optimizeTreeSecondPass(optimization_settings, *root, nodes);
- QueryPlanOptimizations::optimizeTreeThirdPass(*root, nodes);
+ QueryPlanOptimizations::optimizeTreeThirdPass(*this, *root, nodes);
updateDataStreams(*root);
}
@@ -542,9 +542,9 @@ void QueryPlan::explainEstimate(MutableColumns & columns)
}
}
-QueryPlan::Nodes QueryPlan::detachNodes(QueryPlan && plan)
+std::pair QueryPlan::detachNodesAndResources(QueryPlan && plan)
{
- return std::move(plan.nodes);
+ return {std::move(plan.nodes), std::move(plan.resources)};
}
}
diff --git a/src/Processors/QueryPlan/QueryPlan.h b/src/Processors/QueryPlan/QueryPlan.h
index d89bdc534be..f4a6c9097f2 100644
--- a/src/Processors/QueryPlan/QueryPlan.h
+++ b/src/Processors/QueryPlan/QueryPlan.h
@@ -108,7 +108,7 @@ public:
using Nodes = std::list;
Node * getRootNode() const { return root; }
- static Nodes detachNodes(QueryPlan && plan);
+ static std::pair detachNodesAndResources(QueryPlan && plan);
private:
QueryPlanResourceHolder resources;
diff --git a/tests/queries/0_stateless/02843_context_has_expired.reference b/tests/queries/0_stateless/02843_context_has_expired.reference
index 573541ac970..229972f2924 100644
--- a/tests/queries/0_stateless/02843_context_has_expired.reference
+++ b/tests/queries/0_stateless/02843_context_has_expired.reference
@@ -1 +1,5 @@
0
+0
+0
+0
+0
diff --git a/tests/queries/0_stateless/02843_context_has_expired.sql b/tests/queries/0_stateless/02843_context_has_expired.sql
index ccef3458ed7..8355ce2c18c 100644
--- a/tests/queries/0_stateless/02843_context_has_expired.sql
+++ b/tests/queries/0_stateless/02843_context_has_expired.sql
@@ -1,23 +1,36 @@
-DROP DICTIONARY IF EXISTS dict;
-DROP TABLE IF EXISTS source;
+DROP DICTIONARY IF EXISTS 02843_dict;
+DROP TABLE IF EXISTS 02843_source;
+DROP TABLE IF EXISTS 02843_join;
-CREATE TABLE source
+CREATE TABLE 02843_source
(
id UInt64,
value String
)
ENGINE=Memory;
-CREATE DICTIONARY dict
+CREATE DICTIONARY 02843_dict
(
id UInt64,
value String
)
PRIMARY KEY id
-SOURCE(CLICKHOUSE(TABLE 'source'))
+SOURCE(CLICKHOUSE(TABLE '02843_source'))
LAYOUT(DIRECT());
-SELECT 1 IN (SELECT dictGet('dict', 'value', materialize('1')));
+SELECT 1 IN (SELECT dictGet('02843_dict', 'value', materialize('1')));
-DROP DICTIONARY dict;
-DROP TABLE source;
+CREATE TABLE 02843_join (id UInt8, value String) ENGINE Join(ANY, LEFT, id);
+SELECT 1 IN (SELECT joinGet(02843_join, 'value', materialize(1)));
+SELECT 1 IN (SELECT joinGetOrNull(02843_join, 'value', materialize(1)));
+
+SELECT 1 IN (SELECT materialize(connectionId()));
+SELECT 1000000 IN (SELECT materialize(getSetting('max_threads')));
+SELECT 1 in (SELECT file(materialize('a'))); -- { serverError 107 }
+
+EXPLAIN ESTIMATE SELECT 1 IN (SELECT dictGet('02843_dict', 'value', materialize('1')));
+EXPLAIN ESTIMATE SELECT 1 IN (SELECT joinGet(`02843_join`, 'value', materialize(1)));
+
+DROP DICTIONARY 02843_dict;
+DROP TABLE 02843_source;
+DROP TABLE 02843_join;
From 56888912368c2e4620fbe95322cd53e443e79626 Mon Sep 17 00:00:00 2001
From: Robert Schulze
Date: Wed, 16 Aug 2023 19:00:27 +0000
Subject: [PATCH 47/51] curl: update to latest master (fixes CVE-2023-32001)
---
contrib/curl | 2 +-
contrib/curl-cmake/CMakeLists.txt | 4 +---
2 files changed, 2 insertions(+), 4 deletions(-)
diff --git a/contrib/curl b/contrib/curl
index b0edf0b7dae..eb3b049df52 160000
--- a/contrib/curl
+++ b/contrib/curl
@@ -1 +1 @@
-Subproject commit b0edf0b7dae44d9e66f270a257cf654b35d5263d
+Subproject commit eb3b049df526bf125eda23218e680ce7fa9ec46c
diff --git a/contrib/curl-cmake/CMakeLists.txt b/contrib/curl-cmake/CMakeLists.txt
index 70d9c2816dc..ca4edd77b3a 100644
--- a/contrib/curl-cmake/CMakeLists.txt
+++ b/contrib/curl-cmake/CMakeLists.txt
@@ -40,7 +40,6 @@ set (SRCS
"${LIBRARY_DIR}/lib/strcase.c"
"${LIBRARY_DIR}/lib/easy.c"
"${LIBRARY_DIR}/lib/curl_fnmatch.c"
- "${LIBRARY_DIR}/lib/curl_log.c"
"${LIBRARY_DIR}/lib/fileinfo.c"
"${LIBRARY_DIR}/lib/krb5.c"
"${LIBRARY_DIR}/lib/memdebug.c"
@@ -113,6 +112,7 @@ set (SRCS
"${LIBRARY_DIR}/lib/psl.c"
"${LIBRARY_DIR}/lib/doh.c"
"${LIBRARY_DIR}/lib/urlapi.c"
+ "${LIBRARY_DIR}/lib/curl_trc.c"
"${LIBRARY_DIR}/lib/curl_get_line.c"
"${LIBRARY_DIR}/lib/altsvc.c"
"${LIBRARY_DIR}/lib/socketpair.c"
@@ -142,12 +142,10 @@ set (SRCS
"${LIBRARY_DIR}/lib/vtls/openssl.c"
"${LIBRARY_DIR}/lib/vtls/gtls.c"
"${LIBRARY_DIR}/lib/vtls/vtls.c"
- "${LIBRARY_DIR}/lib/vtls/nss.c"
"${LIBRARY_DIR}/lib/vtls/wolfssl.c"
"${LIBRARY_DIR}/lib/vtls/schannel.c"
"${LIBRARY_DIR}/lib/vtls/schannel_verify.c"
"${LIBRARY_DIR}/lib/vtls/sectransp.c"
- "${LIBRARY_DIR}/lib/vtls/gskit.c"
"${LIBRARY_DIR}/lib/vtls/mbedtls.c"
"${LIBRARY_DIR}/lib/vtls/bearssl.c"
"${LIBRARY_DIR}/lib/vtls/keylog.c"
From ea8a99c103342c28f20d0b07ed19318e9f2d5b9f Mon Sep 17 00:00:00 2001
From: Robert Schulze
Date: Wed, 16 Aug 2023 19:04:28 +0000
Subject: [PATCH 48/51] Sort stuff
---
contrib/curl-cmake/CMakeLists.txt | 224 +++++++++++++++---------------
1 file changed, 112 insertions(+), 112 deletions(-)
diff --git a/contrib/curl-cmake/CMakeLists.txt b/contrib/curl-cmake/CMakeLists.txt
index ca4edd77b3a..733865d5101 100644
--- a/contrib/curl-cmake/CMakeLists.txt
+++ b/contrib/curl-cmake/CMakeLists.txt
@@ -8,125 +8,122 @@ endif()
set (LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/curl")
set (SRCS
- "${LIBRARY_DIR}/lib/fopen.c"
- "${LIBRARY_DIR}/lib/noproxy.c"
- "${LIBRARY_DIR}/lib/idn.c"
- "${LIBRARY_DIR}/lib/cfilters.c"
- "${LIBRARY_DIR}/lib/cf-socket.c"
+ "${LIBRARY_DIR}/lib/altsvc.c"
+ "${LIBRARY_DIR}/lib/amigaos.c"
+ "${LIBRARY_DIR}/lib/asyn-thread.c"
+ "${LIBRARY_DIR}/lib/base64.c"
+ "${LIBRARY_DIR}/lib/bufq.c"
+ "${LIBRARY_DIR}/lib/bufref.c"
+ "${LIBRARY_DIR}/lib/cf-h1-proxy.c"
"${LIBRARY_DIR}/lib/cf-haproxy.c"
"${LIBRARY_DIR}/lib/cf-https-connect.c"
- "${LIBRARY_DIR}/lib/file.c"
- "${LIBRARY_DIR}/lib/timeval.c"
- "${LIBRARY_DIR}/lib/base64.c"
- "${LIBRARY_DIR}/lib/hostip.c"
- "${LIBRARY_DIR}/lib/progress.c"
- "${LIBRARY_DIR}/lib/formdata.c"
- "${LIBRARY_DIR}/lib/cookie.c"
- "${LIBRARY_DIR}/lib/http.c"
- "${LIBRARY_DIR}/lib/sendf.c"
- "${LIBRARY_DIR}/lib/url.c"
- "${LIBRARY_DIR}/lib/dict.c"
- "${LIBRARY_DIR}/lib/if2ip.c"
- "${LIBRARY_DIR}/lib/speedcheck.c"
- "${LIBRARY_DIR}/lib/ldap.c"
- "${LIBRARY_DIR}/lib/version.c"
- "${LIBRARY_DIR}/lib/getenv.c"
- "${LIBRARY_DIR}/lib/escape.c"
- "${LIBRARY_DIR}/lib/mprintf.c"
- "${LIBRARY_DIR}/lib/telnet.c"
- "${LIBRARY_DIR}/lib/netrc.c"
- "${LIBRARY_DIR}/lib/getinfo.c"
- "${LIBRARY_DIR}/lib/transfer.c"
- "${LIBRARY_DIR}/lib/strcase.c"
- "${LIBRARY_DIR}/lib/easy.c"
- "${LIBRARY_DIR}/lib/curl_fnmatch.c"
- "${LIBRARY_DIR}/lib/fileinfo.c"
- "${LIBRARY_DIR}/lib/krb5.c"
- "${LIBRARY_DIR}/lib/memdebug.c"
- "${LIBRARY_DIR}/lib/http_chunks.c"
- "${LIBRARY_DIR}/lib/strtok.c"
+ "${LIBRARY_DIR}/lib/cf-socket.c"
+ "${LIBRARY_DIR}/lib/cfilters.c"
+ "${LIBRARY_DIR}/lib/conncache.c"
"${LIBRARY_DIR}/lib/connect.c"
- "${LIBRARY_DIR}/lib/llist.c"
- "${LIBRARY_DIR}/lib/hash.c"
- "${LIBRARY_DIR}/lib/multi.c"
"${LIBRARY_DIR}/lib/content_encoding.c"
- "${LIBRARY_DIR}/lib/share.c"
- "${LIBRARY_DIR}/lib/http_digest.c"
- "${LIBRARY_DIR}/lib/md4.c"
- "${LIBRARY_DIR}/lib/md5.c"
- "${LIBRARY_DIR}/lib/http_negotiate.c"
- "${LIBRARY_DIR}/lib/inet_pton.c"
- "${LIBRARY_DIR}/lib/strtoofft.c"
- "${LIBRARY_DIR}/lib/strerror.c"
- "${LIBRARY_DIR}/lib/amigaos.c"
+ "${LIBRARY_DIR}/lib/cookie.c"
+ "${LIBRARY_DIR}/lib/curl_addrinfo.c"
+ "${LIBRARY_DIR}/lib/curl_des.c"
+ "${LIBRARY_DIR}/lib/curl_endian.c"
+ "${LIBRARY_DIR}/lib/curl_fnmatch.c"
+ "${LIBRARY_DIR}/lib/curl_get_line.c"
+ "${LIBRARY_DIR}/lib/curl_gethostname.c"
+ "${LIBRARY_DIR}/lib/curl_gssapi.c"
+ "${LIBRARY_DIR}/lib/curl_memrchr.c"
+ "${LIBRARY_DIR}/lib/curl_multibyte.c"
+ "${LIBRARY_DIR}/lib/curl_ntlm_core.c"
+ "${LIBRARY_DIR}/lib/curl_ntlm_wb.c"
+ "${LIBRARY_DIR}/lib/curl_path.c"
+ "${LIBRARY_DIR}/lib/curl_range.c"
+ "${LIBRARY_DIR}/lib/curl_rtmp.c"
+ "${LIBRARY_DIR}/lib/curl_sasl.c"
+ "${LIBRARY_DIR}/lib/curl_sspi.c"
+ "${LIBRARY_DIR}/lib/curl_threads.c"
+ "${LIBRARY_DIR}/lib/curl_trc.c"
+ "${LIBRARY_DIR}/lib/dict.c"
+ "${LIBRARY_DIR}/lib/doh.c"
+ "${LIBRARY_DIR}/lib/dynbuf.c"
+ "${LIBRARY_DIR}/lib/dynhds.c"
+ "${LIBRARY_DIR}/lib/easy.c"
+ "${LIBRARY_DIR}/lib/escape.c"
+ "${LIBRARY_DIR}/lib/file.c"
+ "${LIBRARY_DIR}/lib/fileinfo.c"
+ "${LIBRARY_DIR}/lib/fopen.c"
+ "${LIBRARY_DIR}/lib/formdata.c"
+ "${LIBRARY_DIR}/lib/getenv.c"
+ "${LIBRARY_DIR}/lib/getinfo.c"
+ "${LIBRARY_DIR}/lib/gopher.c"
+ "${LIBRARY_DIR}/lib/hash.c"
+ "${LIBRARY_DIR}/lib/headers.c"
+ "${LIBRARY_DIR}/lib/hmac.c"
"${LIBRARY_DIR}/lib/hostasyn.c"
+ "${LIBRARY_DIR}/lib/hostip.c"
"${LIBRARY_DIR}/lib/hostip4.c"
"${LIBRARY_DIR}/lib/hostip6.c"
"${LIBRARY_DIR}/lib/hostsyn.c"
+ "${LIBRARY_DIR}/lib/hsts.c"
+ "${LIBRARY_DIR}/lib/http.c"
+ "${LIBRARY_DIR}/lib/http2.c"
+ "${LIBRARY_DIR}/lib/http_aws_sigv4.c"
+ "${LIBRARY_DIR}/lib/http_chunks.c"
+ "${LIBRARY_DIR}/lib/http_digest.c"
+ "${LIBRARY_DIR}/lib/http_negotiate.c"
+ "${LIBRARY_DIR}/lib/http_ntlm.c"
+ "${LIBRARY_DIR}/lib/http_proxy.c"
+ "${LIBRARY_DIR}/lib/idn.c"
+ "${LIBRARY_DIR}/lib/if2ip.c"
+ "${LIBRARY_DIR}/lib/imap.c"
"${LIBRARY_DIR}/lib/inet_ntop.c"
+ "${LIBRARY_DIR}/lib/inet_pton.c"
+ "${LIBRARY_DIR}/lib/krb5.c"
+ "${LIBRARY_DIR}/lib/ldap.c"
+ "${LIBRARY_DIR}/lib/llist.c"
+ "${LIBRARY_DIR}/lib/md4.c"
+ "${LIBRARY_DIR}/lib/md5.c"
+ "${LIBRARY_DIR}/lib/memdebug.c"
+ "${LIBRARY_DIR}/lib/mime.c"
+ "${LIBRARY_DIR}/lib/mprintf.c"
+ "${LIBRARY_DIR}/lib/mqtt.c"
+ "${LIBRARY_DIR}/lib/multi.c"
+ "${LIBRARY_DIR}/lib/netrc.c"
+ "${LIBRARY_DIR}/lib/nonblock.c"
+ "${LIBRARY_DIR}/lib/noproxy.c"
+ "${LIBRARY_DIR}/lib/openldap.c"
"${LIBRARY_DIR}/lib/parsedate.c"
+ "${LIBRARY_DIR}/lib/pingpong.c"
+ "${LIBRARY_DIR}/lib/pop3.c"
+ "${LIBRARY_DIR}/lib/progress.c"
+ "${LIBRARY_DIR}/lib/psl.c"
+ "${LIBRARY_DIR}/lib/rand.c"
+ "${LIBRARY_DIR}/lib/rename.c"
+ "${LIBRARY_DIR}/lib/rtsp.c"
"${LIBRARY_DIR}/lib/select.c"
- "${LIBRARY_DIR}/lib/splay.c"
- "${LIBRARY_DIR}/lib/strdup.c"
+ "${LIBRARY_DIR}/lib/sendf.c"
+ "${LIBRARY_DIR}/lib/setopt.c"
+ "${LIBRARY_DIR}/lib/sha256.c"
+ "${LIBRARY_DIR}/lib/share.c"
+ "${LIBRARY_DIR}/lib/slist.c"
+ "${LIBRARY_DIR}/lib/smb.c"
+ "${LIBRARY_DIR}/lib/smtp.c"
+ "${LIBRARY_DIR}/lib/socketpair.c"
"${LIBRARY_DIR}/lib/socks.c"
- "${LIBRARY_DIR}/lib/curl_addrinfo.c"
"${LIBRARY_DIR}/lib/socks_gssapi.c"
"${LIBRARY_DIR}/lib/socks_sspi.c"
- "${LIBRARY_DIR}/lib/curl_sspi.c"
- "${LIBRARY_DIR}/lib/slist.c"
- "${LIBRARY_DIR}/lib/nonblock.c"
- "${LIBRARY_DIR}/lib/curl_memrchr.c"
- "${LIBRARY_DIR}/lib/imap.c"
- "${LIBRARY_DIR}/lib/pop3.c"
- "${LIBRARY_DIR}/lib/smtp.c"
- "${LIBRARY_DIR}/lib/pingpong.c"
- "${LIBRARY_DIR}/lib/rtsp.c"
- "${LIBRARY_DIR}/lib/curl_threads.c"
- "${LIBRARY_DIR}/lib/warnless.c"
- "${LIBRARY_DIR}/lib/hmac.c"
- "${LIBRARY_DIR}/lib/curl_rtmp.c"
- "${LIBRARY_DIR}/lib/openldap.c"
- "${LIBRARY_DIR}/lib/curl_gethostname.c"
- "${LIBRARY_DIR}/lib/gopher.c"
- "${LIBRARY_DIR}/lib/http_proxy.c"
- "${LIBRARY_DIR}/lib/asyn-thread.c"
- "${LIBRARY_DIR}/lib/curl_gssapi.c"
- "${LIBRARY_DIR}/lib/http_ntlm.c"
- "${LIBRARY_DIR}/lib/curl_ntlm_wb.c"
- "${LIBRARY_DIR}/lib/curl_ntlm_core.c"
- "${LIBRARY_DIR}/lib/curl_sasl.c"
- "${LIBRARY_DIR}/lib/rand.c"
- "${LIBRARY_DIR}/lib/curl_multibyte.c"
- "${LIBRARY_DIR}/lib/conncache.c"
- "${LIBRARY_DIR}/lib/cf-h1-proxy.c"
- "${LIBRARY_DIR}/lib/http2.c"
- "${LIBRARY_DIR}/lib/smb.c"
- "${LIBRARY_DIR}/lib/curl_endian.c"
- "${LIBRARY_DIR}/lib/curl_des.c"
+ "${LIBRARY_DIR}/lib/speedcheck.c"
+ "${LIBRARY_DIR}/lib/splay.c"
+ "${LIBRARY_DIR}/lib/strcase.c"
+ "${LIBRARY_DIR}/lib/strdup.c"
+ "${LIBRARY_DIR}/lib/strerror.c"
+ "${LIBRARY_DIR}/lib/strtok.c"
+ "${LIBRARY_DIR}/lib/strtoofft.c"
"${LIBRARY_DIR}/lib/system_win32.c"
- "${LIBRARY_DIR}/lib/mime.c"
- "${LIBRARY_DIR}/lib/sha256.c"
- "${LIBRARY_DIR}/lib/setopt.c"
- "${LIBRARY_DIR}/lib/curl_path.c"
- "${LIBRARY_DIR}/lib/curl_range.c"
- "${LIBRARY_DIR}/lib/psl.c"
- "${LIBRARY_DIR}/lib/doh.c"
- "${LIBRARY_DIR}/lib/urlapi.c"
- "${LIBRARY_DIR}/lib/curl_trc.c"
- "${LIBRARY_DIR}/lib/curl_get_line.c"
- "${LIBRARY_DIR}/lib/altsvc.c"
- "${LIBRARY_DIR}/lib/socketpair.c"
- "${LIBRARY_DIR}/lib/bufref.c"
- "${LIBRARY_DIR}/lib/bufq.c"
- "${LIBRARY_DIR}/lib/dynbuf.c"
- "${LIBRARY_DIR}/lib/dynhds.c"
- "${LIBRARY_DIR}/lib/hsts.c"
- "${LIBRARY_DIR}/lib/http_aws_sigv4.c"
- "${LIBRARY_DIR}/lib/mqtt.c"
- "${LIBRARY_DIR}/lib/rename.c"
- "${LIBRARY_DIR}/lib/headers.c"
+ "${LIBRARY_DIR}/lib/telnet.c"
"${LIBRARY_DIR}/lib/timediff.c"
- "${LIBRARY_DIR}/lib/vauth/vauth.c"
+ "${LIBRARY_DIR}/lib/timeval.c"
+ "${LIBRARY_DIR}/lib/transfer.c"
+ "${LIBRARY_DIR}/lib/url.c"
+ "${LIBRARY_DIR}/lib/urlapi.c"
"${LIBRARY_DIR}/lib/vauth/cleartext.c"
"${LIBRARY_DIR}/lib/vauth/cram.c"
"${LIBRARY_DIR}/lib/vauth/digest.c"
@@ -138,21 +135,24 @@ set (SRCS
"${LIBRARY_DIR}/lib/vauth/oauth2.c"
"${LIBRARY_DIR}/lib/vauth/spnego_gssapi.c"
"${LIBRARY_DIR}/lib/vauth/spnego_sspi.c"
+ "${LIBRARY_DIR}/lib/vauth/vauth.c"
+ "${LIBRARY_DIR}/lib/version.c"
"${LIBRARY_DIR}/lib/vquic/vquic.c"
- "${LIBRARY_DIR}/lib/vtls/openssl.c"
+ "${LIBRARY_DIR}/lib/vssh/libssh.c"
+ "${LIBRARY_DIR}/lib/vssh/libssh2.c"
+ "${LIBRARY_DIR}/lib/vtls/bearssl.c"
"${LIBRARY_DIR}/lib/vtls/gtls.c"
- "${LIBRARY_DIR}/lib/vtls/vtls.c"
- "${LIBRARY_DIR}/lib/vtls/wolfssl.c"
+ "${LIBRARY_DIR}/lib/vtls/hostcheck.c"
+ "${LIBRARY_DIR}/lib/vtls/keylog.c"
+ "${LIBRARY_DIR}/lib/vtls/mbedtls.c"
+ "${LIBRARY_DIR}/lib/vtls/openssl.c"
"${LIBRARY_DIR}/lib/vtls/schannel.c"
"${LIBRARY_DIR}/lib/vtls/schannel_verify.c"
"${LIBRARY_DIR}/lib/vtls/sectransp.c"
- "${LIBRARY_DIR}/lib/vtls/mbedtls.c"
- "${LIBRARY_DIR}/lib/vtls/bearssl.c"
- "${LIBRARY_DIR}/lib/vtls/keylog.c"
+ "${LIBRARY_DIR}/lib/vtls/vtls.c"
+ "${LIBRARY_DIR}/lib/vtls/wolfssl.c"
"${LIBRARY_DIR}/lib/vtls/x509asn1.c"
- "${LIBRARY_DIR}/lib/vtls/hostcheck.c"
- "${LIBRARY_DIR}/lib/vssh/libssh2.c"
- "${LIBRARY_DIR}/lib/vssh/libssh.c"
+ "${LIBRARY_DIR}/lib/warnless.c"
)
add_library (_curl ${SRCS})
From d5ed014ec4e4a2a0c49ac95a193aa0c15a511f4c Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov
Date: Wed, 16 Aug 2023 22:56:32 +0300
Subject: [PATCH 49/51] Fix flaky test `02443_detach_attach_partition` (#53478)
* fix flaky test
* empty commit
---
.../02443_detach_attach_partition.reference | 4 ++--
.../02443_detach_attach_partition.sh | 18 ++++++++++++++----
2 files changed, 16 insertions(+), 6 deletions(-)
diff --git a/tests/queries/0_stateless/02443_detach_attach_partition.reference b/tests/queries/0_stateless/02443_detach_attach_partition.reference
index 70930ea6d9a..77cfb77479d 100644
--- a/tests/queries/0_stateless/02443_detach_attach_partition.reference
+++ b/tests/queries/0_stateless/02443_detach_attach_partition.reference
@@ -1,4 +1,4 @@
default begin inserts
default end inserts
-20 210
-20 210
+30 465
+30 465
diff --git a/tests/queries/0_stateless/02443_detach_attach_partition.sh b/tests/queries/0_stateless/02443_detach_attach_partition.sh
index 5a3f1b64065..ae104b833e3 100755
--- a/tests/queries/0_stateless/02443_detach_attach_partition.sh
+++ b/tests/queries/0_stateless/02443_detach_attach_partition.sh
@@ -1,5 +1,5 @@
#!/usr/bin/env bash
-# Tags: race, zookeeper, no-parallel
+# Tags: race, zookeeper, long
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
@@ -29,9 +29,19 @@ function thread_attach()
done
}
+insert_type=$(($RANDOM % 3))
+$CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'insert_type $insert_type' FORMAT Null"
+
function insert()
{
- $CLICKHOUSE_CLIENT -q "INSERT INTO alter_table$(($RANDOM % 2)) SELECT $RANDOM, $i" 2>/dev/null
+ # Fault injection may lead to duplicates
+ if [[ "$insert_type" -eq 0 ]]; then
+ $CLICKHOUSE_CLIENT --insert_deduplication_token=$1 -q "INSERT INTO alter_table$(($RANDOM % 2)) SELECT $RANDOM, $1" 2>/dev/null
+ elif [[ "$insert_type" -eq 1 ]]; then
+ $CLICKHOUSE_CLIENT -q "INSERT INTO alter_table$(($RANDOM % 2)) SELECT $1, $1" 2>/dev/null
+ else
+ $CLICKHOUSE_CLIENT --insert_keeper_fault_injection_probability=0 -q "INSERT INTO alter_table$(($RANDOM % 2)) SELECT $RANDOM, $1" 2>/dev/null
+ fi
}
thread_detach & PID_1=$!
@@ -41,8 +51,8 @@ thread_attach & PID_4=$!
function do_inserts()
{
- for i in {1..20}; do
- while ! insert; do $CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'retrying insert $i' FORMAT Null"; done
+ for i in {1..30}; do
+ while ! insert $i; do $CLICKHOUSE_CLIENT -q "SELECT '$CLICKHOUSE_DATABASE', 'retrying insert $i' FORMAT Null"; done
done
}
From 857856b8b674c46e4c768780efdc9631a1fdcc87 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov
Date: Thu, 17 Aug 2023 03:58:32 +0200
Subject: [PATCH 50/51] Leave only simplifications
---
docker/test/performance-comparison/compare.sh | 38 -------------------
tests/ci/performance_comparison_check.py | 18 ---------
2 files changed, 56 deletions(-)
diff --git a/docker/test/performance-comparison/compare.sh b/docker/test/performance-comparison/compare.sh
index ce8c4903c00..4b1b5c13b9b 100755
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@@ -63,25 +63,6 @@ function left_or_right()
function configure
{
- # Setup a cluster for logs export to ClickHouse Cloud
- # Note: these variables are provided to the Docker run command by the Python script in tests/ci
- if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ]
- then
- set +x
- echo "
-remote_servers:
- system_logs_export:
- shard:
- replica:
- secure: 1
- user: ci
- host: '${CLICKHOUSE_CI_LOGS_HOST}'
- port: 9440
- password: '${CLICKHOUSE_CI_LOGS_PASSWORD}'
-" > right/config/config.d/system_logs_export.yaml
- set -x
- fi
-
# Use the new config for both servers, so that we can change it in a PR.
rm right/config/config.d/text_log.xml ||:
cp -rv right/config left ||:
@@ -111,25 +92,6 @@ remote_servers:
wait_for_server $LEFT_SERVER_PORT $left_pid
echo "Server for setup started"
- # Initialize export of system logs to ClickHouse Cloud
- # Note: it is set up for the "left" server, and its database is then cloned to the "right" server.
- if [ -n "${CLICKHOUSE_CI_LOGS_HOST}" ]
- then
- (
- set +x
- export EXTRA_COLUMNS_EXPRESSION="$PR_TO_TEST AS pull_request_number, '$SHA_TO_TEST' AS commit_sha, '$CHECK_START_TIME' AS check_start_time, '$CHECK_NAME' AS check_name, '$INSTANCE_TYPE' AS instance_type"
- export CONNECTION_PARAMETERS="--secure --user ci --host ${CLICKHOUSE_CI_LOGS_HOST} --password ${CLICKHOUSE_CI_LOGS_PASSWORD}"
-
- /setup_export_logs.sh "--port $LEFT_SERVER_PORT"
-
- # Unset variables after use
- export CONNECTION_PARAMETERS=''
- export CLICKHOUSE_CI_LOGS_HOST=''
- export CLICKHOUSE_CI_LOGS_PASSWORD=''
- set -x
- )
- fi
-
clickhouse-client --port $LEFT_SERVER_PORT --query "create database test" ||:
clickhouse-client --port $LEFT_SERVER_PORT --query "rename table datasets.hits_v1 to test.hits" ||:
diff --git a/tests/ci/performance_comparison_check.py b/tests/ci/performance_comparison_check.py
index 70369f9881e..27a67e2ae0e 100644
--- a/tests/ci/performance_comparison_check.py
+++ b/tests/ci/performance_comparison_check.py
@@ -39,8 +39,6 @@ def get_run_command(
instance_type = get_instance_type()
envs = [
- "-e CLICKHOUSE_CI_LOGS_HOST",
- "-e CLICKHOUSE_CI_LOGS_PASSWORD",
f"-e CHECK_START_TIME='{check_start_time}'",
f"-e CHECK_NAME='{check_name}'",
f"-e INSTANCE_TYPE='{instance_type}'",
@@ -203,22 +201,6 @@ if __name__ == "__main__":
subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
- # Cleanup run log from the credentials of CI logs database.
- # Note: a malicious user can still print them by splitting the value into parts.
- # But we will be warned when a malicious user modifies CI script.
- # Although they can also print them from inside tests.
- # Nevertheless, the credentials of the CI logs have limited scope
- # and does not provide access to sensitive info.
-
- ci_logs_host = os.getenv("CLICKHOUSE_CI_LOGS_HOST", "CLICKHOUSE_CI_LOGS_HOST")
- ci_logs_password = os.getenv(
- "CLICKHOUSE_CI_LOGS_PASSWORD", "CLICKHOUSE_CI_LOGS_PASSWORD"
- )
- subprocess.check_call(
- f"sed -i -r -e 's!{ci_logs_host}!CLICKHOUSE_CI_LOGS_HOST!g; s!{ci_logs_password}!CLICKHOUSE_CI_LOGS_PASSWORD!g;' '{run_log_path}' '{compare_log_path}'",
- shell=True,
- )
-
paths = {
"compare.log": compare_log_path,
"output.7z": os.path.join(result_path, "output.7z"),
From c3e6f7e9ae792b54ef713beb8a5513307af119f4 Mon Sep 17 00:00:00 2001
From: Alexey Milovidov
Date: Thu, 17 Aug 2023 03:59:15 +0200
Subject: [PATCH 51/51] Leave only simplifications
---
docker/test/base/setup_export_logs.sh | 14 ++++++--------
1 file changed, 6 insertions(+), 8 deletions(-)
diff --git a/docker/test/base/setup_export_logs.sh b/docker/test/base/setup_export_logs.sh
index ef510552d2f..12fae855b03 100755
--- a/docker/test/base/setup_export_logs.sh
+++ b/docker/test/base/setup_export_logs.sh
@@ -1,5 +1,4 @@
#!/bin/bash
-# shellcheck disable=SC2086
# This script sets up export of system log tables to a remote server.
# Remote tables are created if not exist, and augmented with extra columns,
@@ -8,7 +7,6 @@
# Pre-configured destination cluster, where to export the data
CLUSTER=${CLUSTER:=system_logs_export}
-LOCAL_PARAMETERS=$1
EXTRA_COLUMNS=${EXTRA_COLUMNS:="pull_request_number UInt32, commit_sha String, check_start_time DateTime, check_name LowCardinality(String), instance_type LowCardinality(String), "}
EXTRA_COLUMNS_EXPRESSION=${EXTRA_COLUMNS_EXPRESSION:="0 AS pull_request_number, '' AS commit_sha, now() AS check_start_time, '' AS check_name, '' AS instance_type"}
@@ -17,13 +15,13 @@ EXTRA_ORDER_BY_COLUMNS=${EXTRA_ORDER_BY_COLUMNS:="check_name, "}
CONNECTION_PARAMETERS=${CONNECTION_PARAMETERS:=""}
# Create all configured system logs:
-clickhouse-client $LOCAL_PARAMETERS --query "SYSTEM FLUSH LOGS"
+clickhouse-client --query "SYSTEM FLUSH LOGS"
# For each system log table:
-clickhouse-client $LOCAL_PARAMETERS --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table
+clickhouse-client --query "SHOW TABLES FROM system LIKE '%\\_log'" | while read -r table
do
# Calculate hash of its structure:
- hash=$(clickhouse-client $LOCAL_PARAMETERS --query "
+ hash=$(clickhouse-client --query "
SELECT sipHash64(groupArray((name, type)))
FROM (SELECT name, type FROM system.columns
WHERE database = 'system' AND table = '$table'
@@ -31,7 +29,7 @@ do
")
# Create the destination table with adapted name and structure:
- statement=$(clickhouse-client $LOCAL_PARAMETERS --format TSVRaw --query "SHOW CREATE TABLE system.${table}" | sed -r -e '
+ statement=$(clickhouse-client --format TSVRaw --query "SHOW CREATE TABLE system.${table}" | sed -r -e '
s/^\($/('"$EXTRA_COLUMNS"'/;
s/ORDER BY \(/ORDER BY ('"$EXTRA_ORDER_BY_COLUMNS"'/;
s/^CREATE TABLE system\.\w+_log$/CREATE TABLE IF NOT EXISTS '"$table"'_'"$hash"'/;
@@ -45,7 +43,7 @@ do
echo "Creating table system.${table}_sender" >&2
# Create Distributed table and materialized view to watch on the original table:
- clickhouse-client $LOCAL_PARAMETERS --query "
+ clickhouse-client --query "
CREATE TABLE system.${table}_sender
ENGINE = Distributed(${CLUSTER}, default, ${table}_${hash})
EMPTY AS
@@ -55,7 +53,7 @@ do
echo "Creating materialized view system.${table}_watcher" >&2
- clickhouse-client $LOCAL_PARAMETERS --query "
+ clickhouse-client --query "
CREATE MATERIALIZED VIEW system.${table}_watcher TO system.${table}_sender AS
SELECT ${EXTRA_COLUMNS_EXPRESSION}, *
FROM system.${table}