Merge branch 'master' of github.com:ClickHouse/ClickHouse into divanik/fix_clickhouse_disks_2

This commit is contained in:
divanik 2024-06-20 14:49:23 +00:00
commit 2f8b61d3ad
38 changed files with 536 additions and 519 deletions

View File

@ -23,9 +23,6 @@
#include <openssl/conf.h>
#endif
#if __has_feature(address_sanitizer)
#include <sanitizer/lsan_interface.h>
#endif
using Poco::RandomInputStream;
using Poco::Thread;
@ -70,18 +67,12 @@ void OpenSSLInitializer::initialize()
SSL_library_init();
SSL_load_error_strings();
OpenSSL_add_all_algorithms();
char seed[SEEDSIZE];
RandomInputStream rnd;
rnd.read(seed, sizeof(seed));
{
# if __has_feature(address_sanitizer)
/// Leak sanitizer (part of address sanitizer) thinks that a few bytes of memory in OpenSSL are allocated during but never released.
__lsan::ScopedDisabler lsan_disabler;
#endif
RAND_seed(seed, SEEDSIZE);
}
RAND_seed(seed, SEEDSIZE);
int nMutexes = CRYPTO_num_locks();
_mutexes = new Poco::FastMutex[nMutexes];
CRYPTO_set_locking_callback(&OpenSSLInitializer::lock);
@ -89,8 +80,8 @@ void OpenSSLInitializer::initialize()
// https://sourceforge.net/p/poco/bugs/110/
//
// From http://www.openssl.org/docs/crypto/threads.html :
// "If the application does not register such a callback using CRYPTO_THREADID_set_callback(),
// then a default implementation is used - on Windows and BeOS this uses the system's
// "If the application does not register such a callback using CRYPTO_THREADID_set_callback(),
// then a default implementation is used - on Windows and BeOS this uses the system's
// default thread identifying APIs"
CRYPTO_set_id_callback(&OpenSSLInitializer::id);
CRYPTO_set_dynlock_create_callback(&OpenSSLInitializer::dynlockCreate);
@ -109,7 +100,7 @@ void OpenSSLInitializer::uninitialize()
CRYPTO_set_locking_callback(0);
CRYPTO_set_id_callback(0);
delete [] _mutexes;
CONF_modules_free();
}
}

View File

@ -1,12 +1,12 @@
# This variables autochanged by tests/ci/version_helper.py:
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54487)
SET(VERSION_REVISION 54488)
SET(VERSION_MAJOR 24)
SET(VERSION_MINOR 6)
SET(VERSION_MINOR 7)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH 70a1d3a63d47f0be077d67b8deb907230fc7cfb0)
SET(VERSION_DESCRIBE v24.6.1.1-testing)
SET(VERSION_STRING 24.6.1.1)
SET(VERSION_GITHASH aa023477a9265e403982fca5ee29a714db5133d9)
SET(VERSION_DESCRIBE v24.7.1.1-testing)
SET(VERSION_STRING 24.7.1.1)
# end of autochange

2
contrib/openssl vendored

@ -1 +1 @@
Subproject commit e0d6ae2bf93cf6dc26bb86aa39992bc6a410869a
Subproject commit 277de2ba202af4eb2291b363456d32ff0960e559

View File

@ -31,6 +31,56 @@ Alternatively, in order to enable the MySQL interface for an existing service:
3. After entering the password, you will get prompted the MySQL connection string for this service
![Connection screen - MySQL Enabled](./images/mysql5.png)
## Creating multiple MySQL users in ClickHouse Cloud
By default, there is a built-in `mysql4<subdomain>` user, which uses the same password as the `default` one. The `<subdomain>` part is the first segment of your ClickHouse Cloud hostname. This format is necessary to work with the tools that implement secure connection, but don't provide [SNI information in their TLS handshake](https://www.cloudflare.com/learning/ssl/what-is-sni), which makes it impossible to do the internal routing without an extra hint in the username (MySQL console client is one of such tools).
Because of this, we _highly recommend_ following the `mysql4<subdomain>_<username>` format when creating a new user intended to be used with the MySQL interface, where `<subdomain>` is a hint to identify your Cloud service, and `<username>` is an arbitrary suffix of your choice.
:::tip
For ClickHouse Cloud hostname like `foobar.us-east1.aws.clickhouse.cloud`, the `<subdomain>` part equals to `foobar`, and a custom MySQL username could look like `mysql4foobar_team1`.
:::
You can create extra users to use with the MySQL interface if, for example, you need to apply extra settings.
1. Optional - create a [settings profile](https://clickhouse.com/docs/en/sql-reference/statements/create/settings-profile) to apply for your custom user. For example, `my_custom_profile` with an extra setting which will be applied by default when we connect with the user we create later:
```sql
CREATE SETTINGS PROFILE my_custom_profile SETTINGS prefer_column_name_to_alias=1;
```
`prefer_column_name_to_alias` is used just as an example, you can use other settings there.
2. [Create a user](https://clickhouse.com/docs/en/sql-reference/statements/create/user) using the following format: `mysql4<subdomain>_<username>` ([see above](#creating-multiple-mysql-users-in-clickhouse-cloud)). The password must be in double SHA1 format. For example:
```sql
CREATE USER mysql4foobar_team1 IDENTIFIED WITH double_sha1_password BY 'YourPassword42$';
```
or if you want to use a custom profile for this user:
```sql
CREATE USER mysql4foobar_team1 IDENTIFIED WITH double_sha1_password BY 'YourPassword42$' SETTINGS PROFILE 'my_custom_profile';
```
where `my_custom_profile` is the name of the profile you created earlier.
3. [Grant](https://clickhouse.com/docs/en/sql-reference/statements/grant) the new user the necessary permissions to interact with the desired tables or databases. For example, if you want to grant access to `system.query_log` only:
```sql
GRANT SELECT ON system.query_log TO mysql4foobar_team1;
```
4. Use the created user to connect to your ClickHouse Cloud service with the MySQL interface.
### Troubleshooting multiple MySQL users in ClickHouse Cloud
If you created a new MySQL user, and you see the following error while connecting via MySQL CLI client:
```
ERROR 2013 (HY000): Lost connection to MySQL server at 'reading authorization packet', system error: 54
```
In this case, ensure that the username follows the `mysql4<subdomain>_<username>` format, as described ([above](#creating-multiple-mysql-users-in-clickhouse-cloud)).
## Enabling the MySQL Interface On Self-managed ClickHouse
Add the [mysql_port](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-mysql_port) setting to your server's configuration file. For example, you could define the port in a new XML file in your `config.d/` [folder](../operations/configuration-files):

View File

@ -1592,19 +1592,19 @@ Default value: `default`.
## parallel_replicas_custom_key_range_lower {#parallel_replicas_custom_key_range_lower}
Allows the filter type `range` to split the work evenly between replicas based on the custom range `[parallel_replicas_custom_key_range_lower, INT_MAX]`.
Allows the filter type `range` to split the work evenly between replicas based on the custom range `[parallel_replicas_custom_key_range_lower, INT_MAX]`.
When used in conjuction with [parallel_replicas_custom_key_range_upper](#parallel_replicas_custom_key_range_upper), it lets the filter evenly split the work over replicas for the range `[parallel_replicas_custom_key_range_lower, parallel_replicas_custom_key_range_upper]`.
When used in conjuction with [parallel_replicas_custom_key_range_upper](#parallel_replicas_custom_key_range_upper), it lets the filter evenly split the work over replicas for the range `[parallel_replicas_custom_key_range_lower, parallel_replicas_custom_key_range_upper]`.
Note: This setting will not cause any additional data to be filtered during query processing, rather it changes the points at which the range filter breaks up the range `[0, INT_MAX]` for parallel processing.
Note: This setting will not cause any additional data to be filtered during query processing, rather it changes the points at which the range filter breaks up the range `[0, INT_MAX]` for parallel processing.
## parallel_replicas_custom_key_range_upper {#parallel_replicas_custom_key_range_upper}
Allows the filter type `range` to split the work evenly between replicas based on the custom range `[0, parallel_replicas_custom_key_range_upper]`. A value of 0 disables the upper bound, setting it the max value of the custom key expression.
When used in conjuction with [parallel_replicas_custom_key_range_lower](#parallel_replicas_custom_key_range_lower), it lets the filter evenly split the work over replicas for the range `[parallel_replicas_custom_key_range_lower, parallel_replicas_custom_key_range_upper]`.
When used in conjuction with [parallel_replicas_custom_key_range_lower](#parallel_replicas_custom_key_range_lower), it lets the filter evenly split the work over replicas for the range `[parallel_replicas_custom_key_range_lower, parallel_replicas_custom_key_range_upper]`.
Note: This setting will not cause any additional data to be filtered during query processing, rather it changes the points at which the range filter breaks up the range `[0, INT_MAX]` for parallel processing.
Note: This setting will not cause any additional data to be filtered during query processing, rather it changes the points at which the range filter breaks up the range `[0, INT_MAX]` for parallel processing.
## allow_experimental_parallel_reading_from_replicas
@ -3188,7 +3188,7 @@ Default value: `0`.
## lightweight_deletes_sync {#lightweight_deletes_sync}
The same as 'mutation_sync', but controls only execution of lightweight deletes.
The same as 'mutation_sync', but controls only execution of lightweight deletes.
Possible values:
@ -5150,7 +5150,7 @@ Allows using statistic to optimize the order of [prewhere conditions](../../sql-
## analyze_index_with_space_filling_curves
If a table has a space-filling curve in its index, e.g. `ORDER BY mortonEncode(x, y)`, and the query has conditions on its arguments, e.g. `x >= 10 AND x <= 20 AND y >= 20 AND y <= 30`, use the space-filling curve for index analysis.
If a table has a space-filling curve in its index, e.g. `ORDER BY mortonEncode(x, y)` or `ORDER BY hilbertEncode(x, y)`, and the query has conditions on its arguments, e.g. `x >= 10 AND x <= 20 AND y >= 20 AND y <= 30`, use the space-filling curve for index analysis.
## query_plan_enable_optimizations {#query_plan_enable_optimizations}

View File

@ -1,90 +0,0 @@
---
slug: /en/sql-reference/aggregate-functions/reference/groupconcat
sidebar_position: 363
sidebar_label: groupConcat
title: groupConcat
---
Calculates a concatenated string from a group of strings, optionally separated by a delimiter, and optionally limited by a maximum number of elements.
**Syntax**
``` sql
groupConcat(expression [, delimiter] [, limit]);
```
**Arguments**
- `expression` — The expression or column name that outputs strings to be concatenated..
- `delimiter` — A [string](../../../sql-reference/data-types/string.md) that will be used to separate concatenated values. This parameter is optional and defaults to an empty string if not specified.
- `limit` — A positive [integer](../../../sql-reference/data-types/int-uint.md) specifying the maximum number of elements to concatenate. If more elements are present, excess elements are ignored. This parameter is optional.
:::note
If delimiter is specified without limit, it must be the first parameter following the expression. If both delimiter and limit are specified, delimiter must precede limit.
:::
**Returned value**
- Returns a [string](../../../sql-reference/data-types/string.md) consisting of the concatenated values of the column or expression. If the group has no elements or only null elements, and the function does not specify a handling for only null values, the result is a nullable string with a null value.
**Examples**
Input table:
``` text
┌─id─┬─name─┐
│ 1 │ John│
│ 2 │ Jane│
│ 3 │ Bob│
└────┴──────┘
```
1. Basic usage without a delimiter:
Query:
``` sql
SELECT groupConcat(Name) FROM Employees;
```
Result:
``` text
JohnJaneBob
```
This concatenates all names into one continuous string without any separator.
2. Using comma as a delimiter:
Query:
``` sql
SELECT groupConcat(Name, ', ', 2) FROM Employees;
```
Result:
``` text
John, Jane, Bob
```
This output shows the names separated by a comma followed by a space.
3. Limiting the number of concatenated elements
Query:
``` sql
SELECT groupConcat(Name, ', ', 2) FROM Employees;
```
Result:
``` text
John, Jane
```
This query limits the output to the first two names, even though there are more names in the table.

View File

@ -1,265 +0,0 @@
#include <AggregateFunctions/IAggregateFunction.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
#include <AggregateFunctions/FactoryHelpers.h>
#include <Columns/IColumn.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Core/ServerSettings.h>
#include <Core/ColumnWithTypeAndName.h>
#include <Common/ArenaAllocator.h>
#include <Common/assert_cast.h>
#include <Interpreters/castColumn.h>
#include <DataTypes/IDataType.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/ReadHelpers.h>
#include <IO/WriteHelpers.h>
namespace DB
{
struct Settings;
namespace ErrorCodes
{
extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int BAD_ARGUMENTS;
}
namespace
{
struct GroupConcatDataBase
{
UInt64 data_size = 0;
UInt64 allocated_size = 0;
char * data = nullptr;
void checkAndUpdateSize(UInt64 add, Arena * arena)
{
if (data_size + add >= allocated_size)
{
auto old_size = allocated_size;
allocated_size = std::max(2 * allocated_size, data_size + add);
data = arena->realloc(data, old_size, allocated_size);
}
}
void insertChar(const char * str, UInt64 str_size, Arena * arena)
{
checkAndUpdateSize(str_size, arena);
memcpy(data + data_size, str, str_size);
data_size += str_size;
}
};
struct GroupConcatData : public GroupConcatDataBase
{
using Offset = UInt64;
using Allocator = MixedAlignedArenaAllocator<alignof(Offset), 4096>;
using Offsets = PODArray<Offset, 32, Allocator>;
/// offset[i * 2] - beginning of the i-th row, offset[i * 2 + 1] - end of the i-th row
Offsets offsets;
UInt64 num_rows = 0;
UInt64 getSize(size_t i) const { return offsets[i * 2 + 1] - offsets[i * 2]; }
UInt64 getString(size_t i) const { return offsets[i * 2]; }
void insert(const IColumn * column, const SerializationPtr & serialization, size_t row_num, Arena * arena)
{
WriteBufferFromOwnString buff;
serialization->serializeText(*column, row_num, buff, {});
auto string = buff.stringView();
checkAndUpdateSize(string.size(), arena);
memcpy(data + data_size, string.data(), string.size());
offsets.push_back(data_size, arena);
data_size += string.size();
offsets.push_back(data_size, arena);
num_rows++;
}
};
template <bool has_limit>
class GroupConcatImpl final
: public IAggregateFunctionDataHelper<GroupConcatData, GroupConcatImpl<has_limit>>
{
static constexpr auto name = "groupConcat";
SerializationPtr serialization;
UInt64 limit;
const String delimiter;
public:
GroupConcatImpl(const DataTypePtr & data_type_, const Array & parameters_, UInt64 limit_, const String & delimiter_)
: IAggregateFunctionDataHelper<GroupConcatData, GroupConcatImpl<has_limit>>(
{data_type_}, parameters_, std::make_shared<DataTypeString>())
, serialization(this->argument_types[0]->getDefaultSerialization())
, limit(limit_)
, delimiter(delimiter_)
{
}
String getName() const override { return name; }
void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena * arena) const override
{
auto & cur_data = this->data(place);
if constexpr (has_limit)
if (cur_data.num_rows >= limit)
return;
if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
cur_data.insert(columns[0], serialization, row_num, arena);
}
void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena * arena) const override
{
auto & cur_data = this->data(place);
auto & rhs_data = this->data(rhs);
if (rhs_data.data_size == 0)
return;
if constexpr (has_limit)
{
UInt64 new_elems_count = std::min(rhs_data.num_rows, limit - cur_data.num_rows);
for (UInt64 i = 0; i < new_elems_count; ++i)
{
if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
cur_data.offsets.push_back(cur_data.data_size, arena);
cur_data.insertChar(rhs_data.data + rhs_data.getString(i), rhs_data.getSize(i), arena);
cur_data.num_rows++;
cur_data.offsets.push_back(cur_data.data_size, arena);
}
}
else
{
if (cur_data.data_size != 0)
cur_data.insertChar(delimiter.c_str(), delimiter.size(), arena);
cur_data.insertChar(rhs_data.data, rhs_data.data_size, arena);
}
}
void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf, std::optional<size_t> /* version */) const override
{
auto & cur_data = this->data(place);
writeVarUInt(cur_data.data_size, buf);
writeVarUInt(cur_data.allocated_size, buf);
buf.write(cur_data.data, cur_data.data_size);
if constexpr (has_limit)
{
writeVarUInt(cur_data.num_rows, buf);
for (const auto & offset : cur_data.offsets)
writeVarUInt(offset, buf);
}
}
void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, std::optional<size_t> /* version */, Arena * arena) const override
{
auto & cur_data = this->data(place);
readVarUInt(cur_data.data_size, buf);
readVarUInt(cur_data.allocated_size, buf);
buf.readStrict(cur_data.data, cur_data.data_size);
if constexpr (has_limit)
{
readVarUInt(cur_data.num_rows, buf);
cur_data.offsets.resize_exact(cur_data.num_rows * 2, arena);
for (auto & offset : cur_data.offsets)
readVarUInt(offset, buf);
}
}
void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
{
auto & cur_data = this->data(place);
if (cur_data.data_size == 0)
{
auto column_nullable = IColumn::mutate(makeNullable(to.getPtr()));
column_nullable->insertDefault();
return;
}
auto & column_string = assert_cast<ColumnString &>(to);
column_string.insertData(cur_data.data, cur_data.data_size);
}
bool allocatesMemoryInArena() const override { return true; }
};
AggregateFunctionPtr createAggregateFunctionGroupConcat(
const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
{
assertUnary(name, argument_types);
bool has_limit = false;
UInt64 limit = 0;
String delimiter;
if (parameters.size() > 2)
throw Exception(ErrorCodes::TOO_MANY_ARGUMENTS_FOR_FUNCTION,
"Incorrect number of parameters for aggregate function {}, should be 0, 1 or 2, got: {}", name, parameters.size());
if (!parameters.empty())
{
auto type = parameters[0].getType();
if (type != Field::Types::String)
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "First parameter for aggregate function {} should be string", name);
delimiter = parameters[0].get<String>();
}
if (parameters.size() == 2)
{
auto type = parameters[1].getType();
if (type != Field::Types::Int64 && type != Field::Types::UInt64)
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number", name);
if ((type == Field::Types::Int64 && parameters[1].get<Int64>() <= 0) ||
(type == Field::Types::UInt64 && parameters[1].get<UInt64>() == 0))
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Second parameter for aggregate function {} should be a positive number, got: {}", name, parameters[1].get<Int64>());
has_limit = true;
limit = parameters[1].get<UInt64>();
}
if (has_limit)
return std::make_shared<GroupConcatImpl</* has_limit= */ true>>(argument_types[0], parameters, limit, delimiter);
else
return std::make_shared<GroupConcatImpl</* has_limit= */ false>>(argument_types[0], parameters, limit, delimiter);
}
}
void registerAggregateFunctionGroupConcat(AggregateFunctionFactory & factory)
{
AggregateFunctionProperties properties = { .returns_default_when_only_null = false, .is_order_dependent = true };
factory.registerFunction("groupConcat", { createAggregateFunctionGroupConcat, properties });
factory.registerAlias("group_concat", "groupConcat", AggregateFunctionFactory::CaseInsensitive);
}
}

View File

@ -19,7 +19,6 @@ void registerAggregateFunctionGroupArraySorted(AggregateFunctionFactory & factor
void registerAggregateFunctionGroupUniqArray(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArrayInsertAt(AggregateFunctionFactory &);
void registerAggregateFunctionGroupArrayIntersect(AggregateFunctionFactory &);
void registerAggregateFunctionGroupConcat(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantile(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileDeterministic(AggregateFunctionFactory &);
void registerAggregateFunctionsQuantileExact(AggregateFunctionFactory &);
@ -121,7 +120,6 @@ void registerAggregateFunctions()
registerAggregateFunctionGroupUniqArray(factory);
registerAggregateFunctionGroupArrayInsertAt(factory);
registerAggregateFunctionGroupArrayIntersect(factory);
registerAggregateFunctionGroupConcat(factory);
registerAggregateFunctionsQuantile(factory);
registerAggregateFunctionsQuantileDeterministic(factory);
registerAggregateFunctionsQuantileExact(factory);

161
src/Common/HilbertUtils.h Normal file
View File

@ -0,0 +1,161 @@
#pragma once
#include <Core/Types.h>
#include <Common/BitHelpers.h>
#include "base/types.h"
#include <Functions/hilbertDecode2DLUT.h>
#include <base/defines.h>
#include <array>
#include <set>
namespace HilbertDetails
{
struct Segment // represents [begin; end], all bounds are included
{
UInt64 begin;
UInt64 end;
};
}
/*
Given the range of values of hilbert code - and this function will return segments of the Hilbert curve
such that each of them lies in a whole domain (aka square)
0 1
0 00xxx 11xxx
| |
| |
_______________________________
| |
| |
| |
1 01xxx___________10xxx
Imagine a square, one side of which is a x-axis, other is a y-axis.
First approximation of the Hilbert curve is on the picture - U curve.
So we divide Hilbert Code Interval on 4 parts each of which is represented by a square
and look where the given interval [start, finish] is located:
[00xxxxxx | 01xxxxxx | 10xxxxxx | 11xxxxxx ]
1: [ ]
start = 0010111 end = 10111110
2: [ ] [ ]
If it contains a whole sector (that represents a domain=square),
then we take this range. In the example above - it is a sector [01000000, 01111111]
Then we dig into the recursion and check the remaining ranges.
Note that after the first call all other ranges in the recursion will have either start or finish on the end of a range,
so the complexity of the algorithm will be O(logN), where N is the maximum of hilbert code.
*/
template <typename F>
void segmentBinaryPartition(UInt64 start, UInt64 finish, UInt8 current_bits, F && callback)
{
if (current_bits == 0)
return;
const auto next_bits = current_bits - 2;
const auto history = current_bits == 64 ? 0 : (start >> current_bits) << current_bits;
const auto chunk_mask = 0b11;
const auto start_chunk = (start >> next_bits) & chunk_mask;
const auto finish_chunk = (finish >> next_bits) & chunk_mask;
auto construct_range = [next_bits, history](UInt64 chunk)
{
return HilbertDetails::Segment{
.begin = history + (chunk << next_bits),
.end = history + ((chunk + 1) << next_bits) - 1
};
};
if (start_chunk == finish_chunk)
{
if ((finish - start + 1) == (1 << next_bits)) // it means that [begin, end] is a range
{
callback(HilbertDetails::Segment{.begin = start, .end = finish});
return;
}
segmentBinaryPartition(start, finish, next_bits, callback);
return;
}
for (auto range_chunk = start_chunk + 1; range_chunk < finish_chunk; ++range_chunk)
{
callback(construct_range(range_chunk));
}
const auto start_range = construct_range(start_chunk);
if (start == start_range.begin)
{
callback(start_range);
}
else
{
segmentBinaryPartition(start, start_range.end, next_bits, callback);
}
const auto finish_range = construct_range(finish_chunk);
if (finish == finish_range.end)
{
callback(finish_range);
}
else
{
segmentBinaryPartition(finish_range.begin, finish, next_bits, callback);
}
}
// Given 2 points representing ends of the range of Hilbert Curve that lies in a whole domain.
// The are neighbour corners of some square - and the function returns ranges of both sides of this square
inline std::array<std::pair<UInt64, UInt64>, 2> createRangeFromCorners(UInt64 x1, UInt64 y1, UInt64 x2, UInt64 y2)
{
UInt64 dist_x = x1 > x2 ? x1 - x2 : x2 - x1;
UInt64 dist_y = y1 > y2 ? y1 - y2 : y2 - y1;
UInt64 range_size = std::max(dist_x, dist_y);
bool contains_minimum_vertice = x1 % (range_size + 1) == 0;
if (contains_minimum_vertice)
{
UInt64 x_min = std::min(x1, x2);
UInt64 y_min = std::min(y1, y2);
return {
std::pair<UInt64, UInt64>{x_min, x_min + range_size},
std::pair<UInt64, UInt64>{y_min, y_min + range_size}
};
}
else
{
UInt64 x_max = std::max(x1, x2);
UInt64 y_max = std::max(y1, y2);
chassert(x_max >= range_size);
chassert(y_max >= range_size);
return {
std::pair<UInt64, UInt64>{x_max - range_size, x_max},
std::pair<UInt64, UInt64>{y_max - range_size, y_max}
};
}
}
/** Unpack an interval of Hilbert curve to hyperrectangles covered by it across N dimensions.
*/
template <typename F>
void hilbertIntervalToHyperrectangles2D(UInt64 first, UInt64 last, F && callback)
{
const auto equal_bits_count = getLeadingZeroBits(last | first);
const auto even_equal_bits_count = equal_bits_count - equal_bits_count % 2;
segmentBinaryPartition(first, last, 64 - even_equal_bits_count, [&](HilbertDetails::Segment range)
{
auto interval1 = DB::FunctionHilbertDecode2DWIthLookupTableImpl<3>::decode(range.begin);
auto interval2 = DB::FunctionHilbertDecode2DWIthLookupTableImpl<3>::decode(range.end);
std::array<std::pair<UInt64, UInt64>, 2> unpacked_range = createRangeFromCorners(
std::get<0>(interval1), std::get<1>(interval1),
std::get<0>(interval2), std::get<1>(interval2));
callback(unpacked_range);
});
}

View File

@ -73,9 +73,10 @@ zkutil::ZooKeeperPtr DatabaseReplicated::getZooKeeper() const
return getContext()->getZooKeeper();
}
static inline String getHostID(ContextPtr global_context, const UUID & db_uuid)
static inline String getHostID(ContextPtr global_context, const UUID & db_uuid, bool secure)
{
return Cluster::Address::toString(getFQDNOrHostName(), global_context->getTCPPort()) + ':' + toString(db_uuid);
UInt16 port = secure ? global_context->getTCPPortSecure().value_or(DBMS_DEFAULT_SECURE_PORT) : global_context->getTCPPort();
return Cluster::Address::toString(getFQDNOrHostName(), port) + ':' + toString(db_uuid);
}
static inline UInt64 getMetadataHash(const String & table_name, const String & metadata)
@ -415,8 +416,10 @@ void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(LoadingStrictnessL
return;
}
String host_id = getHostID(getContext(), db_uuid);
if (is_create_query || replica_host_id != host_id)
String host_id = getHostID(getContext(), db_uuid, cluster_auth_info.cluster_secure_connection);
String host_id_default = getHostID(getContext(), db_uuid, false);
if (is_create_query || (replica_host_id != host_id && replica_host_id != host_id_default))
{
throw Exception(
ErrorCodes::REPLICA_ALREADY_EXISTS,
@ -424,6 +427,14 @@ void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(LoadingStrictnessL
replica_name, shard_name, zookeeper_path, replica_host_id, host_id);
}
/// Before 24.6 we always created host_id with insecure port, even if cluster_auth_info.cluster_secure_connection was true.
/// So not to break compatibility, we need to update host_id to secure one if cluster_auth_info.cluster_secure_connection is true.
if (host_id != host_id_default && replica_host_id == host_id_default)
{
current_zookeeper->set(replica_path, host_id, -1);
createEmptyLogEntry(current_zookeeper);
}
/// Check that replica_group_name in ZooKeeper matches the local one and change it if necessary.
String zk_replica_group_name;
if (!current_zookeeper->tryGet(replica_path + "/replica_group", zk_replica_group_name))
@ -550,7 +561,7 @@ void DatabaseReplicated::createReplicaNodesInZooKeeper(const zkutil::ZooKeeperPt
"already contains some data and it does not look like Replicated database path.", zookeeper_path);
/// Write host name to replica_path, it will protect from multiple replicas with the same name
auto host_id = getHostID(getContext(), db_uuid);
auto host_id = getHostID(getContext(), db_uuid, cluster_auth_info.cluster_secure_connection);
for (int attempts = 10; attempts > 0; --attempts)
{

View File

@ -30,10 +30,6 @@
#include <base/sleep.h>
#ifdef ADDRESS_SANITIZER
#include <sanitizer/lsan_interface.h>
#endif
namespace ProfileEvents
{
extern const Event S3WriteRequestsErrors;
@ -880,14 +876,7 @@ void ClientCacheRegistry::clearCacheForAll()
ClientFactory::ClientFactory()
{
aws_options = Aws::SDKOptions{};
{
#ifdef ADDRESS_SANITIZER
/// Leak sanitizer (part of address sanitizer) thinks that memory in OpenSSL (called by AWS SDK) is allocated but not
/// released. Actually, the memory is released at the end of the program (ClientFactory is a singleton, see the dtor).
__lsan::ScopedDisabler lsan_disabler;
#endif
Aws::InitAPI(aws_options);
}
Aws::InitAPI(aws_options);
Aws::Utils::Logging::InitializeAWSLogging(std::make_shared<AWSLogger>(false));
Aws::Http::SetHttpClientFactory(std::make_shared<PocoHTTPClientFactory>());
}

View File

@ -2,10 +2,25 @@
#include <Processors/QueryPlan/FilterStep.h>
#include <Processors/QueryPlan/ExpressionStep.h>
#include <Interpreters/ActionsDAG.h>
#include <Functions/FunctionsLogical.h>
#include <Functions/IFunctionAdaptors.h>
namespace DB::QueryPlanOptimizations
{
static void removeFromOutputs(ActionsDAG & dag, const ActionsDAG::Node & node)
{
auto & outputs = dag.getOutputs();
for (size_t i = 0; i < outputs.size(); ++i)
{
if (&node == outputs[i])
{
outputs.erase(outputs.begin() + i);
return;
}
}
}
size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
{
if (parent_node->children.size() != 1)
@ -19,6 +34,7 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
auto * parent_expr = typeid_cast<ExpressionStep *>(parent.get());
auto * parent_filter = typeid_cast<FilterStep *>(parent.get());
auto * child_expr = typeid_cast<ExpressionStep *>(child.get());
auto * child_filter = typeid_cast<FilterStep *>(child.get());
if (parent_expr && child_expr)
{
@ -60,6 +76,42 @@ size_t tryMergeExpressions(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
parent_node->children.swap(child_node->children);
return 1;
}
else if (parent_filter && child_filter)
{
const auto & child_actions = child_filter->getExpression();
const auto & parent_actions = parent_filter->getExpression();
if (child_actions->hasArrayJoin())
return 0;
auto actions = child_actions->clone();
const auto & child_filter_node = actions->findInOutputs(child_filter->getFilterColumnName());
if (child_filter->removesFilterColumn())
removeFromOutputs(*actions, child_filter_node);
actions->mergeInplace(std::move(*parent_actions->clone()));
const auto & parent_filter_node = actions->findInOutputs(parent_filter->getFilterColumnName());
if (parent_filter->removesFilterColumn())
removeFromOutputs(*actions, parent_filter_node);
FunctionOverloadResolverPtr func_builder_and = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionAnd>());
const auto & condition = actions->addFunction(func_builder_and, {&child_filter_node, &parent_filter_node}, {});
auto & outputs = actions->getOutputs();
outputs.insert(outputs.begin(), &condition);
actions->removeUnusedActions(false);
auto filter = std::make_unique<FilterStep>(child_filter->getInputStreams().front(),
actions,
condition.result_name,
true);
filter->setStepDescription("(" + parent_filter->getStepDescription() + " + " + child_filter->getStepDescription() + ")");
parent_node->step = std::move(filter);
parent_node->children.swap(child_node->children);
return 1;
}
return 0;
}

View File

@ -1046,12 +1046,21 @@ void HTTPHandler::formatExceptionForClient(int exception_code, HTTPServerRequest
/// FIXME: make sure that no one else is reading from the same stream at the moment.
/// If HTTP method is POST and Keep-Alive is turned on, we should read the whole request body
/// If HTTP method is POST and Keep-Alive is turned on, we should try to read the whole request body
/// to avoid reading part of the current request body in the next request.
if (request.getMethod() == Poco::Net::HTTPRequest::HTTP_POST && response.getKeepAlive()
&& exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED && !request.getStream().eof())
&& exception_code != ErrorCodes::HTTP_LENGTH_REQUIRED)
{
request.getStream().ignoreAll();
try
{
if (!request.getStream().eof())
request.getStream().ignoreAll();
}
catch (...)
{
tryLogCurrentException(log, "Cannot read remaining request body during exception handling");
response.setKeepAlive(false);
}
}
if (exception_code == ErrorCodes::REQUIRED_PASSWORD)

View File

@ -18,6 +18,7 @@
#include <Functions/CastOverloadResolver.h>
#include <Functions/IFunction.h>
#include <Common/FieldVisitorToString.h>
#include <Common/HilbertUtils.h>
#include <Common/MortonUtils.h>
#include <Common/typeid_cast.h>
#include <Columns/ColumnSet.h>
@ -689,6 +690,11 @@ static const ActionsDAG::Node & cloneASTWithInversionPushDown(
return *res;
}
const std::unordered_map<String, KeyCondition::SpaceFillingCurveType> KeyCondition::space_filling_curve_name_to_type {
{"mortonEncode", SpaceFillingCurveType::Morton},
{"hilbertEncode", SpaceFillingCurveType::Hilbert}
};
ActionsDAGPtr KeyCondition::cloneASTWithInversionPushDown(ActionsDAG::NodeRawConstPtrs nodes, const ContextPtr & context)
{
auto res = std::make_shared<ActionsDAG>();
@ -744,16 +750,17 @@ static NameSet getAllSubexpressionNames(const ExpressionActions & key_expr)
void KeyCondition::getAllSpaceFillingCurves()
{
/// So far the only supported function is mortonEncode (Morton curve).
/// So far the only supported function is mortonEncode and hilbertEncode (Morton and Hilbert curves).
for (const auto & action : key_expr->getActions())
{
if (action.node->type == ActionsDAG::ActionType::FUNCTION
&& action.node->children.size() >= 2
&& action.node->function_base->getName() == "mortonEncode")
&& space_filling_curve_name_to_type.contains(action.node->function_base->getName()))
{
SpaceFillingCurveDescription curve;
curve.function_name = action.node->function_base->getName();
curve.type = space_filling_curve_name_to_type.at(curve.function_name);
curve.key_column_pos = key_columns.at(action.node->result_name);
for (const auto & child : action.node->children)
{
@ -2665,6 +2672,15 @@ BoolMask KeyCondition::checkInHyperrectangle(
const DataTypes & data_types) const
{
std::vector<BoolMask> rpn_stack;
auto curve_type = [&](size_t key_column_pos)
{
for (const auto & curve : key_space_filling_curves)
if (curve.key_column_pos == key_column_pos)
return curve.type;
return SpaceFillingCurveType::Unknown;
};
for (const auto & element : rpn)
{
if (element.argument_num_of_space_filling_curve.has_value())
@ -2764,26 +2780,43 @@ BoolMask KeyCondition::checkInHyperrectangle(
UInt64 right = key_range.right.get<UInt64>();
BoolMask mask(false, true);
mortonIntervalToHyperrectangles<2>(left, right,
[&](std::array<std::pair<UInt64, UInt64>, 2> morton_hyperrectangle)
auto hyperrectangle_intersection_callback = [&](std::array<std::pair<UInt64, UInt64>, 2> curve_hyperrectangle)
{
BoolMask current_intersection(true, false);
for (size_t dim = 0; dim < num_dimensions; ++dim)
{
BoolMask current_intersection(true, false);
for (size_t dim = 0; dim < num_dimensions; ++dim)
{
const Range & condition_arg_range = element.space_filling_curve_args_hyperrectangle[dim];
const Range & condition_arg_range = element.space_filling_curve_args_hyperrectangle[dim];
const Range morton_arg_range(
morton_hyperrectangle[dim].first, true,
morton_hyperrectangle[dim].second, true);
const Range curve_arg_range(
curve_hyperrectangle[dim].first, true,
curve_hyperrectangle[dim].second, true);
bool intersects = condition_arg_range.intersectsRange(morton_arg_range);
bool contains = condition_arg_range.containsRange(morton_arg_range);
bool intersects = condition_arg_range.intersectsRange(curve_arg_range);
bool contains = condition_arg_range.containsRange(curve_arg_range);
current_intersection = current_intersection & BoolMask(intersects, !contains);
}
current_intersection = current_intersection & BoolMask(intersects, !contains);
}
mask = mask | current_intersection;
});
mask = mask | current_intersection;
};
switch (curve_type(element.key_column))
{
case SpaceFillingCurveType::Hilbert:
{
hilbertIntervalToHyperrectangles2D(left, right, hyperrectangle_intersection_callback);
break;
}
case SpaceFillingCurveType::Morton:
{
mortonIntervalToHyperrectangles<2>(left, right, hyperrectangle_intersection_callback);
break;
}
case SpaceFillingCurveType::Unknown:
{
throw Exception(ErrorCodes::LOGICAL_ERROR, "curve_type is `Unknown`. It is a bug.");
}
}
rpn_stack.emplace_back(mask);
}

View File

@ -328,11 +328,20 @@ private:
const NameSet key_subexpr_names;
/// Space-filling curves in the key
enum class SpaceFillingCurveType
{
Unknown = 0,
Morton,
Hilbert
};
static const std::unordered_map<String, SpaceFillingCurveType> space_filling_curve_name_to_type;
struct SpaceFillingCurveDescription
{
size_t key_column_pos;
String function_name;
std::vector<String> arguments;
SpaceFillingCurveType type;
};
using SpaceFillingCurveDescriptions = std::vector<SpaceFillingCurveDescription>;
SpaceFillingCurveDescriptions key_space_filling_curves;

View File

@ -140,6 +140,9 @@ class IMergeTreeDataPart;
using ManyExpressionActions = std::vector<ExpressionActionsPtr>;
struct StorageSnapshot;
using StorageSnapshotPtr = std::shared_ptr<StorageSnapshot>;
/** Query along with some additional data,
* that can be used during query processing
* inside storage engines.
@ -173,6 +176,13 @@ struct SelectQueryInfo
/// Local storage limits
StorageLimits local_storage_limits;
/// This is a leak of abstraction.
/// StorageMerge replaces storage into query_tree. However, column types may be changed for inner table.
/// So, resolved query tree might have incompatible types.
/// StorageDistributed uses this query tree to calculate a header, throws if we use storage snapshot.
/// To avoid this, we use initial merge_storage_snapshot.
StorageSnapshotPtr merge_storage_snapshot;
/// Cluster for the query.
ClusterPtr cluster;
/// Optimized cluster for the query.

View File

@ -846,7 +846,7 @@ void StorageDistributed::read(
remote_storage_id = StorageID{remote_database, remote_table};
auto query_tree_distributed = buildQueryTreeDistributed(modified_query_info,
storage_snapshot,
query_info.merge_storage_snapshot ? query_info.merge_storage_snapshot : storage_snapshot,
remote_storage_id,
remote_table_function_ptr);
header = InterpreterSelectQueryAnalyzer::getSampleBlock(query_tree_distributed, local_context, SelectQueryOptions(processed_stage).analyze());

View File

@ -889,6 +889,8 @@ SelectQueryInfo ReadFromMerge::getModifiedQueryInfo(const ContextMutablePtr & mo
SelectQueryInfo modified_query_info = query_info;
modified_query_info.merge_storage_snapshot = merge_storage_snapshot;
if (modified_query_info.planner_context)
modified_query_info.planner_context = std::make_shared<PlannerContext>(modified_context, modified_query_info.planner_context);
@ -1198,7 +1200,10 @@ ReadFromMerge::ChildPlan ReadFromMerge::createPlanForTable(
if (allow_experimental_analyzer)
{
InterpreterSelectQueryAnalyzer interpreter(modified_query_info.query_tree,
/// Converting query to AST because types might be different in the source table.
/// Need to resolve types again.
auto ast = modified_query_info.query_tree->toAST();
InterpreterSelectQueryAnalyzer interpreter(ast,
modified_context,
SelectQueryOptions(processed_stage));

View File

@ -194,6 +194,7 @@ const char * auto_contributors[] {
"Artem Gavrilov",
"Artem Hnilov",
"Artem Konovalov",
"Artem Mustafin",
"Artem Pershin",
"Artem Streltsov",
"Artem Zuikov",
@ -307,6 +308,7 @@ const char * auto_contributors[] {
"Daniil Ivanik",
"Daniil Rubin",
"Danila Kutenin",
"Danila Puzov",
"Daniël van Eeden",
"Dao",
"Dao Minh Thuc",
@ -417,6 +419,7 @@ const char * auto_contributors[] {
"Filippov Denis",
"Fille",
"Flowyi",
"Francesco Ciocchetti",
"Francisco Barón",
"Francisco Javier Jurado Moreno",
"Frank Chen",
@ -449,6 +452,7 @@ const char * auto_contributors[] {
"Gleb-Tretyakov",
"GoGoWen2021",
"Gregory",
"Grigorii Sokolik",
"Grigory",
"Grigory Buteyko",
"Grigory Pervakov",
@ -464,6 +468,7 @@ const char * auto_contributors[] {
"Hamoon",
"Han Fei",
"Han Shukai",
"HappenLee",
"Harry Lee",
"Harry-Lee",
"HarryLeeIBM",
@ -627,6 +632,7 @@ const char * auto_contributors[] {
"Kostiantyn Storozhuk",
"Kozlov Ivan",
"KrJin",
"Kris Buytaert",
"Krisztián Szűcs",
"Kruglov Pavel",
"Krzysztof Góralski",
@ -644,6 +650,7 @@ const char * auto_contributors[] {
"Latysheva Alexandra",
"Laurie Li",
"LaurieLY",
"Lee sungju",
"Lemore",
"Leonardo Cecchi",
"Leonardo Maciel",
@ -770,6 +777,7 @@ const char * auto_contributors[] {
"Mikhail Filimonov",
"Mikhail Fursov",
"Mikhail Gaidamaka",
"Mikhail Gorshkov",
"Mikhail Guzov",
"Mikhail Korotov",
"Mikhail Koviazin",
@ -904,11 +912,13 @@ const char * auto_contributors[] {
"Petr Vasilev",
"Pham Anh Tuan",
"Philip Hallstrom",
"Philipp Schreiber",
"Philippe Ombredanne",
"PigInCloud",
"Potya",
"Pradeep Chhetri",
"Prashant Shahi",
"Pratima Patel",
"Priyansh Agrawal",
"Pxl",
"Pysaoke",
@ -978,6 +988,7 @@ const char * auto_contributors[] {
"Samuel Colvin",
"San",
"Sanjam Panda",
"Sariel",
"Saulius Valatka",
"Sean Haynes",
"Sean Lafferty",
@ -1067,6 +1078,7 @@ const char * auto_contributors[] {
"TABLUM.IO",
"TAC",
"TCeason",
"TTPO100AJIEX",
"Tagir Kuskarov",
"Tai White",
"Taleh Zaliyev",
@ -1089,6 +1101,7 @@ const char * auto_contributors[] {
"Tiaonmmn",
"Tigran Khudaverdyan",
"Tim Liou",
"Tim MacDonald",
"Tim Windelschmidt",
"Timur Magomedov",
"Timur Solodovnikov",
@ -1201,6 +1214,7 @@ const char * auto_contributors[] {
"Xiaofei Hu",
"Xin Wang",
"Xoel Lopez Barata",
"Xu Jia",
"Xudong Zhang",
"Y Lu",
"Yakko Majuri",
@ -1237,6 +1251,7 @@ const char * auto_contributors[] {
"Yusuke Tanaka",
"Zach Naimon",
"Zheng Miao",
"ZhiHong Zhang",
"ZhiYong Wang",
"Zhichang Yu",
"Zhichun Wu",
@ -1276,6 +1291,7 @@ const char * auto_contributors[] {
"alexeyerm",
"alexeypavlenko",
"alfredlu",
"allegrinisante",
"amesaru",
"amoschen",
"amudong",
@ -1287,6 +1303,7 @@ const char * auto_contributors[] {
"anneji",
"anneji-dev",
"annvsh",
"anonymous",
"anrodigina",
"antikvist",
"anton",
@ -1346,6 +1363,7 @@ const char * auto_contributors[] {
"chenxing-xc",
"chenxing.xc",
"chertus",
"chloro",
"chou.fan",
"christophe.kalenzaga",
"clarkcaoliu",
@ -1458,6 +1476,7 @@ const char * auto_contributors[] {
"gyuton",
"hanqf-git",
"hao.he",
"haohang",
"hardstep33",
"hchen9",
"hcz",
@ -1479,6 +1498,7 @@ const char * auto_contributors[] {
"iammagicc",
"ianton-ru",
"ice1x",
"iceFireser",
"idfer",
"ifinik",
"igomac",
@ -1642,6 +1662,7 @@ const char * auto_contributors[] {
"mo-avatar",
"mochi",
"monchickey",
"morning-color",
"morty",
"moscas",
"mosinnik",
@ -1695,6 +1716,7 @@ const char * auto_contributors[] {
"philip.han",
"pingyu",
"pkubaj",
"pn",
"potya",
"pppeace",
"presto53",
@ -1742,6 +1764,7 @@ const char * auto_contributors[] {
"sanjam",
"santaux",
"santrancisco",
"sarielwxm",
"satanson",
"save-my-heart",
"sdk2",
@ -1846,6 +1869,7 @@ const char * auto_contributors[] {
"whysage",
"wineternity",
"woodlzm",
"wudidapaopao",
"wuxiaobai24",
"wxybear",
"wzl",
@ -1860,6 +1884,7 @@ const char * auto_contributors[] {
"xleoken",
"xlwh",
"xmy",
"xogoodnow",
"xuelei",
"xuzifu666",
"yakkomajuri",

View File

@ -208,7 +208,7 @@ class StatusNames(metaclass=WithIter):
# mergeable status
MERGEABLE = "Mergeable Check"
# status of a sync pr
SYNC = "A Sync"
SYNC = "Cloud fork sync (only for ClickHouse Inc. employees)"
# PR formatting check status
PR_CHECK = "PR Check"

View File

@ -877,7 +877,7 @@ def test_max_set_age(started_cluster):
assert "Cannot parse input" in node.query(
"SELECT exception FROM system.s3queue WHERE file_name ilike '%fff.csv' ORDER BY processing_end_time DESC LIMIT 1"
)
assert 2 == int(
assert 1 < int(
node.query(
"SELECT count() FROM system.s3queue_log WHERE file_name ilike '%fff.csv' AND notEmpty(exception)"
)

View File

@ -163,7 +163,6 @@ Filter column: notEquals(__table1.y, 2_UInt8)
> filter is pushed down before CreatingSets
CreatingSets
Filter
Filter
1
3
> one condition of filter is pushed down before LEFT JOIN

View File

@ -0,0 +1,10 @@
Filter (((WHERE + (Change column names to column identifiers + (Project names + Projection))) + HAVING))
Filter column: and(notEquals(sum(__table2.number), 0_UInt8), equals(__table1.key, 7_UInt8)) (removed)
Aggregating
Filter (( + (Before GROUP BY + Change column names to column identifiers)))
Filter column: equals(__table1.key, 7_UInt8) (removed)
Filter (((WHERE + (Projection + Before ORDER BY)) + HAVING))
Filter column: and(notEquals(sum(number), 0), equals(key, 7)) (removed)
Aggregating
Filter ((( + Before GROUP BY) + WHERE))
Filter column: and(equals(bitAnd(number, 15), 7), equals(key, 7)) (removed)

View File

@ -0,0 +1,5 @@
set allow_experimental_analyzer=1;
select explain from (explain actions = 1 select * from (select sum(number) as v, bitAnd(number, 15) as key from numbers(1e8) group by key having v != 0) where key = 7) where explain like '%Filter%' or explain like '%Aggregating%';
set allow_experimental_analyzer=0;
select explain from (explain actions = 1 select * from (select sum(number) as v, bitAnd(number, 15) as key from numbers(1e8) group by key having v != 0) where key = 7) where explain like '%Filter%' or explain like '%Aggregating%';

View File

@ -12,10 +12,10 @@ INSERT INTO order_by_desc SELECT number, repeat('a', 1024) FROM numbers(1024 * 3
OPTIMIZE TABLE order_by_desc FINAL;
SELECT s FROM order_by_desc ORDER BY u DESC LIMIT 10 FORMAT Null
SETTINGS max_memory_usage = '400M';
SETTINGS max_memory_usage = '600M';
SELECT s FROM order_by_desc ORDER BY u LIMIT 10 FORMAT Null
SETTINGS max_memory_usage = '400M';
SETTINGS max_memory_usage = '600M';
SYSTEM FLUSH LOGS;

View File

@ -4,6 +4,12 @@
Prewhere info
Prewhere filter
Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed)
Prewhere info
Prewhere filter
Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed)
Prewhere info
Prewhere filter
Prewhere filter column: and(notEmpty(v), equals(k, 3)) (removed)
2
Filter column: and(equals(k, 3), notEmpty(v)) (removed)
Prewhere info

View File

@ -24,7 +24,8 @@ INSERT INTO t_02156_mt1 SELECT number, toString(number) FROM numbers(10000);
INSERT INTO t_02156_mt2 SELECT number, toString(number) FROM numbers(10000);
INSERT INTO t_02156_log SELECT number, toString(number) FROM numbers(10000);
SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02156_merge1 WHERE k = 3 AND notEmpty(v)) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';
SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02156_merge1 WHERE k = 3 AND notEmpty(v)) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%' settings allow_experimental_analyzer=1;
SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02156_merge1 WHERE k = 3 AND notEmpty(v)) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%' settings allow_experimental_analyzer=0;
SELECT count() FROM t_02156_merge1 WHERE k = 3 AND notEmpty(v);
SELECT replaceRegexpAll(explain, '__table1\.|_UInt8', '') FROM (EXPLAIN actions=1 SELECT count() FROM t_02156_merge2 WHERE k = 3 AND notEmpty(v)) WHERE explain LIKE '%Prewhere%' OR explain LIKE '%Filter column%';

View File

@ -0,0 +1,12 @@
1 a
1 a
2 b
2 b
1 a
1 a
2 b
2 b
1 a
2 b
1 a
2 b

View File

@ -0,0 +1,17 @@
DROP TABLE IF EXISTS t_02156_ololo_1;
DROP TABLE IF EXISTS t_02156_ololo_2;
DROP TABLE IF EXISTS t_02156_ololo_dist;
CREATE TABLE t_02156_ololo_1 (k UInt32, v Nullable(String)) ENGINE = MergeTree order by k;
CREATE TABLE t_02156_ololo_2 (k UInt32, v String) ENGINE = MergeTree order by k;
CREATE TABLE t_02156_ololo_dist (k UInt32, v String) ENGINE = Distributed(test_shard_localhost, currentDatabase(), t_02156_ololo_2);
CREATE TABLE t_02156_ololo_dist2 (k UInt32, v Nullable(String)) ENGINE = Distributed(test_shard_localhost, currentDatabase(), t_02156_ololo_1);
insert into t_02156_ololo_1 values (1, 'a');
insert into t_02156_ololo_2 values (2, 'b');
select * from merge('t_02156_ololo') where k != 0 and notEmpty(v) order by k settings optimize_move_to_prewhere=0;
select * from merge('t_02156_ololo') where k != 0 and notEmpty(v) order by k settings optimize_move_to_prewhere=1;
select * from merge('t_02156_ololo_dist') where k != 0 and notEmpty(v) order by k settings optimize_move_to_prewhere=0;
select * from merge('t_02156_ololo_dist') where k != 0 and notEmpty(v) order by k settings optimize_move_to_prewhere=1;

View File

@ -16,7 +16,7 @@ def main():
sock.settimeout(60)
s = "POST / HTTP/1.1\r\n"
s += "Host: %s\r\n" % host
s += "Content-type: multipart/form-data\r\n"
s += "Content-type: multipart/form-data; boundary=--b3f1zid8kqwy\r\n"
s += "Transfer-encoding: chunked\r\n"
s += "\r\n"
s += "ffffffffffffffff"

View File

@ -1,3 +1,3 @@
HTTP/1.1 200 OK
HTTP/1.1 500 Internal Server Error
encoding type chunked
error code 1000
error code 69

View File

@ -332,13 +332,12 @@ SETTINGS optimize_aggregators_of_group_by_keys=0 -- avoid removing any() as it d
Expression (Projection)
Sorting (Sorting for ORDER BY)
Expression (Before ORDER BY)
Filter ((WHERE + (Projection + Before ORDER BY)))
Filter (HAVING)
Aggregating
Expression ((Before GROUP BY + Projection))
Sorting (Sorting for ORDER BY)
Expression ((Before ORDER BY + (Projection + Before ORDER BY)))
ReadFromSystemNumbers
Filter (((WHERE + (Projection + Before ORDER BY)) + HAVING))
Aggregating
Expression ((Before GROUP BY + Projection))
Sorting (Sorting for ORDER BY)
Expression ((Before ORDER BY + (Projection + Before ORDER BY)))
ReadFromSystemNumbers
-- execute
1
2

View File

@ -29,20 +29,16 @@ WHERE type_1 = \'all\'
ExpressionTransform × 2
(Filter)
FilterTransform × 2
(Filter)
FilterTransform × 2
(Filter)
FilterTransform × 2
(Aggregating)
ExpressionTransform × 2
AggregatingTransform × 2
Copy 1 → 2
(Expression)
ExpressionTransform
(Expression)
ExpressionTransform
(ReadFromMergeTree)
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
(Aggregating)
ExpressionTransform × 2
AggregatingTransform × 2
Copy 1 → 2
(Expression)
ExpressionTransform
(Expression)
ExpressionTransform
(ReadFromMergeTree)
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
(Expression)
ExpressionTransform × 2
(Filter)
@ -68,14 +64,10 @@ ExpressionTransform × 2
ExpressionTransform × 2
AggregatingTransform × 2
Copy 1 → 2
(Filter)
FilterTransform
(Filter)
FilterTransform
(Expression)
ExpressionTransform
(ReadFromMergeTree)
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
(Expression)
ExpressionTransform
(ReadFromMergeTree)
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
(Expression)
ExpressionTransform × 2
(Aggregating)

View File

@ -1,14 +0,0 @@
0 95 abc [1,2,3]
1 \N a [993,986,979,972]
2 123 makson95 []
95123
abcamakson95
[1,2,3][993,986,979,972][]
95,123
abc,a,makson95
[1,2,3],[993,986,979,972]
\N
951239512395123
abc,a,makson95,abc,a,makson95,abc,a,makson95
[1,2,3][993,986,979,972][][1,2,3][993,986,979,972][][1,2,3][993,986,979,972][]
488890

View File

@ -1,40 +0,0 @@
DROP TABLE IF EXISTS test_groupConcat;
CREATE TABLE test_groupConcat
(
id UInt64,
p_int Int32 NULL,
p_string String,
p_array Array(Int32)
) ENGINE = MergeTree ORDER BY id;
SET max_insert_threads = 1, max_threads = 1, min_insert_block_size_rows = 0, min_insert_block_size_bytes = 0;
INSERT INTO test_groupConcat VALUES (0, 95, 'abc', [1, 2, 3]), (1, NULL, 'a', [993, 986, 979, 972]), (2, 123, 'makson95', []);
SELECT * FROM test_groupConcat;
SELECT groupConcat(p_int) FROM test_groupConcat;
SELECT groupConcat(p_string) FROM test_groupConcat;
SELECT groupConcat(p_array) FROM test_groupConcat;
SELECT groupConcat(',')(p_int) FROM test_groupConcat;
SELECT groupConcat(',')(p_string) FROM test_groupConcat;
SELECT groupConcat(',', 2)(p_array) FROM test_groupConcat;
SELECT groupConcat(p_int) FROM test_groupConcat WHERE id = 1;
INSERT INTO test_groupConcat VALUES (0, 95, 'abc', [1, 2, 3]), (1, NULL, 'a', [993, 986, 979, 972]), (2, 123, 'makson95', []);
INSERT INTO test_groupConcat VALUES (0, 95, 'abc', [1, 2, 3]), (1, NULL, 'a', [993, 986, 979, 972]), (2, 123, 'makson95', []);
SELECT groupConcat(p_int) FROM test_groupConcat;
SELECT groupConcat(',')(p_string) FROM test_groupConcat;
SELECT groupConcat(p_array) FROM test_groupConcat;
SELECT groupConcat(123)(number) FROM numbers(10); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
SELECT groupConcat(',', '3')(number) FROM numbers(10); -- { serverError BAD_ARGUMENTS }
SELECT groupConcat(',', 0)(number) FROM numbers(10); -- { serverError BAD_ARGUMENTS }
SELECT groupConcat(',', -1)(number) FROM numbers(10); -- { serverError BAD_ARGUMENTS }
SELECT groupConcat(',', 3, 3)(number) FROM numbers(10); -- { serverError TOO_MANY_ARGUMENTS_FOR_FUNCTION }
SELECT length(groupConcat(number)) FROM numbers(100000);
DROP TABLE IF EXISTS test_groupConcat;

View File

@ -0,0 +1,9 @@
121
121
32
21
10
32
22
11
1

View File

@ -0,0 +1,35 @@
DROP TABLE IF EXISTS test_hilbert_encode_hilbert_encode;
CREATE TABLE test_hilbert_encode (x UInt32, y UInt32) ENGINE = MergeTree ORDER BY hilbertEncode(x, y) SETTINGS index_granularity = 8192, index_granularity_bytes = '1Mi';
INSERT INTO test_hilbert_encode SELECT number DIV 1024, number % 1024 FROM numbers(1048576);
SET max_rows_to_read = 8192, force_primary_key = 1, analyze_index_with_space_filling_curves = 1;
SELECT count() FROM test_hilbert_encode WHERE x >= 10 AND x <= 20 AND y >= 20 AND y <= 30;
SET max_rows_to_read = 8192, force_primary_key = 1, analyze_index_with_space_filling_curves = 0;
SELECT count() FROM test_hilbert_encode WHERE x >= 10 AND x <= 20 AND y >= 20 AND y <= 30; -- { serverError 277 }
DROP TABLE test_hilbert_encode;
-- The same, but with more precise index
CREATE TABLE test_hilbert_encode (x UInt32, y UInt32) ENGINE = MergeTree ORDER BY hilbertEncode(x, y) SETTINGS index_granularity = 1;
SET max_rows_to_read = 0;
INSERT INTO test_hilbert_encode SELECT number DIV 32, number % 32 FROM numbers(1024);
SET max_rows_to_read = 200, force_primary_key = 1, analyze_index_with_space_filling_curves = 1;
SELECT count() FROM test_hilbert_encode WHERE x >= 10 AND x <= 20 AND y >= 20 AND y <= 30;
-- Various other conditions
SELECT count() FROM test_hilbert_encode WHERE x = 10 SETTINGS max_rows_to_read = 49;
SELECT count() FROM test_hilbert_encode WHERE x = 10 AND y > 10 SETTINGS max_rows_to_read = 33;
SELECT count() FROM test_hilbert_encode WHERE x = 10 AND y < 10 SETTINGS max_rows_to_read = 15;
SELECT count() FROM test_hilbert_encode WHERE y = 10 SETTINGS max_rows_to_read = 50;
SELECT count() FROM test_hilbert_encode WHERE x >= 10 AND y = 10 SETTINGS max_rows_to_read = 35;
SELECT count() FROM test_hilbert_encode WHERE y = 10 AND x <= 10 SETTINGS max_rows_to_read = 17;
SELECT count() FROM test_hilbert_encode PREWHERE x >= 10 WHERE x < 11 AND y = 10 SETTINGS max_rows_to_read = 2;
DROP TABLE test_hilbert_encode;

View File

@ -1709,7 +1709,6 @@ groupBitmap
groupBitmapAnd
groupBitmapOr
groupBitmapXor
groupConcat
groupUniqArray
grouparray
grouparrayinsertat
@ -1726,7 +1725,6 @@ groupbitmapor
groupbitmapxor
groupbitor
groupbitxor
groupconcat
groupuniqarray
grpc
grpcio