Merge branch 'master' into perf_experiment

This commit is contained in:
Raúl Marín 2022-11-24 10:32:12 +01:00 committed by GitHub
commit 4d3217cb18
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
72 changed files with 1011 additions and 351 deletions

View File

@ -442,8 +442,9 @@ elseif (OS_DARWIN)
include(cmake/darwin/default_libs.cmake)
elseif (OS_FREEBSD)
include(cmake/freebsd/default_libs.cmake)
else()
link_libraries(global-group)
endif ()
link_libraries(global-group)
if (NOT (OS_LINUX OR OS_DARWIN))
# Using system libs can cause a lot of warnings in includes (on macro expansion).
@ -592,7 +593,7 @@ add_subdirectory (programs)
add_subdirectory (tests)
add_subdirectory (utils)
include (cmake/sanitize_target_link_libraries.cmake)
include (cmake/sanitize_targets.cmake)
# Build native targets if necessary
get_property(NATIVE_BUILD_TARGETS GLOBAL PROPERTY NATIVE_BUILD_TARGETS)

View File

@ -220,13 +220,13 @@ struct statx {
uint32_t stx_dev_minor;
uint64_t spare[14];
};
#endif
int statx(int fd, const char *restrict path, int flag,
unsigned int mask, struct statx *restrict statxbuf)
{
return syscall(SYS_statx, fd, path, flag, mask, statxbuf);
}
#endif
#include <syscall.h>

View File

@ -23,6 +23,7 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
include (cmake/cxx.cmake)
link_libraries(global-group)
target_link_libraries(global-group INTERFACE
$<TARGET_PROPERTY:global-libs,INTERFACE_LINK_LIBRARIES>

View File

@ -24,6 +24,7 @@ find_package(Threads REQUIRED)
include (cmake/unwind.cmake)
include (cmake/cxx.cmake)
link_libraries(global-group)
target_link_libraries(global-group INTERFACE
$<TARGET_PROPERTY:global-libs,INTERFACE_LINK_LIBRARIES>

View File

@ -34,6 +34,13 @@ set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads REQUIRED)
include (cmake/unwind.cmake)
include (cmake/cxx.cmake)
# Delay the call to link the global interface after the libc++ libraries are included to avoid circular dependencies
# which are ok with static libraries but not with dynamic ones
link_libraries(global-group)
if (NOT OS_ANDROID)
if (NOT USE_MUSL)
# Our compatibility layer doesn't build under Android, many errors in musl.
@ -42,9 +49,6 @@ if (NOT OS_ANDROID)
add_subdirectory(base/harmful)
endif ()
include (cmake/unwind.cmake)
include (cmake/cxx.cmake)
target_link_libraries(global-group INTERFACE
-Wl,--start-group
$<TARGET_PROPERTY:global-libs,INTERFACE_LINK_LIBRARIES>

View File

@ -1,3 +1,13 @@
# https://stackoverflow.com/a/62311397/328260
macro (get_all_targets_recursive targets dir)
get_property (subdirectories DIRECTORY ${dir} PROPERTY SUBDIRECTORIES)
foreach (subdir ${subdirectories})
get_all_targets_recursive (${targets} ${subdir})
endforeach ()
get_property (current_targets DIRECTORY ${dir} PROPERTY BUILDSYSTEM_TARGETS)
list (APPEND ${targets} ${current_targets})
endmacro ()
# When you will try to link target with the directory (that exists), cmake will
# skip this without an error, only the following warning will be reported:
#
@ -18,23 +28,12 @@
# -- but cannot be used with link_libraries()
# - use BUILDSYSTEM_TARGETS property to get list of all targets and sanitize
# -- this will work.
# https://stackoverflow.com/a/62311397/328260
function (get_all_targets var)
set (targets)
get_all_targets_recursive (targets ${CMAKE_CURRENT_SOURCE_DIR})
set (${var} ${targets} PARENT_SCOPE)
endfunction()
macro (get_all_targets_recursive targets dir)
get_property (subdirectories DIRECTORY ${dir} PROPERTY SUBDIRECTORIES)
foreach (subdir ${subdirectories})
get_all_targets_recursive (${targets} ${subdir})
endforeach ()
get_property (current_targets DIRECTORY ${dir} PROPERTY BUILDSYSTEM_TARGETS)
list (APPEND ${targets} ${current_targets})
endmacro ()
macro (sanitize_link_libraries target)
function (sanitize_link_libraries target)
get_target_property(target_type ${target} TYPE)
if (${target_type} STREQUAL "INTERFACE_LIBRARY")
get_property(linked_libraries TARGET ${target} PROPERTY INTERFACE_LINK_LIBRARIES)
@ -48,9 +47,35 @@ macro (sanitize_link_libraries target)
message(FATAL_ERROR "${target} requested to link with directory: ${linked_library}")
endif()
endforeach()
endmacro()
endfunction()
get_all_targets (all_targets)
foreach (target ${all_targets})
sanitize_link_libraries(${target})
endforeach()
#
# Do not allow to define -W* from contrib publically (INTERFACE/PUBLIC).
#
function (get_contrib_targets var)
set (targets)
get_all_targets_recursive (targets ${CMAKE_CURRENT_SOURCE_DIR}/contrib)
set (${var} ${targets} PARENT_SCOPE)
endfunction()
function (sanitize_interface_flags target)
get_target_property(target_type ${target} TYPE)
get_property(compile_definitions TARGET ${target} PROPERTY INTERFACE_COMPILE_DEFINITIONS)
get_property(compile_options TARGET ${target} PROPERTY INTERFACE_COMPILE_OPTIONS)
if (NOT "${compile_options}" STREQUAL "")
message(FATAL_ERROR "${target} set INTERFACE_COMPILE_OPTIONS to ${compile_options}. This is forbidden.")
endif()
if ("${compile_definitions}" MATCHES "-Wl,")
# linker option - OK
elseif ("${compile_definitions}" MATCHES "-W")
message(FATAL_ERROR "${target} contains ${compile_definitions} flags in INTERFACE_COMPILE_DEFINITIONS. This is forbidden.")
endif()
endfunction()
get_contrib_targets (contrib_targets)
foreach (contrib_target ${contrib_targets})
sanitize_interface_flags(${contrib_target})
endforeach()

View File

@ -57,7 +57,7 @@ add_library(cxx ${SRCS})
set_target_properties(cxx PROPERTIES FOLDER "contrib/libcxx-cmake")
target_include_directories(cxx SYSTEM BEFORE PRIVATE $<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/src>)
target_include_directories(cxx SYSTEM BEFORE PUBLIC $<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/include>)
target_include_directories(cxx SYSTEM BEFORE PUBLIC $<$<COMPILE_LANGUAGE:CXX>:$<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/include>>)
target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI)
# Enable capturing stack traces for all exceptions.

View File

@ -11,6 +11,14 @@ Projections store data in a format that optimizes query execution, this feature
You can define one or more projections for a table, and during the query analysis the projection with the least data to scan will be selected by ClickHouse without modifying the query provided by the user.
:::note Disk usage
Projections will create internally a new hidden table, this means that more IO and space on disk will be required.
Example, If the projection has defined a different primary key, all the data from the original table will be duplicated.
:::
You can see more technical details about how projections work internally on this [page](/docs/en/guides/improving-query-performance/sparse-primary-indexes/sparse-primary-indexes-multiple.md/#option-3-projections).
## Example filtering without using primary keys
Creating the table:

View File

@ -60,7 +60,7 @@ If you specify `POPULATE`, the existing table data is inserted into the view whe
A `SELECT` query can contain `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`. Note that the corresponding conversions are performed independently on each block of inserted data. For example, if `GROUP BY` is set, data is aggregated during insertion, but only within a single packet of inserted data. The data wont be further aggregated. The exception is when using an `ENGINE` that independently performs data aggregation, such as `SummingMergeTree`.
The execution of [ALTER](../../../sql-reference/statements/alter/view.md) queries on materialized views has limitations, so they might be inconvenient. If the materialized view uses the construction `TO [db.]name`, you can `DETACH` the view, run `ALTER` for the target table, and then `ATTACH` the previously detached (`DETACH`) view.
The execution of [ALTER](/docs/en/sql-reference/statements/alter/view.md) queries on materialized views has limitations, for example, you can not update the `SELECT` query, so this might be inconvenient. If the materialized view uses the construction `TO [db.]name`, you can `DETACH` the view, run `ALTER` for the target table, and then `ATTACH` the previously detached (`DETACH`) view.
Note that materialized view is influenced by [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert) setting. The data is merged before the insertion into a view.

View File

@ -47,6 +47,7 @@ Union
- `AST` — Abstract syntax tree.
- `SYNTAX` — Query text after AST-level optimizations.
- `QUERY TREE` — Query tree after Query Tree level optimizations.
- `PLAN` — Query execution plan.
- `PIPELINE` — Query execution pipeline.
@ -110,6 +111,32 @@ FROM
CROSS JOIN system.numbers AS c
```
### EXPLAIN QUERY TREE
Settings:
- `run_passes` — Run all query tree passes before dumping the query tree. Defaul: `1`.
- `dump_passes` — Dump information about used passes before dumping the query tree. Default: `0`.
- `passes` — Specifies how many passes to run. If set to `-1`, runs all the passes. Default: `-1`.
Example:
```sql
EXPLAIN QUERY TREE SELECT id, value FROM test_table;
```
```
QUERY id: 0
PROJECTION COLUMNS
id UInt64
value String
PROJECTION
LIST id: 1, nodes: 2
COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3
COLUMN id: 4, column_name: value, result_type: String, source_id: 3
JOIN TREE
TABLE id: 3, table_name: default.test_table
```
### EXPLAIN PLAN
Dump query plan steps.

View File

@ -243,6 +243,54 @@ If `max_rows_to_group_by` and `group_by_overflow_mode = 'any'` are not used, all
You can use `WITH TOTALS` in subqueries, including subqueries in the [JOIN](../../../sql-reference/statements/select/join.md) clause (in this case, the respective total values are combined).
## GROUP BY ALL
`GROUP BY ALL` is equivalent to listing all the SELECT-ed expressions that are not aggregate functions.
For example:
``` sql
SELECT
a * 2,
b,
count(c),
FROM t
GROUP BY ALL
```
is the same as
``` sql
SELECT
a * 2,
b,
count(c),
FROM t
GROUP BY a * 2, b
```
For a special case that if there is a function having both aggregate functions and other fields as its arguments, the `GROUP BY` keys will contain the maximum non-aggregate fields we can extract from it.
For example:
``` sql
SELECT
substring(a, 4, 2),
substring(substring(a, 1, 2), 1, count(b))
FROM t
GROUP BY ALL
```
is the same as
``` sql
SELECT
substring(a, 4, 2),
substring(substring(a, 1, 2), 1, count(b))
FROM t
GROUP BY substring(a, 4, 2), substring(a, 1, 2)
```
## Examples
Example:

View File

@ -98,7 +98,7 @@ ClickHouse предоставляет возможность аутентифи
:::danger "Важно"
Если пользователь настроен для Kerberos-аутентификации, другие виды уатентификации будут для него недоступны. Если наряду с `kerberos` в определении пользователя будет указан какой-либо другой способ аутентификации, ClickHouse завершит работу.
Если пользователь настроен для Kerberos-аутентификации, другие виды аутентификации будут для него недоступны. Если наряду с `kerberos` в определении пользователя будет указан какой-либо другой способ аутентификации, ClickHouse завершит работу.
:::info ""
Ещё раз отметим, что кроме `users.xml`, необходимо также включить Kerberos в `config.xml`.

View File

@ -74,7 +74,7 @@ Kafka 特性:
消费的消息会被自动追踪,因此每个消息在不同的消费组里只会记录一次。如果希望获得两次数据,则使用另一个组名创建副本。
消费组可以灵活配置并且在集群之间同步。例如如果群集中有10个主题和5个表副本则每个副本将获得2个主题。 如果副本数量发生变化,主题将自动在副本中重新分配。了解更多信息请访问 http://kafka.apache.org/intro。
消费组可以灵活配置并且在集群之间同步。例如如果群集中有10个主题和5个表副本则每个副本将获得2个主题。 如果副本数量发生变化,主题将自动在副本中重新分配。了解更多信息请访问 [http://kafka.apache.org/intro](http://kafka.apache.org/intro)
`SELECT` 查询对于读取消息并不是很有用(调试除外),因为每条消息只能被读取一次。使用物化视图创建实时线程更实用。您可以这样做:

View File

@ -77,6 +77,54 @@ sidebar_label: GROUP BY
您可以使用 `WITH TOTALS` 在子查询中,包括在子查询 [JOIN](../../../sql-reference/statements/select/join.md) 子句(在这种情况下,将各自的总值合并)。
## GROUP BY ALL {#group-by-all}
`GROUP BY ALL` 相当于对所有被查询的并且不被聚合函数使用的字段进行`GROUP BY`。
例如
``` sql
SELECT
a * 2,
b,
count(c),
FROM t
GROUP BY ALL
```
效果等同于
``` sql
SELECT
a * 2,
b,
count(c),
FROM t
GROUP BY a * 2, b
```
对于一种特殊情况,如果一个 function 的参数中同时有聚合函数和其他字段,会对参数中能提取的最大非聚合字段进行`GROUP BY`。
例如:
``` sql
SELECT
substring(a, 4, 2),
substring(substring(a, 1, 2), 1, count(b))
FROM t
GROUP BY ALL
```
效果等同于
``` sql
SELECT
substring(a, 4, 2),
substring(substring(a, 1, 2), 1, count(b))
FROM t
GROUP BY substring(a, 4, 2), substring(a, 1, 2)
```
## 例子 {#examples}
示例:

View File

@ -80,8 +80,8 @@ require (
go.opentelemetry.io/otel v1.4.1 // indirect
go.opentelemetry.io/otel/trace v1.4.1 // indirect
golang.org/x/net v0.0.0-20211108170745-6635138e15ea // indirect
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9 // indirect
golang.org/x/text v0.3.7 // indirect
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f // indirect
golang.org/x/text v0.3.8 // indirect
google.golang.org/genproto v0.0.0-20211208223120-3a66f561d7aa // indirect
google.golang.org/grpc v1.43.0 // indirect
google.golang.org/protobuf v1.27.1 // indirect

View File

@ -1078,8 +1078,8 @@ golang.org/x/sys v0.0.0-20211109184856-51b60fd695b3/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20211110154304-99a53858aa08/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211124211545-fe61309f8881/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211205182925-97ca703d548d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9 h1:XfKQ4OlFl8okEOr5UvAqFRVj8pY/4yfcXrddB8qAbU0=
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f h1:v4INt8xihDGvnrfjMDVXGxw9wrfxYyCjk0KbXjhR55s=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
@ -1089,8 +1089,9 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/text v0.3.8 h1:nAL+RVCQ9uMn3vJZbV+MRnydTJFPf8qqY42YiA6MrqY=
golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=

View File

@ -2,6 +2,8 @@
#include <Access/LDAPClient.h>
#include <Common/Exception.h>
#include <Common/quoteString.h>
#include <Common/SipHash.h>
#include <Poco/Util/AbstractConfiguration.h>
#include <boost/algorithm/string/case_conv.hpp>
@ -73,6 +75,7 @@ void parseLDAPServer(LDAPClient::Params & params, const Poco::Util::AbstractConf
const bool has_tls_ca_cert_file = config.has(ldap_server_config + ".tls_ca_cert_file");
const bool has_tls_ca_cert_dir = config.has(ldap_server_config + ".tls_ca_cert_dir");
const bool has_tls_cipher_suite = config.has(ldap_server_config + ".tls_cipher_suite");
const bool has_search_limit = config.has(ldap_server_config + ".search_limit");
if (!has_host)
throw Exception("Missing 'host' entry", ErrorCodes::BAD_ARGUMENTS);
@ -91,8 +94,8 @@ void parseLDAPServer(LDAPClient::Params & params, const Poco::Util::AbstractConf
}
else if (has_auth_dn_prefix || has_auth_dn_suffix)
{
const auto auth_dn_prefix = config.getString(ldap_server_config + ".auth_dn_prefix");
const auto auth_dn_suffix = config.getString(ldap_server_config + ".auth_dn_suffix");
std::string auth_dn_prefix = config.getString(ldap_server_config + ".auth_dn_prefix");
std::string auth_dn_suffix = config.getString(ldap_server_config + ".auth_dn_suffix");
params.bind_dn = auth_dn_prefix + "{user_name}" + auth_dn_suffix;
}
@ -176,14 +179,17 @@ void parseLDAPServer(LDAPClient::Params & params, const Poco::Util::AbstractConf
if (has_port)
{
const auto port = config.getInt64(ldap_server_config + ".port");
if (port < 0 || port > 65535)
UInt32 port = config.getUInt(ldap_server_config + ".port");
if (port > 65535)
throw Exception("Bad value for 'port' entry", ErrorCodes::BAD_ARGUMENTS);
params.port = port;
}
else
params.port = (params.enable_tls == LDAPClient::Params::TLSEnable::YES ? 636 : 389);
if (has_search_limit)
params.search_limit = static_cast<UInt32>(config.getUInt64(ldap_server_config + ".search_limit"));
}
void parseKerberosParams(GSSAcceptorContext::Params & params, const Poco::Util::AbstractConfiguration & config)
@ -313,11 +319,26 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur
}
}
UInt128 computeParamsHash(const LDAPClient::Params & params, const LDAPClient::RoleSearchParamsList * role_search_params)
{
SipHash hash;
params.updateHash(hash);
if (role_search_params)
{
for (const auto & params_instance : *role_search_params)
{
params_instance.updateHash(hash);
}
}
return hash.get128();
}
bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const BasicCredentials & credentials,
const LDAPClient::RoleSearchParamsList * role_search_params, LDAPClient::SearchResultsList * role_search_results) const
{
std::optional<LDAPClient::Params> params;
std::size_t params_hash = 0;
UInt128 params_hash = 0;
{
std::scoped_lock lock(mutex);
@ -331,14 +352,7 @@ bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const B
params->user = credentials.getUserName();
params->password = credentials.getPassword();
params->combineCoreHash(params_hash);
if (role_search_params)
{
for (const auto & params_instance : *role_search_params)
{
params_instance.combineHash(params_hash);
}
}
params_hash = computeParamsHash(*params, role_search_params);
// Check the cache, but only if the caching is enabled at all.
if (params->verification_cooldown > std::chrono::seconds{0})
@ -408,15 +422,7 @@ bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const B
new_params.user = credentials.getUserName();
new_params.password = credentials.getPassword();
std::size_t new_params_hash = 0;
new_params.combineCoreHash(new_params_hash);
if (role_search_params)
{
for (const auto & params_instance : *role_search_params)
{
params_instance.combineHash(new_params_hash);
}
}
const UInt128 new_params_hash = computeParamsHash(new_params, role_search_params);
// If the critical server params have changed while we were checking the password, we discard the current result.
if (params_hash != new_params_hash)

View File

@ -2,10 +2,10 @@
#include <Common/Exception.h>
#include <base/scope_guard.h>
#include <Common/logger_useful.h>
#include <Common/SipHash.h>
#include <Poco/Logger.h>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/container_hash/hash.hpp>
#include <mutex>
#include <utility>
@ -15,6 +15,22 @@
#include <sys/time.h>
namespace
{
template <typename T, typename = std::enable_if_t<std::is_fundamental_v<std::decay_t<T>>>>
void updateHash(SipHash & hash, const T & value)
{
hash.update(value);
}
void updateHash(SipHash & hash, const std::string & value)
{
hash.update(value.size());
hash.update(value);
}
}
namespace DB
{
@ -26,30 +42,30 @@ namespace ErrorCodes
extern const int LDAP_ERROR;
}
void LDAPClient::SearchParams::combineHash(std::size_t & seed) const
void LDAPClient::SearchParams::updateHash(SipHash & hash) const
{
boost::hash_combine(seed, base_dn);
boost::hash_combine(seed, static_cast<int>(scope));
boost::hash_combine(seed, search_filter);
boost::hash_combine(seed, attribute);
::updateHash(hash, base_dn);
::updateHash(hash, static_cast<int>(scope));
::updateHash(hash, search_filter);
::updateHash(hash, attribute);
}
void LDAPClient::RoleSearchParams::combineHash(std::size_t & seed) const
void LDAPClient::RoleSearchParams::updateHash(SipHash & hash) const
{
SearchParams::combineHash(seed);
boost::hash_combine(seed, prefix);
SearchParams::updateHash(hash);
::updateHash(hash, prefix);
}
void LDAPClient::Params::combineCoreHash(std::size_t & seed) const
void LDAPClient::Params::updateHash(SipHash & hash) const
{
boost::hash_combine(seed, host);
boost::hash_combine(seed, port);
boost::hash_combine(seed, bind_dn);
boost::hash_combine(seed, user);
boost::hash_combine(seed, password);
::updateHash(hash, host);
::updateHash(hash, port);
::updateHash(hash, bind_dn);
::updateHash(hash, user);
::updateHash(hash, password);
if (user_dn_detection)
user_dn_detection->combineHash(seed);
user_dn_detection->updateHash(hash);
}
LDAPClient::LDAPClient(const Params & params_)
@ -153,13 +169,13 @@ namespace
}
void LDAPClient::diag(int rc, String text)
void LDAPClient::handleError(int result_code, String text)
{
std::scoped_lock lock(ldap_global_mutex);
if (rc != LDAP_SUCCESS)
if (result_code != LDAP_SUCCESS)
{
const char * raw_err_str = ldap_err2string(rc);
const char * raw_err_str = ldap_err2string(result_code);
if (raw_err_str && *raw_err_str != '\0')
{
if (!text.empty())
@ -214,7 +230,7 @@ bool LDAPClient::openConnection()
SCOPE_EXIT({ ldap_memfree(uri); });
diag(ldap_initialize(&handle, uri));
handleError(ldap_initialize(&handle, uri));
if (!handle)
throw Exception("ldap_initialize() failed", ErrorCodes::LDAP_ERROR);
}
@ -226,13 +242,13 @@ bool LDAPClient::openConnection()
case LDAPClient::Params::ProtocolVersion::V2: value = LDAP_VERSION2; break;
case LDAPClient::Params::ProtocolVersion::V3: value = LDAP_VERSION3; break;
}
diag(ldap_set_option(handle, LDAP_OPT_PROTOCOL_VERSION, &value));
handleError(ldap_set_option(handle, LDAP_OPT_PROTOCOL_VERSION, &value));
}
diag(ldap_set_option(handle, LDAP_OPT_RESTART, LDAP_OPT_ON));
handleError(ldap_set_option(handle, LDAP_OPT_RESTART, LDAP_OPT_ON));
#ifdef LDAP_OPT_KEEPCONN
diag(ldap_set_option(handle, LDAP_OPT_KEEPCONN, LDAP_OPT_ON));
handleError(ldap_set_option(handle, LDAP_OPT_KEEPCONN, LDAP_OPT_ON));
#endif
#ifdef LDAP_OPT_TIMEOUT
@ -240,7 +256,7 @@ bool LDAPClient::openConnection()
::timeval operation_timeout;
operation_timeout.tv_sec = params.operation_timeout.count();
operation_timeout.tv_usec = 0;
diag(ldap_set_option(handle, LDAP_OPT_TIMEOUT, &operation_timeout));
handleError(ldap_set_option(handle, LDAP_OPT_TIMEOUT, &operation_timeout));
}
#endif
@ -249,18 +265,18 @@ bool LDAPClient::openConnection()
::timeval network_timeout;
network_timeout.tv_sec = params.network_timeout.count();
network_timeout.tv_usec = 0;
diag(ldap_set_option(handle, LDAP_OPT_NETWORK_TIMEOUT, &network_timeout));
handleError(ldap_set_option(handle, LDAP_OPT_NETWORK_TIMEOUT, &network_timeout));
}
#endif
{
const int search_timeout = static_cast<int>(params.search_timeout.count());
diag(ldap_set_option(handle, LDAP_OPT_TIMELIMIT, &search_timeout));
handleError(ldap_set_option(handle, LDAP_OPT_TIMELIMIT, &search_timeout));
}
{
const int size_limit = params.search_limit;
diag(ldap_set_option(handle, LDAP_OPT_SIZELIMIT, &size_limit));
const int size_limit = static_cast<int>(params.search_limit);
handleError(ldap_set_option(handle, LDAP_OPT_SIZELIMIT, &size_limit));
}
#ifdef LDAP_OPT_X_TLS_PROTOCOL_MIN
@ -274,7 +290,7 @@ bool LDAPClient::openConnection()
case LDAPClient::Params::TLSProtocolVersion::TLS1_1: value = LDAP_OPT_X_TLS_PROTOCOL_TLS1_1; break;
case LDAPClient::Params::TLSProtocolVersion::TLS1_2: value = LDAP_OPT_X_TLS_PROTOCOL_TLS1_2; break;
}
diag(ldap_set_option(handle, LDAP_OPT_X_TLS_PROTOCOL_MIN, &value));
handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_PROTOCOL_MIN, &value));
}
#endif
@ -288,44 +304,44 @@ bool LDAPClient::openConnection()
case LDAPClient::Params::TLSRequireCert::TRY: value = LDAP_OPT_X_TLS_TRY; break;
case LDAPClient::Params::TLSRequireCert::DEMAND: value = LDAP_OPT_X_TLS_DEMAND; break;
}
diag(ldap_set_option(handle, LDAP_OPT_X_TLS_REQUIRE_CERT, &value));
handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_REQUIRE_CERT, &value));
}
#endif
#ifdef LDAP_OPT_X_TLS_CERTFILE
if (!params.tls_cert_file.empty())
diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CERTFILE, params.tls_cert_file.c_str()));
handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_CERTFILE, params.tls_cert_file.c_str()));
#endif
#ifdef LDAP_OPT_X_TLS_KEYFILE
if (!params.tls_key_file.empty())
diag(ldap_set_option(handle, LDAP_OPT_X_TLS_KEYFILE, params.tls_key_file.c_str()));
handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_KEYFILE, params.tls_key_file.c_str()));
#endif
#ifdef LDAP_OPT_X_TLS_CACERTFILE
if (!params.tls_ca_cert_file.empty())
diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTFILE, params.tls_ca_cert_file.c_str()));
handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTFILE, params.tls_ca_cert_file.c_str()));
#endif
#ifdef LDAP_OPT_X_TLS_CACERTDIR
if (!params.tls_ca_cert_dir.empty())
diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTDIR, params.tls_ca_cert_dir.c_str()));
handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTDIR, params.tls_ca_cert_dir.c_str()));
#endif
#ifdef LDAP_OPT_X_TLS_CIPHER_SUITE
if (!params.tls_cipher_suite.empty())
diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CIPHER_SUITE, params.tls_cipher_suite.c_str()));
handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_CIPHER_SUITE, params.tls_cipher_suite.c_str()));
#endif
#ifdef LDAP_OPT_X_TLS_NEWCTX
{
const int i_am_a_server = 0;
diag(ldap_set_option(handle, LDAP_OPT_X_TLS_NEWCTX, &i_am_a_server));
handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_NEWCTX, &i_am_a_server));
}
#endif
if (params.enable_tls == LDAPClient::Params::TLSEnable::YES_STARTTLS)
diag(ldap_start_tls_s(handle, nullptr, nullptr));
handleError(ldap_start_tls_s(handle, nullptr, nullptr));
final_user_name = escapeForDN(params.user);
final_bind_dn = replacePlaceholders(params.bind_dn, { {"{user_name}", final_user_name} });
@ -346,7 +362,7 @@ bool LDAPClient::openConnection()
if (rc == LDAP_INVALID_CREDENTIALS)
return false;
diag(rc);
handleError(rc);
}
// Once bound, run the user DN search query and update the default value, if asked.
@ -425,7 +441,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params)
}
});
diag(ldap_search_ext_s(handle, final_base_dn.c_str(), scope, final_search_filter.c_str(), attrs, 0, nullptr, nullptr, &timeout, params.search_limit, &msgs));
handleError(ldap_search_ext_s(handle, final_base_dn.c_str(), scope, final_search_filter.c_str(), attrs, 0, nullptr, nullptr, &timeout, params.search_limit, &msgs));
for (
auto * msg = ldap_first_message(handle, msgs);
@ -452,7 +468,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params)
::berval bv;
diag(ldap_get_dn_ber(handle, msg, &ber, &bv));
handleError(ldap_get_dn_ber(handle, msg, &ber, &bv));
if (bv.bv_val && bv.bv_len > 0)
result.emplace(bv.bv_val, bv.bv_len);
@ -504,7 +520,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params)
case LDAP_RES_SEARCH_REFERENCE:
{
char ** referrals = nullptr;
diag(ldap_parse_reference(handle, msg, &referrals, nullptr, 0));
handleError(ldap_parse_reference(handle, msg, &referrals, nullptr, 0));
if (referrals)
{
@ -528,7 +544,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params)
char * matched_msg = nullptr;
char * error_msg = nullptr;
diag(ldap_parse_result(handle, msg, &rc, &matched_msg, &error_msg, nullptr, nullptr, 0));
handleError(ldap_parse_result(handle, msg, &rc, &matched_msg, &error_msg, nullptr, nullptr, 0));
if (rc != LDAP_SUCCESS)
{
@ -610,7 +626,7 @@ bool LDAPSimpleAuthClient::authenticate(const RoleSearchParamsList * role_search
#else // USE_LDAP
void LDAPClient::diag(const int, String)
void LDAPClient::handleError(const int, String)
{
throw Exception("ClickHouse was built without LDAP support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME);
}

View File

@ -16,6 +16,7 @@
#include <set>
#include <vector>
class SipHash;
namespace DB
{
@ -38,7 +39,7 @@ public:
String search_filter;
String attribute = "cn";
void combineHash(std::size_t & seed) const;
void updateHash(SipHash & hash) const;
};
struct RoleSearchParams
@ -46,7 +47,7 @@ public:
{
String prefix;
void combineHash(std::size_t & seed) const;
void updateHash(SipHash & hash) const;
};
using RoleSearchParamsList = std::vector<RoleSearchParams>;
@ -95,7 +96,7 @@ public:
ProtocolVersion protocol_version = ProtocolVersion::V3;
String host;
std::uint16_t port = 636;
UInt16 port = 636;
TLSEnable enable_tls = TLSEnable::YES;
TLSProtocolVersion tls_minimum_protocol_version = TLSProtocolVersion::TLS1_2;
@ -119,9 +120,9 @@ public:
std::chrono::seconds operation_timeout{40};
std::chrono::seconds network_timeout{30};
std::chrono::seconds search_timeout{20};
std::uint32_t search_limit = 100;
UInt32 search_limit = 256; /// An arbitrary number, no particular motivation for this value.
void combineCoreHash(std::size_t & seed) const;
void updateHash(SipHash & hash) const;
};
explicit LDAPClient(const Params & params_);
@ -133,7 +134,7 @@ public:
LDAPClient & operator= (LDAPClient &&) = delete;
protected:
MAYBE_NORETURN void diag(int rc, String text = "");
MAYBE_NORETURN void handleError(int result_code, String text = "");
MAYBE_NORETURN bool openConnection();
void closeConnection() noexcept;
SearchResults search(const SearchParams & search_params);

View File

@ -30,6 +30,7 @@ namespace ErrorCodes
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
extern const int NOT_IMPLEMENTED;
extern const int TOO_LARGE_STRING_SIZE;
extern const int LOGICAL_ERROR;
}
/** Aggregate functions that store one of passed values.
@ -485,13 +486,15 @@ struct SingleValueDataString //-V730
private:
using Self = SingleValueDataString;
Int32 size = -1; /// -1 indicates that there is no value.
Int32 capacity = 0; /// power of two or zero
/// 0 size indicates that there is no value. Empty string must has terminating '\0' and, therefore, size of empty string is 1
UInt32 size = 0;
UInt32 capacity = 0; /// power of two or zero
char * large_data;
public:
static constexpr Int32 AUTOMATIC_STORAGE_SIZE = 64;
static constexpr Int32 MAX_SMALL_STRING_SIZE = AUTOMATIC_STORAGE_SIZE - sizeof(size) - sizeof(capacity) - sizeof(large_data);
static constexpr UInt32 AUTOMATIC_STORAGE_SIZE = 64;
static constexpr UInt32 MAX_SMALL_STRING_SIZE = AUTOMATIC_STORAGE_SIZE - sizeof(size) - sizeof(capacity) - sizeof(large_data);
static constexpr UInt32 MAX_STRING_SIZE = std::numeric_limits<Int32>::max();
private:
char small_data[MAX_SMALL_STRING_SIZE]; /// Including the terminating zero.
@ -502,7 +505,7 @@ public:
bool has() const
{
return size >= 0;
return size;
}
private:
@ -536,20 +539,27 @@ public:
void write(WriteBuffer & buf, const ISerialization & /*serialization*/) const
{
writeBinary(size, buf);
if (unlikely(MAX_STRING_SIZE < size))
throw Exception(ErrorCodes::LOGICAL_ERROR, "String size is too big ({}), it's a bug", size);
/// For serialization we use signed Int32 (for historical reasons), -1 means "no value"
Int32 size_to_write = size ? size : -1;
writeBinary(size_to_write, buf);
if (has())
buf.write(getData(), size);
}
void allocateLargeDataIfNeeded(Int64 size_to_reserve, Arena * arena)
void allocateLargeDataIfNeeded(UInt32 size_to_reserve, Arena * arena)
{
if (capacity < size_to_reserve)
{
capacity = static_cast<Int32>(roundUpToPowerOfTwoOrZero(size_to_reserve));
/// It might happen if the size was too big and the rounded value does not fit a size_t
if (unlikely(capacity < size_to_reserve))
if (unlikely(MAX_STRING_SIZE < size_to_reserve))
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "String size is too big ({})", size_to_reserve);
size_t rounded_capacity = roundUpToPowerOfTwoOrZero(size_to_reserve);
chassert(rounded_capacity <= MAX_STRING_SIZE + 1); /// rounded_capacity <= 2^31
capacity = static_cast<UInt32>(rounded_capacity);
/// Don't free large_data here.
large_data = arena->alloc(capacity);
}
@ -557,31 +567,28 @@ public:
void read(ReadBuffer & buf, const ISerialization & /*serialization*/, Arena * arena)
{
Int32 rhs_size;
readBinary(rhs_size, buf);
/// For serialization we use signed Int32 (for historical reasons), -1 means "no value"
Int32 rhs_size_signed;
readBinary(rhs_size_signed, buf);
if (rhs_size < 0)
if (rhs_size_signed < 0)
{
/// Don't free large_data here.
size = rhs_size;
size = 0;
return;
}
UInt32 rhs_size = rhs_size_signed;
if (rhs_size <= MAX_SMALL_STRING_SIZE)
{
/// Don't free large_data here.
size = rhs_size;
if (size > 0)
buf.readStrict(small_data, size);
buf.readStrict(small_data, size);
}
else
{
/// Reserve one byte more for null-character
Int64 rhs_size_to_reserve = rhs_size;
rhs_size_to_reserve += 1; /// Avoid overflow
allocateLargeDataIfNeeded(rhs_size_to_reserve, arena);
allocateLargeDataIfNeeded(rhs_size + 1, arena);
size = rhs_size;
buf.readStrict(large_data, size);
}
@ -616,7 +623,10 @@ public:
/// Assuming to.has()
void changeImpl(StringRef value, Arena * arena)
{
Int32 value_size = static_cast<Int32>(value.size);
if (unlikely(MAX_STRING_SIZE < value.size))
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "String size is too big ({})", value.size);
UInt32 value_size = static_cast<UInt32>(value.size);
if (value_size <= MAX_SMALL_STRING_SIZE)
{

60
src/Analyzer/HashUtils.h Normal file
View File

@ -0,0 +1,60 @@
#pragma once
#include <Analyzer/IQueryTreeNode.h>
namespace DB
{
/** This structure holds query tree node ptr and its hash. It can be used as hash map key to avoid unnecessary hash
* recalculations.
*
* Example of usage:
* std::unordered_map<QueryTreeNodeConstRawPtrWithHash, std::string> map;
*/
template <typename QueryTreeNodePtrType>
struct QueryTreeNodeWithHash
{
QueryTreeNodeWithHash(QueryTreeNodePtrType node_) /// NOLINT
: node(std::move(node_))
, hash(node->getTreeHash().first)
{}
QueryTreeNodePtrType node = nullptr;
size_t hash = 0;
};
template <typename T>
inline bool operator==(const QueryTreeNodeWithHash<T> & lhs, const QueryTreeNodeWithHash<T> & rhs)
{
return lhs.hash == rhs.hash && lhs.node->isEqual(*rhs.node);
}
template <typename T>
inline bool operator!=(const QueryTreeNodeWithHash<T> & lhs, const QueryTreeNodeWithHash<T> & rhs)
{
return !(lhs == rhs);
}
using QueryTreeNodePtrWithHash = QueryTreeNodeWithHash<QueryTreeNodePtr>;
using QueryTreeNodeRawPtrWithHash = QueryTreeNodeWithHash<IQueryTreeNode *>;
using QueryTreeNodeConstRawPtrWithHash = QueryTreeNodeWithHash<const IQueryTreeNode *>;
using QueryTreeNodePtrWithHashSet = std::unordered_set<QueryTreeNodePtrWithHash>;
using QueryTreeNodeConstRawPtrWithHashSet = std::unordered_set<QueryTreeNodeConstRawPtrWithHash>;
template <typename Value>
using QueryTreeNodePtrWithHashMap = std::unordered_map<QueryTreeNodePtrWithHash, Value>;
template <typename Value>
using QueryTreeNodeConstRawPtrWithHashMap = std::unordered_map<QueryTreeNodeConstRawPtrWithHash, Value>;
}
template <typename T>
struct std::hash<DB::QueryTreeNodeWithHash<T>>
{
size_t operator()(const DB::QueryTreeNodeWithHash<T> & node_with_hash) const
{
return node_with_hash.hash;
}
};

View File

@ -8,6 +8,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/FunctionNode.h>
#include <Analyzer/ConstantNode.h>
#include <Analyzer/HashUtils.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeArray.h>
@ -48,43 +49,24 @@ public:
/// Do not apply for `count()` with without arguments or `count(*)`, only `count(x)` is supported.
return;
mapping[QueryTreeNodeWithHash(argument_nodes[0])].push_back(&node);
argument_to_functions_mapping[argument_nodes[0]].push_back(&node);
}
struct QueryTreeNodeWithHash
{
const QueryTreeNodePtr & node;
IQueryTreeNode::Hash hash;
explicit QueryTreeNodeWithHash(const QueryTreeNodePtr & node_)
: node(node_)
, hash(node->getTreeHash())
{}
bool operator==(const QueryTreeNodeWithHash & rhs) const
{
return hash == rhs.hash && node->isEqual(*rhs.node);
}
struct Hash
{
size_t operator() (const QueryTreeNodeWithHash & key) const { return key.hash.first ^ key.hash.second; }
};
};
/// argument -> list of sum/count/avg functions with this argument
std::unordered_map<QueryTreeNodeWithHash, std::vector<QueryTreeNodePtr *>, QueryTreeNodeWithHash::Hash> mapping;
QueryTreeNodePtrWithHashMap<std::vector<QueryTreeNodePtr *>> argument_to_functions_mapping;
private:
std::unordered_set<String> names_to_collect;
};
QueryTreeNodePtr createResolvedFunction(ContextPtr context, const String & name, DataTypePtr result_type, QueryTreeNodes arguments)
QueryTreeNodePtr createResolvedFunction(const ContextPtr & context, const String & name, const DataTypePtr & result_type, QueryTreeNodes arguments)
{
auto function_node = std::make_shared<FunctionNode>(name);
auto function = FunctionFactory::instance().get(name, context);
function_node->resolveAsFunction(std::move(function), result_type);
function_node->getArguments().getNodes() = std::move(arguments);
return function_node;
}
@ -94,21 +76,20 @@ FunctionNodePtr createResolvedAggregateFunction(const String & name, const Query
AggregateFunctionProperties properties;
auto aggregate_function = AggregateFunctionFactory::instance().get(name, {argument->getResultType()}, parameters, properties);
function_node->resolveAsAggregateFunction(aggregate_function, aggregate_function->getReturnType());
function_node->getArguments().getNodes() = { argument };
function_node->getArgumentsNode() = std::make_shared<ListNode>(QueryTreeNodes{argument});
return function_node;
}
QueryTreeNodePtr createTupleElementFunction(ContextPtr context, DataTypePtr result_type, QueryTreeNodePtr argument, UInt64 index)
QueryTreeNodePtr createTupleElementFunction(const ContextPtr & context, const DataTypePtr & result_type, QueryTreeNodePtr argument, UInt64 index)
{
return createResolvedFunction(context, "tupleElement", result_type, {argument, std::make_shared<ConstantNode>(index)});
return createResolvedFunction(context, "tupleElement", result_type, {std::move(argument), std::make_shared<ConstantNode>(index)});
}
QueryTreeNodePtr createArrayElementFunction(ContextPtr context, DataTypePtr result_type, QueryTreeNodePtr argument, UInt64 index)
QueryTreeNodePtr createArrayElementFunction(const ContextPtr & context, const DataTypePtr & result_type, QueryTreeNodePtr argument, UInt64 index)
{
return createResolvedFunction(context, "arrayElement", result_type, {argument, std::make_shared<ConstantNode>(index)});
return createResolvedFunction(context, "arrayElement", result_type, {std::move(argument), std::make_shared<ConstantNode>(index)});
}
void replaceWithSumCount(QueryTreeNodePtr & node, const FunctionNodePtr & sum_count_node, ContextPtr context)
@ -151,6 +132,7 @@ FunctionNodePtr createFusedQuantilesNode(const std::vector<QueryTreeNodePtr *> n
{
Array parameters;
parameters.reserve(nodes.size());
for (const auto * node : nodes)
{
const FunctionNode & function_node = (*node)->as<const FunctionNode &>();
@ -172,6 +154,7 @@ FunctionNodePtr createFusedQuantilesNode(const std::vector<QueryTreeNodePtr *> n
parameters.push_back(constant_value->getValue());
}
return createResolvedAggregateFunction("quantiles", argument, parameters);
}
@ -181,7 +164,7 @@ void tryFuseSumCountAvg(QueryTreeNodePtr query_tree_node, ContextPtr context)
FuseFunctionsVisitor visitor({"sum", "count", "avg"});
visitor.visit(query_tree_node);
for (auto & [argument, nodes] : visitor.mapping)
for (auto & [argument, nodes] : visitor.argument_to_functions_mapping)
{
if (nodes.size() < 2)
continue;
@ -199,24 +182,22 @@ void tryFuseQuantiles(QueryTreeNodePtr query_tree_node, ContextPtr context)
{
FuseFunctionsVisitor visitor_quantile({"quantile"});
visitor_quantile.visit(query_tree_node);
for (auto & [argument, nodes] : visitor_quantile.mapping)
for (auto & [argument, nodes] : visitor_quantile.argument_to_functions_mapping)
{
if (nodes.size() < 2)
size_t nodes_size = nodes.size();
if (nodes_size < 2)
continue;
auto quantiles_node = createFusedQuantilesNode(nodes, argument.node);
auto result_array_type = std::dynamic_pointer_cast<const DataTypeArray>(quantiles_node->getResultType());
if (!result_array_type)
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Unexpected return type '{}' of function '{}', should be array",
quantiles_node->getResultType(), quantiles_node->getFunctionName());
}
for (size_t i = 0; i < nodes.size(); ++i)
{
for (size_t i = 0; i < nodes_size; ++i)
*nodes[i] = createArrayElementFunction(context, result_array_type->getNestedType(), quantiles_node, i + 1);
}
}
}

View File

@ -3,6 +3,7 @@
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/QueryNode.h>
#include <Analyzer/SortNode.h>
#include <Analyzer/HashUtils.h>
namespace DB
{
@ -10,35 +11,6 @@ namespace DB
namespace
{
struct QueryTreeNodeWithHash
{
explicit QueryTreeNodeWithHash(const IQueryTreeNode * node_)
: node(node_)
, hash(node->getTreeHash().first)
{}
const IQueryTreeNode * node = nullptr;
size_t hash = 0;
};
struct QueryTreeNodeWithHashHash
{
size_t operator()(const QueryTreeNodeWithHash & node_with_hash) const
{
return node_with_hash.hash;
}
};
struct QueryTreeNodeWithHashEqualTo
{
bool operator()(const QueryTreeNodeWithHash & lhs_node, const QueryTreeNodeWithHash & rhs_node) const
{
return lhs_node.hash == rhs_node.hash && lhs_node.node->isEqual(*rhs_node.node);
}
};
using QueryTreeNodeWithHashSet = std::unordered_set<QueryTreeNodeWithHash, QueryTreeNodeWithHashHash, QueryTreeNodeWithHashEqualTo>;
class OrderByLimitByDuplicateEliminationVisitor : public InDepthQueryTreeVisitor<OrderByLimitByDuplicateEliminationVisitor>
{
public:
@ -93,7 +65,7 @@ public:
}
private:
QueryTreeNodeWithHashSet unique_expressions_nodes_set;
QueryTreeNodeConstRawPtrWithHashSet unique_expressions_nodes_set;
};
}

View File

@ -67,6 +67,8 @@
#include <Analyzer/InDepthQueryTreeVisitor.h>
#include <Analyzer/QueryTreeBuilder.h>
#include <Common/checkStackSize.h>
namespace DB
{
@ -517,7 +519,7 @@ public:
private:
QueryTreeNodes expressions;
std::unordered_map<std::string, std::vector<QueryTreeNodePtr>> alias_name_to_expressions;
std::unordered_map<std::string, QueryTreeNodes> alias_name_to_expressions;
};
/** Projection names is name of query tree node that is used in projection part of query node.
@ -1100,6 +1102,10 @@ private:
static void validateJoinTableExpressionWithoutAlias(const QueryTreeNodePtr & join_node, const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope);
static void expandGroupByAll(QueryNode & query_tree_node_typed);
static std::pair<bool, UInt64> recursivelyCollectMaxOrdinaryExpressions(QueryTreeNodePtr & node, QueryTreeNodes & into);
/// Resolve identifier functions
static QueryTreeNodePtr tryResolveTableIdentifierFromDatabaseCatalog(const Identifier & table_identifier, ContextPtr context);
@ -1929,6 +1935,68 @@ void QueryAnalyzer::validateJoinTableExpressionWithoutAlias(const QueryTreeNodeP
scope.scope_node->formatASTForErrorMessage());
}
std::pair<bool, UInt64> QueryAnalyzer::recursivelyCollectMaxOrdinaryExpressions(QueryTreeNodePtr & node, QueryTreeNodes & into)
{
checkStackSize();
if (node->as<ColumnNode>())
{
into.push_back(node);
return {false, 1};
}
auto * function = node->as<FunctionNode>();
if (!function)
return {false, 0};
if (function->isAggregateFunction())
return {true, 0};
UInt64 pushed_children = 0;
bool has_aggregate = false;
for (auto & child : function->getArguments().getNodes())
{
auto [child_has_aggregate, child_pushed_children] = recursivelyCollectMaxOrdinaryExpressions(child, into);
has_aggregate |= child_has_aggregate;
pushed_children += child_pushed_children;
}
/// The current function is not aggregate function and there is no aggregate function in its arguments,
/// so use the current function to replace its arguments
if (!has_aggregate)
{
for (UInt64 i = 0; i < pushed_children; i++)
into.pop_back();
into.push_back(node);
pushed_children = 1;
}
return {has_aggregate, pushed_children};
}
/** Expand GROUP BY ALL by extracting all the SELECT-ed expressions that are not aggregate functions.
*
* For a special case that if there is a function having both aggregate functions and other fields as its arguments,
* the `GROUP BY` keys will contain the maximum non-aggregate fields we can extract from it.
*
* Example:
* SELECT substring(a, 4, 2), substring(substring(a, 1, 2), 1, count(b)) FROM t GROUP BY ALL
* will expand as
* SELECT substring(a, 4, 2), substring(substring(a, 1, 2), 1, count(b)) FROM t GROUP BY substring(a, 4, 2), substring(a, 1, 2)
*/
void QueryAnalyzer::expandGroupByAll(QueryNode & query_tree_node_typed)
{
auto & group_by_nodes = query_tree_node_typed.getGroupBy().getNodes();
auto & projection_list = query_tree_node_typed.getProjection();
for (auto & node : projection_list.getNodes())
recursivelyCollectMaxOrdinaryExpressions(node, group_by_nodes);
}
/// Resolve identifier functions implementation
@ -2171,18 +2239,19 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
auto & alias_identifier_node = it->second->as<IdentifierNode &>();
auto identifier = alias_identifier_node.getIdentifier();
auto lookup_result = tryResolveIdentifier(IdentifierLookup{identifier, identifier_lookup.lookup_context}, scope, identifier_resolve_settings);
if (!lookup_result.isResolved())
if (!lookup_result.resolved_identifier)
{
std::unordered_set<Identifier> valid_identifiers;
collectScopeWithParentScopesValidIdentifiersForTypoCorrection(identifier, scope, true, false, false, valid_identifiers);
auto hints = collectIdentifierTypoHints(identifier, valid_identifiers);
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown {} identifier '{}' in scope {}{}",
toStringLowercase(IdentifierLookupContext::EXPRESSION),
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown {} identifier '{}'. In scope {}{}",
toStringLowercase(identifier_lookup.lookup_context),
identifier.getFullName(),
scope.scope_node->formatASTForErrorMessage(),
getHintsErrorMessageSuffix(hints));
}
it->second = lookup_result.resolved_identifier;
/** During collection of aliases if node is identifier and has alias, we cannot say if it is
@ -2193,9 +2262,9 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
* If we resolved identifier node as function, we must remove identifier node alias from
* expression alias map.
*/
if (identifier_lookup.isExpressionLookup() && it->second)
if (identifier_lookup.isExpressionLookup())
scope.alias_name_to_lambda_node.erase(identifier_bind_part);
else if (identifier_lookup.isFunctionLookup() && it->second)
else if (identifier_lookup.isFunctionLookup())
scope.alias_name_to_expression_node.erase(identifier_bind_part);
scope.expressions_in_resolve_process_stack.popNode();
@ -3203,11 +3272,9 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveUnqualifiedMatcher(
if (auto * array_join_node = table_expression->as<ArrayJoinNode>())
{
size_t table_expressions_column_nodes_with_names_stack_size = table_expressions_column_nodes_with_names_stack.size();
if (table_expressions_column_nodes_with_names_stack_size < 1)
if (table_expressions_column_nodes_with_names_stack.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Expected at least 1 table expressions on stack before ARRAY JOIN processing. Actual {}",
table_expressions_column_nodes_with_names_stack_size);
"Expected at least 1 table expressions on stack before ARRAY JOIN processing");
auto & table_expression_column_nodes_with_names = table_expressions_column_nodes_with_names_stack.back();
@ -5388,25 +5455,7 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node,
}
}
/// TODO: Special functions that can take query
/// TODO: Support qualified matchers for table function
for (auto & argument_node : table_function_node.getArguments().getNodes())
{
if (argument_node->getNodeType() == QueryTreeNodeType::MATCHER)
{
throw Exception(ErrorCodes::BAD_ARGUMENTS,
"Matcher as table function argument is not supported {}. In scope {}",
join_tree_node->formatASTForErrorMessage(),
scope.scope_node->formatASTForErrorMessage());
}
auto * function_node = argument_node->as<FunctionNode>();
if (function_node && table_function_factory.hasNameOrAlias(function_node->getFunctionName()))
continue;
resolveExpressionNode(argument_node, scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/);
}
resolveExpressionNodeList(table_function_node.getArgumentsNode(), scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/);
auto table_function_ast = table_function_node.toAST();
table_function_ptr->parseArguments(table_function_ast, scope_context);
@ -6006,6 +6055,9 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
node->removeAlias();
}
if (query_node_typed.isGroupByAll())
expandGroupByAll(query_node_typed);
/** Validate aggregates
*
* 1. Check that there are no aggregate functions and GROUPING function in JOIN TREE, WHERE, PREWHERE, in another aggregate functions.

View File

@ -54,6 +54,9 @@ void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
if (is_group_by_with_totals)
buffer << ", is_group_by_with_totals: " << is_group_by_with_totals;
if (is_group_by_all)
buffer << ", is_group_by_all: " << is_group_by_all;
std::string group_by_type;
if (is_group_by_with_rollup)
group_by_type = "rollup";
@ -117,7 +120,7 @@ void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
getWhere()->dumpTreeImpl(buffer, format_state, indent + 4);
}
if (hasGroupBy())
if (!is_group_by_all && hasGroupBy())
{
buffer << '\n' << std::string(indent + 2, ' ') << "GROUP BY\n";
getGroupBy().dumpTreeImpl(buffer, format_state, indent + 4);
@ -198,7 +201,8 @@ bool QueryNode::isEqualImpl(const IQueryTreeNode & rhs) const
is_group_by_with_totals == rhs_typed.is_group_by_with_totals &&
is_group_by_with_rollup == rhs_typed.is_group_by_with_rollup &&
is_group_by_with_cube == rhs_typed.is_group_by_with_cube &&
is_group_by_with_grouping_sets == rhs_typed.is_group_by_with_grouping_sets;
is_group_by_with_grouping_sets == rhs_typed.is_group_by_with_grouping_sets &&
is_group_by_all == rhs_typed.is_group_by_all;
}
void QueryNode::updateTreeHashImpl(HashState & state) const
@ -226,6 +230,7 @@ void QueryNode::updateTreeHashImpl(HashState & state) const
state.update(is_group_by_with_rollup);
state.update(is_group_by_with_cube);
state.update(is_group_by_with_grouping_sets);
state.update(is_group_by_all);
if (constant_value)
{
@ -251,6 +256,7 @@ QueryTreeNodePtr QueryNode::cloneImpl() const
result_query_node->is_group_by_with_rollup = is_group_by_with_rollup;
result_query_node->is_group_by_with_cube = is_group_by_with_cube;
result_query_node->is_group_by_with_grouping_sets = is_group_by_with_grouping_sets;
result_query_node->is_group_by_all = is_group_by_all;
result_query_node->cte_name = cte_name;
result_query_node->projection_columns = projection_columns;
result_query_node->constant_value = constant_value;
@ -267,6 +273,7 @@ ASTPtr QueryNode::toASTImpl() const
select_query->group_by_with_rollup = is_group_by_with_rollup;
select_query->group_by_with_cube = is_group_by_with_cube;
select_query->group_by_with_grouping_sets = is_group_by_with_grouping_sets;
select_query->group_by_all = is_group_by_all;
if (hasWith())
select_query->setExpression(ASTSelectQuery::Expression::WITH, getWith().toAST());
@ -283,7 +290,7 @@ ASTPtr QueryNode::toASTImpl() const
if (getWhere())
select_query->setExpression(ASTSelectQuery::Expression::WHERE, getWhere()->toAST());
if (hasGroupBy())
if (!is_group_by_all && hasGroupBy())
select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, getGroupBy().toAST());
if (hasHaving())

View File

@ -176,6 +176,18 @@ public:
is_group_by_with_grouping_sets = is_group_by_with_grouping_sets_value;
}
/// Returns true, if query node has GROUP BY ALL modifier, false otherwise
bool isGroupByAll() const
{
return is_group_by_all;
}
/// Set query node GROUP BY ALL modifier value
void setIsGroupByAll(bool is_group_by_all_value)
{
is_group_by_all = is_group_by_all_value;
}
/// Returns true if query node WITH section is not empty, false otherwise
bool hasWith() const
{
@ -580,6 +592,7 @@ private:
bool is_group_by_with_rollup = false;
bool is_group_by_with_cube = false;
bool is_group_by_with_grouping_sets = false;
bool is_group_by_all = false;
std::string cte_name;
NamesAndTypes projection_columns;

View File

@ -215,6 +215,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
current_query_tree->setIsGroupByWithCube(select_query_typed.group_by_with_cube);
current_query_tree->setIsGroupByWithRollup(select_query_typed.group_by_with_rollup);
current_query_tree->setIsGroupByWithGroupingSets(select_query_typed.group_by_with_grouping_sets);
current_query_tree->setIsGroupByAll(select_query_typed.group_by_all);
current_query_tree->setOriginalAST(select_query);
auto select_settings = select_query_typed.settings();

View File

@ -1401,6 +1401,11 @@ try
QueryPipeline pipeline(std::move(pipe));
PullingAsyncPipelineExecutor executor(pipeline);
if (need_render_progress)
{
pipeline.setProgressCallback([this](const Progress & progress){ onProgress(progress); });
}
Block block;
while (executor.pull(block))
{
@ -1445,12 +1450,6 @@ catch (...)
void ClientBase::sendDataFromStdin(Block & sample, const ColumnsDescription & columns_description, ASTPtr parsed_query)
{
if (need_render_progress)
{
/// Add callback to track reading from fd.
std_in.setProgressCallback(global_context);
}
/// Send data read from stdin.
try
{

View File

@ -171,6 +171,11 @@ protected:
void initTtyBuffer(ProgressOption progress);
/// Should be one of the first, to be destroyed the last,
/// since other members can use them.
SharedContextHolder shared_context;
ContextMutablePtr global_context;
bool is_interactive = false; /// Use either interactive line editing interface or batch mode.
bool is_multiquery = false;
bool delayed_interactive = false;
@ -208,9 +213,6 @@ protected:
/// Settings specified via command line args
Settings cmd_settings;
SharedContextHolder shared_context;
ContextMutablePtr global_context;
/// thread status should be destructed before shared context because it relies on process list.
std::optional<ThreadStatus> thread_status;

View File

@ -524,11 +524,13 @@ void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t leng
size_t nested_offset = src_concrete.offsetAt(start);
size_t nested_length = src_concrete.getOffsets()[start + length - 1] - nested_offset;
Offsets & cur_offsets = getOffsets();
/// Reserve offsets before to make it more exception safe (in case of MEMORY_LIMIT_EXCEEDED)
cur_offsets.reserve(cur_offsets.size() + length);
getData().insertRangeFrom(src_concrete.getData(), nested_offset, nested_length);
Offsets & cur_offsets = getOffsets();
const Offsets & src_offsets = src_concrete.getOffsets();
if (start == 0 && cur_offsets.empty())
{
cur_offsets.assign(src_offsets.begin(), src_offsets.begin() + length);

View File

@ -124,6 +124,9 @@ void ColumnString::insertRangeFrom(const IColumn & src, size_t start, size_t len
size_t nested_offset = src_concrete.offsetAt(start);
size_t nested_length = src_concrete.offsets[start + length - 1] - nested_offset;
/// Reserve offsets before to make it more exception safe (in case of MEMORY_LIMIT_EXCEEDED)
offsets.reserve(offsets.size() + length);
size_t old_chars_size = chars.size();
chars.resize(old_chars_size + nested_length);
memcpy(&chars[old_chars_size], &src_concrete.chars[nested_offset], nested_length);

View File

@ -189,6 +189,13 @@ public:
finalize();
return v0 ^ v1 ^ v2 ^ v3;
}
UInt128 get128()
{
UInt128 res;
get128(res);
return res;
}
};
@ -208,9 +215,7 @@ inline UInt128 sipHash128(const char * data, const size_t size)
{
SipHash hash;
hash.update(data, size);
UInt128 res;
hash.get128(res);
return res;
return hash.get128();
}
inline UInt64 sipHash64(const char * data, const size_t size)

View File

@ -1,6 +1,7 @@
#pragma once
#include <base/types.h>
#include <base/getThreadId.h>
#include <Common/ProfileEvents.h>
#include <sys/time.h>
#include <sys/resource.h>
@ -47,6 +48,8 @@ struct RUsageCounters
UInt64 soft_page_faults = 0;
UInt64 hard_page_faults = 0;
UInt64 thread_id = 0;
RUsageCounters() = default;
RUsageCounters(const ::rusage & rusage_, UInt64 real_time_)
{
@ -61,6 +64,8 @@ struct RUsageCounters
soft_page_faults = static_cast<UInt64>(rusage.ru_minflt);
hard_page_faults = static_cast<UInt64>(rusage.ru_majflt);
thread_id = getThreadId();
}
static RUsageCounters current()
@ -78,6 +83,12 @@ struct RUsageCounters
static void incrementProfileEvents(const RUsageCounters & prev, const RUsageCounters & curr, ProfileEvents::Counters & profile_events)
{
chassert(prev.thread_id == curr.thread_id);
/// LONG_MAX is ~106751 days
chassert(curr.real_time - prev.real_time < LONG_MAX);
chassert(curr.user_time - prev.user_time < LONG_MAX);
chassert(curr.sys_time - prev.sys_time < LONG_MAX);
profile_events.increment(ProfileEvents::RealTimeMicroseconds, (curr.real_time - prev.real_time) / 1000U);
profile_events.increment(ProfileEvents::UserTimeMicroseconds, (curr.user_time - prev.user_time) / 1000U);
profile_events.increment(ProfileEvents::SystemTimeMicroseconds, (curr.sys_time - prev.sys_time) / 1000U);

View File

@ -175,8 +175,8 @@ protected:
/// Is used to send logs from logs_queue to client in case of fatal errors.
std::function<void()> fatal_error_callback;
/// It is used to avoid enabling the query profiler when you have multiple ThreadStatus in the same thread
bool query_profiler_enabled = true;
/// See setInternalThread()
bool internal_thread = false;
/// Requires access to query_id.
friend class MemoryTrackerThreadSwitcher;
@ -221,11 +221,21 @@ public:
return global_context.lock();
}
void disableProfiling()
{
assert(!query_profiler_real && !query_profiler_cpu);
query_profiler_enabled = false;
}
/// "Internal" ThreadStatus is used for materialized views for separate
/// tracking into system.query_views_log
///
/// You can have multiple internal threads, but only one non-internal with
/// the same thread_id.
///
/// "Internal" thread:
/// - cannot have query profiler
/// since the running (main query) thread should already have one
/// - should not try to obtain latest counter on detach
/// because detaching of such threads will be done from a different
/// thread_id, and some counters are not available (i.e. getrusage()),
/// but anyway they are accounted correctly in the main ThreadStatus of a
/// query.
void setInternalThread();
/// Starts new query and create new thread group for it, current thread becomes master thread of the query
void initializeQuery();

View File

@ -1015,7 +1015,7 @@ void DatabaseCatalog::dropTableFinally(const TableMarkedAsDropped & table)
for (const auto & [disk_name, disk] : getContext()->getDisksMap())
{
String data_path = "store/" + getPathForUUID(table.table_id.uuid);
if (!disk->exists(data_path) || disk->isReadOnly())
if (disk->isReadOnly() || !disk->exists(data_path))
continue;
LOG_INFO(log, "Removing data directory {} of dropped table {} from disk {}", data_path, table.table_id.getNameForLogs(), disk_name);

View File

@ -165,7 +165,7 @@ struct QueryASTSettings
struct QueryTreeSettings
{
bool run_passes = false;
bool run_passes = true;
bool dump_passes = false;
bool dump_ast = false;
Int64 passes = -1;

View File

@ -2728,13 +2728,18 @@ void InterpreterSelectQuery::executeDistinct(QueryPlan & query_plan, bool before
{
const Settings & settings = context->getSettingsRef();
auto [limit_length, limit_offset] = getLimitLengthAndOffset(query, context);
UInt64 limit_for_distinct = 0;
/// If after this stage of DISTINCT ORDER BY is not executed,
/// If after this stage of DISTINCT,
/// (1) ORDER BY is not executed
/// (2) there is no LIMIT BY (todo: we can check if DISTINCT and LIMIT BY expressions are match)
/// then you can get no more than limit_length + limit_offset of different rows.
if ((!query.orderBy() || !before_order) && limit_length <= std::numeric_limits<UInt64>::max() - limit_offset)
limit_for_distinct = limit_length + limit_offset;
if ((!query.orderBy() || !before_order) && !query.limitBy())
{
auto [limit_length, limit_offset] = getLimitLengthAndOffset(query, context);
if (limit_length <= std::numeric_limits<UInt64>::max() - limit_offset)
limit_for_distinct = limit_length + limit_offset;
}
SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode);

View File

@ -6,7 +6,6 @@
#include <DataTypes/DataTypeDate.h>
#include <DataTypes/DataTypeString.h>
#include <DataTypes/DataTypeEnum.h>
#include <DataTypes/DataTypeUUID.h>
#include <Storages/MergeTree/IMergeTreeDataPart.h>
#include <Storages/MergeTree/MergeTreeData.h>
#include <Interpreters/PartLog.h>
@ -101,7 +100,6 @@ NamesAndTypesList PartLogElement::getNamesAndTypes()
{"database", std::make_shared<DataTypeString>()},
{"table", std::make_shared<DataTypeString>()},
{"table_uuid", std::make_shared<DataTypeUUID>()},
{"part_name", std::make_shared<DataTypeString>()},
{"partition_id", std::make_shared<DataTypeString>()},
{"part_type", std::make_shared<DataTypeString>()},
@ -139,7 +137,6 @@ void PartLogElement::appendToBlock(MutableColumns & columns) const
columns[i++]->insert(database_name);
columns[i++]->insert(table_name);
columns[i++]->insert(table_uuid);
columns[i++]->insert(part_name);
columns[i++]->insert(partition_id);
columns[i++]->insert(part_type.toString());
@ -208,7 +205,6 @@ bool PartLog::addNewParts(
elem.database_name = table_id.database_name;
elem.table_name = table_id.table_name;
elem.table_uuid = table_id.uuid;
elem.partition_id = part->info.partition_id;
elem.part_name = part->name;
elem.disk_name = part->getDataPartStorage().getDiskName();

View File

@ -4,7 +4,6 @@
#include <Interpreters/SystemLog.h>
#include <Core/NamesAndTypes.h>
#include <Core/NamesAndAliases.h>
#include <Core/UUID.h>
#include <Storages/MergeTree/MergeType.h>
#include <Storages/MergeTree/MergeAlgorithm.h>
@ -56,7 +55,6 @@ struct PartLogElement
String database_name;
String table_name;
UUID table_uuid{UUIDHelpers::Nil};
String part_name;
String partition_id;
String disk_name;

View File

@ -131,6 +131,12 @@ void ThreadStatus::setupState(const ThreadGroupStatusPtr & thread_group_)
thread_state = ThreadState::AttachedToQuery;
}
void ThreadStatus::setInternalThread()
{
chassert(!query_profiler_real && !query_profiler_cpu);
internal_thread = true;
}
void ThreadStatus::initializeQuery()
{
setupState(std::make_shared<ThreadGroupStatus>());
@ -177,41 +183,44 @@ void ThreadStatus::initPerformanceCounters()
// query_start_time_nanoseconds cannot be used here since RUsageCounters expect CLOCK_MONOTONIC
*last_rusage = RUsageCounters::current();
if (auto query_context_ptr = query_context.lock())
if (!internal_thread)
{
const Settings & settings = query_context_ptr->getSettingsRef();
if (settings.metrics_perf_events_enabled)
if (auto query_context_ptr = query_context.lock())
{
const Settings & settings = query_context_ptr->getSettingsRef();
if (settings.metrics_perf_events_enabled)
{
try
{
current_thread_counters.initializeProfileEvents(
settings.metrics_perf_events_list);
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
}
if (!taskstats)
{
try
{
current_thread_counters.initializeProfileEvents(
settings.metrics_perf_events_list);
taskstats = TasksStatsCounters::create(thread_id);
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
tryLogCurrentException(log);
}
}
if (taskstats)
taskstats->reset();
}
if (!taskstats)
{
try
{
taskstats = TasksStatsCounters::create(thread_id);
}
catch (...)
{
tryLogCurrentException(log);
}
}
if (taskstats)
taskstats->reset();
}
void ThreadStatus::finalizePerformanceCounters()
{
if (performance_counters_finalized)
if (performance_counters_finalized || internal_thread)
return;
performance_counters_finalized = true;
@ -270,7 +279,7 @@ void ThreadStatus::resetPerformanceCountersLastUsage()
void ThreadStatus::initQueryProfiler()
{
if (!query_profiler_enabled)
if (internal_thread)
return;
/// query profilers are useless without trace collector

View File

@ -1,8 +1,8 @@
#include <algorithm>
#include <memory>
#include <Core/Settings.h>
#include <Core/NamesAndTypes.h>
#include <Core/SettingsEnums.h>
#include <Interpreters/ArrayJoinedColumnsVisitor.h>
@ -45,10 +45,10 @@
#include <DataTypes/NestedUtils.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/WriteHelpers.h>
#include <Storages/IStorage.h>
#include <Common/checkStackSize.h>
#include <AggregateFunctions/AggregateFunctionFactory.h>
@ -784,6 +784,67 @@ void collectJoinedColumns(TableJoin & analyzed_join, ASTTableJoin & table_join,
}
}
std::pair<bool, UInt64> recursivelyCollectMaxOrdinaryExpressions(const ASTPtr & expr, ASTExpressionList & into)
{
checkStackSize();
if (expr->as<ASTIdentifier>())
{
into.children.push_back(expr);
return {false, 1};
}
auto * function = expr->as<ASTFunction>();
if (!function)
return {false, 0};
if (AggregateUtils::isAggregateFunction(*function))
return {true, 0};
UInt64 pushed_children = 0;
bool has_aggregate = false;
for (const auto & child : function->arguments->children)
{
auto [child_has_aggregate, child_pushed_children] = recursivelyCollectMaxOrdinaryExpressions(child, into);
has_aggregate |= child_has_aggregate;
pushed_children += child_pushed_children;
}
/// The current function is not aggregate function and there is no aggregate function in its arguments,
/// so use the current function to replace its arguments
if (!has_aggregate)
{
for (UInt64 i = 0; i < pushed_children; i++)
into.children.pop_back();
into.children.push_back(expr);
pushed_children = 1;
}
return {has_aggregate, pushed_children};
}
/** Expand GROUP BY ALL by extracting all the SELECT-ed expressions that are not aggregate functions.
*
* For a special case that if there is a function having both aggregate functions and other fields as its arguments,
* the `GROUP BY` keys will contain the maximum non-aggregate fields we can extract from it.
*
* Example:
* SELECT substring(a, 4, 2), substring(substring(a, 1, 2), 1, count(b)) FROM t GROUP BY ALL
* will expand as
* SELECT substring(a, 4, 2), substring(substring(a, 1, 2), 1, count(b)) FROM t GROUP BY substring(a, 4, 2), substring(a, 1, 2)
*/
void expandGroupByAll(ASTSelectQuery * select_query)
{
auto group_expression_list = std::make_shared<ASTExpressionList>();
for (const auto & expr : select_query->select()->children)
recursivelyCollectMaxOrdinaryExpressions(expr, *group_expression_list);
select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, group_expression_list);
}
std::vector<const ASTFunction *> getAggregates(ASTPtr & query, const ASTSelectQuery & select_query)
{
@ -1276,6 +1337,10 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
normalize(query, result.aliases, all_source_columns_set, select_options.ignore_alias, settings, /* allow_self_aliases = */ true, getContext());
// expand GROUP BY ALL
if (select_query->group_by_all)
expandGroupByAll(select_query);
/// Remove unneeded columns according to 'required_result_columns'.
/// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside.
/// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost)

View File

@ -93,7 +93,7 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
where()->formatImpl(s, state, frame);
}
if (groupBy())
if (!group_by_all && groupBy())
{
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "GROUP BY" << (s.hilite ? hilite_none : "");
if (!group_by_with_grouping_sets)
@ -104,6 +104,9 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
}
}
if (group_by_all)
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "GROUP BY ALL" << (s.hilite ? hilite_none : "");
if (group_by_with_rollup)
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << (s.one_line ? "" : " ") << "WITH ROLLUP" << (s.hilite ? hilite_none : "");

View File

@ -82,6 +82,7 @@ public:
ASTPtr clone() const override;
bool distinct = false;
bool group_by_all = false;
bool group_by_with_totals = false;
bool group_by_with_rollup = false;
bool group_by_with_cube = false;

View File

@ -195,6 +195,8 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
select_query->group_by_with_cube = true;
else if (s_grouping_sets.ignore(pos, expected))
select_query->group_by_with_grouping_sets = true;
else if (s_all.ignore(pos, expected))
select_query->group_by_all = true;
if ((select_query->group_by_with_rollup || select_query->group_by_with_cube || select_query->group_by_with_grouping_sets) &&
!open_bracket.ignore(pos, expected))
@ -205,7 +207,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
if (!grouping_sets_list.parse(pos, group_expression_list, expected))
return false;
}
else
else if (!select_query->group_by_all)
{
if (!exp_list.parse(pos, group_expression_list, expected))
return false;

View File

@ -15,6 +15,12 @@ try
DB::ParserCreateQuery parser;
DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 1000);
const UInt64 max_ast_depth = 1000;
ast->checkDepth(max_ast_depth);
const UInt64 max_ast_elements = 50000;
ast->checkSize(max_ast_elements);
DB::WriteBufferFromOwnString wb;
DB::formatAST(*ast, wb);

View File

@ -87,8 +87,8 @@ void JoinClause::dump(WriteBuffer & buffer) const
{
const auto & asof_condition = asof_conditions[i];
buffer << "key_index: " << asof_condition.key_index;
buffer << "inequality: " << toString(asof_condition.asof_inequality);
buffer << " key_index: " << asof_condition.key_index;
buffer << " inequality: " << toString(asof_condition.asof_inequality);
if (i + 1 != asof_conditions_size)
buffer << ',';

View File

@ -183,19 +183,19 @@ public:
}
private:
/// Valid for table, table function, query, union, array join table expression nodes
/// Valid for table, table function, array join, query, union nodes
NamesAndTypesList columns;
/// Valid for table, table function, query, union, array join table expression nodes
/// Valid for table, table function, array join, query, union nodes
NameSet columns_names;
/// Valid only for table table expression node
/// Valid only for table node
NameSet alias_columns_names;
/// Valid for table, table function, query, union table, array join expression nodes
/// Valid for table, table function, array join, query, union nodes
ColumnNameToColumnIdentifier column_name_to_column_identifier;
/// Valid for table, table function, query, union table, array join expression nodes
/// Valid for table, table function, array join, query, union nodes
ColumnIdentifierToColumnName column_identifier_to_column_name;
/// Is storage remote

View File

@ -275,10 +275,8 @@ Chain buildPushingToViewsChain(
SCOPE_EXIT({ current_thread = original_thread; });
std::unique_ptr<ThreadStatus> view_thread_status_ptr = std::make_unique<ThreadStatus>();
/// Disable query profiler for this ThreadStatus since the running (main query) thread should already have one
/// If we didn't disable it, then we could end up with N + 1 (N = number of dependencies) profilers which means
/// N times more interruptions
view_thread_status_ptr->disableProfiling();
/// Copy of a ThreadStatus should be internal.
view_thread_status_ptr->setInternalThread();
/// view_thread_status_ptr will be moved later (on and on), so need to capture raw pointer.
view_thread_status_ptr->deleter = [thread_status = view_thread_status_ptr.get(), running_group]
{

View File

@ -1848,7 +1848,6 @@ void MergeTreeData::removePartsFinally(const MergeTreeData::DataPartsVector & pa
part_log_elem.database_name = table_id.database_name;
part_log_elem.table_name = table_id.table_name;
part_log_elem.table_uuid = table_id.uuid;
for (const auto & part : parts)
{
@ -6599,7 +6598,6 @@ try
part_log_elem.database_name = table_id.database_name;
part_log_elem.table_name = table_id.table_name;
part_log_elem.table_uuid = table_id.uuid;
part_log_elem.partition_id = MergeTreePartInfo::fromPartName(new_part_name, format_version).partition_id;
part_log_elem.part_name = new_part_name;

View File

@ -434,7 +434,7 @@ void StorageBuffer::read(
}
static void appendBlock(const Block & from, Block & to)
static void appendBlock(Poco::Logger * log, const Block & from, Block & to)
{
size_t rows = from.rows();
size_t old_rows = to.rows();
@ -456,7 +456,24 @@ static void appendBlock(const Block & from, Block & to)
for (size_t column_no = 0, columns = to.columns(); column_no < columns; ++column_no)
{
const IColumn & col_from = *from.getByPosition(column_no).column.get();
last_col = IColumn::mutate(std::move(to.getByPosition(column_no).column));
{
/// Usually IColumn::mutate() here will simply move pointers,
/// however in case of parallel reading from it via SELECT, it
/// is possible for the full IColumn::clone() here, and in this
/// case it may fail due to MEMORY_LIMIT_EXCEEDED, and this
/// breaks the rollback, since the column got lost, it is
/// neither in last_col nor in "to" block.
///
/// The safest option here, is to do a full clone every time,
/// however, it is overhead. And it looks like the only
/// exception that is possible here is MEMORY_LIMIT_EXCEEDED,
/// and it is better to simply suppress it, to avoid overhead
/// for every INSERT into Buffer (Anyway we have a
/// LOGICAL_ERROR in rollback that will bail if something else
/// will happens here).
LockMemoryExceptionInThread temporarily_ignore_any_memory_limits(VariableContext::Global);
last_col = IColumn::mutate(std::move(to.getByPosition(column_no).column));
}
/// In case of ColumnAggregateFunction aggregate states will
/// be allocated from the query context but can be destroyed from the
@ -468,7 +485,10 @@ static void appendBlock(const Block & from, Block & to)
last_col->ensureOwnership();
last_col->insertRangeFrom(col_from, 0, rows);
to.getByPosition(column_no).column = std::move(last_col);
{
DENY_ALLOCATIONS_IN_SCOPE;
to.getByPosition(column_no).column = std::move(last_col);
}
}
CurrentMetrics::add(CurrentMetrics::StorageBufferRows, rows);
CurrentMetrics::add(CurrentMetrics::StorageBufferBytes, to.bytes() - old_bytes);
@ -481,6 +501,9 @@ static void appendBlock(const Block & from, Block & to)
/// So ignore any memory limits, even global (since memory tracking has drift).
LockMemoryExceptionInThread temporarily_ignore_any_memory_limits(VariableContext::Global);
/// But first log exception to get more details in case of LOGICAL_ERROR
tryLogCurrentException(log, "Caught exception while adding data to buffer, rolling back...");
try
{
for (size_t column_no = 0, columns = to.columns(); column_no < columns; ++column_no)
@ -625,7 +648,7 @@ private:
size_t old_rows = buffer.data.rows();
size_t old_bytes = buffer.data.allocatedBytes();
appendBlock(sorted_block, buffer.data);
appendBlock(storage.log, sorted_block, buffer.data);
storage.total_writes.rows += (buffer.data.rows() - old_rows);
storage.total_writes.bytes += (buffer.data.allocatedBytes() - old_bytes);

View File

@ -1,8 +1,14 @@
import pytest
# FIXME This test is too flaky
# https://github.com/ClickHouse/ClickHouse/issues/43541
pytestmark = pytest.mark.skip
import logging
import os
import time
import pytest
from helpers.cluster import ClickHouseCluster
from helpers.test_tools import TSV

View File

@ -0,0 +1,27 @@
#!/usr/bin/env bash
# Tags: no-fasttest
# Regression test for 'Logical error: No column to rollback' in case of
# exception while commiting batch into the Buffer, see [1].
#
# [1]: https://github.com/ClickHouse/ClickHouse/issues/42740
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_buffer_string"
$CLICKHOUSE_CLIENT -q "CREATE TABLE t_buffer_string(key String) ENGINE = Buffer('', '', 1, 1, 1, 1000000000000, 1000000000000, 1000000000000, 1000000000000)"
# --continue_on_errors -- to ignore possible MEMORY_LIMIT_EXCEEDED errors
# --concurrency -- we need have SELECT and INSERT in parallel to have refcount
# of the column in the Buffer block > 1, that way we will do
# full clone and moving a column may throw.
#
# It reproduces the problem 100% with MemoryTrackerFaultInjectorInThread in the appendBlock()
$CLICKHOUSE_BENCHMARK --randomize --timelimit 10 --continue_on_errors --concurrency 10 >& /dev/null <<EOL
INSERT INTO t_buffer_string SELECT number::String from numbers(10000)
SELECT * FROM t_buffer_string
EOL
$CLICKHOUSE_CLIENT -q "DROP TABLE t_buffer_string"

View File

@ -1,6 +1,10 @@
#!/usr/bin/env bash
# Tags: no-fasttest
# Regression test for incorrect mutation of Map() column, see [1].
#
# [1]: https://github.com/ClickHouse/ClickHouse/issues/30546
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
@ -8,29 +12,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_buffer_map"
$CLICKHOUSE_CLIENT -q "CREATE TABLE t_buffer_map(m1 Map(String, UInt64), m2 Map(String, String)) ENGINE = Buffer('', '', 1, 1, 1, 1000000000000, 1000000000000, 1000000000000, 1000000000000)"
function insert1
{
while true; do
$CLICKHOUSE_CLIENT -q "INSERT INTO t_buffer_map SELECT (range(10), range(10)), (range(10), range(10)) from numbers(100)"
done
}
function select1
{
while true; do
$CLICKHOUSE_CLIENT -q "SELECT * FROM t_buffer_map" 2> /dev/null > /dev/null
done
}
TIMEOUT=10
export -f insert1
export -f select1
timeout $TIMEOUT bash -c insert1 &
timeout $TIMEOUT bash -c select1 &
wait
# --continue_on_errors -- to ignore possible MEMORY_LIMIT_EXCEEDED errors
$CLICKHOUSE_BENCHMARK --randomize --timelimit 10 --continue_on_errors --concurrency 10 >& /dev/null <<EOL
INSERT INTO t_buffer_map SELECT (range(10), range(10)), (range(10), range(10)) from numbers(100)
SELECT * FROM t_buffer_map
EOL
echo "OK"
$CLICKHOUSE_CLIENT -q "DROP TABLE t_buffer_map"

View File

@ -1,6 +1,6 @@
SET allow_experimental_analyzer = 1;
EXPLAIN QUERY TREE SELECT 1;
EXPLAIN QUERY TREE run_passes = 0 SELECT 1;
SELECT '--';
@ -13,7 +13,7 @@ CREATE TABLE test_table
INSERT INTO test_table VALUES (0, 'Value');
EXPLAIN QUERY TREE SELECT id, value FROM test_table;
EXPLAIN QUERY TREE run_passes = 0 SELECT id, value FROM test_table;
SELECT '--';
@ -21,7 +21,7 @@ EXPLAIN QUERY TREE run_passes = 1 SELECT id, value FROM test_table;
SELECT '--';
EXPLAIN QUERY TREE SELECT arrayMap(x -> x + id, [1, 2, 3]) FROM test_table;
EXPLAIN QUERY TREE run_passes = 0 SELECT arrayMap(x -> x + id, [1, 2, 3]) FROM test_table;
SELECT '--';
@ -29,7 +29,7 @@ EXPLAIN QUERY TREE run_passes = 1 SELECT arrayMap(x -> x + 1, [1, 2, 3]) FROM te
SELECT '--';
EXPLAIN QUERY TREE WITH x -> x + 1 AS lambda SELECT lambda(id) FROM test_table;
EXPLAIN QUERY TREE run_passes = 0 WITH x -> x + 1 AS lambda SELECT lambda(id) FROM test_table;
SELECT '--';

View File

@ -0,0 +1,44 @@
abc1 1
abc2 1
abc3 1
abc4 1
abc 4
abc ab
abc ab
abc ab
abc bc
abc bc
abc a
abc a
abc a
abc a
abc a
abc a
abc a
abc a
1 abc a
1 abc a
1 abc a
1 abc a
abc1 1
abc2 1
abc3 1
abc4 1
abc 4
abc ab
abc ab
abc ab
abc bc
abc bc
abc a
abc a
abc a
abc a
abc a
abc a
abc a
abc a
1 abc a
1 abc a
1 abc a
1 abc a

View File

@ -0,0 +1,35 @@
DROP TABLE IF EXISTS group_by_all;
CREATE TABLE group_by_all
(
a String,
b int,
c int
)
engine = Memory;
insert into group_by_all values ('abc1', 1, 1), ('abc2', 1, 1), ('abc3', 1, 1), ('abc4', 1, 1);
select a, count(b) from group_by_all group by all order by a;
select substring(a, 1, 3), count(b) from group_by_all group by all;
select substring(a, 1, 3), substring(substring(a, 1, 2), 1, count(b)) from group_by_all group by all;
select substring(a, 1, 3), substring(substring(a, 1, 2), c, count(b)) from group_by_all group by all;
select substring(a, 1, 3), substring(substring(a, c, 2), c, count(b)) from group_by_all group by all;
select substring(a, 1, 3), substring(substring(a, c + 1, 2), 1, count(b)) from group_by_all group by all;
select substring(a, 1, 3), substring(substring(a, c + 1, 2), c, count(b)) from group_by_all group by all;
select substring(a, 1, 3), substring(substring(substring(a, c, count(b)), 1, count(b)), 1, count(b)) from group_by_all group by all;
select substring(a, 1, 3), substring(a, 1, count(b)) from group_by_all group by all;
select count(b) AS len, substring(a, 1, 3), substring(a, 1, len) from group_by_all group by all;
SET allow_experimental_analyzer = 1;
select a, count(b) from group_by_all group by all order by a;
select substring(a, 1, 3), count(b) from group_by_all group by all;
select substring(a, 1, 3), substring(substring(a, 1, 2), 1, count(b)) from group_by_all group by all;
select substring(a, 1, 3), substring(substring(a, 1, 2), c, count(b)) from group_by_all group by all;
select substring(a, 1, 3), substring(substring(a, c, 2), c, count(b)) from group_by_all group by all;
select substring(a, 1, 3), substring(substring(a, c + 1, 2), 1, count(b)) from group_by_all group by all;
select substring(a, 1, 3), substring(substring(a, c + 1, 2), c, count(b)) from group_by_all group by all;
select substring(a, 1, 3), substring(substring(substring(a, c, count(b)), 1, count(b)), 1, count(b)) from group_by_all group by all;
select substring(a, 1, 3), substring(a, 1, count(b)) from group_by_all group by all;
select count(b) AS len, substring(a, 1, 3), substring(a, 1, len) from group_by_all group by all;

View File

@ -27,7 +27,7 @@ with client(name="client>", log=log) as client1:
)
client1.expect(prompt)
client1.send(f"INSERT INTO test.infile_progress FROM INFILE '{filename}'")
client1.expect("Progress: 0.00 rows, 10.00 B.*\)")
client1.expect("Progress: 5.00 rows, 30.00 B.*\)")
client1.expect(prompt)
# send Ctrl-C

View File

@ -23,3 +23,8 @@
1M without 0 1048576
1M with 0 1048575
fuzz2 0123 4
1 0
2 \0 1
3 \0\0\0\0 4
4 abrac\0dabra\0 12
abrac\0dabra\0 12

View File

@ -90,11 +90,12 @@ SELECT '-1', maxMerge(x), length(maxMerge(x)) from (select CAST(unhex('ffffffff'
SELECT '-2', maxMerge(x), length(maxMerge(x)) from (select CAST(unhex('fffffffe') || randomString(100500), 'AggregateFunction(max, String)') as x);
SELECT '-2^31', maxMerge(x), length(maxMerge(x)) from (select CAST(unhex('00000080') || randomString(100500), 'AggregateFunction(max, String)') as x);
SELECT '2^31-2', maxMerge(x) from (select CAST(unhex('feffff7f') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError TOO_LARGE_STRING_SIZE }
SELECT '2^31-1', maxMerge(x) from (select CAST(unhex('ffffff7f') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError TOO_LARGE_STRING_SIZE }
SELECT '2^30', maxMerge(x) from (select CAST(unhex('00000040') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError TOO_LARGE_STRING_SIZE }
SELECT '2^30+1', maxMerge(x) from (select CAST(unhex('01000040') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError TOO_LARGE_STRING_SIZE }
SELECT '2^31-2', maxMerge(x) from (select CAST(unhex('feffff7f') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError CANNOT_READ_ALL_DATA }
SELECT '2^30', maxMerge(x) from (select CAST(unhex('00000040') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError CANNOT_READ_ALL_DATA }
SELECT '2^30+1', maxMerge(x) from (select CAST(unhex('01000040') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError CANNOT_READ_ALL_DATA }
SELECT '2^30-1', maxMerge(x) from (select CAST(unhex('ffffff3f') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError CANNOT_READ_ALL_DATA }
-- The following query works, but it's too long and consumes to much memory
@ -107,3 +108,14 @@ SELECT 'fuzz2', finalizeAggregation(CAST(unhex('04000000' || '30313233' || '01'
SELECT 'fuzz3', finalizeAggregation(CAST(unhex('04000000' || '30313233' || '00' || 'ffffffffffffffff'), 'AggregateFunction(argMax, String, UInt64)')) as x, length(x); -- { serverError CORRUPTED_DATA }
SELECT 'fuzz4', finalizeAggregation(CAST(unhex('04000000' || '30313233' || '00'), 'AggregateFunction(argMax, String, UInt64)')) as x, length(x); -- { serverError CORRUPTED_DATA }
SELECT 'fuzz5', finalizeAggregation(CAST(unhex('0100000000000000000FFFFFFFF0'), 'AggregateFunction(argMax, UInt64, String)')); -- { serverError CORRUPTED_DATA }
drop table if exists aggr;
create table aggr (n int, s AggregateFunction(max, String)) engine=MergeTree order by n;
insert into aggr select 1, maxState('');
insert into aggr select 2, maxState('\0');
insert into aggr select 3, maxState('\0\0\0\0');
insert into aggr select 4, maxState('abrac\0dabra\0');
select n, maxMerge(s) as x, length(x) from aggr group by n order by n;
select maxMerge(s) as x, length(x) from aggr;
drop table aggr;

View File

@ -0,0 +1,36 @@
SET allow_experimental_analyzer = 1;
DROP TABLE IF EXISTS test_table_join_1;
CREATE TABLE test_table_join_1
(
id UInt8,
value String
)
ENGINE = TinyLog;
INSERT INTO test_table_join_1 VALUES (0, 'Value_0');
DROP TABLE IF EXISTS test_table_join_2;
CREATE TABLE test_table_join_2
(
id UInt16,
value String
)
ENGINE = TinyLog;
INSERT INTO test_table_join_2 VALUES (0, 'Value_1');
SELECT
toTypeName(t2_value),
t2.value AS t2_value
FROM test_table_join_1 AS t1
INNER JOIN test_table_join_2 USING (id); -- { serverError 47 };
SELECT
toTypeName(t2_value),
t2.value AS t2_value
FROM test_table_join_1 AS t1
INNER JOIN test_table_join_2 AS t2 USING (id);
DROP TABLE test_table_join_1;
DROP TABLE test_table_join_2;

View File

@ -1,6 +1,6 @@
-- https://github.com/ClickHouse/ClickHouse/issues/43247
SELECT finalizeAggregation(CAST('AggregateFunction(categoricalInformationValue, Nullable(UInt8), UInt8)AggregateFunction(categoricalInformationValue, Nullable(UInt8), UInt8)',
'AggregateFunction(min, String)')); -- { serverError 131 }
'AggregateFunction(min, String)')); -- { serverError CANNOT_READ_ALL_DATA }
-- Value from hex(minState('0123456789012345678901234567890123456789012345678901234567890123')). Size 63 + 1 (64)
SELECT finalizeAggregation(CAST(unhex('4000000030313233343536373839303132333435363738393031323334353637383930313233343536373839303132333435363738393031323334353637383930313233'),

View File

@ -1,3 +0,0 @@
NewPart NotAMerge
MergeParts RegularMerge
RemovePart NotAMerge

View File

@ -1,16 +0,0 @@
create table data_02491 (key Int) engine=MergeTree() order by tuple();
insert into data_02491 values (1);
optimize table data_02491 final;
truncate table data_02491;
system flush logs;
with (select uuid from system.tables where database = currentDatabase() and table = 'data_02491') as table_uuid_
select event_type, merge_reason from system.part_log
where
database = currentDatabase() and
table = 'data_02491' and
table_uuid = table_uuid_ and
table_uuid != toUUIDOrDefault(Null)
order by event_time_microseconds;
drop table data_02491;

View File

@ -0,0 +1,19 @@
#!/usr/bin/env bash
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CUR_DIR"/../shell_config.sh
# This is the regression test for clickhouse-local, that may use already freed
# context from the suggestion thread on error.
grep_options=(
-e ^$
-e 'Cannot create file: /no/such/directory'
-e 'Cannot load data for command line suggestions:'
-e 'ClickHouse local version'
)
ASAN_OPTIONS=$ASAN_OPTIONS:exitcode=3 $CLICKHOUSE_LOCAL --history_file /no/such/directory |& grep -v "${grep_options[@]}"
# on sanitizer error the code will be not 1, but 3
echo $?

View File

@ -0,0 +1,28 @@
0
--
0
1
--
1
2
--
(1) 0
--
(0,1) 0
--
(1,2) 1
(1,2) 2
--
(1) 0
--
(0,1) 0
--
(1,2) 1
(1,2) 2
--
('1') 0
--
('0','1') 0
--
('1','2') 1
('1','2') 2

View File

@ -0,0 +1,47 @@
SET allow_experimental_analyzer = 1;
SELECT number FROM numbers(untuple(tuple(1)));
SELECT '--';
SELECT number FROM numbers(untuple(tuple(0, 2)));
SELECT '--';
SELECT number FROM numbers(untuple(tuple(1, 2)));
SELECT '--';
SELECT cast(tuple(1), 'Tuple(value UInt64)') AS value, number FROM numbers(untuple(value));
SELECT '--';
SELECT cast(tuple(0, 1), 'Tuple(value_1 UInt64, value_2 UInt64)') AS value, number FROM numbers(untuple(value));
SELECT '--';
SELECT cast(tuple(1, 2), 'Tuple(value_1 UInt64, value_2 UInt64)') AS value, number FROM numbers(untuple(value));
SELECT '--';
SELECT cast(tuple(1), 'Tuple(value UInt64)') AS value, number FROM numbers(value.*);
SELECT '--';
SELECT cast(tuple(0, 1), 'Tuple(value_1 UInt64, value_2 UInt64)') AS value, number FROM numbers(value.*);
SELECT '--';
SELECT cast(tuple(1, 2), 'Tuple(value_1 UInt64, value_2 UInt64)') AS value, number FROM numbers(value.*);
SELECT '--';
SELECT cast(tuple('1'), 'Tuple(value String)') AS value, number FROM numbers(value.* APPLY x -> toUInt64(x));
SELECT '--';
SELECT cast(tuple('0', '1'), 'Tuple(value_1 String, value_2 String)') AS value, number FROM numbers(value.* APPLY x -> toUInt64(x));
SELECT '--';
SELECT cast(tuple('1', '2'), 'Tuple(value_1 String, value_2 String)') AS value, number FROM numbers(value.* APPLY x -> toUInt64(x));

View File

@ -1,4 +1,5 @@
#!/usr/bin/env bash
# Tags: disabled
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh

View File

@ -0,0 +1,11 @@
SELECT count()
FROM
(
SELECT DISTINCT
Title,
SearchPhrase
FROM test.hits
WHERE (SearchPhrase != '') AND (NOT match(Title, '[а-яА-ЯёЁ]')) AND (NOT match(SearchPhrase, '[а-яА-ЯёЁ]'))
LIMIT 1 BY Title
LIMIT 10
);