mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Merge branch 'master' into perf_experiment
This commit is contained in:
commit
4d3217cb18
@ -442,8 +442,9 @@ elseif (OS_DARWIN)
|
||||
include(cmake/darwin/default_libs.cmake)
|
||||
elseif (OS_FREEBSD)
|
||||
include(cmake/freebsd/default_libs.cmake)
|
||||
else()
|
||||
link_libraries(global-group)
|
||||
endif ()
|
||||
link_libraries(global-group)
|
||||
|
||||
if (NOT (OS_LINUX OR OS_DARWIN))
|
||||
# Using system libs can cause a lot of warnings in includes (on macro expansion).
|
||||
@ -592,7 +593,7 @@ add_subdirectory (programs)
|
||||
add_subdirectory (tests)
|
||||
add_subdirectory (utils)
|
||||
|
||||
include (cmake/sanitize_target_link_libraries.cmake)
|
||||
include (cmake/sanitize_targets.cmake)
|
||||
|
||||
# Build native targets if necessary
|
||||
get_property(NATIVE_BUILD_TARGETS GLOBAL PROPERTY NATIVE_BUILD_TARGETS)
|
||||
|
@ -220,13 +220,13 @@ struct statx {
|
||||
uint32_t stx_dev_minor;
|
||||
uint64_t spare[14];
|
||||
};
|
||||
#endif
|
||||
|
||||
int statx(int fd, const char *restrict path, int flag,
|
||||
unsigned int mask, struct statx *restrict statxbuf)
|
||||
{
|
||||
return syscall(SYS_statx, fd, path, flag, mask, statxbuf);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#include <syscall.h>
|
||||
|
@ -23,6 +23,7 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
include (cmake/cxx.cmake)
|
||||
link_libraries(global-group)
|
||||
|
||||
target_link_libraries(global-group INTERFACE
|
||||
$<TARGET_PROPERTY:global-libs,INTERFACE_LINK_LIBRARIES>
|
||||
|
@ -24,6 +24,7 @@ find_package(Threads REQUIRED)
|
||||
|
||||
include (cmake/unwind.cmake)
|
||||
include (cmake/cxx.cmake)
|
||||
link_libraries(global-group)
|
||||
|
||||
target_link_libraries(global-group INTERFACE
|
||||
$<TARGET_PROPERTY:global-libs,INTERFACE_LINK_LIBRARIES>
|
||||
|
@ -34,6 +34,13 @@ set(CMAKE_C_STANDARD_LIBRARIES ${DEFAULT_LIBS})
|
||||
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
find_package(Threads REQUIRED)
|
||||
|
||||
include (cmake/unwind.cmake)
|
||||
include (cmake/cxx.cmake)
|
||||
|
||||
# Delay the call to link the global interface after the libc++ libraries are included to avoid circular dependencies
|
||||
# which are ok with static libraries but not with dynamic ones
|
||||
link_libraries(global-group)
|
||||
|
||||
if (NOT OS_ANDROID)
|
||||
if (NOT USE_MUSL)
|
||||
# Our compatibility layer doesn't build under Android, many errors in musl.
|
||||
@ -42,9 +49,6 @@ if (NOT OS_ANDROID)
|
||||
add_subdirectory(base/harmful)
|
||||
endif ()
|
||||
|
||||
include (cmake/unwind.cmake)
|
||||
include (cmake/cxx.cmake)
|
||||
|
||||
target_link_libraries(global-group INTERFACE
|
||||
-Wl,--start-group
|
||||
$<TARGET_PROPERTY:global-libs,INTERFACE_LINK_LIBRARIES>
|
||||
|
@ -1,3 +1,13 @@
|
||||
# https://stackoverflow.com/a/62311397/328260
|
||||
macro (get_all_targets_recursive targets dir)
|
||||
get_property (subdirectories DIRECTORY ${dir} PROPERTY SUBDIRECTORIES)
|
||||
foreach (subdir ${subdirectories})
|
||||
get_all_targets_recursive (${targets} ${subdir})
|
||||
endforeach ()
|
||||
get_property (current_targets DIRECTORY ${dir} PROPERTY BUILDSYSTEM_TARGETS)
|
||||
list (APPEND ${targets} ${current_targets})
|
||||
endmacro ()
|
||||
|
||||
# When you will try to link target with the directory (that exists), cmake will
|
||||
# skip this without an error, only the following warning will be reported:
|
||||
#
|
||||
@ -18,23 +28,12 @@
|
||||
# -- but cannot be used with link_libraries()
|
||||
# - use BUILDSYSTEM_TARGETS property to get list of all targets and sanitize
|
||||
# -- this will work.
|
||||
|
||||
# https://stackoverflow.com/a/62311397/328260
|
||||
function (get_all_targets var)
|
||||
set (targets)
|
||||
get_all_targets_recursive (targets ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
set (${var} ${targets} PARENT_SCOPE)
|
||||
endfunction()
|
||||
macro (get_all_targets_recursive targets dir)
|
||||
get_property (subdirectories DIRECTORY ${dir} PROPERTY SUBDIRECTORIES)
|
||||
foreach (subdir ${subdirectories})
|
||||
get_all_targets_recursive (${targets} ${subdir})
|
||||
endforeach ()
|
||||
get_property (current_targets DIRECTORY ${dir} PROPERTY BUILDSYSTEM_TARGETS)
|
||||
list (APPEND ${targets} ${current_targets})
|
||||
endmacro ()
|
||||
|
||||
macro (sanitize_link_libraries target)
|
||||
function (sanitize_link_libraries target)
|
||||
get_target_property(target_type ${target} TYPE)
|
||||
if (${target_type} STREQUAL "INTERFACE_LIBRARY")
|
||||
get_property(linked_libraries TARGET ${target} PROPERTY INTERFACE_LINK_LIBRARIES)
|
||||
@ -48,9 +47,35 @@ macro (sanitize_link_libraries target)
|
||||
message(FATAL_ERROR "${target} requested to link with directory: ${linked_library}")
|
||||
endif()
|
||||
endforeach()
|
||||
endmacro()
|
||||
|
||||
endfunction()
|
||||
get_all_targets (all_targets)
|
||||
foreach (target ${all_targets})
|
||||
sanitize_link_libraries(${target})
|
||||
endforeach()
|
||||
|
||||
#
|
||||
# Do not allow to define -W* from contrib publically (INTERFACE/PUBLIC).
|
||||
#
|
||||
function (get_contrib_targets var)
|
||||
set (targets)
|
||||
get_all_targets_recursive (targets ${CMAKE_CURRENT_SOURCE_DIR}/contrib)
|
||||
set (${var} ${targets} PARENT_SCOPE)
|
||||
endfunction()
|
||||
function (sanitize_interface_flags target)
|
||||
get_target_property(target_type ${target} TYPE)
|
||||
get_property(compile_definitions TARGET ${target} PROPERTY INTERFACE_COMPILE_DEFINITIONS)
|
||||
get_property(compile_options TARGET ${target} PROPERTY INTERFACE_COMPILE_OPTIONS)
|
||||
if (NOT "${compile_options}" STREQUAL "")
|
||||
message(FATAL_ERROR "${target} set INTERFACE_COMPILE_OPTIONS to ${compile_options}. This is forbidden.")
|
||||
endif()
|
||||
if ("${compile_definitions}" MATCHES "-Wl,")
|
||||
# linker option - OK
|
||||
elseif ("${compile_definitions}" MATCHES "-W")
|
||||
message(FATAL_ERROR "${target} contains ${compile_definitions} flags in INTERFACE_COMPILE_DEFINITIONS. This is forbidden.")
|
||||
endif()
|
||||
endfunction()
|
||||
get_contrib_targets (contrib_targets)
|
||||
foreach (contrib_target ${contrib_targets})
|
||||
sanitize_interface_flags(${contrib_target})
|
||||
endforeach()
|
||||
|
@ -57,7 +57,7 @@ add_library(cxx ${SRCS})
|
||||
set_target_properties(cxx PROPERTIES FOLDER "contrib/libcxx-cmake")
|
||||
|
||||
target_include_directories(cxx SYSTEM BEFORE PRIVATE $<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/src>)
|
||||
target_include_directories(cxx SYSTEM BEFORE PUBLIC $<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/include>)
|
||||
target_include_directories(cxx SYSTEM BEFORE PUBLIC $<$<COMPILE_LANGUAGE:CXX>:$<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/include>>)
|
||||
target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI)
|
||||
|
||||
# Enable capturing stack traces for all exceptions.
|
||||
|
@ -11,6 +11,14 @@ Projections store data in a format that optimizes query execution, this feature
|
||||
|
||||
You can define one or more projections for a table, and during the query analysis the projection with the least data to scan will be selected by ClickHouse without modifying the query provided by the user.
|
||||
|
||||
:::note Disk usage
|
||||
|
||||
Projections will create internally a new hidden table, this means that more IO and space on disk will be required.
|
||||
Example, If the projection has defined a different primary key, all the data from the original table will be duplicated.
|
||||
:::
|
||||
|
||||
You can see more technical details about how projections work internally on this [page](/docs/en/guides/improving-query-performance/sparse-primary-indexes/sparse-primary-indexes-multiple.md/#option-3-projections).
|
||||
|
||||
## Example filtering without using primary keys
|
||||
|
||||
Creating the table:
|
||||
|
@ -60,7 +60,7 @@ If you specify `POPULATE`, the existing table data is inserted into the view whe
|
||||
|
||||
A `SELECT` query can contain `DISTINCT`, `GROUP BY`, `ORDER BY`, `LIMIT`. Note that the corresponding conversions are performed independently on each block of inserted data. For example, if `GROUP BY` is set, data is aggregated during insertion, but only within a single packet of inserted data. The data won’t be further aggregated. The exception is when using an `ENGINE` that independently performs data aggregation, such as `SummingMergeTree`.
|
||||
|
||||
The execution of [ALTER](../../../sql-reference/statements/alter/view.md) queries on materialized views has limitations, so they might be inconvenient. If the materialized view uses the construction `TO [db.]name`, you can `DETACH` the view, run `ALTER` for the target table, and then `ATTACH` the previously detached (`DETACH`) view.
|
||||
The execution of [ALTER](/docs/en/sql-reference/statements/alter/view.md) queries on materialized views has limitations, for example, you can not update the `SELECT` query, so this might be inconvenient. If the materialized view uses the construction `TO [db.]name`, you can `DETACH` the view, run `ALTER` for the target table, and then `ATTACH` the previously detached (`DETACH`) view.
|
||||
|
||||
Note that materialized view is influenced by [optimize_on_insert](../../../operations/settings/settings.md#optimize-on-insert) setting. The data is merged before the insertion into a view.
|
||||
|
||||
|
@ -47,6 +47,7 @@ Union
|
||||
|
||||
- `AST` — Abstract syntax tree.
|
||||
- `SYNTAX` — Query text after AST-level optimizations.
|
||||
- `QUERY TREE` — Query tree after Query Tree level optimizations.
|
||||
- `PLAN` — Query execution plan.
|
||||
- `PIPELINE` — Query execution pipeline.
|
||||
|
||||
@ -110,6 +111,32 @@ FROM
|
||||
CROSS JOIN system.numbers AS c
|
||||
```
|
||||
|
||||
### EXPLAIN QUERY TREE
|
||||
|
||||
Settings:
|
||||
|
||||
- `run_passes` — Run all query tree passes before dumping the query tree. Defaul: `1`.
|
||||
- `dump_passes` — Dump information about used passes before dumping the query tree. Default: `0`.
|
||||
- `passes` — Specifies how many passes to run. If set to `-1`, runs all the passes. Default: `-1`.
|
||||
|
||||
Example:
|
||||
```sql
|
||||
EXPLAIN QUERY TREE SELECT id, value FROM test_table;
|
||||
```
|
||||
|
||||
```
|
||||
QUERY id: 0
|
||||
PROJECTION COLUMNS
|
||||
id UInt64
|
||||
value String
|
||||
PROJECTION
|
||||
LIST id: 1, nodes: 2
|
||||
COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3
|
||||
COLUMN id: 4, column_name: value, result_type: String, source_id: 3
|
||||
JOIN TREE
|
||||
TABLE id: 3, table_name: default.test_table
|
||||
```
|
||||
|
||||
### EXPLAIN PLAN
|
||||
|
||||
Dump query plan steps.
|
||||
|
@ -243,6 +243,54 @@ If `max_rows_to_group_by` and `group_by_overflow_mode = 'any'` are not used, all
|
||||
|
||||
You can use `WITH TOTALS` in subqueries, including subqueries in the [JOIN](../../../sql-reference/statements/select/join.md) clause (in this case, the respective total values are combined).
|
||||
|
||||
## GROUP BY ALL
|
||||
|
||||
`GROUP BY ALL` is equivalent to listing all the SELECT-ed expressions that are not aggregate functions.
|
||||
|
||||
For example:
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
a * 2,
|
||||
b,
|
||||
count(c),
|
||||
FROM t
|
||||
GROUP BY ALL
|
||||
```
|
||||
|
||||
is the same as
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
a * 2,
|
||||
b,
|
||||
count(c),
|
||||
FROM t
|
||||
GROUP BY a * 2, b
|
||||
```
|
||||
|
||||
For a special case that if there is a function having both aggregate functions and other fields as its arguments, the `GROUP BY` keys will contain the maximum non-aggregate fields we can extract from it.
|
||||
|
||||
For example:
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
substring(a, 4, 2),
|
||||
substring(substring(a, 1, 2), 1, count(b))
|
||||
FROM t
|
||||
GROUP BY ALL
|
||||
```
|
||||
|
||||
is the same as
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
substring(a, 4, 2),
|
||||
substring(substring(a, 1, 2), 1, count(b))
|
||||
FROM t
|
||||
GROUP BY substring(a, 4, 2), substring(a, 1, 2)
|
||||
```
|
||||
|
||||
## Examples
|
||||
|
||||
Example:
|
||||
|
@ -98,7 +98,7 @@ ClickHouse предоставляет возможность аутентифи
|
||||
|
||||
|
||||
:::danger "Важно"
|
||||
Если пользователь настроен для Kerberos-аутентификации, другие виды уатентификации будут для него недоступны. Если наряду с `kerberos` в определении пользователя будет указан какой-либо другой способ аутентификации, ClickHouse завершит работу.
|
||||
Если пользователь настроен для Kerberos-аутентификации, другие виды аутентификации будут для него недоступны. Если наряду с `kerberos` в определении пользователя будет указан какой-либо другой способ аутентификации, ClickHouse завершит работу.
|
||||
|
||||
:::info ""
|
||||
Ещё раз отметим, что кроме `users.xml`, необходимо также включить Kerberos в `config.xml`.
|
||||
|
@ -74,7 +74,7 @@ Kafka 特性:
|
||||
|
||||
消费的消息会被自动追踪,因此每个消息在不同的消费组里只会记录一次。如果希望获得两次数据,则使用另一个组名创建副本。
|
||||
|
||||
消费组可以灵活配置并且在集群之间同步。例如,如果群集中有10个主题和5个表副本,则每个副本将获得2个主题。 如果副本数量发生变化,主题将自动在副本中重新分配。了解更多信息请访问 http://kafka.apache.org/intro。
|
||||
消费组可以灵活配置并且在集群之间同步。例如,如果群集中有10个主题和5个表副本,则每个副本将获得2个主题。 如果副本数量发生变化,主题将自动在副本中重新分配。了解更多信息请访问 [http://kafka.apache.org/intro](http://kafka.apache.org/intro)。
|
||||
|
||||
`SELECT` 查询对于读取消息并不是很有用(调试除外),因为每条消息只能被读取一次。使用物化视图创建实时线程更实用。您可以这样做:
|
||||
|
||||
|
@ -77,6 +77,54 @@ sidebar_label: GROUP BY
|
||||
|
||||
您可以使用 `WITH TOTALS` 在子查询中,包括在子查询 [JOIN](../../../sql-reference/statements/select/join.md) 子句(在这种情况下,将各自的总值合并)。
|
||||
|
||||
## GROUP BY ALL {#group-by-all}
|
||||
|
||||
`GROUP BY ALL` 相当于对所有被查询的并且不被聚合函数使用的字段进行`GROUP BY`。
|
||||
|
||||
例如
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
a * 2,
|
||||
b,
|
||||
count(c),
|
||||
FROM t
|
||||
GROUP BY ALL
|
||||
```
|
||||
|
||||
效果等同于
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
a * 2,
|
||||
b,
|
||||
count(c),
|
||||
FROM t
|
||||
GROUP BY a * 2, b
|
||||
```
|
||||
|
||||
对于一种特殊情况,如果一个 function 的参数中同时有聚合函数和其他字段,会对参数中能提取的最大非聚合字段进行`GROUP BY`。
|
||||
|
||||
例如:
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
substring(a, 4, 2),
|
||||
substring(substring(a, 1, 2), 1, count(b))
|
||||
FROM t
|
||||
GROUP BY ALL
|
||||
```
|
||||
|
||||
效果等同于
|
||||
|
||||
``` sql
|
||||
SELECT
|
||||
substring(a, 4, 2),
|
||||
substring(substring(a, 1, 2), 1, count(b))
|
||||
FROM t
|
||||
GROUP BY substring(a, 4, 2), substring(a, 1, 2)
|
||||
```
|
||||
|
||||
## 例子 {#examples}
|
||||
|
||||
示例:
|
||||
|
@ -80,8 +80,8 @@ require (
|
||||
go.opentelemetry.io/otel v1.4.1 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.4.1 // indirect
|
||||
golang.org/x/net v0.0.0-20211108170745-6635138e15ea // indirect
|
||||
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9 // indirect
|
||||
golang.org/x/text v0.3.7 // indirect
|
||||
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f // indirect
|
||||
golang.org/x/text v0.3.8 // indirect
|
||||
google.golang.org/genproto v0.0.0-20211208223120-3a66f561d7aa // indirect
|
||||
google.golang.org/grpc v1.43.0 // indirect
|
||||
google.golang.org/protobuf v1.27.1 // indirect
|
||||
|
@ -1078,8 +1078,8 @@ golang.org/x/sys v0.0.0-20211109184856-51b60fd695b3/go.mod h1:oPkhp1MJrh7nUepCBc
|
||||
golang.org/x/sys v0.0.0-20211110154304-99a53858aa08/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20211124211545-fe61309f8881/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20211205182925-97ca703d548d/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9 h1:XfKQ4OlFl8okEOr5UvAqFRVj8pY/4yfcXrddB8qAbU0=
|
||||
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f h1:v4INt8xihDGvnrfjMDVXGxw9wrfxYyCjk0KbXjhR55s=
|
||||
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
@ -1089,8 +1089,9 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/text v0.3.8 h1:nAL+RVCQ9uMn3vJZbV+MRnydTJFPf8qqY42YiA6MrqY=
|
||||
golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ=
|
||||
golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||
|
@ -2,6 +2,8 @@
|
||||
#include <Access/LDAPClient.h>
|
||||
#include <Common/Exception.h>
|
||||
#include <Common/quoteString.h>
|
||||
#include <Common/SipHash.h>
|
||||
|
||||
#include <Poco/Util/AbstractConfiguration.h>
|
||||
#include <boost/algorithm/string/case_conv.hpp>
|
||||
|
||||
@ -73,6 +75,7 @@ void parseLDAPServer(LDAPClient::Params & params, const Poco::Util::AbstractConf
|
||||
const bool has_tls_ca_cert_file = config.has(ldap_server_config + ".tls_ca_cert_file");
|
||||
const bool has_tls_ca_cert_dir = config.has(ldap_server_config + ".tls_ca_cert_dir");
|
||||
const bool has_tls_cipher_suite = config.has(ldap_server_config + ".tls_cipher_suite");
|
||||
const bool has_search_limit = config.has(ldap_server_config + ".search_limit");
|
||||
|
||||
if (!has_host)
|
||||
throw Exception("Missing 'host' entry", ErrorCodes::BAD_ARGUMENTS);
|
||||
@ -91,8 +94,8 @@ void parseLDAPServer(LDAPClient::Params & params, const Poco::Util::AbstractConf
|
||||
}
|
||||
else if (has_auth_dn_prefix || has_auth_dn_suffix)
|
||||
{
|
||||
const auto auth_dn_prefix = config.getString(ldap_server_config + ".auth_dn_prefix");
|
||||
const auto auth_dn_suffix = config.getString(ldap_server_config + ".auth_dn_suffix");
|
||||
std::string auth_dn_prefix = config.getString(ldap_server_config + ".auth_dn_prefix");
|
||||
std::string auth_dn_suffix = config.getString(ldap_server_config + ".auth_dn_suffix");
|
||||
params.bind_dn = auth_dn_prefix + "{user_name}" + auth_dn_suffix;
|
||||
}
|
||||
|
||||
@ -176,14 +179,17 @@ void parseLDAPServer(LDAPClient::Params & params, const Poco::Util::AbstractConf
|
||||
|
||||
if (has_port)
|
||||
{
|
||||
const auto port = config.getInt64(ldap_server_config + ".port");
|
||||
if (port < 0 || port > 65535)
|
||||
UInt32 port = config.getUInt(ldap_server_config + ".port");
|
||||
if (port > 65535)
|
||||
throw Exception("Bad value for 'port' entry", ErrorCodes::BAD_ARGUMENTS);
|
||||
|
||||
params.port = port;
|
||||
}
|
||||
else
|
||||
params.port = (params.enable_tls == LDAPClient::Params::TLSEnable::YES ? 636 : 389);
|
||||
|
||||
if (has_search_limit)
|
||||
params.search_limit = static_cast<UInt32>(config.getUInt64(ldap_server_config + ".search_limit"));
|
||||
}
|
||||
|
||||
void parseKerberosParams(GSSAcceptorContext::Params & params, const Poco::Util::AbstractConfiguration & config)
|
||||
@ -313,11 +319,26 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur
|
||||
}
|
||||
}
|
||||
|
||||
UInt128 computeParamsHash(const LDAPClient::Params & params, const LDAPClient::RoleSearchParamsList * role_search_params)
|
||||
{
|
||||
SipHash hash;
|
||||
params.updateHash(hash);
|
||||
if (role_search_params)
|
||||
{
|
||||
for (const auto & params_instance : *role_search_params)
|
||||
{
|
||||
params_instance.updateHash(hash);
|
||||
}
|
||||
}
|
||||
|
||||
return hash.get128();
|
||||
}
|
||||
|
||||
bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const BasicCredentials & credentials,
|
||||
const LDAPClient::RoleSearchParamsList * role_search_params, LDAPClient::SearchResultsList * role_search_results) const
|
||||
{
|
||||
std::optional<LDAPClient::Params> params;
|
||||
std::size_t params_hash = 0;
|
||||
UInt128 params_hash = 0;
|
||||
|
||||
{
|
||||
std::scoped_lock lock(mutex);
|
||||
@ -331,14 +352,7 @@ bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const B
|
||||
params->user = credentials.getUserName();
|
||||
params->password = credentials.getPassword();
|
||||
|
||||
params->combineCoreHash(params_hash);
|
||||
if (role_search_params)
|
||||
{
|
||||
for (const auto & params_instance : *role_search_params)
|
||||
{
|
||||
params_instance.combineHash(params_hash);
|
||||
}
|
||||
}
|
||||
params_hash = computeParamsHash(*params, role_search_params);
|
||||
|
||||
// Check the cache, but only if the caching is enabled at all.
|
||||
if (params->verification_cooldown > std::chrono::seconds{0})
|
||||
@ -408,15 +422,7 @@ bool ExternalAuthenticators::checkLDAPCredentials(const String & server, const B
|
||||
new_params.user = credentials.getUserName();
|
||||
new_params.password = credentials.getPassword();
|
||||
|
||||
std::size_t new_params_hash = 0;
|
||||
new_params.combineCoreHash(new_params_hash);
|
||||
if (role_search_params)
|
||||
{
|
||||
for (const auto & params_instance : *role_search_params)
|
||||
{
|
||||
params_instance.combineHash(new_params_hash);
|
||||
}
|
||||
}
|
||||
const UInt128 new_params_hash = computeParamsHash(new_params, role_search_params);
|
||||
|
||||
// If the critical server params have changed while we were checking the password, we discard the current result.
|
||||
if (params_hash != new_params_hash)
|
||||
|
@ -2,10 +2,10 @@
|
||||
#include <Common/Exception.h>
|
||||
#include <base/scope_guard.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/SipHash.h>
|
||||
|
||||
#include <Poco/Logger.h>
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
#include <boost/container_hash/hash.hpp>
|
||||
|
||||
#include <mutex>
|
||||
#include <utility>
|
||||
@ -15,6 +15,22 @@
|
||||
|
||||
#include <sys/time.h>
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
template <typename T, typename = std::enable_if_t<std::is_fundamental_v<std::decay_t<T>>>>
|
||||
void updateHash(SipHash & hash, const T & value)
|
||||
{
|
||||
hash.update(value);
|
||||
}
|
||||
|
||||
void updateHash(SipHash & hash, const std::string & value)
|
||||
{
|
||||
hash.update(value.size());
|
||||
hash.update(value);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -26,30 +42,30 @@ namespace ErrorCodes
|
||||
extern const int LDAP_ERROR;
|
||||
}
|
||||
|
||||
void LDAPClient::SearchParams::combineHash(std::size_t & seed) const
|
||||
void LDAPClient::SearchParams::updateHash(SipHash & hash) const
|
||||
{
|
||||
boost::hash_combine(seed, base_dn);
|
||||
boost::hash_combine(seed, static_cast<int>(scope));
|
||||
boost::hash_combine(seed, search_filter);
|
||||
boost::hash_combine(seed, attribute);
|
||||
::updateHash(hash, base_dn);
|
||||
::updateHash(hash, static_cast<int>(scope));
|
||||
::updateHash(hash, search_filter);
|
||||
::updateHash(hash, attribute);
|
||||
}
|
||||
|
||||
void LDAPClient::RoleSearchParams::combineHash(std::size_t & seed) const
|
||||
void LDAPClient::RoleSearchParams::updateHash(SipHash & hash) const
|
||||
{
|
||||
SearchParams::combineHash(seed);
|
||||
boost::hash_combine(seed, prefix);
|
||||
SearchParams::updateHash(hash);
|
||||
::updateHash(hash, prefix);
|
||||
}
|
||||
|
||||
void LDAPClient::Params::combineCoreHash(std::size_t & seed) const
|
||||
void LDAPClient::Params::updateHash(SipHash & hash) const
|
||||
{
|
||||
boost::hash_combine(seed, host);
|
||||
boost::hash_combine(seed, port);
|
||||
boost::hash_combine(seed, bind_dn);
|
||||
boost::hash_combine(seed, user);
|
||||
boost::hash_combine(seed, password);
|
||||
::updateHash(hash, host);
|
||||
::updateHash(hash, port);
|
||||
::updateHash(hash, bind_dn);
|
||||
::updateHash(hash, user);
|
||||
::updateHash(hash, password);
|
||||
|
||||
if (user_dn_detection)
|
||||
user_dn_detection->combineHash(seed);
|
||||
user_dn_detection->updateHash(hash);
|
||||
}
|
||||
|
||||
LDAPClient::LDAPClient(const Params & params_)
|
||||
@ -153,13 +169,13 @@ namespace
|
||||
|
||||
}
|
||||
|
||||
void LDAPClient::diag(int rc, String text)
|
||||
void LDAPClient::handleError(int result_code, String text)
|
||||
{
|
||||
std::scoped_lock lock(ldap_global_mutex);
|
||||
|
||||
if (rc != LDAP_SUCCESS)
|
||||
if (result_code != LDAP_SUCCESS)
|
||||
{
|
||||
const char * raw_err_str = ldap_err2string(rc);
|
||||
const char * raw_err_str = ldap_err2string(result_code);
|
||||
if (raw_err_str && *raw_err_str != '\0')
|
||||
{
|
||||
if (!text.empty())
|
||||
@ -214,7 +230,7 @@ bool LDAPClient::openConnection()
|
||||
|
||||
SCOPE_EXIT({ ldap_memfree(uri); });
|
||||
|
||||
diag(ldap_initialize(&handle, uri));
|
||||
handleError(ldap_initialize(&handle, uri));
|
||||
if (!handle)
|
||||
throw Exception("ldap_initialize() failed", ErrorCodes::LDAP_ERROR);
|
||||
}
|
||||
@ -226,13 +242,13 @@ bool LDAPClient::openConnection()
|
||||
case LDAPClient::Params::ProtocolVersion::V2: value = LDAP_VERSION2; break;
|
||||
case LDAPClient::Params::ProtocolVersion::V3: value = LDAP_VERSION3; break;
|
||||
}
|
||||
diag(ldap_set_option(handle, LDAP_OPT_PROTOCOL_VERSION, &value));
|
||||
handleError(ldap_set_option(handle, LDAP_OPT_PROTOCOL_VERSION, &value));
|
||||
}
|
||||
|
||||
diag(ldap_set_option(handle, LDAP_OPT_RESTART, LDAP_OPT_ON));
|
||||
handleError(ldap_set_option(handle, LDAP_OPT_RESTART, LDAP_OPT_ON));
|
||||
|
||||
#ifdef LDAP_OPT_KEEPCONN
|
||||
diag(ldap_set_option(handle, LDAP_OPT_KEEPCONN, LDAP_OPT_ON));
|
||||
handleError(ldap_set_option(handle, LDAP_OPT_KEEPCONN, LDAP_OPT_ON));
|
||||
#endif
|
||||
|
||||
#ifdef LDAP_OPT_TIMEOUT
|
||||
@ -240,7 +256,7 @@ bool LDAPClient::openConnection()
|
||||
::timeval operation_timeout;
|
||||
operation_timeout.tv_sec = params.operation_timeout.count();
|
||||
operation_timeout.tv_usec = 0;
|
||||
diag(ldap_set_option(handle, LDAP_OPT_TIMEOUT, &operation_timeout));
|
||||
handleError(ldap_set_option(handle, LDAP_OPT_TIMEOUT, &operation_timeout));
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -249,18 +265,18 @@ bool LDAPClient::openConnection()
|
||||
::timeval network_timeout;
|
||||
network_timeout.tv_sec = params.network_timeout.count();
|
||||
network_timeout.tv_usec = 0;
|
||||
diag(ldap_set_option(handle, LDAP_OPT_NETWORK_TIMEOUT, &network_timeout));
|
||||
handleError(ldap_set_option(handle, LDAP_OPT_NETWORK_TIMEOUT, &network_timeout));
|
||||
}
|
||||
#endif
|
||||
|
||||
{
|
||||
const int search_timeout = static_cast<int>(params.search_timeout.count());
|
||||
diag(ldap_set_option(handle, LDAP_OPT_TIMELIMIT, &search_timeout));
|
||||
handleError(ldap_set_option(handle, LDAP_OPT_TIMELIMIT, &search_timeout));
|
||||
}
|
||||
|
||||
{
|
||||
const int size_limit = params.search_limit;
|
||||
diag(ldap_set_option(handle, LDAP_OPT_SIZELIMIT, &size_limit));
|
||||
const int size_limit = static_cast<int>(params.search_limit);
|
||||
handleError(ldap_set_option(handle, LDAP_OPT_SIZELIMIT, &size_limit));
|
||||
}
|
||||
|
||||
#ifdef LDAP_OPT_X_TLS_PROTOCOL_MIN
|
||||
@ -274,7 +290,7 @@ bool LDAPClient::openConnection()
|
||||
case LDAPClient::Params::TLSProtocolVersion::TLS1_1: value = LDAP_OPT_X_TLS_PROTOCOL_TLS1_1; break;
|
||||
case LDAPClient::Params::TLSProtocolVersion::TLS1_2: value = LDAP_OPT_X_TLS_PROTOCOL_TLS1_2; break;
|
||||
}
|
||||
diag(ldap_set_option(handle, LDAP_OPT_X_TLS_PROTOCOL_MIN, &value));
|
||||
handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_PROTOCOL_MIN, &value));
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -288,44 +304,44 @@ bool LDAPClient::openConnection()
|
||||
case LDAPClient::Params::TLSRequireCert::TRY: value = LDAP_OPT_X_TLS_TRY; break;
|
||||
case LDAPClient::Params::TLSRequireCert::DEMAND: value = LDAP_OPT_X_TLS_DEMAND; break;
|
||||
}
|
||||
diag(ldap_set_option(handle, LDAP_OPT_X_TLS_REQUIRE_CERT, &value));
|
||||
handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_REQUIRE_CERT, &value));
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef LDAP_OPT_X_TLS_CERTFILE
|
||||
if (!params.tls_cert_file.empty())
|
||||
diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CERTFILE, params.tls_cert_file.c_str()));
|
||||
handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_CERTFILE, params.tls_cert_file.c_str()));
|
||||
#endif
|
||||
|
||||
#ifdef LDAP_OPT_X_TLS_KEYFILE
|
||||
if (!params.tls_key_file.empty())
|
||||
diag(ldap_set_option(handle, LDAP_OPT_X_TLS_KEYFILE, params.tls_key_file.c_str()));
|
||||
handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_KEYFILE, params.tls_key_file.c_str()));
|
||||
#endif
|
||||
|
||||
#ifdef LDAP_OPT_X_TLS_CACERTFILE
|
||||
if (!params.tls_ca_cert_file.empty())
|
||||
diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTFILE, params.tls_ca_cert_file.c_str()));
|
||||
handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTFILE, params.tls_ca_cert_file.c_str()));
|
||||
#endif
|
||||
|
||||
#ifdef LDAP_OPT_X_TLS_CACERTDIR
|
||||
if (!params.tls_ca_cert_dir.empty())
|
||||
diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTDIR, params.tls_ca_cert_dir.c_str()));
|
||||
handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_CACERTDIR, params.tls_ca_cert_dir.c_str()));
|
||||
#endif
|
||||
|
||||
#ifdef LDAP_OPT_X_TLS_CIPHER_SUITE
|
||||
if (!params.tls_cipher_suite.empty())
|
||||
diag(ldap_set_option(handle, LDAP_OPT_X_TLS_CIPHER_SUITE, params.tls_cipher_suite.c_str()));
|
||||
handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_CIPHER_SUITE, params.tls_cipher_suite.c_str()));
|
||||
#endif
|
||||
|
||||
#ifdef LDAP_OPT_X_TLS_NEWCTX
|
||||
{
|
||||
const int i_am_a_server = 0;
|
||||
diag(ldap_set_option(handle, LDAP_OPT_X_TLS_NEWCTX, &i_am_a_server));
|
||||
handleError(ldap_set_option(handle, LDAP_OPT_X_TLS_NEWCTX, &i_am_a_server));
|
||||
}
|
||||
#endif
|
||||
|
||||
if (params.enable_tls == LDAPClient::Params::TLSEnable::YES_STARTTLS)
|
||||
diag(ldap_start_tls_s(handle, nullptr, nullptr));
|
||||
handleError(ldap_start_tls_s(handle, nullptr, nullptr));
|
||||
|
||||
final_user_name = escapeForDN(params.user);
|
||||
final_bind_dn = replacePlaceholders(params.bind_dn, { {"{user_name}", final_user_name} });
|
||||
@ -346,7 +362,7 @@ bool LDAPClient::openConnection()
|
||||
if (rc == LDAP_INVALID_CREDENTIALS)
|
||||
return false;
|
||||
|
||||
diag(rc);
|
||||
handleError(rc);
|
||||
}
|
||||
|
||||
// Once bound, run the user DN search query and update the default value, if asked.
|
||||
@ -425,7 +441,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params)
|
||||
}
|
||||
});
|
||||
|
||||
diag(ldap_search_ext_s(handle, final_base_dn.c_str(), scope, final_search_filter.c_str(), attrs, 0, nullptr, nullptr, &timeout, params.search_limit, &msgs));
|
||||
handleError(ldap_search_ext_s(handle, final_base_dn.c_str(), scope, final_search_filter.c_str(), attrs, 0, nullptr, nullptr, &timeout, params.search_limit, &msgs));
|
||||
|
||||
for (
|
||||
auto * msg = ldap_first_message(handle, msgs);
|
||||
@ -452,7 +468,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params)
|
||||
|
||||
::berval bv;
|
||||
|
||||
diag(ldap_get_dn_ber(handle, msg, &ber, &bv));
|
||||
handleError(ldap_get_dn_ber(handle, msg, &ber, &bv));
|
||||
|
||||
if (bv.bv_val && bv.bv_len > 0)
|
||||
result.emplace(bv.bv_val, bv.bv_len);
|
||||
@ -504,7 +520,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params)
|
||||
case LDAP_RES_SEARCH_REFERENCE:
|
||||
{
|
||||
char ** referrals = nullptr;
|
||||
diag(ldap_parse_reference(handle, msg, &referrals, nullptr, 0));
|
||||
handleError(ldap_parse_reference(handle, msg, &referrals, nullptr, 0));
|
||||
|
||||
if (referrals)
|
||||
{
|
||||
@ -528,7 +544,7 @@ LDAPClient::SearchResults LDAPClient::search(const SearchParams & search_params)
|
||||
char * matched_msg = nullptr;
|
||||
char * error_msg = nullptr;
|
||||
|
||||
diag(ldap_parse_result(handle, msg, &rc, &matched_msg, &error_msg, nullptr, nullptr, 0));
|
||||
handleError(ldap_parse_result(handle, msg, &rc, &matched_msg, &error_msg, nullptr, nullptr, 0));
|
||||
|
||||
if (rc != LDAP_SUCCESS)
|
||||
{
|
||||
@ -610,7 +626,7 @@ bool LDAPSimpleAuthClient::authenticate(const RoleSearchParamsList * role_search
|
||||
|
||||
#else // USE_LDAP
|
||||
|
||||
void LDAPClient::diag(const int, String)
|
||||
void LDAPClient::handleError(const int, String)
|
||||
{
|
||||
throw Exception("ClickHouse was built without LDAP support", ErrorCodes::FEATURE_IS_NOT_ENABLED_AT_BUILD_TIME);
|
||||
}
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
class SipHash;
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -38,7 +39,7 @@ public:
|
||||
String search_filter;
|
||||
String attribute = "cn";
|
||||
|
||||
void combineHash(std::size_t & seed) const;
|
||||
void updateHash(SipHash & hash) const;
|
||||
};
|
||||
|
||||
struct RoleSearchParams
|
||||
@ -46,7 +47,7 @@ public:
|
||||
{
|
||||
String prefix;
|
||||
|
||||
void combineHash(std::size_t & seed) const;
|
||||
void updateHash(SipHash & hash) const;
|
||||
};
|
||||
|
||||
using RoleSearchParamsList = std::vector<RoleSearchParams>;
|
||||
@ -95,7 +96,7 @@ public:
|
||||
ProtocolVersion protocol_version = ProtocolVersion::V3;
|
||||
|
||||
String host;
|
||||
std::uint16_t port = 636;
|
||||
UInt16 port = 636;
|
||||
|
||||
TLSEnable enable_tls = TLSEnable::YES;
|
||||
TLSProtocolVersion tls_minimum_protocol_version = TLSProtocolVersion::TLS1_2;
|
||||
@ -119,9 +120,9 @@ public:
|
||||
std::chrono::seconds operation_timeout{40};
|
||||
std::chrono::seconds network_timeout{30};
|
||||
std::chrono::seconds search_timeout{20};
|
||||
std::uint32_t search_limit = 100;
|
||||
UInt32 search_limit = 256; /// An arbitrary number, no particular motivation for this value.
|
||||
|
||||
void combineCoreHash(std::size_t & seed) const;
|
||||
void updateHash(SipHash & hash) const;
|
||||
};
|
||||
|
||||
explicit LDAPClient(const Params & params_);
|
||||
@ -133,7 +134,7 @@ public:
|
||||
LDAPClient & operator= (LDAPClient &&) = delete;
|
||||
|
||||
protected:
|
||||
MAYBE_NORETURN void diag(int rc, String text = "");
|
||||
MAYBE_NORETURN void handleError(int result_code, String text = "");
|
||||
MAYBE_NORETURN bool openConnection();
|
||||
void closeConnection() noexcept;
|
||||
SearchResults search(const SearchParams & search_params);
|
||||
|
@ -30,6 +30,7 @@ namespace ErrorCodes
|
||||
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int TOO_LARGE_STRING_SIZE;
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
/** Aggregate functions that store one of passed values.
|
||||
@ -485,13 +486,15 @@ struct SingleValueDataString //-V730
|
||||
private:
|
||||
using Self = SingleValueDataString;
|
||||
|
||||
Int32 size = -1; /// -1 indicates that there is no value.
|
||||
Int32 capacity = 0; /// power of two or zero
|
||||
/// 0 size indicates that there is no value. Empty string must has terminating '\0' and, therefore, size of empty string is 1
|
||||
UInt32 size = 0;
|
||||
UInt32 capacity = 0; /// power of two or zero
|
||||
char * large_data;
|
||||
|
||||
public:
|
||||
static constexpr Int32 AUTOMATIC_STORAGE_SIZE = 64;
|
||||
static constexpr Int32 MAX_SMALL_STRING_SIZE = AUTOMATIC_STORAGE_SIZE - sizeof(size) - sizeof(capacity) - sizeof(large_data);
|
||||
static constexpr UInt32 AUTOMATIC_STORAGE_SIZE = 64;
|
||||
static constexpr UInt32 MAX_SMALL_STRING_SIZE = AUTOMATIC_STORAGE_SIZE - sizeof(size) - sizeof(capacity) - sizeof(large_data);
|
||||
static constexpr UInt32 MAX_STRING_SIZE = std::numeric_limits<Int32>::max();
|
||||
|
||||
private:
|
||||
char small_data[MAX_SMALL_STRING_SIZE]; /// Including the terminating zero.
|
||||
@ -502,7 +505,7 @@ public:
|
||||
|
||||
bool has() const
|
||||
{
|
||||
return size >= 0;
|
||||
return size;
|
||||
}
|
||||
|
||||
private:
|
||||
@ -536,20 +539,27 @@ public:
|
||||
|
||||
void write(WriteBuffer & buf, const ISerialization & /*serialization*/) const
|
||||
{
|
||||
writeBinary(size, buf);
|
||||
if (unlikely(MAX_STRING_SIZE < size))
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "String size is too big ({}), it's a bug", size);
|
||||
|
||||
/// For serialization we use signed Int32 (for historical reasons), -1 means "no value"
|
||||
Int32 size_to_write = size ? size : -1;
|
||||
writeBinary(size_to_write, buf);
|
||||
if (has())
|
||||
buf.write(getData(), size);
|
||||
}
|
||||
|
||||
void allocateLargeDataIfNeeded(Int64 size_to_reserve, Arena * arena)
|
||||
void allocateLargeDataIfNeeded(UInt32 size_to_reserve, Arena * arena)
|
||||
{
|
||||
if (capacity < size_to_reserve)
|
||||
{
|
||||
capacity = static_cast<Int32>(roundUpToPowerOfTwoOrZero(size_to_reserve));
|
||||
/// It might happen if the size was too big and the rounded value does not fit a size_t
|
||||
if (unlikely(capacity < size_to_reserve))
|
||||
if (unlikely(MAX_STRING_SIZE < size_to_reserve))
|
||||
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "String size is too big ({})", size_to_reserve);
|
||||
|
||||
size_t rounded_capacity = roundUpToPowerOfTwoOrZero(size_to_reserve);
|
||||
chassert(rounded_capacity <= MAX_STRING_SIZE + 1); /// rounded_capacity <= 2^31
|
||||
capacity = static_cast<UInt32>(rounded_capacity);
|
||||
|
||||
/// Don't free large_data here.
|
||||
large_data = arena->alloc(capacity);
|
||||
}
|
||||
@ -557,31 +567,28 @@ public:
|
||||
|
||||
void read(ReadBuffer & buf, const ISerialization & /*serialization*/, Arena * arena)
|
||||
{
|
||||
Int32 rhs_size;
|
||||
readBinary(rhs_size, buf);
|
||||
/// For serialization we use signed Int32 (for historical reasons), -1 means "no value"
|
||||
Int32 rhs_size_signed;
|
||||
readBinary(rhs_size_signed, buf);
|
||||
|
||||
if (rhs_size < 0)
|
||||
if (rhs_size_signed < 0)
|
||||
{
|
||||
/// Don't free large_data here.
|
||||
size = rhs_size;
|
||||
size = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
UInt32 rhs_size = rhs_size_signed;
|
||||
if (rhs_size <= MAX_SMALL_STRING_SIZE)
|
||||
{
|
||||
/// Don't free large_data here.
|
||||
|
||||
size = rhs_size;
|
||||
|
||||
if (size > 0)
|
||||
buf.readStrict(small_data, size);
|
||||
buf.readStrict(small_data, size);
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Reserve one byte more for null-character
|
||||
Int64 rhs_size_to_reserve = rhs_size;
|
||||
rhs_size_to_reserve += 1; /// Avoid overflow
|
||||
allocateLargeDataIfNeeded(rhs_size_to_reserve, arena);
|
||||
allocateLargeDataIfNeeded(rhs_size + 1, arena);
|
||||
size = rhs_size;
|
||||
buf.readStrict(large_data, size);
|
||||
}
|
||||
@ -616,7 +623,10 @@ public:
|
||||
/// Assuming to.has()
|
||||
void changeImpl(StringRef value, Arena * arena)
|
||||
{
|
||||
Int32 value_size = static_cast<Int32>(value.size);
|
||||
if (unlikely(MAX_STRING_SIZE < value.size))
|
||||
throw Exception(ErrorCodes::TOO_LARGE_STRING_SIZE, "String size is too big ({})", value.size);
|
||||
|
||||
UInt32 value_size = static_cast<UInt32>(value.size);
|
||||
|
||||
if (value_size <= MAX_SMALL_STRING_SIZE)
|
||||
{
|
||||
|
60
src/Analyzer/HashUtils.h
Normal file
60
src/Analyzer/HashUtils.h
Normal file
@ -0,0 +1,60 @@
|
||||
#pragma once
|
||||
|
||||
#include <Analyzer/IQueryTreeNode.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/** This structure holds query tree node ptr and its hash. It can be used as hash map key to avoid unnecessary hash
|
||||
* recalculations.
|
||||
*
|
||||
* Example of usage:
|
||||
* std::unordered_map<QueryTreeNodeConstRawPtrWithHash, std::string> map;
|
||||
*/
|
||||
template <typename QueryTreeNodePtrType>
|
||||
struct QueryTreeNodeWithHash
|
||||
{
|
||||
QueryTreeNodeWithHash(QueryTreeNodePtrType node_) /// NOLINT
|
||||
: node(std::move(node_))
|
||||
, hash(node->getTreeHash().first)
|
||||
{}
|
||||
|
||||
QueryTreeNodePtrType node = nullptr;
|
||||
size_t hash = 0;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
inline bool operator==(const QueryTreeNodeWithHash<T> & lhs, const QueryTreeNodeWithHash<T> & rhs)
|
||||
{
|
||||
return lhs.hash == rhs.hash && lhs.node->isEqual(*rhs.node);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline bool operator!=(const QueryTreeNodeWithHash<T> & lhs, const QueryTreeNodeWithHash<T> & rhs)
|
||||
{
|
||||
return !(lhs == rhs);
|
||||
}
|
||||
|
||||
using QueryTreeNodePtrWithHash = QueryTreeNodeWithHash<QueryTreeNodePtr>;
|
||||
using QueryTreeNodeRawPtrWithHash = QueryTreeNodeWithHash<IQueryTreeNode *>;
|
||||
using QueryTreeNodeConstRawPtrWithHash = QueryTreeNodeWithHash<const IQueryTreeNode *>;
|
||||
|
||||
using QueryTreeNodePtrWithHashSet = std::unordered_set<QueryTreeNodePtrWithHash>;
|
||||
using QueryTreeNodeConstRawPtrWithHashSet = std::unordered_set<QueryTreeNodeConstRawPtrWithHash>;
|
||||
|
||||
template <typename Value>
|
||||
using QueryTreeNodePtrWithHashMap = std::unordered_map<QueryTreeNodePtrWithHash, Value>;
|
||||
|
||||
template <typename Value>
|
||||
using QueryTreeNodeConstRawPtrWithHashMap = std::unordered_map<QueryTreeNodeConstRawPtrWithHash, Value>;
|
||||
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct std::hash<DB::QueryTreeNodeWithHash<T>>
|
||||
{
|
||||
size_t operator()(const DB::QueryTreeNodeWithHash<T> & node_with_hash) const
|
||||
{
|
||||
return node_with_hash.hash;
|
||||
}
|
||||
};
|
@ -8,6 +8,7 @@
|
||||
#include <Analyzer/InDepthQueryTreeVisitor.h>
|
||||
#include <Analyzer/FunctionNode.h>
|
||||
#include <Analyzer/ConstantNode.h>
|
||||
#include <Analyzer/HashUtils.h>
|
||||
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
@ -48,43 +49,24 @@ public:
|
||||
/// Do not apply for `count()` with without arguments or `count(*)`, only `count(x)` is supported.
|
||||
return;
|
||||
|
||||
mapping[QueryTreeNodeWithHash(argument_nodes[0])].push_back(&node);
|
||||
argument_to_functions_mapping[argument_nodes[0]].push_back(&node);
|
||||
}
|
||||
|
||||
struct QueryTreeNodeWithHash
|
||||
{
|
||||
const QueryTreeNodePtr & node;
|
||||
IQueryTreeNode::Hash hash;
|
||||
|
||||
explicit QueryTreeNodeWithHash(const QueryTreeNodePtr & node_)
|
||||
: node(node_)
|
||||
, hash(node->getTreeHash())
|
||||
{}
|
||||
|
||||
bool operator==(const QueryTreeNodeWithHash & rhs) const
|
||||
{
|
||||
return hash == rhs.hash && node->isEqual(*rhs.node);
|
||||
}
|
||||
|
||||
struct Hash
|
||||
{
|
||||
size_t operator() (const QueryTreeNodeWithHash & key) const { return key.hash.first ^ key.hash.second; }
|
||||
};
|
||||
};
|
||||
|
||||
/// argument -> list of sum/count/avg functions with this argument
|
||||
std::unordered_map<QueryTreeNodeWithHash, std::vector<QueryTreeNodePtr *>, QueryTreeNodeWithHash::Hash> mapping;
|
||||
QueryTreeNodePtrWithHashMap<std::vector<QueryTreeNodePtr *>> argument_to_functions_mapping;
|
||||
|
||||
private:
|
||||
std::unordered_set<String> names_to_collect;
|
||||
};
|
||||
|
||||
QueryTreeNodePtr createResolvedFunction(ContextPtr context, const String & name, DataTypePtr result_type, QueryTreeNodes arguments)
|
||||
QueryTreeNodePtr createResolvedFunction(const ContextPtr & context, const String & name, const DataTypePtr & result_type, QueryTreeNodes arguments)
|
||||
{
|
||||
auto function_node = std::make_shared<FunctionNode>(name);
|
||||
|
||||
auto function = FunctionFactory::instance().get(name, context);
|
||||
function_node->resolveAsFunction(std::move(function), result_type);
|
||||
function_node->getArguments().getNodes() = std::move(arguments);
|
||||
|
||||
return function_node;
|
||||
}
|
||||
|
||||
@ -94,21 +76,20 @@ FunctionNodePtr createResolvedAggregateFunction(const String & name, const Query
|
||||
|
||||
AggregateFunctionProperties properties;
|
||||
auto aggregate_function = AggregateFunctionFactory::instance().get(name, {argument->getResultType()}, parameters, properties);
|
||||
|
||||
function_node->resolveAsAggregateFunction(aggregate_function, aggregate_function->getReturnType());
|
||||
function_node->getArguments().getNodes() = { argument };
|
||||
|
||||
function_node->getArgumentsNode() = std::make_shared<ListNode>(QueryTreeNodes{argument});
|
||||
return function_node;
|
||||
}
|
||||
|
||||
QueryTreeNodePtr createTupleElementFunction(ContextPtr context, DataTypePtr result_type, QueryTreeNodePtr argument, UInt64 index)
|
||||
QueryTreeNodePtr createTupleElementFunction(const ContextPtr & context, const DataTypePtr & result_type, QueryTreeNodePtr argument, UInt64 index)
|
||||
{
|
||||
return createResolvedFunction(context, "tupleElement", result_type, {argument, std::make_shared<ConstantNode>(index)});
|
||||
return createResolvedFunction(context, "tupleElement", result_type, {std::move(argument), std::make_shared<ConstantNode>(index)});
|
||||
}
|
||||
|
||||
QueryTreeNodePtr createArrayElementFunction(ContextPtr context, DataTypePtr result_type, QueryTreeNodePtr argument, UInt64 index)
|
||||
QueryTreeNodePtr createArrayElementFunction(const ContextPtr & context, const DataTypePtr & result_type, QueryTreeNodePtr argument, UInt64 index)
|
||||
{
|
||||
return createResolvedFunction(context, "arrayElement", result_type, {argument, std::make_shared<ConstantNode>(index)});
|
||||
return createResolvedFunction(context, "arrayElement", result_type, {std::move(argument), std::make_shared<ConstantNode>(index)});
|
||||
}
|
||||
|
||||
void replaceWithSumCount(QueryTreeNodePtr & node, const FunctionNodePtr & sum_count_node, ContextPtr context)
|
||||
@ -151,6 +132,7 @@ FunctionNodePtr createFusedQuantilesNode(const std::vector<QueryTreeNodePtr *> n
|
||||
{
|
||||
Array parameters;
|
||||
parameters.reserve(nodes.size());
|
||||
|
||||
for (const auto * node : nodes)
|
||||
{
|
||||
const FunctionNode & function_node = (*node)->as<const FunctionNode &>();
|
||||
@ -172,6 +154,7 @@ FunctionNodePtr createFusedQuantilesNode(const std::vector<QueryTreeNodePtr *> n
|
||||
|
||||
parameters.push_back(constant_value->getValue());
|
||||
}
|
||||
|
||||
return createResolvedAggregateFunction("quantiles", argument, parameters);
|
||||
}
|
||||
|
||||
@ -181,7 +164,7 @@ void tryFuseSumCountAvg(QueryTreeNodePtr query_tree_node, ContextPtr context)
|
||||
FuseFunctionsVisitor visitor({"sum", "count", "avg"});
|
||||
visitor.visit(query_tree_node);
|
||||
|
||||
for (auto & [argument, nodes] : visitor.mapping)
|
||||
for (auto & [argument, nodes] : visitor.argument_to_functions_mapping)
|
||||
{
|
||||
if (nodes.size() < 2)
|
||||
continue;
|
||||
@ -199,24 +182,22 @@ void tryFuseQuantiles(QueryTreeNodePtr query_tree_node, ContextPtr context)
|
||||
{
|
||||
FuseFunctionsVisitor visitor_quantile({"quantile"});
|
||||
visitor_quantile.visit(query_tree_node);
|
||||
for (auto & [argument, nodes] : visitor_quantile.mapping)
|
||||
|
||||
for (auto & [argument, nodes] : visitor_quantile.argument_to_functions_mapping)
|
||||
{
|
||||
if (nodes.size() < 2)
|
||||
size_t nodes_size = nodes.size();
|
||||
if (nodes_size < 2)
|
||||
continue;
|
||||
|
||||
auto quantiles_node = createFusedQuantilesNode(nodes, argument.node);
|
||||
auto result_array_type = std::dynamic_pointer_cast<const DataTypeArray>(quantiles_node->getResultType());
|
||||
if (!result_array_type)
|
||||
{
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Unexpected return type '{}' of function '{}', should be array",
|
||||
quantiles_node->getResultType(), quantiles_node->getFunctionName());
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < nodes.size(); ++i)
|
||||
{
|
||||
for (size_t i = 0; i < nodes_size; ++i)
|
||||
*nodes[i] = createArrayElementFunction(context, result_array_type->getNestedType(), quantiles_node, i + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3,6 +3,7 @@
|
||||
#include <Analyzer/InDepthQueryTreeVisitor.h>
|
||||
#include <Analyzer/QueryNode.h>
|
||||
#include <Analyzer/SortNode.h>
|
||||
#include <Analyzer/HashUtils.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
@ -10,35 +11,6 @@ namespace DB
|
||||
namespace
|
||||
{
|
||||
|
||||
struct QueryTreeNodeWithHash
|
||||
{
|
||||
explicit QueryTreeNodeWithHash(const IQueryTreeNode * node_)
|
||||
: node(node_)
|
||||
, hash(node->getTreeHash().first)
|
||||
{}
|
||||
|
||||
const IQueryTreeNode * node = nullptr;
|
||||
size_t hash = 0;
|
||||
};
|
||||
|
||||
struct QueryTreeNodeWithHashHash
|
||||
{
|
||||
size_t operator()(const QueryTreeNodeWithHash & node_with_hash) const
|
||||
{
|
||||
return node_with_hash.hash;
|
||||
}
|
||||
};
|
||||
|
||||
struct QueryTreeNodeWithHashEqualTo
|
||||
{
|
||||
bool operator()(const QueryTreeNodeWithHash & lhs_node, const QueryTreeNodeWithHash & rhs_node) const
|
||||
{
|
||||
return lhs_node.hash == rhs_node.hash && lhs_node.node->isEqual(*rhs_node.node);
|
||||
}
|
||||
};
|
||||
|
||||
using QueryTreeNodeWithHashSet = std::unordered_set<QueryTreeNodeWithHash, QueryTreeNodeWithHashHash, QueryTreeNodeWithHashEqualTo>;
|
||||
|
||||
class OrderByLimitByDuplicateEliminationVisitor : public InDepthQueryTreeVisitor<OrderByLimitByDuplicateEliminationVisitor>
|
||||
{
|
||||
public:
|
||||
@ -93,7 +65,7 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
QueryTreeNodeWithHashSet unique_expressions_nodes_set;
|
||||
QueryTreeNodeConstRawPtrWithHashSet unique_expressions_nodes_set;
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -67,6 +67,8 @@
|
||||
#include <Analyzer/InDepthQueryTreeVisitor.h>
|
||||
#include <Analyzer/QueryTreeBuilder.h>
|
||||
|
||||
#include <Common/checkStackSize.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
@ -517,7 +519,7 @@ public:
|
||||
|
||||
private:
|
||||
QueryTreeNodes expressions;
|
||||
std::unordered_map<std::string, std::vector<QueryTreeNodePtr>> alias_name_to_expressions;
|
||||
std::unordered_map<std::string, QueryTreeNodes> alias_name_to_expressions;
|
||||
};
|
||||
|
||||
/** Projection names is name of query tree node that is used in projection part of query node.
|
||||
@ -1100,6 +1102,10 @@ private:
|
||||
|
||||
static void validateJoinTableExpressionWithoutAlias(const QueryTreeNodePtr & join_node, const QueryTreeNodePtr & table_expression_node, IdentifierResolveScope & scope);
|
||||
|
||||
static void expandGroupByAll(QueryNode & query_tree_node_typed);
|
||||
|
||||
static std::pair<bool, UInt64> recursivelyCollectMaxOrdinaryExpressions(QueryTreeNodePtr & node, QueryTreeNodes & into);
|
||||
|
||||
/// Resolve identifier functions
|
||||
|
||||
static QueryTreeNodePtr tryResolveTableIdentifierFromDatabaseCatalog(const Identifier & table_identifier, ContextPtr context);
|
||||
@ -1929,6 +1935,68 @@ void QueryAnalyzer::validateJoinTableExpressionWithoutAlias(const QueryTreeNodeP
|
||||
scope.scope_node->formatASTForErrorMessage());
|
||||
}
|
||||
|
||||
std::pair<bool, UInt64> QueryAnalyzer::recursivelyCollectMaxOrdinaryExpressions(QueryTreeNodePtr & node, QueryTreeNodes & into)
|
||||
{
|
||||
checkStackSize();
|
||||
|
||||
if (node->as<ColumnNode>())
|
||||
{
|
||||
into.push_back(node);
|
||||
return {false, 1};
|
||||
}
|
||||
|
||||
auto * function = node->as<FunctionNode>();
|
||||
|
||||
if (!function)
|
||||
return {false, 0};
|
||||
|
||||
if (function->isAggregateFunction())
|
||||
return {true, 0};
|
||||
|
||||
UInt64 pushed_children = 0;
|
||||
bool has_aggregate = false;
|
||||
|
||||
for (auto & child : function->getArguments().getNodes())
|
||||
{
|
||||
auto [child_has_aggregate, child_pushed_children] = recursivelyCollectMaxOrdinaryExpressions(child, into);
|
||||
has_aggregate |= child_has_aggregate;
|
||||
pushed_children += child_pushed_children;
|
||||
}
|
||||
|
||||
/// The current function is not aggregate function and there is no aggregate function in its arguments,
|
||||
/// so use the current function to replace its arguments
|
||||
if (!has_aggregate)
|
||||
{
|
||||
for (UInt64 i = 0; i < pushed_children; i++)
|
||||
into.pop_back();
|
||||
|
||||
into.push_back(node);
|
||||
pushed_children = 1;
|
||||
}
|
||||
|
||||
return {has_aggregate, pushed_children};
|
||||
}
|
||||
|
||||
/** Expand GROUP BY ALL by extracting all the SELECT-ed expressions that are not aggregate functions.
|
||||
*
|
||||
* For a special case that if there is a function having both aggregate functions and other fields as its arguments,
|
||||
* the `GROUP BY` keys will contain the maximum non-aggregate fields we can extract from it.
|
||||
*
|
||||
* Example:
|
||||
* SELECT substring(a, 4, 2), substring(substring(a, 1, 2), 1, count(b)) FROM t GROUP BY ALL
|
||||
* will expand as
|
||||
* SELECT substring(a, 4, 2), substring(substring(a, 1, 2), 1, count(b)) FROM t GROUP BY substring(a, 4, 2), substring(a, 1, 2)
|
||||
*/
|
||||
void QueryAnalyzer::expandGroupByAll(QueryNode & query_tree_node_typed)
|
||||
{
|
||||
auto & group_by_nodes = query_tree_node_typed.getGroupBy().getNodes();
|
||||
auto & projection_list = query_tree_node_typed.getProjection();
|
||||
|
||||
for (auto & node : projection_list.getNodes())
|
||||
recursivelyCollectMaxOrdinaryExpressions(node, group_by_nodes);
|
||||
|
||||
}
|
||||
|
||||
|
||||
/// Resolve identifier functions implementation
|
||||
|
||||
@ -2171,18 +2239,19 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
|
||||
auto & alias_identifier_node = it->second->as<IdentifierNode &>();
|
||||
auto identifier = alias_identifier_node.getIdentifier();
|
||||
auto lookup_result = tryResolveIdentifier(IdentifierLookup{identifier, identifier_lookup.lookup_context}, scope, identifier_resolve_settings);
|
||||
if (!lookup_result.isResolved())
|
||||
if (!lookup_result.resolved_identifier)
|
||||
{
|
||||
std::unordered_set<Identifier> valid_identifiers;
|
||||
collectScopeWithParentScopesValidIdentifiersForTypoCorrection(identifier, scope, true, false, false, valid_identifiers);
|
||||
|
||||
auto hints = collectIdentifierTypoHints(identifier, valid_identifiers);
|
||||
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown {} identifier '{}' in scope {}{}",
|
||||
toStringLowercase(IdentifierLookupContext::EXPRESSION),
|
||||
|
||||
throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown {} identifier '{}'. In scope {}{}",
|
||||
toStringLowercase(identifier_lookup.lookup_context),
|
||||
identifier.getFullName(),
|
||||
scope.scope_node->formatASTForErrorMessage(),
|
||||
getHintsErrorMessageSuffix(hints));
|
||||
}
|
||||
|
||||
it->second = lookup_result.resolved_identifier;
|
||||
|
||||
/** During collection of aliases if node is identifier and has alias, we cannot say if it is
|
||||
@ -2193,9 +2262,9 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromAliases(const Identifier
|
||||
* If we resolved identifier node as function, we must remove identifier node alias from
|
||||
* expression alias map.
|
||||
*/
|
||||
if (identifier_lookup.isExpressionLookup() && it->second)
|
||||
if (identifier_lookup.isExpressionLookup())
|
||||
scope.alias_name_to_lambda_node.erase(identifier_bind_part);
|
||||
else if (identifier_lookup.isFunctionLookup() && it->second)
|
||||
else if (identifier_lookup.isFunctionLookup())
|
||||
scope.alias_name_to_expression_node.erase(identifier_bind_part);
|
||||
|
||||
scope.expressions_in_resolve_process_stack.popNode();
|
||||
@ -3203,11 +3272,9 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveUnqualifiedMatcher(
|
||||
|
||||
if (auto * array_join_node = table_expression->as<ArrayJoinNode>())
|
||||
{
|
||||
size_t table_expressions_column_nodes_with_names_stack_size = table_expressions_column_nodes_with_names_stack.size();
|
||||
if (table_expressions_column_nodes_with_names_stack_size < 1)
|
||||
if (table_expressions_column_nodes_with_names_stack.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Expected at least 1 table expressions on stack before ARRAY JOIN processing. Actual {}",
|
||||
table_expressions_column_nodes_with_names_stack_size);
|
||||
"Expected at least 1 table expressions on stack before ARRAY JOIN processing");
|
||||
|
||||
auto & table_expression_column_nodes_with_names = table_expressions_column_nodes_with_names_stack.back();
|
||||
|
||||
@ -5388,25 +5455,7 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node,
|
||||
}
|
||||
}
|
||||
|
||||
/// TODO: Special functions that can take query
|
||||
/// TODO: Support qualified matchers for table function
|
||||
|
||||
for (auto & argument_node : table_function_node.getArguments().getNodes())
|
||||
{
|
||||
if (argument_node->getNodeType() == QueryTreeNodeType::MATCHER)
|
||||
{
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS,
|
||||
"Matcher as table function argument is not supported {}. In scope {}",
|
||||
join_tree_node->formatASTForErrorMessage(),
|
||||
scope.scope_node->formatASTForErrorMessage());
|
||||
}
|
||||
|
||||
auto * function_node = argument_node->as<FunctionNode>();
|
||||
if (function_node && table_function_factory.hasNameOrAlias(function_node->getFunctionName()))
|
||||
continue;
|
||||
|
||||
resolveExpressionNode(argument_node, scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/);
|
||||
}
|
||||
resolveExpressionNodeList(table_function_node.getArgumentsNode(), scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/);
|
||||
|
||||
auto table_function_ast = table_function_node.toAST();
|
||||
table_function_ptr->parseArguments(table_function_ast, scope_context);
|
||||
@ -6006,6 +6055,9 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
|
||||
node->removeAlias();
|
||||
}
|
||||
|
||||
if (query_node_typed.isGroupByAll())
|
||||
expandGroupByAll(query_node_typed);
|
||||
|
||||
/** Validate aggregates
|
||||
*
|
||||
* 1. Check that there are no aggregate functions and GROUPING function in JOIN TREE, WHERE, PREWHERE, in another aggregate functions.
|
||||
|
@ -54,6 +54,9 @@ void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
|
||||
if (is_group_by_with_totals)
|
||||
buffer << ", is_group_by_with_totals: " << is_group_by_with_totals;
|
||||
|
||||
if (is_group_by_all)
|
||||
buffer << ", is_group_by_all: " << is_group_by_all;
|
||||
|
||||
std::string group_by_type;
|
||||
if (is_group_by_with_rollup)
|
||||
group_by_type = "rollup";
|
||||
@ -117,7 +120,7 @@ void QueryNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_state, s
|
||||
getWhere()->dumpTreeImpl(buffer, format_state, indent + 4);
|
||||
}
|
||||
|
||||
if (hasGroupBy())
|
||||
if (!is_group_by_all && hasGroupBy())
|
||||
{
|
||||
buffer << '\n' << std::string(indent + 2, ' ') << "GROUP BY\n";
|
||||
getGroupBy().dumpTreeImpl(buffer, format_state, indent + 4);
|
||||
@ -198,7 +201,8 @@ bool QueryNode::isEqualImpl(const IQueryTreeNode & rhs) const
|
||||
is_group_by_with_totals == rhs_typed.is_group_by_with_totals &&
|
||||
is_group_by_with_rollup == rhs_typed.is_group_by_with_rollup &&
|
||||
is_group_by_with_cube == rhs_typed.is_group_by_with_cube &&
|
||||
is_group_by_with_grouping_sets == rhs_typed.is_group_by_with_grouping_sets;
|
||||
is_group_by_with_grouping_sets == rhs_typed.is_group_by_with_grouping_sets &&
|
||||
is_group_by_all == rhs_typed.is_group_by_all;
|
||||
}
|
||||
|
||||
void QueryNode::updateTreeHashImpl(HashState & state) const
|
||||
@ -226,6 +230,7 @@ void QueryNode::updateTreeHashImpl(HashState & state) const
|
||||
state.update(is_group_by_with_rollup);
|
||||
state.update(is_group_by_with_cube);
|
||||
state.update(is_group_by_with_grouping_sets);
|
||||
state.update(is_group_by_all);
|
||||
|
||||
if (constant_value)
|
||||
{
|
||||
@ -251,6 +256,7 @@ QueryTreeNodePtr QueryNode::cloneImpl() const
|
||||
result_query_node->is_group_by_with_rollup = is_group_by_with_rollup;
|
||||
result_query_node->is_group_by_with_cube = is_group_by_with_cube;
|
||||
result_query_node->is_group_by_with_grouping_sets = is_group_by_with_grouping_sets;
|
||||
result_query_node->is_group_by_all = is_group_by_all;
|
||||
result_query_node->cte_name = cte_name;
|
||||
result_query_node->projection_columns = projection_columns;
|
||||
result_query_node->constant_value = constant_value;
|
||||
@ -267,6 +273,7 @@ ASTPtr QueryNode::toASTImpl() const
|
||||
select_query->group_by_with_rollup = is_group_by_with_rollup;
|
||||
select_query->group_by_with_cube = is_group_by_with_cube;
|
||||
select_query->group_by_with_grouping_sets = is_group_by_with_grouping_sets;
|
||||
select_query->group_by_all = is_group_by_all;
|
||||
|
||||
if (hasWith())
|
||||
select_query->setExpression(ASTSelectQuery::Expression::WITH, getWith().toAST());
|
||||
@ -283,7 +290,7 @@ ASTPtr QueryNode::toASTImpl() const
|
||||
if (getWhere())
|
||||
select_query->setExpression(ASTSelectQuery::Expression::WHERE, getWhere()->toAST());
|
||||
|
||||
if (hasGroupBy())
|
||||
if (!is_group_by_all && hasGroupBy())
|
||||
select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, getGroupBy().toAST());
|
||||
|
||||
if (hasHaving())
|
||||
|
@ -176,6 +176,18 @@ public:
|
||||
is_group_by_with_grouping_sets = is_group_by_with_grouping_sets_value;
|
||||
}
|
||||
|
||||
/// Returns true, if query node has GROUP BY ALL modifier, false otherwise
|
||||
bool isGroupByAll() const
|
||||
{
|
||||
return is_group_by_all;
|
||||
}
|
||||
|
||||
/// Set query node GROUP BY ALL modifier value
|
||||
void setIsGroupByAll(bool is_group_by_all_value)
|
||||
{
|
||||
is_group_by_all = is_group_by_all_value;
|
||||
}
|
||||
|
||||
/// Returns true if query node WITH section is not empty, false otherwise
|
||||
bool hasWith() const
|
||||
{
|
||||
@ -580,6 +592,7 @@ private:
|
||||
bool is_group_by_with_rollup = false;
|
||||
bool is_group_by_with_cube = false;
|
||||
bool is_group_by_with_grouping_sets = false;
|
||||
bool is_group_by_all = false;
|
||||
|
||||
std::string cte_name;
|
||||
NamesAndTypes projection_columns;
|
||||
|
@ -215,6 +215,7 @@ QueryTreeNodePtr QueryTreeBuilder::buildSelectExpression(const ASTPtr & select_q
|
||||
current_query_tree->setIsGroupByWithCube(select_query_typed.group_by_with_cube);
|
||||
current_query_tree->setIsGroupByWithRollup(select_query_typed.group_by_with_rollup);
|
||||
current_query_tree->setIsGroupByWithGroupingSets(select_query_typed.group_by_with_grouping_sets);
|
||||
current_query_tree->setIsGroupByAll(select_query_typed.group_by_all);
|
||||
current_query_tree->setOriginalAST(select_query);
|
||||
|
||||
auto select_settings = select_query_typed.settings();
|
||||
|
@ -1401,6 +1401,11 @@ try
|
||||
QueryPipeline pipeline(std::move(pipe));
|
||||
PullingAsyncPipelineExecutor executor(pipeline);
|
||||
|
||||
if (need_render_progress)
|
||||
{
|
||||
pipeline.setProgressCallback([this](const Progress & progress){ onProgress(progress); });
|
||||
}
|
||||
|
||||
Block block;
|
||||
while (executor.pull(block))
|
||||
{
|
||||
@ -1445,12 +1450,6 @@ catch (...)
|
||||
|
||||
void ClientBase::sendDataFromStdin(Block & sample, const ColumnsDescription & columns_description, ASTPtr parsed_query)
|
||||
{
|
||||
if (need_render_progress)
|
||||
{
|
||||
/// Add callback to track reading from fd.
|
||||
std_in.setProgressCallback(global_context);
|
||||
}
|
||||
|
||||
/// Send data read from stdin.
|
||||
try
|
||||
{
|
||||
|
@ -171,6 +171,11 @@ protected:
|
||||
|
||||
void initTtyBuffer(ProgressOption progress);
|
||||
|
||||
/// Should be one of the first, to be destroyed the last,
|
||||
/// since other members can use them.
|
||||
SharedContextHolder shared_context;
|
||||
ContextMutablePtr global_context;
|
||||
|
||||
bool is_interactive = false; /// Use either interactive line editing interface or batch mode.
|
||||
bool is_multiquery = false;
|
||||
bool delayed_interactive = false;
|
||||
@ -208,9 +213,6 @@ protected:
|
||||
/// Settings specified via command line args
|
||||
Settings cmd_settings;
|
||||
|
||||
SharedContextHolder shared_context;
|
||||
ContextMutablePtr global_context;
|
||||
|
||||
/// thread status should be destructed before shared context because it relies on process list.
|
||||
std::optional<ThreadStatus> thread_status;
|
||||
|
||||
|
@ -524,11 +524,13 @@ void ColumnArray::insertRangeFrom(const IColumn & src, size_t start, size_t leng
|
||||
size_t nested_offset = src_concrete.offsetAt(start);
|
||||
size_t nested_length = src_concrete.getOffsets()[start + length - 1] - nested_offset;
|
||||
|
||||
Offsets & cur_offsets = getOffsets();
|
||||
/// Reserve offsets before to make it more exception safe (in case of MEMORY_LIMIT_EXCEEDED)
|
||||
cur_offsets.reserve(cur_offsets.size() + length);
|
||||
|
||||
getData().insertRangeFrom(src_concrete.getData(), nested_offset, nested_length);
|
||||
|
||||
Offsets & cur_offsets = getOffsets();
|
||||
const Offsets & src_offsets = src_concrete.getOffsets();
|
||||
|
||||
if (start == 0 && cur_offsets.empty())
|
||||
{
|
||||
cur_offsets.assign(src_offsets.begin(), src_offsets.begin() + length);
|
||||
|
@ -124,6 +124,9 @@ void ColumnString::insertRangeFrom(const IColumn & src, size_t start, size_t len
|
||||
size_t nested_offset = src_concrete.offsetAt(start);
|
||||
size_t nested_length = src_concrete.offsets[start + length - 1] - nested_offset;
|
||||
|
||||
/// Reserve offsets before to make it more exception safe (in case of MEMORY_LIMIT_EXCEEDED)
|
||||
offsets.reserve(offsets.size() + length);
|
||||
|
||||
size_t old_chars_size = chars.size();
|
||||
chars.resize(old_chars_size + nested_length);
|
||||
memcpy(&chars[old_chars_size], &src_concrete.chars[nested_offset], nested_length);
|
||||
|
@ -189,6 +189,13 @@ public:
|
||||
finalize();
|
||||
return v0 ^ v1 ^ v2 ^ v3;
|
||||
}
|
||||
|
||||
UInt128 get128()
|
||||
{
|
||||
UInt128 res;
|
||||
get128(res);
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -208,9 +215,7 @@ inline UInt128 sipHash128(const char * data, const size_t size)
|
||||
{
|
||||
SipHash hash;
|
||||
hash.update(data, size);
|
||||
UInt128 res;
|
||||
hash.get128(res);
|
||||
return res;
|
||||
return hash.get128();
|
||||
}
|
||||
|
||||
inline UInt64 sipHash64(const char * data, const size_t size)
|
||||
|
@ -1,6 +1,7 @@
|
||||
#pragma once
|
||||
|
||||
#include <base/types.h>
|
||||
#include <base/getThreadId.h>
|
||||
#include <Common/ProfileEvents.h>
|
||||
#include <sys/time.h>
|
||||
#include <sys/resource.h>
|
||||
@ -47,6 +48,8 @@ struct RUsageCounters
|
||||
UInt64 soft_page_faults = 0;
|
||||
UInt64 hard_page_faults = 0;
|
||||
|
||||
UInt64 thread_id = 0;
|
||||
|
||||
RUsageCounters() = default;
|
||||
RUsageCounters(const ::rusage & rusage_, UInt64 real_time_)
|
||||
{
|
||||
@ -61,6 +64,8 @@ struct RUsageCounters
|
||||
|
||||
soft_page_faults = static_cast<UInt64>(rusage.ru_minflt);
|
||||
hard_page_faults = static_cast<UInt64>(rusage.ru_majflt);
|
||||
|
||||
thread_id = getThreadId();
|
||||
}
|
||||
|
||||
static RUsageCounters current()
|
||||
@ -78,6 +83,12 @@ struct RUsageCounters
|
||||
|
||||
static void incrementProfileEvents(const RUsageCounters & prev, const RUsageCounters & curr, ProfileEvents::Counters & profile_events)
|
||||
{
|
||||
chassert(prev.thread_id == curr.thread_id);
|
||||
/// LONG_MAX is ~106751 days
|
||||
chassert(curr.real_time - prev.real_time < LONG_MAX);
|
||||
chassert(curr.user_time - prev.user_time < LONG_MAX);
|
||||
chassert(curr.sys_time - prev.sys_time < LONG_MAX);
|
||||
|
||||
profile_events.increment(ProfileEvents::RealTimeMicroseconds, (curr.real_time - prev.real_time) / 1000U);
|
||||
profile_events.increment(ProfileEvents::UserTimeMicroseconds, (curr.user_time - prev.user_time) / 1000U);
|
||||
profile_events.increment(ProfileEvents::SystemTimeMicroseconds, (curr.sys_time - prev.sys_time) / 1000U);
|
||||
|
@ -175,8 +175,8 @@ protected:
|
||||
/// Is used to send logs from logs_queue to client in case of fatal errors.
|
||||
std::function<void()> fatal_error_callback;
|
||||
|
||||
/// It is used to avoid enabling the query profiler when you have multiple ThreadStatus in the same thread
|
||||
bool query_profiler_enabled = true;
|
||||
/// See setInternalThread()
|
||||
bool internal_thread = false;
|
||||
|
||||
/// Requires access to query_id.
|
||||
friend class MemoryTrackerThreadSwitcher;
|
||||
@ -221,11 +221,21 @@ public:
|
||||
return global_context.lock();
|
||||
}
|
||||
|
||||
void disableProfiling()
|
||||
{
|
||||
assert(!query_profiler_real && !query_profiler_cpu);
|
||||
query_profiler_enabled = false;
|
||||
}
|
||||
/// "Internal" ThreadStatus is used for materialized views for separate
|
||||
/// tracking into system.query_views_log
|
||||
///
|
||||
/// You can have multiple internal threads, but only one non-internal with
|
||||
/// the same thread_id.
|
||||
///
|
||||
/// "Internal" thread:
|
||||
/// - cannot have query profiler
|
||||
/// since the running (main query) thread should already have one
|
||||
/// - should not try to obtain latest counter on detach
|
||||
/// because detaching of such threads will be done from a different
|
||||
/// thread_id, and some counters are not available (i.e. getrusage()),
|
||||
/// but anyway they are accounted correctly in the main ThreadStatus of a
|
||||
/// query.
|
||||
void setInternalThread();
|
||||
|
||||
/// Starts new query and create new thread group for it, current thread becomes master thread of the query
|
||||
void initializeQuery();
|
||||
|
@ -1015,7 +1015,7 @@ void DatabaseCatalog::dropTableFinally(const TableMarkedAsDropped & table)
|
||||
for (const auto & [disk_name, disk] : getContext()->getDisksMap())
|
||||
{
|
||||
String data_path = "store/" + getPathForUUID(table.table_id.uuid);
|
||||
if (!disk->exists(data_path) || disk->isReadOnly())
|
||||
if (disk->isReadOnly() || !disk->exists(data_path))
|
||||
continue;
|
||||
|
||||
LOG_INFO(log, "Removing data directory {} of dropped table {} from disk {}", data_path, table.table_id.getNameForLogs(), disk_name);
|
||||
|
@ -165,7 +165,7 @@ struct QueryASTSettings
|
||||
|
||||
struct QueryTreeSettings
|
||||
{
|
||||
bool run_passes = false;
|
||||
bool run_passes = true;
|
||||
bool dump_passes = false;
|
||||
bool dump_ast = false;
|
||||
Int64 passes = -1;
|
||||
|
@ -2728,13 +2728,18 @@ void InterpreterSelectQuery::executeDistinct(QueryPlan & query_plan, bool before
|
||||
{
|
||||
const Settings & settings = context->getSettingsRef();
|
||||
|
||||
auto [limit_length, limit_offset] = getLimitLengthAndOffset(query, context);
|
||||
UInt64 limit_for_distinct = 0;
|
||||
|
||||
/// If after this stage of DISTINCT ORDER BY is not executed,
|
||||
/// If after this stage of DISTINCT,
|
||||
/// (1) ORDER BY is not executed
|
||||
/// (2) there is no LIMIT BY (todo: we can check if DISTINCT and LIMIT BY expressions are match)
|
||||
/// then you can get no more than limit_length + limit_offset of different rows.
|
||||
if ((!query.orderBy() || !before_order) && limit_length <= std::numeric_limits<UInt64>::max() - limit_offset)
|
||||
limit_for_distinct = limit_length + limit_offset;
|
||||
if ((!query.orderBy() || !before_order) && !query.limitBy())
|
||||
{
|
||||
auto [limit_length, limit_offset] = getLimitLengthAndOffset(query, context);
|
||||
if (limit_length <= std::numeric_limits<UInt64>::max() - limit_offset)
|
||||
limit_for_distinct = limit_length + limit_offset;
|
||||
}
|
||||
|
||||
SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode);
|
||||
|
||||
|
@ -6,7 +6,6 @@
|
||||
#include <DataTypes/DataTypeDate.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataTypes/DataTypeEnum.h>
|
||||
#include <DataTypes/DataTypeUUID.h>
|
||||
#include <Storages/MergeTree/IMergeTreeDataPart.h>
|
||||
#include <Storages/MergeTree/MergeTreeData.h>
|
||||
#include <Interpreters/PartLog.h>
|
||||
@ -101,7 +100,6 @@ NamesAndTypesList PartLogElement::getNamesAndTypes()
|
||||
|
||||
{"database", std::make_shared<DataTypeString>()},
|
||||
{"table", std::make_shared<DataTypeString>()},
|
||||
{"table_uuid", std::make_shared<DataTypeUUID>()},
|
||||
{"part_name", std::make_shared<DataTypeString>()},
|
||||
{"partition_id", std::make_shared<DataTypeString>()},
|
||||
{"part_type", std::make_shared<DataTypeString>()},
|
||||
@ -139,7 +137,6 @@ void PartLogElement::appendToBlock(MutableColumns & columns) const
|
||||
|
||||
columns[i++]->insert(database_name);
|
||||
columns[i++]->insert(table_name);
|
||||
columns[i++]->insert(table_uuid);
|
||||
columns[i++]->insert(part_name);
|
||||
columns[i++]->insert(partition_id);
|
||||
columns[i++]->insert(part_type.toString());
|
||||
@ -208,7 +205,6 @@ bool PartLog::addNewParts(
|
||||
|
||||
elem.database_name = table_id.database_name;
|
||||
elem.table_name = table_id.table_name;
|
||||
elem.table_uuid = table_id.uuid;
|
||||
elem.partition_id = part->info.partition_id;
|
||||
elem.part_name = part->name;
|
||||
elem.disk_name = part->getDataPartStorage().getDiskName();
|
||||
|
@ -4,7 +4,6 @@
|
||||
#include <Interpreters/SystemLog.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
#include <Core/NamesAndAliases.h>
|
||||
#include <Core/UUID.h>
|
||||
#include <Storages/MergeTree/MergeType.h>
|
||||
#include <Storages/MergeTree/MergeAlgorithm.h>
|
||||
|
||||
@ -56,7 +55,6 @@ struct PartLogElement
|
||||
|
||||
String database_name;
|
||||
String table_name;
|
||||
UUID table_uuid{UUIDHelpers::Nil};
|
||||
String part_name;
|
||||
String partition_id;
|
||||
String disk_name;
|
||||
|
@ -131,6 +131,12 @@ void ThreadStatus::setupState(const ThreadGroupStatusPtr & thread_group_)
|
||||
thread_state = ThreadState::AttachedToQuery;
|
||||
}
|
||||
|
||||
void ThreadStatus::setInternalThread()
|
||||
{
|
||||
chassert(!query_profiler_real && !query_profiler_cpu);
|
||||
internal_thread = true;
|
||||
}
|
||||
|
||||
void ThreadStatus::initializeQuery()
|
||||
{
|
||||
setupState(std::make_shared<ThreadGroupStatus>());
|
||||
@ -177,41 +183,44 @@ void ThreadStatus::initPerformanceCounters()
|
||||
// query_start_time_nanoseconds cannot be used here since RUsageCounters expect CLOCK_MONOTONIC
|
||||
*last_rusage = RUsageCounters::current();
|
||||
|
||||
if (auto query_context_ptr = query_context.lock())
|
||||
if (!internal_thread)
|
||||
{
|
||||
const Settings & settings = query_context_ptr->getSettingsRef();
|
||||
if (settings.metrics_perf_events_enabled)
|
||||
if (auto query_context_ptr = query_context.lock())
|
||||
{
|
||||
const Settings & settings = query_context_ptr->getSettingsRef();
|
||||
if (settings.metrics_perf_events_enabled)
|
||||
{
|
||||
try
|
||||
{
|
||||
current_thread_counters.initializeProfileEvents(
|
||||
settings.metrics_perf_events_list);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!taskstats)
|
||||
{
|
||||
try
|
||||
{
|
||||
current_thread_counters.initializeProfileEvents(
|
||||
settings.metrics_perf_events_list);
|
||||
taskstats = TasksStatsCounters::create(thread_id);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
||||
tryLogCurrentException(log);
|
||||
}
|
||||
}
|
||||
if (taskstats)
|
||||
taskstats->reset();
|
||||
}
|
||||
|
||||
if (!taskstats)
|
||||
{
|
||||
try
|
||||
{
|
||||
taskstats = TasksStatsCounters::create(thread_id);
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
tryLogCurrentException(log);
|
||||
}
|
||||
}
|
||||
if (taskstats)
|
||||
taskstats->reset();
|
||||
}
|
||||
|
||||
void ThreadStatus::finalizePerformanceCounters()
|
||||
{
|
||||
if (performance_counters_finalized)
|
||||
if (performance_counters_finalized || internal_thread)
|
||||
return;
|
||||
|
||||
performance_counters_finalized = true;
|
||||
@ -270,7 +279,7 @@ void ThreadStatus::resetPerformanceCountersLastUsage()
|
||||
|
||||
void ThreadStatus::initQueryProfiler()
|
||||
{
|
||||
if (!query_profiler_enabled)
|
||||
if (internal_thread)
|
||||
return;
|
||||
|
||||
/// query profilers are useless without trace collector
|
||||
|
@ -1,8 +1,8 @@
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
|
||||
#include <Core/Settings.h>
|
||||
#include <Core/NamesAndTypes.h>
|
||||
|
||||
#include <Core/SettingsEnums.h>
|
||||
|
||||
#include <Interpreters/ArrayJoinedColumnsVisitor.h>
|
||||
@ -45,10 +45,10 @@
|
||||
#include <DataTypes/NestedUtils.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Common/checkStackSize.h>
|
||||
|
||||
#include <AggregateFunctions/AggregateFunctionFactory.h>
|
||||
|
||||
@ -784,6 +784,67 @@ void collectJoinedColumns(TableJoin & analyzed_join, ASTTableJoin & table_join,
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<bool, UInt64> recursivelyCollectMaxOrdinaryExpressions(const ASTPtr & expr, ASTExpressionList & into)
|
||||
{
|
||||
checkStackSize();
|
||||
|
||||
if (expr->as<ASTIdentifier>())
|
||||
{
|
||||
into.children.push_back(expr);
|
||||
return {false, 1};
|
||||
}
|
||||
|
||||
auto * function = expr->as<ASTFunction>();
|
||||
|
||||
if (!function)
|
||||
return {false, 0};
|
||||
|
||||
if (AggregateUtils::isAggregateFunction(*function))
|
||||
return {true, 0};
|
||||
|
||||
UInt64 pushed_children = 0;
|
||||
bool has_aggregate = false;
|
||||
|
||||
for (const auto & child : function->arguments->children)
|
||||
{
|
||||
auto [child_has_aggregate, child_pushed_children] = recursivelyCollectMaxOrdinaryExpressions(child, into);
|
||||
has_aggregate |= child_has_aggregate;
|
||||
pushed_children += child_pushed_children;
|
||||
}
|
||||
|
||||
/// The current function is not aggregate function and there is no aggregate function in its arguments,
|
||||
/// so use the current function to replace its arguments
|
||||
if (!has_aggregate)
|
||||
{
|
||||
for (UInt64 i = 0; i < pushed_children; i++)
|
||||
into.children.pop_back();
|
||||
|
||||
into.children.push_back(expr);
|
||||
pushed_children = 1;
|
||||
}
|
||||
|
||||
return {has_aggregate, pushed_children};
|
||||
}
|
||||
|
||||
/** Expand GROUP BY ALL by extracting all the SELECT-ed expressions that are not aggregate functions.
|
||||
*
|
||||
* For a special case that if there is a function having both aggregate functions and other fields as its arguments,
|
||||
* the `GROUP BY` keys will contain the maximum non-aggregate fields we can extract from it.
|
||||
*
|
||||
* Example:
|
||||
* SELECT substring(a, 4, 2), substring(substring(a, 1, 2), 1, count(b)) FROM t GROUP BY ALL
|
||||
* will expand as
|
||||
* SELECT substring(a, 4, 2), substring(substring(a, 1, 2), 1, count(b)) FROM t GROUP BY substring(a, 4, 2), substring(a, 1, 2)
|
||||
*/
|
||||
void expandGroupByAll(ASTSelectQuery * select_query)
|
||||
{
|
||||
auto group_expression_list = std::make_shared<ASTExpressionList>();
|
||||
|
||||
for (const auto & expr : select_query->select()->children)
|
||||
recursivelyCollectMaxOrdinaryExpressions(expr, *group_expression_list);
|
||||
|
||||
select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, group_expression_list);
|
||||
}
|
||||
|
||||
std::vector<const ASTFunction *> getAggregates(ASTPtr & query, const ASTSelectQuery & select_query)
|
||||
{
|
||||
@ -1276,6 +1337,10 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
|
||||
|
||||
normalize(query, result.aliases, all_source_columns_set, select_options.ignore_alias, settings, /* allow_self_aliases = */ true, getContext());
|
||||
|
||||
// expand GROUP BY ALL
|
||||
if (select_query->group_by_all)
|
||||
expandGroupByAll(select_query);
|
||||
|
||||
/// Remove unneeded columns according to 'required_result_columns'.
|
||||
/// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside.
|
||||
/// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost)
|
||||
|
@ -93,7 +93,7 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
|
||||
where()->formatImpl(s, state, frame);
|
||||
}
|
||||
|
||||
if (groupBy())
|
||||
if (!group_by_all && groupBy())
|
||||
{
|
||||
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "GROUP BY" << (s.hilite ? hilite_none : "");
|
||||
if (!group_by_with_grouping_sets)
|
||||
@ -104,6 +104,9 @@ void ASTSelectQuery::formatImpl(const FormatSettings & s, FormatState & state, F
|
||||
}
|
||||
}
|
||||
|
||||
if (group_by_all)
|
||||
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << "GROUP BY ALL" << (s.hilite ? hilite_none : "");
|
||||
|
||||
if (group_by_with_rollup)
|
||||
s.ostr << (s.hilite ? hilite_keyword : "") << s.nl_or_ws << indent_str << (s.one_line ? "" : " ") << "WITH ROLLUP" << (s.hilite ? hilite_none : "");
|
||||
|
||||
|
@ -82,6 +82,7 @@ public:
|
||||
ASTPtr clone() const override;
|
||||
|
||||
bool distinct = false;
|
||||
bool group_by_all = false;
|
||||
bool group_by_with_totals = false;
|
||||
bool group_by_with_rollup = false;
|
||||
bool group_by_with_cube = false;
|
||||
|
@ -195,6 +195,8 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
select_query->group_by_with_cube = true;
|
||||
else if (s_grouping_sets.ignore(pos, expected))
|
||||
select_query->group_by_with_grouping_sets = true;
|
||||
else if (s_all.ignore(pos, expected))
|
||||
select_query->group_by_all = true;
|
||||
|
||||
if ((select_query->group_by_with_rollup || select_query->group_by_with_cube || select_query->group_by_with_grouping_sets) &&
|
||||
!open_bracket.ignore(pos, expected))
|
||||
@ -205,7 +207,7 @@ bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
|
||||
if (!grouping_sets_list.parse(pos, group_expression_list, expected))
|
||||
return false;
|
||||
}
|
||||
else
|
||||
else if (!select_query->group_by_all)
|
||||
{
|
||||
if (!exp_list.parse(pos, group_expression_list, expected))
|
||||
return false;
|
||||
|
@ -15,6 +15,12 @@ try
|
||||
DB::ParserCreateQuery parser;
|
||||
DB::ASTPtr ast = parseQuery(parser, input.data(), input.data() + input.size(), "", 0, 1000);
|
||||
|
||||
const UInt64 max_ast_depth = 1000;
|
||||
ast->checkDepth(max_ast_depth);
|
||||
|
||||
const UInt64 max_ast_elements = 50000;
|
||||
ast->checkSize(max_ast_elements);
|
||||
|
||||
DB::WriteBufferFromOwnString wb;
|
||||
DB::formatAST(*ast, wb);
|
||||
|
||||
|
@ -87,8 +87,8 @@ void JoinClause::dump(WriteBuffer & buffer) const
|
||||
{
|
||||
const auto & asof_condition = asof_conditions[i];
|
||||
|
||||
buffer << "key_index: " << asof_condition.key_index;
|
||||
buffer << "inequality: " << toString(asof_condition.asof_inequality);
|
||||
buffer << " key_index: " << asof_condition.key_index;
|
||||
buffer << " inequality: " << toString(asof_condition.asof_inequality);
|
||||
|
||||
if (i + 1 != asof_conditions_size)
|
||||
buffer << ',';
|
||||
|
@ -183,19 +183,19 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
/// Valid for table, table function, query, union, array join table expression nodes
|
||||
/// Valid for table, table function, array join, query, union nodes
|
||||
NamesAndTypesList columns;
|
||||
|
||||
/// Valid for table, table function, query, union, array join table expression nodes
|
||||
/// Valid for table, table function, array join, query, union nodes
|
||||
NameSet columns_names;
|
||||
|
||||
/// Valid only for table table expression node
|
||||
/// Valid only for table node
|
||||
NameSet alias_columns_names;
|
||||
|
||||
/// Valid for table, table function, query, union table, array join expression nodes
|
||||
/// Valid for table, table function, array join, query, union nodes
|
||||
ColumnNameToColumnIdentifier column_name_to_column_identifier;
|
||||
|
||||
/// Valid for table, table function, query, union table, array join expression nodes
|
||||
/// Valid for table, table function, array join, query, union nodes
|
||||
ColumnIdentifierToColumnName column_identifier_to_column_name;
|
||||
|
||||
/// Is storage remote
|
||||
|
@ -275,10 +275,8 @@ Chain buildPushingToViewsChain(
|
||||
SCOPE_EXIT({ current_thread = original_thread; });
|
||||
|
||||
std::unique_ptr<ThreadStatus> view_thread_status_ptr = std::make_unique<ThreadStatus>();
|
||||
/// Disable query profiler for this ThreadStatus since the running (main query) thread should already have one
|
||||
/// If we didn't disable it, then we could end up with N + 1 (N = number of dependencies) profilers which means
|
||||
/// N times more interruptions
|
||||
view_thread_status_ptr->disableProfiling();
|
||||
/// Copy of a ThreadStatus should be internal.
|
||||
view_thread_status_ptr->setInternalThread();
|
||||
/// view_thread_status_ptr will be moved later (on and on), so need to capture raw pointer.
|
||||
view_thread_status_ptr->deleter = [thread_status = view_thread_status_ptr.get(), running_group]
|
||||
{
|
||||
|
@ -1848,7 +1848,6 @@ void MergeTreeData::removePartsFinally(const MergeTreeData::DataPartsVector & pa
|
||||
|
||||
part_log_elem.database_name = table_id.database_name;
|
||||
part_log_elem.table_name = table_id.table_name;
|
||||
part_log_elem.table_uuid = table_id.uuid;
|
||||
|
||||
for (const auto & part : parts)
|
||||
{
|
||||
@ -6599,7 +6598,6 @@ try
|
||||
|
||||
part_log_elem.database_name = table_id.database_name;
|
||||
part_log_elem.table_name = table_id.table_name;
|
||||
part_log_elem.table_uuid = table_id.uuid;
|
||||
part_log_elem.partition_id = MergeTreePartInfo::fromPartName(new_part_name, format_version).partition_id;
|
||||
part_log_elem.part_name = new_part_name;
|
||||
|
||||
|
@ -434,7 +434,7 @@ void StorageBuffer::read(
|
||||
}
|
||||
|
||||
|
||||
static void appendBlock(const Block & from, Block & to)
|
||||
static void appendBlock(Poco::Logger * log, const Block & from, Block & to)
|
||||
{
|
||||
size_t rows = from.rows();
|
||||
size_t old_rows = to.rows();
|
||||
@ -456,7 +456,24 @@ static void appendBlock(const Block & from, Block & to)
|
||||
for (size_t column_no = 0, columns = to.columns(); column_no < columns; ++column_no)
|
||||
{
|
||||
const IColumn & col_from = *from.getByPosition(column_no).column.get();
|
||||
last_col = IColumn::mutate(std::move(to.getByPosition(column_no).column));
|
||||
{
|
||||
/// Usually IColumn::mutate() here will simply move pointers,
|
||||
/// however in case of parallel reading from it via SELECT, it
|
||||
/// is possible for the full IColumn::clone() here, and in this
|
||||
/// case it may fail due to MEMORY_LIMIT_EXCEEDED, and this
|
||||
/// breaks the rollback, since the column got lost, it is
|
||||
/// neither in last_col nor in "to" block.
|
||||
///
|
||||
/// The safest option here, is to do a full clone every time,
|
||||
/// however, it is overhead. And it looks like the only
|
||||
/// exception that is possible here is MEMORY_LIMIT_EXCEEDED,
|
||||
/// and it is better to simply suppress it, to avoid overhead
|
||||
/// for every INSERT into Buffer (Anyway we have a
|
||||
/// LOGICAL_ERROR in rollback that will bail if something else
|
||||
/// will happens here).
|
||||
LockMemoryExceptionInThread temporarily_ignore_any_memory_limits(VariableContext::Global);
|
||||
last_col = IColumn::mutate(std::move(to.getByPosition(column_no).column));
|
||||
}
|
||||
|
||||
/// In case of ColumnAggregateFunction aggregate states will
|
||||
/// be allocated from the query context but can be destroyed from the
|
||||
@ -468,7 +485,10 @@ static void appendBlock(const Block & from, Block & to)
|
||||
last_col->ensureOwnership();
|
||||
last_col->insertRangeFrom(col_from, 0, rows);
|
||||
|
||||
to.getByPosition(column_no).column = std::move(last_col);
|
||||
{
|
||||
DENY_ALLOCATIONS_IN_SCOPE;
|
||||
to.getByPosition(column_no).column = std::move(last_col);
|
||||
}
|
||||
}
|
||||
CurrentMetrics::add(CurrentMetrics::StorageBufferRows, rows);
|
||||
CurrentMetrics::add(CurrentMetrics::StorageBufferBytes, to.bytes() - old_bytes);
|
||||
@ -481,6 +501,9 @@ static void appendBlock(const Block & from, Block & to)
|
||||
/// So ignore any memory limits, even global (since memory tracking has drift).
|
||||
LockMemoryExceptionInThread temporarily_ignore_any_memory_limits(VariableContext::Global);
|
||||
|
||||
/// But first log exception to get more details in case of LOGICAL_ERROR
|
||||
tryLogCurrentException(log, "Caught exception while adding data to buffer, rolling back...");
|
||||
|
||||
try
|
||||
{
|
||||
for (size_t column_no = 0, columns = to.columns(); column_no < columns; ++column_no)
|
||||
@ -625,7 +648,7 @@ private:
|
||||
size_t old_rows = buffer.data.rows();
|
||||
size_t old_bytes = buffer.data.allocatedBytes();
|
||||
|
||||
appendBlock(sorted_block, buffer.data);
|
||||
appendBlock(storage.log, sorted_block, buffer.data);
|
||||
|
||||
storage.total_writes.rows += (buffer.data.rows() - old_rows);
|
||||
storage.total_writes.bytes += (buffer.data.allocatedBytes() - old_bytes);
|
||||
|
@ -1,8 +1,14 @@
|
||||
import pytest
|
||||
|
||||
# FIXME This test is too flaky
|
||||
# https://github.com/ClickHouse/ClickHouse/issues/43541
|
||||
|
||||
pytestmark = pytest.mark.skip
|
||||
|
||||
import logging
|
||||
import os
|
||||
|
||||
import time
|
||||
import pytest
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
from helpers.test_tools import TSV
|
||||
|
||||
|
27
tests/queries/0_stateless/02124_buffer_insert_select_race.sh
Executable file
27
tests/queries/0_stateless/02124_buffer_insert_select_race.sh
Executable file
@ -0,0 +1,27 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest
|
||||
|
||||
# Regression test for 'Logical error: No column to rollback' in case of
|
||||
# exception while commiting batch into the Buffer, see [1].
|
||||
#
|
||||
# [1]: https://github.com/ClickHouse/ClickHouse/issues/42740
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_buffer_string"
|
||||
$CLICKHOUSE_CLIENT -q "CREATE TABLE t_buffer_string(key String) ENGINE = Buffer('', '', 1, 1, 1, 1000000000000, 1000000000000, 1000000000000, 1000000000000)"
|
||||
|
||||
# --continue_on_errors -- to ignore possible MEMORY_LIMIT_EXCEEDED errors
|
||||
# --concurrency -- we need have SELECT and INSERT in parallel to have refcount
|
||||
# of the column in the Buffer block > 1, that way we will do
|
||||
# full clone and moving a column may throw.
|
||||
#
|
||||
# It reproduces the problem 100% with MemoryTrackerFaultInjectorInThread in the appendBlock()
|
||||
$CLICKHOUSE_BENCHMARK --randomize --timelimit 10 --continue_on_errors --concurrency 10 >& /dev/null <<EOL
|
||||
INSERT INTO t_buffer_string SELECT number::String from numbers(10000)
|
||||
SELECT * FROM t_buffer_string
|
||||
EOL
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "DROP TABLE t_buffer_string"
|
@ -1,6 +1,10 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: no-fasttest
|
||||
|
||||
# Regression test for incorrect mutation of Map() column, see [1].
|
||||
#
|
||||
# [1]: https://github.com/ClickHouse/ClickHouse/issues/30546
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CURDIR"/../shell_config.sh
|
||||
@ -8,29 +12,11 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS t_buffer_map"
|
||||
$CLICKHOUSE_CLIENT -q "CREATE TABLE t_buffer_map(m1 Map(String, UInt64), m2 Map(String, String)) ENGINE = Buffer('', '', 1, 1, 1, 1000000000000, 1000000000000, 1000000000000, 1000000000000)"
|
||||
|
||||
function insert1
|
||||
{
|
||||
while true; do
|
||||
$CLICKHOUSE_CLIENT -q "INSERT INTO t_buffer_map SELECT (range(10), range(10)), (range(10), range(10)) from numbers(100)"
|
||||
done
|
||||
}
|
||||
|
||||
function select1
|
||||
{
|
||||
while true; do
|
||||
$CLICKHOUSE_CLIENT -q "SELECT * FROM t_buffer_map" 2> /dev/null > /dev/null
|
||||
done
|
||||
}
|
||||
|
||||
TIMEOUT=10
|
||||
|
||||
export -f insert1
|
||||
export -f select1
|
||||
|
||||
timeout $TIMEOUT bash -c insert1 &
|
||||
timeout $TIMEOUT bash -c select1 &
|
||||
|
||||
wait
|
||||
# --continue_on_errors -- to ignore possible MEMORY_LIMIT_EXCEEDED errors
|
||||
$CLICKHOUSE_BENCHMARK --randomize --timelimit 10 --continue_on_errors --concurrency 10 >& /dev/null <<EOL
|
||||
INSERT INTO t_buffer_map SELECT (range(10), range(10)), (range(10), range(10)) from numbers(100)
|
||||
SELECT * FROM t_buffer_map
|
||||
EOL
|
||||
|
||||
echo "OK"
|
||||
$CLICKHOUSE_CLIENT -q "DROP TABLE t_buffer_map"
|
||||
|
@ -1,6 +1,6 @@
|
||||
SET allow_experimental_analyzer = 1;
|
||||
|
||||
EXPLAIN QUERY TREE SELECT 1;
|
||||
EXPLAIN QUERY TREE run_passes = 0 SELECT 1;
|
||||
|
||||
SELECT '--';
|
||||
|
||||
@ -13,7 +13,7 @@ CREATE TABLE test_table
|
||||
|
||||
INSERT INTO test_table VALUES (0, 'Value');
|
||||
|
||||
EXPLAIN QUERY TREE SELECT id, value FROM test_table;
|
||||
EXPLAIN QUERY TREE run_passes = 0 SELECT id, value FROM test_table;
|
||||
|
||||
SELECT '--';
|
||||
|
||||
@ -21,7 +21,7 @@ EXPLAIN QUERY TREE run_passes = 1 SELECT id, value FROM test_table;
|
||||
|
||||
SELECT '--';
|
||||
|
||||
EXPLAIN QUERY TREE SELECT arrayMap(x -> x + id, [1, 2, 3]) FROM test_table;
|
||||
EXPLAIN QUERY TREE run_passes = 0 SELECT arrayMap(x -> x + id, [1, 2, 3]) FROM test_table;
|
||||
|
||||
SELECT '--';
|
||||
|
||||
@ -29,7 +29,7 @@ EXPLAIN QUERY TREE run_passes = 1 SELECT arrayMap(x -> x + 1, [1, 2, 3]) FROM te
|
||||
|
||||
SELECT '--';
|
||||
|
||||
EXPLAIN QUERY TREE WITH x -> x + 1 AS lambda SELECT lambda(id) FROM test_table;
|
||||
EXPLAIN QUERY TREE run_passes = 0 WITH x -> x + 1 AS lambda SELECT lambda(id) FROM test_table;
|
||||
|
||||
SELECT '--';
|
||||
|
||||
|
44
tests/queries/0_stateless/02459_group_by_all.reference
Normal file
44
tests/queries/0_stateless/02459_group_by_all.reference
Normal file
@ -0,0 +1,44 @@
|
||||
abc1 1
|
||||
abc2 1
|
||||
abc3 1
|
||||
abc4 1
|
||||
abc 4
|
||||
abc ab
|
||||
abc ab
|
||||
abc ab
|
||||
abc bc
|
||||
abc bc
|
||||
abc a
|
||||
abc a
|
||||
abc a
|
||||
abc a
|
||||
abc a
|
||||
abc a
|
||||
abc a
|
||||
abc a
|
||||
1 abc a
|
||||
1 abc a
|
||||
1 abc a
|
||||
1 abc a
|
||||
abc1 1
|
||||
abc2 1
|
||||
abc3 1
|
||||
abc4 1
|
||||
abc 4
|
||||
abc ab
|
||||
abc ab
|
||||
abc ab
|
||||
abc bc
|
||||
abc bc
|
||||
abc a
|
||||
abc a
|
||||
abc a
|
||||
abc a
|
||||
abc a
|
||||
abc a
|
||||
abc a
|
||||
abc a
|
||||
1 abc a
|
||||
1 abc a
|
||||
1 abc a
|
||||
1 abc a
|
35
tests/queries/0_stateless/02459_group_by_all.sql
Normal file
35
tests/queries/0_stateless/02459_group_by_all.sql
Normal file
@ -0,0 +1,35 @@
|
||||
DROP TABLE IF EXISTS group_by_all;
|
||||
|
||||
CREATE TABLE group_by_all
|
||||
(
|
||||
a String,
|
||||
b int,
|
||||
c int
|
||||
)
|
||||
engine = Memory;
|
||||
|
||||
insert into group_by_all values ('abc1', 1, 1), ('abc2', 1, 1), ('abc3', 1, 1), ('abc4', 1, 1);
|
||||
|
||||
select a, count(b) from group_by_all group by all order by a;
|
||||
select substring(a, 1, 3), count(b) from group_by_all group by all;
|
||||
select substring(a, 1, 3), substring(substring(a, 1, 2), 1, count(b)) from group_by_all group by all;
|
||||
select substring(a, 1, 3), substring(substring(a, 1, 2), c, count(b)) from group_by_all group by all;
|
||||
select substring(a, 1, 3), substring(substring(a, c, 2), c, count(b)) from group_by_all group by all;
|
||||
select substring(a, 1, 3), substring(substring(a, c + 1, 2), 1, count(b)) from group_by_all group by all;
|
||||
select substring(a, 1, 3), substring(substring(a, c + 1, 2), c, count(b)) from group_by_all group by all;
|
||||
select substring(a, 1, 3), substring(substring(substring(a, c, count(b)), 1, count(b)), 1, count(b)) from group_by_all group by all;
|
||||
select substring(a, 1, 3), substring(a, 1, count(b)) from group_by_all group by all;
|
||||
select count(b) AS len, substring(a, 1, 3), substring(a, 1, len) from group_by_all group by all;
|
||||
|
||||
SET allow_experimental_analyzer = 1;
|
||||
|
||||
select a, count(b) from group_by_all group by all order by a;
|
||||
select substring(a, 1, 3), count(b) from group_by_all group by all;
|
||||
select substring(a, 1, 3), substring(substring(a, 1, 2), 1, count(b)) from group_by_all group by all;
|
||||
select substring(a, 1, 3), substring(substring(a, 1, 2), c, count(b)) from group_by_all group by all;
|
||||
select substring(a, 1, 3), substring(substring(a, c, 2), c, count(b)) from group_by_all group by all;
|
||||
select substring(a, 1, 3), substring(substring(a, c + 1, 2), 1, count(b)) from group_by_all group by all;
|
||||
select substring(a, 1, 3), substring(substring(a, c + 1, 2), c, count(b)) from group_by_all group by all;
|
||||
select substring(a, 1, 3), substring(substring(substring(a, c, count(b)), 1, count(b)), 1, count(b)) from group_by_all group by all;
|
||||
select substring(a, 1, 3), substring(a, 1, count(b)) from group_by_all group by all;
|
||||
select count(b) AS len, substring(a, 1, 3), substring(a, 1, len) from group_by_all group by all;
|
@ -27,7 +27,7 @@ with client(name="client>", log=log) as client1:
|
||||
)
|
||||
client1.expect(prompt)
|
||||
client1.send(f"INSERT INTO test.infile_progress FROM INFILE '{filename}'")
|
||||
client1.expect("Progress: 0.00 rows, 10.00 B.*\)")
|
||||
client1.expect("Progress: 5.00 rows, 30.00 B.*\)")
|
||||
client1.expect(prompt)
|
||||
|
||||
# send Ctrl-C
|
||||
|
@ -23,3 +23,8 @@
|
||||
1M without 0 1048576
|
||||
1M with 0 1048575
|
||||
fuzz2 0123 4
|
||||
1 0
|
||||
2 \0 1
|
||||
3 \0\0\0\0 4
|
||||
4 abrac\0dabra\0 12
|
||||
abrac\0dabra\0 12
|
||||
|
@ -90,11 +90,12 @@ SELECT '-1', maxMerge(x), length(maxMerge(x)) from (select CAST(unhex('ffffffff'
|
||||
SELECT '-2', maxMerge(x), length(maxMerge(x)) from (select CAST(unhex('fffffffe') || randomString(100500), 'AggregateFunction(max, String)') as x);
|
||||
SELECT '-2^31', maxMerge(x), length(maxMerge(x)) from (select CAST(unhex('00000080') || randomString(100500), 'AggregateFunction(max, String)') as x);
|
||||
|
||||
SELECT '2^31-2', maxMerge(x) from (select CAST(unhex('feffff7f') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError TOO_LARGE_STRING_SIZE }
|
||||
SELECT '2^31-1', maxMerge(x) from (select CAST(unhex('ffffff7f') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError TOO_LARGE_STRING_SIZE }
|
||||
|
||||
SELECT '2^30', maxMerge(x) from (select CAST(unhex('00000040') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError TOO_LARGE_STRING_SIZE }
|
||||
SELECT '2^30+1', maxMerge(x) from (select CAST(unhex('01000040') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError TOO_LARGE_STRING_SIZE }
|
||||
SELECT '2^31-2', maxMerge(x) from (select CAST(unhex('feffff7f') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError CANNOT_READ_ALL_DATA }
|
||||
|
||||
SELECT '2^30', maxMerge(x) from (select CAST(unhex('00000040') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError CANNOT_READ_ALL_DATA }
|
||||
SELECT '2^30+1', maxMerge(x) from (select CAST(unhex('01000040') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError CANNOT_READ_ALL_DATA }
|
||||
|
||||
SELECT '2^30-1', maxMerge(x) from (select CAST(unhex('ffffff3f') || randomString(100500), 'AggregateFunction(max, String)') as x); -- { serverError CANNOT_READ_ALL_DATA }
|
||||
-- The following query works, but it's too long and consumes to much memory
|
||||
@ -107,3 +108,14 @@ SELECT 'fuzz2', finalizeAggregation(CAST(unhex('04000000' || '30313233' || '01'
|
||||
SELECT 'fuzz3', finalizeAggregation(CAST(unhex('04000000' || '30313233' || '00' || 'ffffffffffffffff'), 'AggregateFunction(argMax, String, UInt64)')) as x, length(x); -- { serverError CORRUPTED_DATA }
|
||||
SELECT 'fuzz4', finalizeAggregation(CAST(unhex('04000000' || '30313233' || '00'), 'AggregateFunction(argMax, String, UInt64)')) as x, length(x); -- { serverError CORRUPTED_DATA }
|
||||
SELECT 'fuzz5', finalizeAggregation(CAST(unhex('0100000000000000000FFFFFFFF0'), 'AggregateFunction(argMax, UInt64, String)')); -- { serverError CORRUPTED_DATA }
|
||||
|
||||
|
||||
drop table if exists aggr;
|
||||
create table aggr (n int, s AggregateFunction(max, String)) engine=MergeTree order by n;
|
||||
insert into aggr select 1, maxState('');
|
||||
insert into aggr select 2, maxState('\0');
|
||||
insert into aggr select 3, maxState('\0\0\0\0');
|
||||
insert into aggr select 4, maxState('abrac\0dabra\0');
|
||||
select n, maxMerge(s) as x, length(x) from aggr group by n order by n;
|
||||
select maxMerge(s) as x, length(x) from aggr;
|
||||
drop table aggr;
|
||||
|
@ -0,0 +1 @@
|
||||
String Value_1
|
@ -0,0 +1,36 @@
|
||||
SET allow_experimental_analyzer = 1;
|
||||
|
||||
DROP TABLE IF EXISTS test_table_join_1;
|
||||
CREATE TABLE test_table_join_1
|
||||
(
|
||||
id UInt8,
|
||||
value String
|
||||
)
|
||||
ENGINE = TinyLog;
|
||||
|
||||
INSERT INTO test_table_join_1 VALUES (0, 'Value_0');
|
||||
|
||||
DROP TABLE IF EXISTS test_table_join_2;
|
||||
CREATE TABLE test_table_join_2
|
||||
(
|
||||
id UInt16,
|
||||
value String
|
||||
)
|
||||
ENGINE = TinyLog;
|
||||
|
||||
INSERT INTO test_table_join_2 VALUES (0, 'Value_1');
|
||||
|
||||
SELECT
|
||||
toTypeName(t2_value),
|
||||
t2.value AS t2_value
|
||||
FROM test_table_join_1 AS t1
|
||||
INNER JOIN test_table_join_2 USING (id); -- { serverError 47 };
|
||||
|
||||
SELECT
|
||||
toTypeName(t2_value),
|
||||
t2.value AS t2_value
|
||||
FROM test_table_join_1 AS t1
|
||||
INNER JOIN test_table_join_2 AS t2 USING (id);
|
||||
|
||||
DROP TABLE test_table_join_1;
|
||||
DROP TABLE test_table_join_2;
|
@ -1,6 +1,6 @@
|
||||
-- https://github.com/ClickHouse/ClickHouse/issues/43247
|
||||
SELECT finalizeAggregation(CAST('AggregateFunction(categoricalInformationValue, Nullable(UInt8), UInt8)AggregateFunction(categoricalInformationValue, Nullable(UInt8), UInt8)',
|
||||
'AggregateFunction(min, String)')); -- { serverError 131 }
|
||||
'AggregateFunction(min, String)')); -- { serverError CANNOT_READ_ALL_DATA }
|
||||
|
||||
-- Value from hex(minState('0123456789012345678901234567890123456789012345678901234567890123')). Size 63 + 1 (64)
|
||||
SELECT finalizeAggregation(CAST(unhex('4000000030313233343536373839303132333435363738393031323334353637383930313233343536373839303132333435363738393031323334353637383930313233'),
|
||||
|
@ -1,3 +0,0 @@
|
||||
NewPart NotAMerge
|
||||
MergeParts RegularMerge
|
||||
RemovePart NotAMerge
|
@ -1,16 +0,0 @@
|
||||
create table data_02491 (key Int) engine=MergeTree() order by tuple();
|
||||
insert into data_02491 values (1);
|
||||
optimize table data_02491 final;
|
||||
truncate table data_02491;
|
||||
|
||||
system flush logs;
|
||||
with (select uuid from system.tables where database = currentDatabase() and table = 'data_02491') as table_uuid_
|
||||
select event_type, merge_reason from system.part_log
|
||||
where
|
||||
database = currentDatabase() and
|
||||
table = 'data_02491' and
|
||||
table_uuid = table_uuid_ and
|
||||
table_uuid != toUUIDOrDefault(Null)
|
||||
order by event_time_microseconds;
|
||||
|
||||
drop table data_02491;
|
@ -0,0 +1 @@
|
||||
1
|
19
tests/queries/0_stateless/02492_clickhouse_local_context_uaf.sh
Executable file
19
tests/queries/0_stateless/02492_clickhouse_local_context_uaf.sh
Executable file
@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
# This is the regression test for clickhouse-local, that may use already freed
|
||||
# context from the suggestion thread on error.
|
||||
|
||||
grep_options=(
|
||||
-e ^$
|
||||
-e 'Cannot create file: /no/such/directory'
|
||||
-e 'Cannot load data for command line suggestions:'
|
||||
-e 'ClickHouse local version'
|
||||
)
|
||||
|
||||
ASAN_OPTIONS=$ASAN_OPTIONS:exitcode=3 $CLICKHOUSE_LOCAL --history_file /no/such/directory |& grep -v "${grep_options[@]}"
|
||||
# on sanitizer error the code will be not 1, but 3
|
||||
echo $?
|
@ -0,0 +1,28 @@
|
||||
0
|
||||
--
|
||||
0
|
||||
1
|
||||
--
|
||||
1
|
||||
2
|
||||
--
|
||||
(1) 0
|
||||
--
|
||||
(0,1) 0
|
||||
--
|
||||
(1,2) 1
|
||||
(1,2) 2
|
||||
--
|
||||
(1) 0
|
||||
--
|
||||
(0,1) 0
|
||||
--
|
||||
(1,2) 1
|
||||
(1,2) 2
|
||||
--
|
||||
('1') 0
|
||||
--
|
||||
('0','1') 0
|
||||
--
|
||||
('1','2') 1
|
||||
('1','2') 2
|
@ -0,0 +1,47 @@
|
||||
SET allow_experimental_analyzer = 1;
|
||||
|
||||
SELECT number FROM numbers(untuple(tuple(1)));
|
||||
|
||||
SELECT '--';
|
||||
|
||||
SELECT number FROM numbers(untuple(tuple(0, 2)));
|
||||
|
||||
SELECT '--';
|
||||
|
||||
SELECT number FROM numbers(untuple(tuple(1, 2)));
|
||||
|
||||
SELECT '--';
|
||||
|
||||
SELECT cast(tuple(1), 'Tuple(value UInt64)') AS value, number FROM numbers(untuple(value));
|
||||
|
||||
SELECT '--';
|
||||
|
||||
SELECT cast(tuple(0, 1), 'Tuple(value_1 UInt64, value_2 UInt64)') AS value, number FROM numbers(untuple(value));
|
||||
|
||||
SELECT '--';
|
||||
|
||||
SELECT cast(tuple(1, 2), 'Tuple(value_1 UInt64, value_2 UInt64)') AS value, number FROM numbers(untuple(value));
|
||||
|
||||
SELECT '--';
|
||||
|
||||
SELECT cast(tuple(1), 'Tuple(value UInt64)') AS value, number FROM numbers(value.*);
|
||||
|
||||
SELECT '--';
|
||||
|
||||
SELECT cast(tuple(0, 1), 'Tuple(value_1 UInt64, value_2 UInt64)') AS value, number FROM numbers(value.*);
|
||||
|
||||
SELECT '--';
|
||||
|
||||
SELECT cast(tuple(1, 2), 'Tuple(value_1 UInt64, value_2 UInt64)') AS value, number FROM numbers(value.*);
|
||||
|
||||
SELECT '--';
|
||||
|
||||
SELECT cast(tuple('1'), 'Tuple(value String)') AS value, number FROM numbers(value.* APPLY x -> toUInt64(x));
|
||||
|
||||
SELECT '--';
|
||||
|
||||
SELECT cast(tuple('0', '1'), 'Tuple(value_1 String, value_2 String)') AS value, number FROM numbers(value.* APPLY x -> toUInt64(x));
|
||||
|
||||
SELECT '--';
|
||||
|
||||
SELECT cast(tuple('1', '2'), 'Tuple(value_1 String, value_2 String)') AS value, number FROM numbers(value.* APPLY x -> toUInt64(x));
|
@ -1,4 +1,5 @@
|
||||
#!/usr/bin/env bash
|
||||
# Tags: disabled
|
||||
|
||||
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
|
@ -0,0 +1 @@
|
||||
10
|
@ -0,0 +1,11 @@
|
||||
SELECT count()
|
||||
FROM
|
||||
(
|
||||
SELECT DISTINCT
|
||||
Title,
|
||||
SearchPhrase
|
||||
FROM test.hits
|
||||
WHERE (SearchPhrase != '') AND (NOT match(Title, '[а-яА-ЯёЁ]')) AND (NOT match(SearchPhrase, '[а-яА-ЯёЁ]'))
|
||||
LIMIT 1 BY Title
|
||||
LIMIT 10
|
||||
);
|
Loading…
Reference in New Issue
Block a user