Merge remote-tracking branch 'origin/master' into HEAD

This commit is contained in:
Alexander Kuzmenkov 2021-04-15 18:19:53 +03:00
commit 3b95b637a5
257 changed files with 3380 additions and 1780 deletions

3
.gitmodules vendored
View File

@ -221,6 +221,9 @@
[submodule "contrib/NuRaft"]
path = contrib/NuRaft
url = https://github.com/ClickHouse-Extras/NuRaft.git
[submodule "contrib/nanodbc"]
path = contrib/nanodbc
url = https://github.com/ClickHouse-Extras/nanodbc.git
[submodule "contrib/datasketches-cpp"]
path = contrib/datasketches-cpp
url = https://github.com/ClickHouse-Extras/datasketches-cpp.git

View File

@ -11,10 +11,13 @@
* Now replicas that are processing the `ALTER TABLE ATTACH PART[ITION]` command search in their `detached/` folders before fetching the data from other replicas. As an implementation detail, a new command `ATTACH_PART` is introduced in the replicated log. Parts are searched and compared by their checksums. [#18978](https://github.com/ClickHouse/ClickHouse/pull/18978) ([Mike Kot](https://github.com/myrrc)). **Note**:
* `ATTACH PART[ITION]` queries may not work during cluster upgrade.
* It's not possible to rollback to older ClickHouse version after executing `ALTER ... ATTACH` query in new version as the old servers would fail to pass the `ATTACH_PART` entry in the replicated log.
* In this version, empty `<remote_url_allow_hosts></remote_url_allow_hosts>` will block all access to remote hosts while in previous versions it did nothing. If you want to keep old behaviour and you have empty `remote_url_allow_hosts` element in configuration file, remove it. [#20058](https://github.com/ClickHouse/ClickHouse/pull/20058) ([Vladimir Chebotarev](https://github.com/excitoon)).
#### New Feature
* Extended range of `DateTime64` to support dates from year 1925 to 2283. Improved support of `DateTime` around zero date (`1970-01-01`). [#9404](https://github.com/ClickHouse/ClickHouse/pull/9404) ([alexey-milovidov](https://github.com/alexey-milovidov), [Vasily Nemkov](https://github.com/Enmk)). Not every time and date functions are working for extended range of dates.
* Added support of Kerberos authentication for preconfigured users and HTTP requests (GSS-SPNEGO). [#14995](https://github.com/ClickHouse/ClickHouse/pull/14995) ([Denis Glazachev](https://github.com/traceon)).
* Add `prefer_column_name_to_alias` setting to use original column names instead of aliases. it is needed to be more compatible with common databases' aliasing rules. This is for [#9715](https://github.com/ClickHouse/ClickHouse/issues/9715) and [#9887](https://github.com/ClickHouse/ClickHouse/issues/9887). [#22044](https://github.com/ClickHouse/ClickHouse/pull/22044) ([Amos Bird](https://github.com/amosbird)).
* Added functions `dictGetChildren(dictionary, key)`, `dictGetDescendants(dictionary, key, level)`. Function `dictGetChildren` return all children as an array if indexes. It is a inverse transformation for `dictGetHierarchy`. Function `dictGetDescendants` return all descendants as if `dictGetChildren` was applied `level` times recursively. Zero `level` value is equivalent to infinity. Closes [#14656](https://github.com/ClickHouse/ClickHouse/issues/14656). [#22096](https://github.com/ClickHouse/ClickHouse/pull/22096) ([Maksim Kita](https://github.com/kitaisreal)).
* Added `executable_pool` dictionary source. Close [#14528](https://github.com/ClickHouse/ClickHouse/issues/14528). [#21321](https://github.com/ClickHouse/ClickHouse/pull/21321) ([Maksim Kita](https://github.com/kitaisreal)).
@ -131,7 +134,6 @@
* Fix receive and send timeouts and non-blocking read in secure socket. [#21429](https://github.com/ClickHouse/ClickHouse/pull/21429) ([Kruglov Pavel](https://github.com/Avogar)).
* `force_drop_table` flag didn't work for `MATERIALIZED VIEW`, it's fixed. Fixes [#18943](https://github.com/ClickHouse/ClickHouse/issues/18943). [#20626](https://github.com/ClickHouse/ClickHouse/pull/20626) ([tavplubix](https://github.com/tavplubix)).
* Fix name clashes in `PredicateRewriteVisitor`. It caused incorrect `WHERE` filtration after full join. Close [#20497](https://github.com/ClickHouse/ClickHouse/issues/20497). [#20622](https://github.com/ClickHouse/ClickHouse/pull/20622) ([Vladimir](https://github.com/vdimir)).
* Fixed open behavior of remote host filter in case when there is `remote_url_allow_hosts` section in configuration but no entries there. [#20058](https://github.com/ClickHouse/ClickHouse/pull/20058) ([Vladimir Chebotarev](https://github.com/excitoon)).
#### Build/Testing/Packaging Improvement

View File

@ -290,6 +290,12 @@ if (COMPILER_GCC OR COMPILER_CLANG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsized-deallocation")
endif ()
# falign-functions=32 prevents from random performance regressions with the code change. Thus, providing more stable
# benchmarks.
if (COMPILER_GCC OR COMPILER_CLANG)
set(COMPILER_FLAGS "${COMPILER_FLAGS} -falign-functions=32")
endif ()
# Compiler-specific coverage flags e.g. -fcoverage-mapping for gcc
option(WITH_COVERAGE "Profile the resulting binary/binaries" OFF)
@ -512,6 +518,7 @@ include (cmake/find/fastops.cmake)
include (cmake/find/odbc.cmake)
include (cmake/find/rocksdb.cmake)
include (cmake/find/libpqxx.cmake)
include (cmake/find/nanodbc.cmake)
include (cmake/find/nuraft.cmake)

View File

@ -7,8 +7,7 @@
#include <condition_variable>
#include <common/defines.h>
#include <Common/MoveOrCopyIfThrow.h>
#include <common/MoveOrCopyIfThrow.h>
/** Pool for limited size objects that cannot be used from different threads simultaneously.
* The main use case is to have fixed size of objects that can be reused in difference threads during their lifetime

View File

@ -12,7 +12,8 @@
///
/// NOTE: it should be used with caution.
#define SCOPE_EXIT_MEMORY(...) SCOPE_EXIT( \
MemoryTracker::LockExceptionInThread lock_memory_tracker; \
MemoryTracker::LockExceptionInThread \
lock_memory_tracker(VariableContext::Global); \
__VA_ARGS__; \
)
@ -56,7 +57,8 @@
#define SCOPE_EXIT_MEMORY_SAFE(...) SCOPE_EXIT( \
try \
{ \
MemoryTracker::LockExceptionInThread lock_memory_tracker; \
MemoryTracker::LockExceptionInThread \
lock_memory_tracker(VariableContext::Global); \
__VA_ARGS__; \
} \
catch (...) \

35
cmake/find/nanodbc.cmake Normal file
View File

@ -0,0 +1,35 @@
option(ENABLE_NANODBC "Enalbe nanodbc" ${ENABLE_LIBRARIES})
if (NOT ENABLE_NANODBC)
set (USE_ODBC 0)
return()
endif()
if (NOT ENABLE_ODBC)
set (USE_NANODBC 0)
message (STATUS "Using nanodbc=${USE_NANODBC}")
return()
endif()
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/nanodbc/CMakeLists.txt")
message (WARNING "submodule contrib/nanodbc is missing. to fix try run: \n git submodule update --init --recursive")
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal nanodbc library")
set (USE_NANODBC 0)
return()
endif()
if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/unixodbc/include")
message (ERROR "submodule contrib/unixodbc is missing. to fix try run: \n git submodule update --init --recursive")
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal unixodbc needed for nanodbc")
set (USE_NANODBC 0)
return()
endif()
set (USE_NANODBC 1)
set (NANODBC_LIBRARY nanodbc)
set (NANODBC_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/nanodbc/nanodbce")
message (STATUS "Using nanodbc=${USE_NANODBC}: ${NANODBC_INCLUDE_DIR} : ${NANODBC_LIBRARY}")
message (STATUS "Using unixodbc")

View File

@ -96,14 +96,8 @@ if (USE_INTERNAL_ZLIB_LIBRARY)
add_subdirectory (${INTERNAL_ZLIB_NAME})
# We should use same defines when including zlib.h as used when zlib compiled
target_compile_definitions (zlib PUBLIC ZLIB_COMPAT WITH_GZFILEOP)
if (TARGET zlibstatic)
target_compile_definitions (zlibstatic PUBLIC ZLIB_COMPAT WITH_GZFILEOP)
endif ()
if (ARCH_AMD64 OR ARCH_AARCH64)
target_compile_definitions (zlib PUBLIC X86_64 UNALIGNED_OK)
if (TARGET zlibstatic)
target_compile_definitions (zlibstatic PUBLIC X86_64 UNALIGNED_OK)
endif ()
endif ()
endif ()
@ -326,6 +320,10 @@ if (USE_LIBPQXX)
add_subdirectory (libpqxx-cmake)
endif()
if (USE_NANODBC)
add_subdirectory (nanodbc-cmake)
endif()
if (USE_NURAFT)
add_subdirectory(nuraft-cmake)
endif()

2
contrib/NuRaft vendored

@ -1 +1 @@
Subproject commit c35819f2c8a378d4ba88cc930c17bc20aeb875eb
Subproject commit d2feb5978b979729a07c3ca76eaa4ab94cef4ceb

1
contrib/nanodbc vendored Submodule

@ -0,0 +1 @@
Subproject commit 9fc459675515d491401727ec67fca38db721f28c

View File

@ -0,0 +1,14 @@
set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/nanodbc)
if (NOT TARGET unixodbc)
message(FATAL_ERROR "Configuration error: unixodbc is not a target")
endif()
set (SRCS
${LIBRARY_DIR}/nanodbc/nanodbc.cpp
)
add_library(nanodbc ${SRCS})
target_link_libraries (nanodbc PUBLIC unixodbc)
target_include_directories (nanodbc SYSTEM PUBLIC ${LIBRARY_DIR}/)

2
contrib/zlib-ng vendored

@ -1 +1 @@
Subproject commit 6fd1846c8b8f59436fe2dd752d0f316ddbb64df6
Subproject commit b82d3497a5afc46dec3c5d07e4b163b169f251d7

View File

@ -3,4 +3,3 @@ usr/bin/clickhouse-odbc-bridge
usr/bin/clickhouse-library-bridge
usr/bin/clickhouse-extract-from-config
usr/share/bash-completion/completions
etc/security/limits.d/clickhouse.conf

View File

@ -1,16 +0,0 @@
#!/bin/sh -e
test -f /usr/share/debconf/confmodule && . /usr/share/debconf/confmodule
db_fget clickhouse-server/default-password seen || true
password_seen="$RET"
if [ "$1" = "reconfigure" ]; then
password_seen=false
fi
if [ "$password_seen" != "true" ]; then
db_input high clickhouse-server/default-password || true
db_go || true
fi
db_go || true

View File

@ -23,11 +23,13 @@ if [ ! -f "/etc/debian_version" ]; then
fi
if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --group "${CLICKHOUSE_GROUP}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}" --log-path "${CLICKHOUSE_LOGDIR}" --data-path "${CLICKHOUSE_DATADIR}"
if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
# if old rc.d service present - remove it
if [ -x "/etc/init.d/clickhouse-server" ] && [ -x "/usr/sbin/update-rc.d" ]; then
/usr/sbin/update-rc.d clickhouse-server remove
echo "ClickHouse init script has migrated to systemd. Please manually stop old server and restart the service: sudo killall clickhouse-server && sleep 5 && sudo service clickhouse-server restart"
fi
/bin/systemctl daemon-reload
@ -38,10 +40,8 @@ if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then
if [ -x "/usr/sbin/update-rc.d" ]; then
/usr/sbin/update-rc.d clickhouse-server defaults 19 19 >/dev/null || exit $?
else
echo # TODO [ "$OS" = "rhel" ] || [ "$OS" = "centos" ] || [ "$OS" = "fedora" ]
echo # Other OS
fi
fi
fi
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --group "${CLICKHOUSE_GROUP}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}" --log-path "${CLICKHOUSE_LOGDIR}" --data-path "${CLICKHOUSE_DATADIR}"
fi

View File

@ -1,8 +0,0 @@
#!/bin/sh
if [ "$1" = "upgrade" ]; then
# Return etc/cron.d/clickhouse-server to original state
service clickhouse-server disable_cron ||:
fi
#DEBHELPER#

View File

@ -1,6 +0,0 @@
#!/bin/sh
if [ "$1" = "upgrade" ] || [ "$1" = "remove" ]; then
# Return etc/cron.d/clickhouse-server to original state
service clickhouse-server disable_cron ||:
fi

View File

@ -1,3 +0,0 @@
Template: clickhouse-server/default-password
Type: password
Description: Enter password for default user:

View File

@ -1,2 +0,0 @@
clickhouse soft nofile 262144
clickhouse hard nofile 262144

3
debian/rules vendored
View File

@ -113,9 +113,6 @@ override_dh_install:
ln -sf clickhouse-server.docs debian/clickhouse-client.docs
ln -sf clickhouse-server.docs debian/clickhouse-common-static.docs
mkdir -p $(DESTDIR)/etc/security/limits.d
cp debian/clickhouse.limits $(DESTDIR)/etc/security/limits.d/clickhouse.conf
# systemd compatibility
mkdir -p $(DESTDIR)/etc/systemd/system/
cp debian/clickhouse-server.service $(DESTDIR)/etc/systemd/system/

2
debian/watch vendored
View File

@ -1,6 +1,6 @@
version=4
opts="filenamemangle=s%(?:.*?)?v?(\d[\d.]*)-stable\.tar\.gz%clickhouse-$1.tar.gz%" \
https://github.com/yandex/clickhouse/tags \
https://github.com/ClickHouse/ClickHouse/tags \
(?:.*?/)?v?(\d[\d.]*)-stable\.tar\.gz debian uupdate

View File

@ -300,6 +300,7 @@ function run_tests
01663_aes_msan # Depends on OpenSSL
01667_aes_args_check # Depends on OpenSSL
01776_decrypt_aead_size_check # Depends on OpenSSL
01811_filter_by_null # Depends on OpenSSL
01281_unsucceeded_insert_select_queries_counter
01292_create_user
01294_lazy_database_concurrent
@ -365,6 +366,9 @@ function run_tests
# JSON functions
01666_blns
# Depends on AWS
01801_s3_cluster
)
(time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 ||:) | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"

View File

@ -112,10 +112,13 @@ if [[ -n "$WITH_COVERAGE" ]] && [[ "$WITH_COVERAGE" -eq 1 ]]; then
fi
tar -chf /test_output/text_log_dump.tar /var/lib/clickhouse/data/system/text_log ||:
tar -chf /test_output/query_log_dump.tar /var/lib/clickhouse/data/system/query_log ||:
tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:
if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
pigz < /var/log/clickhouse-server/clickhouse-server1.log > /test_output/clickhouse-server1.log.gz ||:
pigz < /var/log/clickhouse-server/clickhouse-server2.log > /test_output/clickhouse-server2.log.gz ||:
mv /var/log/clickhouse-server/stderr1.log /test_output/ ||:
mv /var/log/clickhouse-server/stderr2.log /test_output/ ||:
tar -chf /test_output/coordination1.tar /var/lib/clickhouse1/coordination ||:
tar -chf /test_output/coordination2.tar /var/lib/clickhouse2/coordination ||:
fi

View File

@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from multiprocessing import cpu_count
from subprocess import Popen, call, STDOUT
from subprocess import Popen, call, check_output, STDOUT
import os
import sys
import shutil
@ -85,10 +85,27 @@ def prepare_for_hung_check():
# Issue #21004, live views are experimental, so let's just suppress it
call("""clickhouse client -q "KILL QUERY WHERE upper(query) LIKE 'WATCH %'" """, shell=True, stderr=STDOUT)
# Wait for last queries to finish if any, not longer than 120 seconds
# Kill other queries which known to be slow
# It's query from 01232_preparing_sets_race_condition_long, it may take up to 1000 seconds in slow builds
call("""clickhouse client -q "KILL QUERY WHERE query LIKE 'insert into tableB select %'" """, shell=True, stderr=STDOUT)
# Long query from 00084_external_agregation
call("""clickhouse client -q "KILL QUERY WHERE query LIKE 'SELECT URL, uniq(SearchPhrase) AS u FROM test.hits GROUP BY URL ORDER BY u %'" """, shell=True, stderr=STDOUT)
# Wait for last queries to finish if any, not longer than 300 seconds
call("""clickhouse client -q "select sleepEachRow((
select maxOrDefault(120 - elapsed) + 1 from system.processes where query not like '%from system.processes%' and elapsed < 120
) / 120) from numbers(120) format Null" """, shell=True, stderr=STDOUT)
select maxOrDefault(300 - elapsed) + 1 from system.processes where query not like '%from system.processes%' and elapsed < 300
) / 300) from numbers(300) format Null" """, shell=True, stderr=STDOUT)
# Even if all clickhouse-test processes are finished, there are probably some sh scripts,
# which still run some new queries. Let's ignore them.
try:
query = """clickhouse client -q "SELECT count() FROM system.processes where where elapsed > 300" """
output = check_output(query, shell=True, stderr=STDOUT).decode('utf-8').strip()
if int(output) == 0:
return False
except:
pass
return True
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
@ -119,12 +136,12 @@ if __name__ == "__main__":
logging.info("All processes finished")
if args.hung_check:
prepare_for_hung_check()
have_long_running_queries = prepare_for_hung_check()
logging.info("Checking if some queries hung")
cmd = "{} {} {}".format(args.test_cmd, "--hung-check", "00001_select_1")
res = call(cmd, shell=True, stderr=STDOUT)
hung_check_status = "No queries hung\tOK\n"
if res != 0:
if res != 0 and have_long_running_queries:
logging.info("Hung check failed with exit code {}".format(res))
hung_check_status = "Hung check failed\tFAIL\n"
open(os.path.join(args.output_folder, "test_results.tsv"), 'w+').write(hung_check_status)

View File

@ -94,10 +94,10 @@ postgres=# INSERT INTO test (int_id, str, "float") VALUES (1,'test',2);
INSERT 0 1
postgresql> SELECT * FROM test;
int_id | int_nullable | float | str | float_nullable
--------+--------------+-------+------+----------------
1 | | 2 | test |
(1 row)
int_id | int_nullable | float | str | float_nullable
--------+--------------+-------+------+----------------
1 | | 2 | test |
(1 row)
```
Table in ClickHouse, retrieving data from the PostgreSQL table created above:

View File

@ -33,7 +33,7 @@ SELECT bitmapBuild([1, 2, 3, 4, 5]) AS res, toTypeName(res);
``` text
┌─res─┬─toTypeName(bitmapBuild([1, 2, 3, 4, 5]))─────┐
 │ AggregateFunction(groupBitmap, UInt8) │
│ │ AggregateFunction(groupBitmap, UInt8)
└─────┴──────────────────────────────────────────────┘
```

View File

@ -437,13 +437,13 @@ A [FixedString(16)](../../sql-reference/data-types/fixedstring.md) data type has
**Example**
``` sql
SELECT murmurHash3_128('example_string') AS MurmurHash3, toTypeName(MurmurHash3) AS type;
SELECT hex(murmurHash3_128('example_string')) AS MurmurHash3, toTypeName(MurmurHash3) AS type;
```
``` text
┌─MurmurHash3──────┬─type────────────┐
6<EFBFBD>1<1C>4"S5KT<4B>~~q │ FixedString(16)
└──────────────────┴─────────────────┘
┌─MurmurHash3──────────────────────┬─type───┐
368A1A311CB7342253354B548E7E7E71 │ String
└──────────────────────────────────┴────────┘
```
## xxHash32, xxHash64 {#hash-functions-xxhash32}

View File

@ -65,9 +65,9 @@ postgres=# INSERT INTO test (int_id, str, "float") VALUES (1,'test',2);
INSERT 0 1
postgresql> SELECT * FROM test;
int_id | int_nullable | float | str | float_nullable
--------+--------------+-------+------+----------------
1 | | 2 | test |
int_id | int_nullable | float | str | float_nullable
--------+--------------+-------+------+----------------
1 | | 2 | test |
(1 row)
```

View File

@ -35,7 +35,7 @@ SELECT bitmapBuild([1, 2, 3, 4, 5]) AS res, toTypeName(res)
``` text
┌─res─┬─toTypeName(bitmapBuild([1, 2, 3, 4, 5]))─────┐
 │ AggregateFunction(groupBitmap, UInt8) │
│ │ AggregateFunction(groupBitmap, UInt8)
└─────┴──────────────────────────────────────────────┘
```

View File

@ -434,13 +434,13 @@ A [FixedString(16)](../../sql-reference/data-types/fixedstring.md) データ型
**例**
``` sql
SELECT murmurHash3_128('example_string') AS MurmurHash3, toTypeName(MurmurHash3) AS type
SELECT hex(murmurHash3_128('example_string')) AS MurmurHash3, toTypeName(MurmurHash3) AS type;
```
``` text
┌─MurmurHash3──────┬─type────────────┐
6<EFBFBD>1<1C>4"S5KT<4B>~~q │ FixedString(16)
└──────────────────┴─────────────────┘
┌─MurmurHash3──────────────────────┬─type───┐
368A1A311CB7342253354B548E7E7E71 │ String
└──────────────────────────────────┴────────┘
```
## xxHash32,xxHash64 {#hash-functions-xxhash32}

View File

@ -94,10 +94,10 @@ postgres=# INSERT INTO test (int_id, str, "float") VALUES (1,'test',2);
INSERT 0 1
postgresql> SELECT * FROM test;
int_id | int_nullable | float | str | float_nullable
--------+--------------+-------+------+----------------
1 | | 2 | test |
(1 row)
int_id | int_nullable | float | str | float_nullable
--------+--------------+-------+------+----------------
1 | | 2 | test |
(1 row)
```
Таблица в ClickHouse, получение данных из PostgreSQL таблицы, созданной выше:

View File

@ -15,10 +15,12 @@
- [`groupBitOr`](../../sql-reference/aggregate-functions/reference/groupbitor.md#groupbitor)
- [`groupBitXor`](../../sql-reference/aggregate-functions/reference/groupbitxor.md#groupbitxor)
- [`groupArrayArray`](../../sql-reference/aggregate-functions/reference/grouparray.md#agg_function-grouparray)
- [`groupUniqArrayArray`](../../sql-reference/aggregate-functions/reference/groupuniqarray.md#groupuniqarray)
- [`groupUniqArrayArray`](../../sql-reference/aggregate-functions/reference/groupuniqarray.md)
- [`sumMap`](../../sql-reference/aggregate-functions/reference/summap.md#agg_functions-summap)
- [`minMap`](../../sql-reference/aggregate-functions/reference/minmap.md#agg_functions-minmap)
- [`maxMap`](../../sql-reference/aggregate-functions/reference/maxmap.md#agg_functions-maxmap)
- [`argMin`](../../sql-reference/aggregate-functions/reference/argmin.md)
- [`argMax`](../../sql-reference/aggregate-functions/reference/argmax.md)
!!! note "Примечание"
Значения `SimpleAggregateFunction(func, Type)` отображаются и хранятся так же, как и `Type`, поэтому комбинаторы [-Merge](../../sql-reference/aggregate-functions/combinators.md#aggregate_functions_combinators-merge) и [-State](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-state) не требуются.

View File

@ -25,7 +25,7 @@ SELECT bitmapBuild([1, 2, 3, 4, 5]) AS res, toTypeName(res);
``` text
┌─res─┬─toTypeName(bitmapBuild([1, 2, 3, 4, 5]))─────┐
 │ AggregateFunction(groupBitmap, UInt8) │
│ │ AggregateFunction(groupBitmap, UInt8)
└─────┴──────────────────────────────────────────────┘
```

View File

@ -430,7 +430,7 @@ murmurHash3_128( expr )
**Аргументы**
- `expr` — [выражение](../syntax.md#syntax-expressions), возвращающее значение типа[String](../../sql-reference/functions/hash-functions.md).
- `expr` — [выражение](../syntax.md#syntax-expressions), возвращающее значение типа [String](../../sql-reference/functions/hash-functions.md).
**Возвращаемое значение**
@ -439,13 +439,13 @@ murmurHash3_128( expr )
**Пример**
``` sql
SELECT murmurHash3_128('example_string') AS MurmurHash3, toTypeName(MurmurHash3) AS type;
SELECT hex(murmurHash3_128('example_string')) AS MurmurHash3, toTypeName(MurmurHash3) AS type;
```
``` text
┌─MurmurHash3──────┬─type────────────┐
6<EFBFBD>1<1C>4"S5KT<4B>~~q │ FixedString(16)
└──────────────────┴─────────────────┘
┌─MurmurHash3──────────────────────┬─type───┐
368A1A311CB7342253354B548E7E7E71 │ String
└──────────────────────────────────┴────────┘
```
## xxHash32, xxHash64 {#hash-functions-xxhash32-xxhash64}

View File

@ -65,10 +65,10 @@ postgres=# INSERT INTO test (int_id, str, "float") VALUES (1,'test',2);
INSERT 0 1
postgresql> SELECT * FROM test;
int_id | int_nullable | float | str | float_nullable
--------+--------------+-------+------+----------------
1 | | 2 | test |
(1 row)
int_id | int_nullable | float | str | float_nullable
--------+--------------+-------+------+----------------
1 | | 2 | test |
(1 row)
```
Получение данных в ClickHouse:

View File

@ -109,7 +109,8 @@ def build_single_page_version(lang, args, nav, cfg):
extra['single_page'] = True
extra['is_amp'] = False
with open(os.path.join(args.docs_dir, lang, 'single.md'), 'w') as single_md:
single_md_path = os.path.join(args.docs_dir, lang, 'single.md')
with open(single_md_path, 'w') as single_md:
concatenate(lang, args.docs_dir, single_md, nav)
with util.temp_dir() as site_temp:
@ -221,3 +222,7 @@ def build_single_page_version(lang, args, nav, cfg):
subprocess.check_call(' '.join(create_pdf_command), shell=True)
logging.info(f'Finished building single page version for {lang}')
if os.path.exists(single_md_path):
os.unlink(single_md_path)

View File

@ -108,14 +108,6 @@ void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeo
" UNION ALL "
"SELECT cluster FROM system.clusters"
" UNION ALL "
"SELECT name FROM system.errors"
" UNION ALL "
"SELECT event FROM system.events"
" UNION ALL "
"SELECT metric FROM system.asynchronous_metrics"
" UNION ALL "
"SELECT metric FROM system.metrics"
" UNION ALL "
"SELECT macro FROM system.macros"
" UNION ALL "
"SELECT policy_name FROM system.storage_policies"
@ -139,17 +131,12 @@ void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeo
query << ") WHERE notEmpty(res)";
Settings settings;
/// To show all rows from:
/// - system.errors
/// - system.events
settings.system_events_show_zero_values = true;
fetch(connection, timeouts, query.str(), settings);
fetch(connection, timeouts, query.str());
}
void Suggest::fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query, Settings & settings)
void Suggest::fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query)
{
connection.sendQuery(timeouts, query, "" /* query_id */, QueryProcessingStage::Complete, &settings);
connection.sendQuery(timeouts, query, "" /* query_id */, QueryProcessingStage::Complete);
while (true)
{

View File

@ -33,7 +33,7 @@ public:
private:
void loadImpl(Connection & connection, const ConnectionTimeouts & timeouts, size_t suggestion_limit);
void fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query, Settings & settings);
void fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query);
void fillWordsFromBlock(const Block & block);
/// Words are fetched asynchronously.

View File

@ -71,6 +71,9 @@ namespace ErrorCodes
}
/// ANSI escape sequence for intense color in terminal.
#define HILITE "\033[1m"
#define END_HILITE "\033[0m"
using namespace DB;
namespace po = boost::program_options;
@ -559,20 +562,32 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
bool stdin_is_a_tty = isatty(STDIN_FILENO);
bool stdout_is_a_tty = isatty(STDOUT_FILENO);
bool is_interactive = stdin_is_a_tty && stdout_is_a_tty;
/// dpkg or apt installers can ask for non-interactive work explicitly.
const char * debian_frontend_var = getenv("DEBIAN_FRONTEND");
bool noninteractive = debian_frontend_var && debian_frontend_var == std::string_view("noninteractive");
bool is_interactive = !noninteractive && stdin_is_a_tty && stdout_is_a_tty;
/// We can ask password even if stdin is closed/redirected but /dev/tty is available.
bool can_ask_password = !noninteractive && stdout_is_a_tty;
if (has_password_for_default_user)
{
fmt::print("Password for default user is already specified. To remind or reset, see {} and {}.\n",
fmt::print(HILITE "Password for default user is already specified. To remind or reset, see {} and {}." END_HILITE "\n",
users_config_file.string(), users_d.string());
}
else if (!is_interactive)
else if (!can_ask_password)
{
fmt::print("Password for default user is empty string. See {} and {} to change it.\n",
fmt::print(HILITE "Password for default user is empty string. See {} and {} to change it." END_HILITE "\n",
users_config_file.string(), users_d.string());
}
else
{
/// NOTE: When installing debian package with dpkg -i, stdin is not a terminal but we are still being able to enter password.
/// More sophisticated method with /dev/tty is used inside the `readpassphrase` function.
char buf[1000] = {};
std::string password;
if (auto * result = readpassphrase("Enter password for default user: ", buf, sizeof(buf), 0))
@ -600,7 +615,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
"</yandex>\n";
out.sync();
out.finalize();
fmt::print("Password for default user is saved in file {}.\n", password_file);
fmt::print(HILITE "Password for default user is saved in file {}." END_HILITE "\n", password_file);
#else
out << "<yandex>\n"
" <users>\n"
@ -611,12 +626,12 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
"</yandex>\n";
out.sync();
out.finalize();
fmt::print("Password for default user is saved in plaintext in file {}.\n", password_file);
fmt::print(HILITE "Password for default user is saved in plaintext in file {}." END_HILITE "\n", password_file);
#endif
has_password_for_default_user = true;
}
else
fmt::print("Password for default user is empty string. See {} and {} to change it.\n",
fmt::print(HILITE "Password for default user is empty string. See {} and {} to change it." END_HILITE "\n",
users_config_file.string(), users_d.string());
}
@ -641,7 +656,6 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
" This is optional. Taskstats accounting will be disabled."
" To enable taskstats accounting you may add the required capability later manually.\"",
"/tmp/test_setcap.sh", fs::canonical(main_bin_path).string());
fmt::print(" {}\n", command);
executeScript(command);
#endif

View File

@ -1,6 +1,6 @@
set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES
library-bridge.cpp
library-log.cpp
LibraryInterface.cpp
LibraryBridge.cpp
Handlers.cpp
HandlerFactory.cpp
@ -17,7 +17,6 @@ add_executable(clickhouse-library-bridge ${CLICKHOUSE_LIBRARY_BRIDGE_SOURCES})
target_link_libraries(clickhouse-library-bridge PRIVATE
daemon
dbms
clickhouse_parsers
bridge
)

View File

@ -1,4 +1,5 @@
#include "LibraryDictionarySourceExternal.h"
#include "LibraryInterface.h"
#include <common/logger_useful.h>
namespace

View File

@ -1,11 +1,12 @@
#pragma once
#include <Common/StringUtils/StringUtils.h>
#include <Dictionaries/LibraryDictionarySourceExternal.h>
#include <Core/Block.h>
#include <ext/bit_cast.h>
#include <ext/range.h>
#include "LibraryInterface.h"
namespace DB
{

View File

@ -1,66 +0,0 @@
#include <Dictionaries/LibraryDictionarySourceExternal.h>
#include <common/logger_useful.h>
namespace
{
const char DICT_LOGGER_NAME[] = "LibraryDictionarySourceExternal";
}
namespace ClickHouseLibrary
{
std::string_view LIBRARY_CREATE_NEW_FUNC_NAME = "ClickHouseDictionary_v3_libNew";
std::string_view LIBRARY_CLONE_FUNC_NAME = "ClickHouseDictionary_v3_libClone";
std::string_view LIBRARY_DELETE_FUNC_NAME = "ClickHouseDictionary_v3_libDelete";
std::string_view LIBRARY_DATA_NEW_FUNC_NAME = "ClickHouseDictionary_v3_dataNew";
std::string_view LIBRARY_DATA_DELETE_FUNC_NAME = "ClickHouseDictionary_v3_dataDelete";
std::string_view LIBRARY_LOAD_ALL_FUNC_NAME = "ClickHouseDictionary_v3_loadAll";
std::string_view LIBRARY_LOAD_IDS_FUNC_NAME = "ClickHouseDictionary_v3_loadIds";
std::string_view LIBRARY_LOAD_KEYS_FUNC_NAME = "ClickHouseDictionary_v3_loadKeys";
std::string_view LIBRARY_IS_MODIFIED_FUNC_NAME = "ClickHouseDictionary_v3_isModified";
std::string_view LIBRARY_SUPPORTS_SELECTIVE_LOAD_FUNC_NAME = "ClickHouseDictionary_v3_supportsSelectiveLoad";
void log(LogLevel level, CString msg)
{
auto & logger = Poco::Logger::get(DICT_LOGGER_NAME);
switch (level)
{
case LogLevel::TRACE:
if (logger.trace())
logger.trace(msg);
break;
case LogLevel::DEBUG:
if (logger.debug())
logger.debug(msg);
break;
case LogLevel::INFORMATION:
if (logger.information())
logger.information(msg);
break;
case LogLevel::NOTICE:
if (logger.notice())
logger.notice(msg);
break;
case LogLevel::WARNING:
if (logger.warning())
logger.warning(msg);
break;
case LogLevel::ERROR:
if (logger.error())
logger.error(msg);
break;
case LogLevel::CRITICAL:
if (logger.critical())
logger.critical(msg);
break;
case LogLevel::FATAL:
if (logger.fatal())
logger.fatal(msg);
break;
}
}
}

View File

@ -26,11 +26,12 @@ target_link_libraries(clickhouse-odbc-bridge PRIVATE
dbms
bridge
clickhouse_parsers
Poco::Data
Poco::Data::ODBC
nanodbc
unixodbc
)
set_target_properties(clickhouse-odbc-bridge PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..)
target_compile_options (clickhouse-odbc-bridge PRIVATE -Wno-reserved-id-macro -Wno-keyword-macro)
if (USE_GDB_ADD_INDEX)
add_custom_command(TARGET clickhouse-odbc-bridge POST_BUILD COMMAND ${GDB_ADD_INDEX_EXE} ../clickhouse-odbc-bridge COMMENT "Adding .gdb-index to clickhouse-odbc-bridge" VERBATIM)

View File

@ -2,29 +2,36 @@
#if USE_ODBC
# include <DataTypes/DataTypeFactory.h>
# include <DataTypes/DataTypeNullable.h>
# include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
# include <IO/WriteHelpers.h>
# include <Parsers/ParserQueryWithOutput.h>
# include <Parsers/parseQuery.h>
# include <Poco/Data/ODBC/ODBCException.h>
# include <Poco/Data/ODBC/SessionImpl.h>
# include <Poco/Data/ODBC/Utility.h>
# include <Server/HTTP/HTMLForm.h>
# include <Poco/Net/HTTPServerRequest.h>
# include <Poco/Net/HTTPServerResponse.h>
# include <Poco/NumberParser.h>
# include <common/logger_useful.h>
# include <Common/quoteString.h>
# include <ext/scope_guard.h>
# include "getIdentifierQuote.h"
# include "validateODBCConnectionString.h"
#include <DataTypes/DataTypeFactory.h>
#include <DataTypes/DataTypeNullable.h>
#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
#include <IO/WriteHelpers.h>
#include <Parsers/ParserQueryWithOutput.h>
#include <Parsers/parseQuery.h>
#include <Server/HTTP/HTMLForm.h>
#include <Poco/Net/HTTPServerRequest.h>
#include <Poco/Net/HTTPServerResponse.h>
#include <Poco/NumberParser.h>
#include <common/logger_useful.h>
#include <Common/quoteString.h>
#include <ext/scope_guard.h>
#include "getIdentifierQuote.h"
#include "validateODBCConnectionString.h"
#include "ODBCConnectionFactory.h"
#include <sql.h>
#include <sqlext.h>
# define POCO_SQL_ODBC_CLASS Poco::Data::ODBC
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int BAD_ARGUMENTS;
}
namespace
{
DataTypePtr getDataType(SQLSMALLINT type)
@ -59,6 +66,7 @@ namespace
}
}
void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
{
HTMLForm params(request, request.getStream());
@ -77,88 +85,79 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ
process_error("No 'table' param in request URL");
return;
}
if (!params.has("connection_string"))
{
process_error("No 'connection_string' in request URL");
return;
}
std::string schema_name;
std::string table_name = params.get("table");
std::string connection_string = params.get("connection_string");
if (params.has("schema"))
{
schema_name = params.get("schema");
LOG_TRACE(log, "Will fetch info for table '{}'", schema_name + "." + table_name);
}
else
LOG_TRACE(log, "Will fetch info for table '{}'", table_name);
LOG_TRACE(log, "Got connection str '{}'", connection_string);
try
{
const bool external_table_functions_use_nulls = Poco::NumberParser::parseBool(params.get("external_table_functions_use_nulls", "false"));
POCO_SQL_ODBC_CLASS::SessionImpl session(validateODBCConnectionString(connection_string), DBMS_DEFAULT_CONNECT_TIMEOUT_SEC);
SQLHDBC hdbc = session.dbc().handle();
auto connection = ODBCConnectionFactory::instance().get(
validateODBCConnectionString(connection_string),
getContext()->getSettingsRef().odbc_bridge_connection_pool_size);
SQLHSTMT hstmt = nullptr;
nanodbc::catalog catalog(*connection);
std::string catalog_name;
if (POCO_SQL_ODBC_CLASS::Utility::isError(SQLAllocStmt(hdbc, &hstmt)))
throw POCO_SQL_ODBC_CLASS::ODBCException("Could not allocate connection handle.");
SCOPE_EXIT(SQLFreeStmt(hstmt, SQL_DROP));
const auto & context_settings = getContext()->getSettingsRef();
/// TODO Why not do SQLColumns instead?
std::string name = schema_name.empty() ? backQuoteIfNeed(table_name) : backQuoteIfNeed(schema_name) + "." + backQuoteIfNeed(table_name);
WriteBufferFromOwnString buf;
std::string input = "SELECT * FROM " + name + " WHERE 1 = 0";
ParserQueryWithOutput parser(input.data() + input.size());
ASTPtr select = parseQuery(parser, input.data(), input.data() + input.size(), "", context_settings.max_query_size, context_settings.max_parser_depth);
IAST::FormatSettings settings(buf, true);
settings.always_quote_identifiers = true;
settings.identifier_quoting_style = getQuotingStyle(hdbc);
select->format(settings);
std::string query = buf.str();
LOG_TRACE(log, "Inferring structure with query '{}'", query);
if (POCO_SQL_ODBC_CLASS::Utility::isError(POCO_SQL_ODBC_CLASS::SQLPrepare(hstmt, reinterpret_cast<SQLCHAR *>(query.data()), query.size())))
throw POCO_SQL_ODBC_CLASS::DescriptorException(session.dbc());
if (POCO_SQL_ODBC_CLASS::Utility::isError(SQLExecute(hstmt)))
throw POCO_SQL_ODBC_CLASS::StatementException(hstmt);
SQLSMALLINT cols = 0;
if (POCO_SQL_ODBC_CLASS::Utility::isError(SQLNumResultCols(hstmt, &cols)))
throw POCO_SQL_ODBC_CLASS::StatementException(hstmt);
/// TODO cols not checked
NamesAndTypesList columns;
for (SQLSMALLINT ncol = 1; ncol <= cols; ++ncol)
/// In XDBC tables it is allowed to pass either database_name or schema_name in table definion, but not both of them.
/// They both are passed as 'schema' parameter in request URL, so it is not clear whether it is database_name or schema_name passed.
/// If it is schema_name then we know that database is added in odbc.ini. But if we have database_name as 'schema',
/// it is not guaranteed. For nanodbc database_name must be either in odbc.ini or passed as catalog_name.
auto get_columns = [&]()
{
SQLSMALLINT type = 0;
/// TODO Why 301?
SQLCHAR column_name[301];
SQLSMALLINT is_nullable;
const auto result = POCO_SQL_ODBC_CLASS::SQLDescribeCol(hstmt, ncol, column_name, sizeof(column_name), nullptr, &type, nullptr, nullptr, &is_nullable);
if (POCO_SQL_ODBC_CLASS::Utility::isError(result))
throw POCO_SQL_ODBC_CLASS::StatementException(hstmt);
auto column_type = getDataType(type);
if (external_table_functions_use_nulls && is_nullable == SQL_NULLABLE)
nanodbc::catalog::tables tables = catalog.find_tables(table_name, /* type = */ "", /* schema = */ "", /* catalog = */ schema_name);
if (tables.next())
{
column_type = std::make_shared<DataTypeNullable>(column_type);
catalog_name = tables.table_catalog();
LOG_TRACE(log, "Will fetch info for table '{}.{}'", catalog_name, table_name);
return catalog.find_columns(/* column = */ "", table_name, /* schema = */ "", catalog_name);
}
columns.emplace_back(reinterpret_cast<char *>(column_name), std::move(column_type));
tables = catalog.find_tables(table_name, /* type = */ "", /* schema = */ schema_name);
if (tables.next())
{
catalog_name = tables.table_catalog();
LOG_TRACE(log, "Will fetch info for table '{}.{}.{}'", catalog_name, schema_name, table_name);
return catalog.find_columns(/* column = */ "", table_name, schema_name, catalog_name);
}
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Table {} not found", schema_name.empty() ? table_name : schema_name + '.' + table_name);
};
nanodbc::catalog::columns columns_definition = get_columns();
NamesAndTypesList columns;
while (columns_definition.next())
{
SQLSMALLINT type = columns_definition.sql_data_type();
std::string column_name = columns_definition.column_name();
bool is_nullable = columns_definition.nullable() == SQL_NULLABLE;
auto column_type = getDataType(type);
if (external_table_functions_use_nulls && is_nullable == SQL_NULLABLE)
column_type = std::make_shared<DataTypeNullable>(column_type);
columns.emplace_back(column_name, std::move(column_type));
}
if (columns.empty())
throw Exception("Columns definition was not returned", ErrorCodes::LOGICAL_ERROR);
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
try
{

View File

@ -2,16 +2,13 @@
#if USE_ODBC
# include <Interpreters/Context_fwd.h>
# include <Server/HTTP/HTTPRequestHandler.h>
# include <Common/config.h>
#include <Interpreters/Context_fwd.h>
#include <Interpreters/Context.h>
#include <Server/HTTP/HTTPRequestHandler.h>
#include <Common/config.h>
#include <Poco/Logger.h>
# include <Poco/Logger.h>
/** The structure of the table is taken from the query "SELECT * FROM table WHERE 1=0".
* TODO: It would be much better to utilize ODBC methods dedicated for columns description.
* If there is no such table, an exception is thrown.
*/
namespace DB
{
@ -19,7 +16,9 @@ class ODBCColumnsInfoHandler : public HTTPRequestHandler, WithContext
{
public:
ODBCColumnsInfoHandler(size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_), log(&Poco::Logger::get("ODBCColumnsInfoHandler")), keep_alive_timeout(keep_alive_timeout_)
: WithContext(context_)
, log(&Poco::Logger::get("ODBCColumnsInfoHandler"))
, keep_alive_timeout(keep_alive_timeout_)
{
}

View File

@ -38,9 +38,9 @@ std::unique_ptr<HTTPRequestHandler> ODBCBridgeHandlerFactory::createRequestHandl
return nullptr;
#endif
else if (uri.getPath() == "/write")
return std::make_unique<ODBCHandler>(pool_map, keep_alive_timeout, getContext(), "write");
return std::make_unique<ODBCHandler>(keep_alive_timeout, getContext(), "write");
else
return std::make_unique<ODBCHandler>(pool_map, keep_alive_timeout, getContext(), "read");
return std::make_unique<ODBCHandler>(keep_alive_timeout, getContext(), "read");
}
return nullptr;
}

View File

@ -6,14 +6,8 @@
#include "IdentifierQuoteHandler.h"
#include "MainHandler.h"
#include "SchemaAllowedHandler.h"
#include <Poco/Logger.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#include <Poco/Data/SessionPool.h>
#pragma GCC diagnostic pop
namespace DB
{
@ -24,9 +18,11 @@ class ODBCBridgeHandlerFactory : public HTTPRequestHandlerFactory, WithContext
{
public:
ODBCBridgeHandlerFactory(const std::string & name_, size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_), log(&Poco::Logger::get(name_)), name(name_), keep_alive_timeout(keep_alive_timeout_)
: WithContext(context_)
, log(&Poco::Logger::get(name_))
, name(name_)
, keep_alive_timeout(keep_alive_timeout_)
{
pool_map = std::make_shared<ODBCHandler::PoolMap>();
}
std::unique_ptr<HTTPRequestHandler> createRequestHandler(const HTTPServerRequest & request) override;
@ -35,7 +31,6 @@ private:
Poco::Logger * log;
std::string name;
size_t keep_alive_timeout;
std::shared_ptr<ODBCHandler::PoolMap> pool_map;
};
}

View File

@ -2,23 +2,20 @@
#if USE_ODBC
# include <DataTypes/DataTypeFactory.h>
# include <Server/HTTP/HTMLForm.h>
# include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
# include <IO/WriteHelpers.h>
# include <Parsers/ParserQueryWithOutput.h>
# include <Parsers/parseQuery.h>
# include <Poco/Data/ODBC/ODBCException.h>
# include <Poco/Data/ODBC/SessionImpl.h>
# include <Poco/Data/ODBC/Utility.h>
# include <Poco/Net/HTTPServerRequest.h>
# include <Poco/Net/HTTPServerResponse.h>
# include <common/logger_useful.h>
# include <ext/scope_guard.h>
# include "getIdentifierQuote.h"
# include "validateODBCConnectionString.h"
#include <DataTypes/DataTypeFactory.h>
#include <Server/HTTP/HTMLForm.h>
#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
#include <IO/WriteHelpers.h>
#include <Parsers/ParserQueryWithOutput.h>
#include <Parsers/parseQuery.h>
#include <Poco/Net/HTTPServerRequest.h>
#include <Poco/Net/HTTPServerResponse.h>
#include <common/logger_useful.h>
#include <ext/scope_guard.h>
#include "getIdentifierQuote.h"
#include "validateODBCConnectionString.h"
#include "ODBCConnectionFactory.h"
# define POCO_SQL_ODBC_CLASS Poco::Data::ODBC
namespace DB
{
@ -44,10 +41,12 @@ void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServ
try
{
std::string connection_string = params.get("connection_string");
POCO_SQL_ODBC_CLASS::SessionImpl session(validateODBCConnectionString(connection_string), DBMS_DEFAULT_CONNECT_TIMEOUT_SEC);
SQLHDBC hdbc = session.dbc().handle();
auto identifier = getIdentifierQuote(hdbc);
auto connection = ODBCConnectionFactory::instance().get(
validateODBCConnectionString(connection_string),
getContext()->getSettingsRef().odbc_bridge_connection_pool_size);
auto identifier = getIdentifierQuote(*connection);
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
try

View File

@ -11,11 +11,13 @@
namespace DB
{
class IdentifierQuoteHandler : public HTTPRequestHandler
class IdentifierQuoteHandler : public HTTPRequestHandler, WithContext
{
public:
IdentifierQuoteHandler(size_t keep_alive_timeout_, ContextPtr)
: log(&Poco::Logger::get("IdentifierQuoteHandler")), keep_alive_timeout(keep_alive_timeout_)
IdentifierQuoteHandler(size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, log(&Poco::Logger::get("IdentifierQuoteHandler"))
, keep_alive_timeout(keep_alive_timeout_)
{
}

View File

@ -18,18 +18,17 @@
#include <Processors/Formats/InputStreamFromInputFormat.h>
#include <common/logger_useful.h>
#include <Server/HTTP/HTMLForm.h>
#include "ODBCConnectionFactory.h"
#include <mutex>
#include <memory>
#include <nanodbc/nanodbc.h>
#if USE_ODBC
#include <Poco/Data/ODBC/SessionImpl.h>
#define POCO_SQL_ODBC_CLASS Poco::Data::ODBC
#endif
namespace DB
{
namespace
{
std::unique_ptr<Block> parseColumns(std::string && column_string)
@ -42,37 +41,6 @@ namespace
}
}
using PocoSessionPoolConstructor = std::function<std::shared_ptr<Poco::Data::SessionPool>()>;
/** Is used to adjust max size of default Poco thread pool. See issue #750
* Acquire the lock, resize pool and construct new Session.
*/
static std::shared_ptr<Poco::Data::SessionPool> createAndCheckResizePocoSessionPool(PocoSessionPoolConstructor pool_constr)
{
static std::mutex mutex;
Poco::ThreadPool & pool = Poco::ThreadPool::defaultPool();
/// NOTE: The lock don't guarantee that external users of the pool don't change its capacity
std::unique_lock lock(mutex);
if (pool.available() == 0)
pool.addCapacity(2 * std::max(pool.capacity(), 1));
return pool_constr();
}
ODBCHandler::PoolPtr ODBCHandler::getPool(const std::string & connection_str)
{
std::lock_guard lock(mutex);
if (!pool_map->count(connection_str))
{
pool_map->emplace(connection_str, createAndCheckResizePocoSessionPool([connection_str]
{
return std::make_shared<Poco::Data::SessionPool>("ODBC", validateODBCConnectionString(connection_str));
}));
}
return pool_map->at(connection_str);
}
void ODBCHandler::processError(HTTPServerResponse & response, const std::string & message)
{
@ -82,6 +50,7 @@ void ODBCHandler::processError(HTTPServerResponse & response, const std::string
LOG_WARNING(log, message);
}
void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
{
HTMLForm params(request);
@ -141,6 +110,10 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
try
{
auto connection = ODBCConnectionFactory::instance().get(
validateODBCConnectionString(connection_string),
getContext()->getSettingsRef().odbc_bridge_connection_pool_size);
if (mode == "write")
{
if (!params.has("db_name"))
@ -159,15 +132,12 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
auto quoting_style = IdentifierQuotingStyle::None;
#if USE_ODBC
POCO_SQL_ODBC_CLASS::SessionImpl session(validateODBCConnectionString(connection_string), DBMS_DEFAULT_CONNECT_TIMEOUT_SEC);
quoting_style = getQuotingStyle(session.dbc().handle());
quoting_style = getQuotingStyle(*connection);
#endif
auto pool = getPool(connection_string);
auto & read_buf = request.getStream();
auto input_format = FormatFactory::instance().getInput(format, read_buf, *sample_block, getContext(), max_block_size);
auto input_stream = std::make_shared<InputStreamFromInputFormat>(input_format);
ODBCBlockOutputStream output_stream(pool->get(), db_name, table_name, *sample_block, quoting_style);
ODBCBlockOutputStream output_stream(*connection, db_name, table_name, *sample_block, getContext(), quoting_style);
copyData(*input_stream, output_stream);
writeStringBinary("Ok.", out);
}
@ -176,10 +146,8 @@ void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse
std::string query = params.get("query");
LOG_TRACE(log, "Query: {}", query);
BlockOutputStreamPtr writer
= FormatFactory::instance().getOutputStreamParallelIfPossible(format, out, *sample_block, getContext());
auto pool = getPool(connection_string);
ODBCBlockInputStream inp(pool->get(), query, *sample_block, max_block_size);
BlockOutputStreamPtr writer = FormatFactory::instance().getOutputStreamParallelIfPossible(format, out, *sample_block, getContext());
ODBCBlockInputStream inp(*connection, query, *sample_block, max_block_size);
copyData(inp, *writer);
}
}

View File

@ -2,13 +2,8 @@
#include <Interpreters/Context_fwd.h>
#include <Server/HTTP/HTTPRequestHandler.h>
#include <Poco/Logger.h>
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#include <Poco/Data/SessionPool.h>
#pragma GCC diagnostic pop
#include <mutex>
#include <unordered_map>
@ -24,16 +19,12 @@ namespace DB
class ODBCHandler : public HTTPRequestHandler, WithContext
{
public:
using PoolPtr = std::shared_ptr<Poco::Data::SessionPool>;
using PoolMap = std::unordered_map<std::string, PoolPtr>;
ODBCHandler(std::shared_ptr<PoolMap> pool_map_,
ODBCHandler(
size_t keep_alive_timeout_,
ContextPtr context_,
const String & mode_)
: WithContext(context_)
, log(&Poco::Logger::get("ODBCHandler"))
, pool_map(pool_map_)
, keep_alive_timeout(keep_alive_timeout_)
, mode(mode_)
{
@ -44,13 +35,11 @@ public:
private:
Poco::Logger * log;
std::shared_ptr<PoolMap> pool_map;
size_t keep_alive_timeout;
String mode;
static inline std::mutex mutex;
PoolPtr getPool(const std::string & connection_str);
void processError(HTTPServerResponse & response, const std::string & message);
};

View File

@ -1,5 +1,7 @@
#include "ODBCBlockInputStream.h"
#include <vector>
#include <IO/ReadBufferFromString.h>
#include <DataTypes/DataTypeNullable.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsNumber.h>
@ -14,137 +16,143 @@ namespace DB
{
namespace ErrorCodes
{
extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
extern const int UNKNOWN_TYPE;
}
ODBCBlockInputStream::ODBCBlockInputStream(
Poco::Data::Session && session_, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_)
: session{session_}
, statement{(this->session << query_str, Poco::Data::Keywords::now)}
, result{statement}
, iterator{result.begin()}
nanodbc::connection & connection_, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_)
: log(&Poco::Logger::get("ODBCBlockInputStream"))
, max_block_size{max_block_size_}
, log(&Poco::Logger::get("ODBCBlockInputStream"))
, connection(connection_)
, query(query_str)
{
if (sample_block.columns() != result.columnCount())
throw Exception{"RecordSet contains " + toString(result.columnCount()) + " columns while " + toString(sample_block.columns())
+ " expected",
ErrorCodes::NUMBER_OF_COLUMNS_DOESNT_MATCH};
description.init(sample_block);
}
namespace
{
using ValueType = ExternalResultDescription::ValueType;
void insertValue(IColumn & column, const ValueType type, const Poco::Dynamic::Var & value)
{
switch (type)
{
case ValueType::vtUInt8:
assert_cast<ColumnUInt8 &>(column).insertValue(value.convert<UInt64>());
break;
case ValueType::vtUInt16:
assert_cast<ColumnUInt16 &>(column).insertValue(value.convert<UInt64>());
break;
case ValueType::vtUInt32:
assert_cast<ColumnUInt32 &>(column).insertValue(value.convert<UInt64>());
break;
case ValueType::vtUInt64:
assert_cast<ColumnUInt64 &>(column).insertValue(value.convert<UInt64>());
break;
case ValueType::vtInt8:
assert_cast<ColumnInt8 &>(column).insertValue(value.convert<Int64>());
break;
case ValueType::vtInt16:
assert_cast<ColumnInt16 &>(column).insertValue(value.convert<Int64>());
break;
case ValueType::vtInt32:
assert_cast<ColumnInt32 &>(column).insertValue(value.convert<Int64>());
break;
case ValueType::vtInt64:
assert_cast<ColumnInt64 &>(column).insertValue(value.convert<Int64>());
break;
case ValueType::vtFloat32:
assert_cast<ColumnFloat32 &>(column).insertValue(value.convert<Float64>());
break;
case ValueType::vtFloat64:
assert_cast<ColumnFloat64 &>(column).insertValue(value.convert<Float64>());
break;
case ValueType::vtString:
assert_cast<ColumnString &>(column).insert(value.convert<String>());
break;
case ValueType::vtDate:
{
Poco::DateTime date = value.convert<Poco::DateTime>();
assert_cast<ColumnUInt16 &>(column).insertValue(UInt16{LocalDate(date.year(), date.month(), date.day()).getDayNum()});
break;
}
case ValueType::vtDateTime:
{
Poco::DateTime datetime = value.convert<Poco::DateTime>();
assert_cast<ColumnUInt32 &>(column).insertValue(DateLUT::instance().makeDateTime(
datetime.year(), datetime.month(), datetime.day(), datetime.hour(), datetime.minute(), datetime.second()));
break;
}
case ValueType::vtUUID:
assert_cast<ColumnUInt128 &>(column).insert(parse<UUID>(value.convert<std::string>()));
break;
default:
throw Exception("Unsupported value type", ErrorCodes::UNKNOWN_TYPE);
}
}
void insertDefaultValue(IColumn & column, const IColumn & sample_column) { column.insertFrom(sample_column, 0); }
result = execute(connection, NANODBC_TEXT(query));
}
Block ODBCBlockInputStream::readImpl()
{
if (iterator == result.end())
return {};
MutableColumns columns(description.sample_block.columns());
for (const auto i : ext::range(0, columns.size()))
columns[i] = description.sample_block.getByPosition(i).column->cloneEmpty();
if (finished)
return Block();
MutableColumns columns(description.sample_block.cloneEmptyColumns());
size_t num_rows = 0;
while (iterator != result.end())
while (true)
{
Poco::Data::Row & row = *iterator;
for (const auto idx : ext::range(0, row.fieldCount()))
if (!result.next())
{
/// TODO This is extremely slow.
const Poco::Dynamic::Var & value = row[idx];
finished = true;
break;
}
if (!value.isEmpty())
for (int idx = 0; idx < result.columns(); ++idx)
{
const auto & sample = description.sample_block.getByPosition(idx);
if (!result.is_null(idx))
{
if (description.types[idx].second)
bool is_nullable = description.types[idx].second;
if (is_nullable)
{
ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[idx]);
insertValue(column_nullable.getNestedColumn(), description.types[idx].first, value);
const auto & data_type = assert_cast<const DataTypeNullable &>(*sample.type);
insertValue(column_nullable.getNestedColumn(), data_type.getNestedType(), description.types[idx].first, result, idx);
column_nullable.getNullMapData().emplace_back(0);
}
else
insertValue(*columns[idx], description.types[idx].first, value);
{
insertValue(*columns[idx], sample.type, description.types[idx].first, result, idx);
}
}
else
insertDefaultValue(*columns[idx], *description.sample_block.getByPosition(idx).column);
insertDefaultValue(*columns[idx], *sample.column);
}
++iterator;
++num_rows;
if (num_rows == max_block_size)
if (++num_rows == max_block_size)
break;
}
return description.sample_block.cloneWithColumns(std::move(columns));
}
void ODBCBlockInputStream::insertValue(
IColumn & column, const DataTypePtr data_type, const ValueType type, nanodbc::result & row, size_t idx)
{
switch (type)
{
case ValueType::vtUInt8:
assert_cast<ColumnUInt8 &>(column).insertValue(row.get<uint16_t>(idx));
break;
case ValueType::vtUInt16:
assert_cast<ColumnUInt16 &>(column).insertValue(row.get<uint16_t>(idx));
break;
case ValueType::vtUInt32:
assert_cast<ColumnUInt32 &>(column).insertValue(row.get<uint32_t>(idx));
break;
case ValueType::vtUInt64:
assert_cast<ColumnUInt64 &>(column).insertValue(row.get<uint64_t>(idx));
break;
case ValueType::vtInt8:
assert_cast<ColumnInt8 &>(column).insertValue(row.get<int16_t>(idx));
break;
case ValueType::vtInt16:
assert_cast<ColumnInt16 &>(column).insertValue(row.get<int16_t>(idx));
break;
case ValueType::vtInt32:
assert_cast<ColumnInt32 &>(column).insertValue(row.get<int32_t>(idx));
break;
case ValueType::vtInt64:
assert_cast<ColumnInt64 &>(column).insertValue(row.get<int64_t>(idx));
break;
case ValueType::vtFloat32:
assert_cast<ColumnFloat32 &>(column).insertValue(row.get<float>(idx));
break;
case ValueType::vtFloat64:
assert_cast<ColumnFloat64 &>(column).insertValue(row.get<double>(idx));
break;
case ValueType::vtFixedString:[[fallthrough]];
case ValueType::vtString:
assert_cast<ColumnString &>(column).insert(row.get<std::string>(idx));
break;
case ValueType::vtUUID:
{
auto value = row.get<std::string>(idx);
assert_cast<ColumnUInt128 &>(column).insert(parse<UUID>(value.data(), value.size()));
break;
}
case ValueType::vtDate:
assert_cast<ColumnUInt16 &>(column).insertValue(UInt16{LocalDate{row.get<std::string>(idx)}.getDayNum()});
break;
case ValueType::vtDateTime:
{
auto value = row.get<std::string>(idx);
ReadBufferFromString in(value);
time_t time = 0;
readDateTimeText(time, in);
if (time < 0)
time = 0;
assert_cast<ColumnUInt32 &>(column).insertValue(time);
break;
}
case ValueType::vtDateTime64:[[fallthrough]];
case ValueType::vtDecimal32: [[fallthrough]];
case ValueType::vtDecimal64: [[fallthrough]];
case ValueType::vtDecimal128: [[fallthrough]];
case ValueType::vtDecimal256:
{
auto value = row.get<std::string>(idx);
ReadBufferFromString istr(value);
data_type->getDefaultSerialization()->deserializeWholeText(column, istr, FormatSettings{});
break;
}
default:
throw Exception("Unsupported value type", ErrorCodes::UNKNOWN_TYPE);
}
}
}

View File

@ -3,10 +3,8 @@
#include <string>
#include <Core/Block.h>
#include <DataStreams/IBlockInputStream.h>
#include <Poco/Data/RecordSet.h>
#include <Poco/Data/Session.h>
#include <Poco/Data/Statement.h>
#include <Core/ExternalResultDescription.h>
#include <nanodbc/nanodbc.h>
namespace DB
@ -15,25 +13,33 @@ namespace DB
class ODBCBlockInputStream final : public IBlockInputStream
{
public:
ODBCBlockInputStream(
Poco::Data::Session && session_, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_);
ODBCBlockInputStream(nanodbc::connection & connection_, const std::string & query_str, const Block & sample_block, const UInt64 max_block_size_);
String getName() const override { return "ODBC"; }
Block getHeader() const override { return description.sample_block.cloneEmpty(); }
private:
using QueryResult = std::shared_ptr<nanodbc::result>;
using ValueType = ExternalResultDescription::ValueType;
Block readImpl() override;
Poco::Data::Session session;
Poco::Data::Statement statement;
Poco::Data::RecordSet result;
Poco::Data::RecordSet::Iterator iterator;
static void insertValue(IColumn & column, const DataTypePtr data_type, const ValueType type, nanodbc::result & row, size_t idx);
static void insertDefaultValue(IColumn & column, const IColumn & sample_column)
{
column.insertFrom(sample_column, 0);
}
Poco::Logger * log;
const UInt64 max_block_size;
ExternalResultDescription description;
Poco::Logger * log;
nanodbc::connection & connection;
nanodbc::result result;
String query;
bool finished = false;
};
}

View File

@ -8,16 +8,14 @@
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ASTIdentifier.h>
#include "getIdentifierQuote.h"
#include <IO/WriteHelpers.h>
#include <IO/Operators.h>
#include <Formats/FormatFactory.h>
namespace DB
{
namespace ErrorCodes
{
extern const int UNKNOWN_TYPE;
}
namespace
{
using ValueType = ExternalResultDescription::ValueType;
@ -40,69 +38,21 @@ namespace
return buf.str();
}
std::string getQuestionMarks(size_t n)
{
std::string result = "(";
for (size_t i = 0; i < n; ++i)
{
if (i > 0)
result += ",";
result += "?";
}
return result + ")";
}
Poco::Dynamic::Var getVarFromField(const Field & field, const ValueType type)
{
switch (type)
{
case ValueType::vtUInt8:
return Poco::Dynamic::Var(static_cast<UInt64>(field.get<UInt64>())).convert<UInt64>();
case ValueType::vtUInt16:
return Poco::Dynamic::Var(static_cast<UInt64>(field.get<UInt64>())).convert<UInt64>();
case ValueType::vtUInt32:
return Poco::Dynamic::Var(static_cast<UInt64>(field.get<UInt64>())).convert<UInt64>();
case ValueType::vtUInt64:
return Poco::Dynamic::Var(field.get<UInt64>()).convert<UInt64>();
case ValueType::vtInt8:
return Poco::Dynamic::Var(static_cast<Int64>(field.get<Int64>())).convert<Int64>();
case ValueType::vtInt16:
return Poco::Dynamic::Var(static_cast<Int64>(field.get<Int64>())).convert<Int64>();
case ValueType::vtInt32:
return Poco::Dynamic::Var(static_cast<Int64>(field.get<Int64>())).convert<Int64>();
case ValueType::vtInt64:
return Poco::Dynamic::Var(field.get<Int64>()).convert<Int64>();
case ValueType::vtFloat32:
return Poco::Dynamic::Var(field.get<Float64>()).convert<Float64>();
case ValueType::vtFloat64:
return Poco::Dynamic::Var(field.get<Float64>()).convert<Float64>();
case ValueType::vtString:
return Poco::Dynamic::Var(field.get<String>()).convert<String>();
case ValueType::vtDate:
return Poco::Dynamic::Var(LocalDate(DayNum(field.get<UInt64>())).toString()).convert<String>();
case ValueType::vtDateTime:
return Poco::Dynamic::Var(DateLUT::instance().timeToString(time_t(field.get<UInt64>()))).convert<String>();
case ValueType::vtUUID:
return Poco::Dynamic::Var(UUID(field.get<UInt128>()).toUnderType().toHexString()).convert<std::string>();
default:
throw Exception("Unsupported value type", ErrorCodes::UNKNOWN_TYPE);
}
__builtin_unreachable();
}
}
ODBCBlockOutputStream::ODBCBlockOutputStream(Poco::Data::Session && session_,
ODBCBlockOutputStream::ODBCBlockOutputStream(nanodbc::connection & connection_,
const std::string & remote_database_name_,
const std::string & remote_table_name_,
const Block & sample_block_,
ContextPtr local_context_,
IdentifierQuotingStyle quoting_)
: session(session_)
: log(&Poco::Logger::get("ODBCBlockOutputStream"))
, connection(connection_)
, db_name(remote_database_name_)
, table_name(remote_table_name_)
, sample_block(sample_block_)
, local_context(local_context_)
, quoting(quoting_)
, log(&Poco::Logger::get("ODBCBlockOutputStream"))
{
description.init(sample_block);
}
@ -114,28 +64,12 @@ Block ODBCBlockOutputStream::getHeader() const
void ODBCBlockOutputStream::write(const Block & block)
{
ColumnsWithTypeAndName columns;
for (size_t i = 0; i < block.columns(); ++i)
columns.push_back({block.getColumns()[i], sample_block.getDataTypes()[i], sample_block.getNames()[i]});
WriteBufferFromOwnString values_buf;
auto writer = FormatFactory::instance().getOutputStream("Values", values_buf, sample_block, local_context);
writer->write(block);
std::vector<Poco::Dynamic::Var> row_to_insert(block.columns());
Poco::Data::Statement statement(session << getInsertQuery(db_name, table_name, columns, quoting) + getQuestionMarks(block.columns()));
for (size_t i = 0; i < block.columns(); ++i)
statement.addBind(Poco::Data::Keywords::use(row_to_insert[i]));
for (size_t i = 0; i < block.rows(); ++i)
{
for (size_t col_idx = 0; col_idx < block.columns(); ++col_idx)
{
Field val;
columns[col_idx].column->get(i, val);
if (val.isNull())
row_to_insert[col_idx] = Poco::Dynamic::Var();
else
row_to_insert[col_idx] = getVarFromField(val, description.types[col_idx].first);
}
statement.execute();
}
std::string query = getInsertQuery(db_name, table_name, block.getColumnsWithTypeAndName(), quoting) + values_buf.str();
execute(connection, query);
}
}

View File

@ -2,30 +2,41 @@
#include <Core/Block.h>
#include <DataStreams/IBlockOutputStream.h>
#include <Poco/Data/Session.h>
#include <Core/ExternalResultDescription.h>
#include <Parsers/IdentifierQuotingStyle.h>
#include <Interpreters/Context_fwd.h>
#include <nanodbc/nanodbc.h>
namespace DB
{
class ODBCBlockOutputStream : public IBlockOutputStream
{
public:
ODBCBlockOutputStream(Poco::Data::Session && session_, const std::string & remote_database_name_,
const std::string & remote_table_name_, const Block & sample_block_, IdentifierQuotingStyle quoting);
ODBCBlockOutputStream(
nanodbc::connection & connection_,
const std::string & remote_database_name_,
const std::string & remote_table_name_,
const Block & sample_block_,
ContextPtr local_context_,
IdentifierQuotingStyle quoting);
Block getHeader() const override;
void write(const Block & block) override;
private:
Poco::Data::Session session;
Poco::Logger * log;
nanodbc::connection & connection;
std::string db_name;
std::string table_name;
Block sample_block;
ContextPtr local_context;
IdentifierQuotingStyle quoting;
ExternalResultDescription description;
Poco::Logger * log;
};
}

View File

@ -0,0 +1,82 @@
#pragma once
#include <common/logger_useful.h>
#include <nanodbc/nanodbc.h>
#include <mutex>
#include <common/BorrowedObjectPool.h>
#include <unordered_map>
namespace nanodbc
{
static constexpr inline auto ODBC_CONNECT_TIMEOUT = 100;
using ConnectionPtr = std::shared_ptr<nanodbc::connection>;
using Pool = BorrowedObjectPool<ConnectionPtr>;
using PoolPtr = std::shared_ptr<Pool>;
class ConnectionHolder
{
public:
ConnectionHolder(const std::string & connection_string_, PoolPtr pool_) : connection_string(connection_string_), pool(pool_) {}
~ConnectionHolder()
{
if (connection)
pool->returnObject(std::move(connection));
}
nanodbc::connection & operator*()
{
if (!connection)
{
pool->borrowObject(connection, [&]()
{
return std::make_shared<nanodbc::connection>(connection_string, ODBC_CONNECT_TIMEOUT);
});
}
return *connection;
}
private:
std::string connection_string;
PoolPtr pool;
ConnectionPtr connection;
};
}
namespace DB
{
class ODBCConnectionFactory final : private boost::noncopyable
{
public:
static ODBCConnectionFactory & instance()
{
static ODBCConnectionFactory ret;
return ret;
}
nanodbc::ConnectionHolder get(const std::string & connection_string, size_t pool_size)
{
std::lock_guard lock(mutex);
if (!factory.count(connection_string))
factory.emplace(std::make_pair(connection_string, std::make_shared<nanodbc::Pool>(pool_size)));
return nanodbc::ConnectionHolder(connection_string, factory[connection_string]);
}
private:
/// [connection_settings_string] -> [connection_pool]
using PoolFactory = std::unordered_map<std::string, nanodbc::PoolPtr>;
PoolFactory factory;
std::mutex mutex;
};
}

View File

@ -2,33 +2,26 @@
#if USE_ODBC
# include <Server/HTTP/HTMLForm.h>
# include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
# include <IO/WriteHelpers.h>
# include <Poco/Data/ODBC/ODBCException.h>
# include <Poco/Data/ODBC/SessionImpl.h>
# include <Poco/Data/ODBC/Utility.h>
# include <Poco/Net/HTTPServerRequest.h>
# include <Poco/Net/HTTPServerResponse.h>
# include <common/logger_useful.h>
# include "validateODBCConnectionString.h"
#include <Server/HTTP/HTMLForm.h>
#include <Server/HTTP/WriteBufferFromHTTPServerResponse.h>
#include <IO/WriteHelpers.h>
#include <Poco/Net/HTTPServerRequest.h>
#include <Poco/Net/HTTPServerResponse.h>
#include <common/logger_useful.h>
#include "validateODBCConnectionString.h"
#include "ODBCConnectionFactory.h"
#include <sql.h>
#include <sqlext.h>
# define POCO_SQL_ODBC_CLASS Poco::Data::ODBC
namespace DB
{
namespace
{
bool isSchemaAllowed(SQLHDBC hdbc)
bool isSchemaAllowed(nanodbc::connection & connection)
{
SQLUINTEGER value;
SQLSMALLINT value_length = sizeof(value);
SQLRETURN r = POCO_SQL_ODBC_CLASS::SQLGetInfo(hdbc, SQL_SCHEMA_USAGE, &value, sizeof(value), &value_length);
if (POCO_SQL_ODBC_CLASS::Utility::isError(r))
throw POCO_SQL_ODBC_CLASS::ConnectionException(hdbc);
return value != 0;
uint32_t result = connection.get_info<uint32_t>(SQL_SCHEMA_USAGE);
return result != 0;
}
}
@ -55,10 +48,12 @@ void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServer
try
{
std::string connection_string = params.get("connection_string");
POCO_SQL_ODBC_CLASS::SessionImpl session(validateODBCConnectionString(connection_string), DBMS_DEFAULT_CONNECT_TIMEOUT_SEC);
SQLHDBC hdbc = session.dbc().handle();
bool result = isSchemaAllowed(hdbc);
auto connection = ODBCConnectionFactory::instance().get(
validateODBCConnectionString(connection_string),
getContext()->getSettingsRef().odbc_bridge_connection_pool_size);
bool result = isSchemaAllowed(*connection);
WriteBufferFromHTTPServerResponse out(response, request.getMethod() == Poco::Net::HTTPRequest::HTTP_HEAD, keep_alive_timeout);
try

View File

@ -1,22 +1,25 @@
#pragma once
#include <Interpreters/Context.h>
#include <Server/HTTP/HTTPRequestHandler.h>
#include <Poco/Logger.h>
#if USE_ODBC
namespace DB
{
class Context;
/// This handler establishes connection to database, and retrieves whether schema is allowed.
class SchemaAllowedHandler : public HTTPRequestHandler
class SchemaAllowedHandler : public HTTPRequestHandler, WithContext
{
public:
SchemaAllowedHandler(size_t keep_alive_timeout_, ContextPtr)
: log(&Poco::Logger::get("SchemaAllowedHandler")), keep_alive_timeout(keep_alive_timeout_)
SchemaAllowedHandler(size_t keep_alive_timeout_, ContextPtr context_)
: WithContext(context_)
, log(&Poco::Logger::get("SchemaAllowedHandler"))
, keep_alive_timeout(keep_alive_timeout_)
{
}

View File

@ -2,11 +2,10 @@
#if USE_ODBC
# include <Poco/Data/ODBC/ODBCException.h>
# include <Poco/Data/ODBC/SessionImpl.h>
# include <Poco/Data/ODBC/Utility.h>
# define POCO_SQL_ODBC_CLASS Poco::Data::ODBC
#include <common/logger_useful.h>
#include <nanodbc/nanodbc.h>
#include <sql.h>
#include <sqlext.h>
namespace DB
@ -17,33 +16,16 @@ namespace ErrorCodes
extern const int ILLEGAL_TYPE_OF_ARGUMENT;
}
std::string getIdentifierQuote(SQLHDBC hdbc)
std::string getIdentifierQuote(nanodbc::connection & connection)
{
std::string identifier;
SQLSMALLINT t;
SQLRETURN r = POCO_SQL_ODBC_CLASS::SQLGetInfo(hdbc, SQL_IDENTIFIER_QUOTE_CHAR, nullptr, 0, &t);
if (POCO_SQL_ODBC_CLASS::Utility::isError(r))
throw POCO_SQL_ODBC_CLASS::ConnectionException(hdbc);
if (t > 0)
{
// I have no idea, why to add '2' here, got from: contrib/poco/Data/ODBC/src/ODBCStatementImpl.cpp:60 (SQL_DRIVER_NAME)
identifier.resize(static_cast<std::size_t>(t) + 2);
if (POCO_SQL_ODBC_CLASS::Utility::isError(POCO_SQL_ODBC_CLASS::SQLGetInfo(
hdbc, SQL_IDENTIFIER_QUOTE_CHAR, &identifier[0], SQLSMALLINT((identifier.length() - 1) * sizeof(identifier[0])), &t)))
throw POCO_SQL_ODBC_CLASS::ConnectionException(hdbc);
identifier.resize(static_cast<std::size_t>(t));
}
return identifier;
return connection.get_info<std::string>(SQL_IDENTIFIER_QUOTE_CHAR);
}
IdentifierQuotingStyle getQuotingStyle(SQLHDBC hdbc)
IdentifierQuotingStyle getQuotingStyle(nanodbc::connection & connection)
{
auto identifier_quote = getIdentifierQuote(hdbc);
auto identifier_quote = getIdentifierQuote(connection);
if (identifier_quote.length() == 0)
return IdentifierQuotingStyle::None;
else if (identifier_quote[0] == '`')

View File

@ -2,20 +2,19 @@
#if USE_ODBC
# include <Interpreters/Context.h>
# include <Poco/Logger.h>
# include <Poco/Net/HTTPRequestHandler.h>
# include <Poco/Data/ODBC/Utility.h>
#include <Interpreters/Context.h>
#include <Poco/Logger.h>
#include <Poco/Net/HTTPRequestHandler.h>
#include <Parsers/IdentifierQuotingStyle.h>
#include <nanodbc/nanodbc.h>
namespace DB
{
std::string getIdentifierQuote(SQLHDBC hdbc);
std::string getIdentifierQuote(nanodbc::connection & connection);
IdentifierQuotingStyle getQuotingStyle(SQLHDBC hdbc);
IdentifierQuotingStyle getQuotingStyle(nanodbc::connection & connection);
}

View File

@ -173,18 +173,24 @@ int waitServersToFinish(std::vector<DB::ProtocolServerAdapter> & servers, size_t
const int sleep_one_ms = 100;
int sleep_current_ms = 0;
int current_connections = 0;
while (sleep_current_ms < sleep_max_ms)
for (;;)
{
current_connections = 0;
for (auto & server : servers)
{
server.stop();
current_connections += server.currentConnections();
}
if (!current_connections)
break;
sleep_current_ms += sleep_one_ms;
std::this_thread::sleep_for(std::chrono::milliseconds(sleep_one_ms));
if (sleep_current_ms < sleep_max_ms)
std::this_thread::sleep_for(std::chrono::milliseconds(sleep_one_ms));
else
break;
}
return current_connections;
}
@ -879,10 +885,30 @@ int Server::main(const std::vector<std::string> & /*args*/)
servers_to_start_before_tables->emplace_back(
port_name,
std::make_unique<Poco::Net::TCPServer>(
new KeeperTCPHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
new KeeperTCPHandlerFactory(*this, false), server_pool, socket, new Poco::Net::TCPServerParams));
LOG_INFO(log, "Listening for connections to Keeper (tcp): {}", address.toString());
});
const char * secure_port_name = "keeper_server.tcp_port_secure";
createServer(listen_host, secure_port_name, listen_try, [&](UInt16 port)
{
#if USE_SSL
Poco::Net::SecureServerSocket socket;
auto address = socketBindListen(socket, listen_host, port, /* secure = */ true);
socket.setReceiveTimeout(settings.receive_timeout);
socket.setSendTimeout(settings.send_timeout);
servers_to_start_before_tables->emplace_back(
secure_port_name,
std::make_unique<Poco::Net::TCPServer>(
new KeeperTCPHandlerFactory(*this, true), server_pool, socket, new Poco::Net::TCPServerParams));
LOG_INFO(log, "Listening for connections to Keeper with secure protocol (tcp_secure): {}", address.toString());
#else
UNUSED(port);
throw Exception{"SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.",
ErrorCodes::SUPPORT_IS_DISABLED};
#endif
});
}
#else
throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "ClickHouse server built without NuRaft library. Cannot use internal coordination.");
@ -931,6 +957,9 @@ int Server::main(const std::vector<std::string> & /*args*/)
global_context->shutdownKeeperStorageDispatcher();
}
/// Wait server pool to avoid use-after-free of destroyed context in the handlers
server_pool.joinAll();
/** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available.
* At this moment, no one could own shared part of Context.
*/

View File

@ -7,7 +7,20 @@
-->
<yandex>
<logger>
<!-- Possible levels: https://github.com/pocoproject/poco/blob/poco-1.9.4-release/Foundation/include/Poco/Logger.h#L105 -->
<!-- Possible levels [1]:
- none (turns off logging)
- fatal
- critical
- error
- warning
- notice
- information
- debug
- trace
[1]: https://github.com/pocoproject/poco/blob/poco-1.9.4-release/Foundation/include/Poco/Logger.h#L105-L114
-->
<level>trace</level>
<log>/var/log/clickhouse-server/clickhouse-server.log</log>
<errorlog>/var/log/clickhouse-server/clickhouse-server.err.log</errorlog>

View File

@ -551,6 +551,15 @@ void Connection::sendIgnoredPartUUIDs(const std::vector<UUID> & uuids)
out->next();
}
void Connection::sendReadTaskResponse(const String & response)
{
writeVarUInt(Protocol::Client::ReadTaskResponse, *out);
writeVarUInt(DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION, *out);
writeStringBinary(response, *out);
out->next();
}
void Connection::sendPreparedData(ReadBuffer & input, size_t size, const String & name)
{
/// NOTE 'Throttler' is not used in this method (could use, but it's not important right now).
@ -807,6 +816,9 @@ Packet Connection::receivePacket()
readVectorBinary(res.part_uuids, *in);
return res;
case Protocol::Server::ReadTaskRequest:
return res;
default:
/// In unknown state, disconnect - to not leave unsynchronised connection.
disconnect();
@ -907,13 +919,13 @@ void Connection::setDescription()
}
std::unique_ptr<Exception> Connection::receiveException()
std::unique_ptr<Exception> Connection::receiveException() const
{
return std::make_unique<Exception>(readException(*in, "Received from " + getDescription(), true /* remote */));
}
std::vector<String> Connection::receiveMultistringMessage(UInt64 msg_type)
std::vector<String> Connection::receiveMultistringMessage(UInt64 msg_type) const
{
size_t num = Protocol::Server::stringsInMessage(msg_type);
std::vector<String> strings(num);
@ -923,7 +935,7 @@ std::vector<String> Connection::receiveMultistringMessage(UInt64 msg_type)
}
Progress Connection::receiveProgress()
Progress Connection::receiveProgress() const
{
Progress progress;
progress.read(*in, server_revision);
@ -931,7 +943,7 @@ Progress Connection::receiveProgress()
}
BlockStreamProfileInfo Connection::receiveProfileInfo()
BlockStreamProfileInfo Connection::receiveProfileInfo() const
{
BlockStreamProfileInfo profile_info;
profile_info.read(*in);

View File

@ -159,6 +159,8 @@ public:
/// Send parts' uuids to excluded them from query processing
void sendIgnoredPartUUIDs(const std::vector<UUID> & uuids);
void sendReadTaskResponse(const String &);
/// Send prepared block of data (serialized and, if need, compressed), that will be read from 'input'.
/// You could pass size of serialized/compressed block.
void sendPreparedData(ReadBuffer & input, size_t size, const String & name = "");
@ -269,7 +271,7 @@ private:
class LoggerWrapper
{
public:
LoggerWrapper(Connection & parent_)
explicit LoggerWrapper(Connection & parent_)
: log(nullptr), parent(parent_)
{
}
@ -304,10 +306,10 @@ private:
Block receiveLogData();
Block receiveDataImpl(BlockInputStreamPtr & stream);
std::vector<String> receiveMultistringMessage(UInt64 msg_type);
std::unique_ptr<Exception> receiveException();
Progress receiveProgress();
BlockStreamProfileInfo receiveProfileInfo();
std::vector<String> receiveMultistringMessage(UInt64 msg_type) const;
std::unique_ptr<Exception> receiveException() const;
Progress receiveProgress() const;
BlockStreamProfileInfo receiveProfileInfo() const;
void initInputBuffers();
void initBlockInput();

View File

@ -26,7 +26,7 @@ public:
using Entry = PoolBase<Connection>::Entry;
public:
virtual ~IConnectionPool() {}
virtual ~IConnectionPool() = default;
/// Selects the connection to work.
/// If force_connected is false, the client must manually ensure that returned connection is good.

View File

@ -14,6 +14,12 @@
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
/** To receive data from multiple replicas (connections) from one shard asynchronously.
* The principe of Hedged Connections is used to reduce tail latency:
* if we don't receive data from replica and there is no progress in query execution
@ -84,6 +90,11 @@ public:
const ClientInfo & client_info,
bool with_pending_data) override;
void sendReadTaskResponse(const String &) override
{
throw Exception("sendReadTaskResponse in not supported with HedgedConnections", ErrorCodes::LOGICAL_ERROR);
}
Packet receivePacket() override;
Packet receivePacketUnlocked(AsyncCallback async_callback) override;

View File

@ -24,6 +24,8 @@ public:
const ClientInfo & client_info,
bool with_pending_data) = 0;
virtual void sendReadTaskResponse(const String &) = 0;
/// Get packet from any replica.
virtual Packet receivePacket() = 0;

View File

@ -155,6 +155,15 @@ void MultiplexedConnections::sendIgnoredPartUUIDs(const std::vector<UUID> & uuid
}
}
void MultiplexedConnections::sendReadTaskResponse(const String & response)
{
std::lock_guard lock(cancel_mutex);
if (cancelled)
return;
current_connection->sendReadTaskResponse(response);
}
Packet MultiplexedConnections::receivePacket()
{
std::lock_guard lock(cancel_mutex);
@ -210,6 +219,7 @@ Packet MultiplexedConnections::drain()
switch (packet.type)
{
case Protocol::Server::ReadTaskRequest:
case Protocol::Server::PartUUIDs:
case Protocol::Server::Data:
case Protocol::Server::Progress:
@ -273,6 +283,7 @@ Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callbac
switch (packet.type)
{
case Protocol::Server::ReadTaskRequest:
case Protocol::Server::PartUUIDs:
case Protocol::Server::Data:
case Protocol::Server::Progress:

View File

@ -39,6 +39,8 @@ public:
const ClientInfo & client_info,
bool with_pending_data) override;
void sendReadTaskResponse(const String &) override;
Packet receivePacket() override;
void disconnect() override;

View File

@ -6,7 +6,7 @@
#include <Poco/Mutex.h>
#include <Poco/Semaphore.h>
#include <Common/MoveOrCopyIfThrow.h>
#include <common/MoveOrCopyIfThrow.h>
/** A very simple thread-safe queue of limited size.
* If you try to pop an item from an empty queue, the thread is blocked until the queue becomes nonempty.

View File

@ -150,7 +150,7 @@ void tryLogCurrentException(Poco::Logger * logger, const std::string & start_of_
///
/// And in this case the exception will not be logged, so let's block the
/// MemoryTracker until the exception will be logged.
MemoryTracker::LockExceptionInThread lock_memory_tracker;
MemoryTracker::LockExceptionInThread lock_memory_tracker(VariableContext::Global);
try
{

View File

@ -24,8 +24,8 @@ namespace
///
/// - when it is explicitly blocked with LockExceptionInThread
///
/// - to avoid std::terminate(), when stack unwinding is currently in progress
/// in this thread.
/// - when there are uncaught exceptions objects in the current thread
/// (to avoid std::terminate())
///
/// NOTE: that since C++11 destructor marked with noexcept by default, and
/// this means that any throw from destructor (that is not marked with

View File

@ -51,7 +51,7 @@ private:
*/
struct PoolEntryHelper
{
PoolEntryHelper(PooledObject & data_) : data(data_) { data.in_use = true; }
explicit PoolEntryHelper(PooledObject & data_) : data(data_) { data.in_use = true; }
~PoolEntryHelper()
{
std::unique_lock lock(data.pool.mutex);
@ -69,7 +69,7 @@ public:
public:
friend class PoolBase<Object>;
Entry() {} /// For deferred initialization.
Entry() = default; /// For deferred initialization.
/** The `Entry` object protects the resource from being used by another thread.
* The following methods are forbidden for `rvalue`, so you can not write a similar to
@ -99,10 +99,10 @@ public:
private:
std::shared_ptr<PoolEntryHelper> data;
Entry(PooledObject & object) : data(std::make_shared<PoolEntryHelper>(object)) {}
explicit Entry(PooledObject & object) : data(std::make_shared<PoolEntryHelper>(object)) {}
};
virtual ~PoolBase() {}
virtual ~PoolBase() = default;
/** Allocates the object. Wait for free object in pool for 'timeout'. With 'timeout' < 0, the timeout is infinite. */
Entry get(Poco::Timespan::TimeDiff timeout)

View File

@ -421,26 +421,38 @@ std::pair<ResponsePtr, Undo> TestKeeperMultiRequest::process(TestKeeper::Contain
try
{
for (const auto & request : requests)
auto request_it = requests.begin();
response.error = Error::ZOK;
while (request_it != requests.end())
{
const TestKeeperRequest & concrete_request = dynamic_cast<const TestKeeperRequest &>(*request);
const TestKeeperRequest & concrete_request = dynamic_cast<const TestKeeperRequest &>(**request_it);
++request_it;
auto [ cur_response, undo_action ] = concrete_request.process(container, zxid);
response.responses.emplace_back(cur_response);
if (cur_response->error != Error::ZOK)
{
response.error = cur_response->error;
for (auto it = undo_actions.rbegin(); it != undo_actions.rend(); ++it)
if (*it)
(*it)();
return { std::make_shared<MultiResponse>(response), {} };
break;
}
undo_actions.emplace_back(std::move(undo_action));
}
if (response.error != Error::ZOK)
{
for (auto it = undo_actions.rbegin(); it != undo_actions.rend(); ++it)
if (*it)
(*it)();
while (request_it != requests.end())
{
const TestKeeperRequest & concrete_request = dynamic_cast<const TestKeeperRequest &>(**request_it);
++request_it;
response.responses.emplace_back(concrete_request.createResponse());
response.responses.back()->error = Error::ZRUNTIMEINCONSISTENCY;
}
else
undo_actions.emplace_back(std::move(undo_action));
}
response.error = Error::ZOK;
return { std::make_shared<MultiResponse>(response), {} };
}
catch (...)

View File

@ -1,4 +1,9 @@
#include <Coordination/KeeperServer.h>
#if !defined(ARCADIA_BUILD)
# include "config_core.h"
#endif
#include <Coordination/LoggerWrapper.h>
#include <Coordination/KeeperStateMachine.h>
#include <Coordination/KeeperStateManager.h>
@ -9,6 +14,7 @@
#include <chrono>
#include <Common/ZooKeeper/ZooKeeperIO.h>
#include <string>
#include <Poco/Util/Application.h>
namespace DB
{
@ -16,6 +22,42 @@ namespace DB
namespace ErrorCodes
{
extern const int RAFT_ERROR;
extern const int NO_ELEMENTS_IN_CONFIG;
extern const int SUPPORT_IS_DISABLED;
}
namespace
{
#if USE_SSL
void setSSLParams(nuraft::asio_service::options & asio_opts)
{
const Poco::Util::LayeredConfiguration & config = Poco::Util::Application::instance().config();
String certificate_file_property = "openSSL.server.certificateFile";
String private_key_file_property = "openSSL.server.privateKeyFile";
String root_ca_file_property = "openSSL.server.caConfig";
if (!config.has(certificate_file_property))
throw Exception("Server certificate file is not set.", ErrorCodes::NO_ELEMENTS_IN_CONFIG);
if (!config.has(private_key_file_property))
throw Exception("Server private key file is not set.", ErrorCodes::NO_ELEMENTS_IN_CONFIG);
asio_opts.enable_ssl_ = true;
asio_opts.server_cert_file_ = config.getString(certificate_file_property);
asio_opts.server_key_file_ = config.getString(private_key_file_property);
if (config.has(root_ca_file_property))
asio_opts.root_cert_file_ = config.getString(root_ca_file_property);
if (config.getBool("openSSL.server.loadDefaultCAFile", false))
asio_opts.load_default_ca_file_ = true;
if (config.getString("openSSL.server.verificationMode", "none") == "none")
asio_opts.skip_verification_ = true;
}
#endif
}
KeeperServer::KeeperServer(
@ -72,6 +114,15 @@ void KeeperServer::startup()
params.return_method_ = nuraft::raft_params::blocking;
nuraft::asio_service::options asio_opts{};
if (state_manager->isSecure())
{
#if USE_SSL
setSSLParams(asio_opts);
#else
throw Exception{"SSL support for NuRaft is disabled because ClickHouse was built without SSL support.",
ErrorCodes::SUPPORT_IS_DISABLED};
#endif
}
launchRaftServer(params, asio_opts);

View File

@ -12,6 +12,7 @@ namespace ErrorCodes
KeeperStateManager::KeeperStateManager(int server_id_, const std::string & host, int port, const std::string & logs_path)
: my_server_id(server_id_)
, my_port(port)
, secure(false)
, log_store(nuraft::cs_new<KeeperLogStore>(logs_path, 5000, false))
, cluster_config(nuraft::cs_new<nuraft::cluster_config>())
{
@ -25,6 +26,7 @@ KeeperStateManager::KeeperStateManager(
const Poco::Util::AbstractConfiguration & config,
const CoordinationSettingsPtr & coordination_settings)
: my_server_id(my_server_id_)
, secure(config.getBool(config_prefix + ".raft_configuration.secure", false))
, log_store(nuraft::cs_new<KeeperLogStore>(
config.getString(config_prefix + ".log_storage_path", config.getString("path", DBMS_DEFAULT_PATH) + "coordination/logs"),
coordination_settings->rotate_log_storage_interval, coordination_settings->force_sync))
@ -37,6 +39,9 @@ KeeperStateManager::KeeperStateManager(
for (const auto & server_key : keys)
{
if (!startsWith(server_key, "server"))
continue;
std::string full_prefix = config_prefix + ".raft_configuration." + server_key;
int server_id = config.getInt(full_prefix + ".id");
std::string hostname = config.getString(full_prefix + ".hostname");
@ -44,6 +49,7 @@ KeeperStateManager::KeeperStateManager(
bool can_become_leader = config.getBool(full_prefix + ".can_become_leader", true);
int32_t priority = config.getInt(full_prefix + ".priority", 1);
bool start_as_follower = config.getBool(full_prefix + ".start_as_follower", false);
if (start_as_follower)
start_as_follower_servers.insert(server_id);
@ -57,6 +63,7 @@ KeeperStateManager::KeeperStateManager(
cluster_config->get_servers().push_back(peer_config);
}
if (!my_server_config)
throw Exception(ErrorCodes::RAFT_ERROR, "Our server id {} not found in raft_configuration section", my_server_id);

View File

@ -52,6 +52,11 @@ public:
return start_as_follower_servers.count(my_server_id);
}
bool isSecure() const
{
return secure;
}
nuraft::ptr<KeeperLogStore> getLogStore() const { return log_store; }
uint64_t getTotalServers() const { return total_servers; }
@ -59,6 +64,7 @@ public:
private:
int my_server_id;
int my_port;
bool secure;
uint64_t total_servers{0};
std::unordered_set<int> start_as_follower_servers;
nuraft::ptr<KeeperLogStore> log_store;

View File

@ -74,6 +74,9 @@
/// Minimum revision supporting OpenTelemetry
#define DBMS_MIN_REVISION_WITH_OPENTELEMETRY 54442
#define DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION 1
/// Minimum revision supporting interserver secret.
#define DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET 54441

View File

@ -76,8 +76,10 @@ namespace Protocol
Log = 10, /// System logs of the query execution
TableColumns = 11, /// Columns' description for default values calculation
PartUUIDs = 12, /// List of unique parts ids.
MAX = PartUUIDs,
ReadTaskRequest = 13, /// String (UUID) describes a request for which next task is needed
/// This is such an inverted logic, where server sends requests
/// And client returns back response
MAX = ReadTaskRequest,
};
/// NOTE: If the type of packet argument would be Enum, the comparison packet >= 0 && packet < 10
@ -100,6 +102,7 @@ namespace Protocol
"Log",
"TableColumns",
"PartUUIDs",
"ReadTaskRequest"
};
return packet <= MAX
? data[packet]
@ -135,8 +138,9 @@ namespace Protocol
KeepAlive = 6, /// Keep the connection alive
Scalar = 7, /// A block of data (compressed or not).
IgnoredPartUUIDs = 8, /// List of unique parts ids to exclude from query processing
ReadTaskResponse = 9, /// TODO:
MAX = IgnoredPartUUIDs,
MAX = ReadTaskResponse,
};
inline const char * toString(UInt64 packet)
@ -151,6 +155,7 @@ namespace Protocol
"KeepAlive",
"Scalar",
"IgnoredPartUUIDs",
"ReadTaskResponse",
};
return packet <= MAX
? data[packet]

View File

@ -229,7 +229,7 @@ class IColumn;
M(Seconds, http_connection_timeout, DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT, "HTTP connection timeout.", 0) \
M(Seconds, http_send_timeout, DEFAULT_HTTP_READ_BUFFER_TIMEOUT, "HTTP send timeout", 0) \
M(Seconds, http_receive_timeout, DEFAULT_HTTP_READ_BUFFER_TIMEOUT, "HTTP receive timeout", 0) \
M(UInt64, http_max_uri_size, 16384, "Maximum URI length of HTTP request", 0) \
M(UInt64, http_max_uri_size, 1048576, "Maximum URI length of HTTP request", 0) \
M(Bool, optimize_throw_if_noop, false, "If setting is enabled and OPTIMIZE query didn't actually assign a merge then an explanatory exception is thrown", 0) \
M(Bool, use_index_for_in_with_subqueries, true, "Try using an index if there is a subquery or a table expression on the right side of the IN operator.", 0) \
M(Bool, joined_subquery_requires_alias, true, "Force joined subqueries and table functions to have aliases for correct name qualification.", 0) \
@ -373,6 +373,7 @@ class IColumn;
M(UInt64, postgresql_connection_pool_size, 16, "Connection pool size for PostgreSQL table engine and database engine.", 0) \
M(Int64, postgresql_connection_pool_wait_timeout, -1, "Connection pool push/pop timeout on empty pool for PostgreSQL table engine and database engine. By default it will block on empty pool.", 0) \
M(UInt64, glob_expansion_max_elements, 1000, "Maximum number of allowed addresses (For external storages, table functions, etc).", 0) \
M(UInt64, odbc_bridge_connection_pool_size, 16, "Connection pool size for each connection settings string in ODBC bridge.", 0) \
\
M(Seconds, distributed_replica_error_half_life, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_DECREASE_ERROR_PERIOD, "Time period reduces replica error counter by 2 times.", 0) \
M(UInt64, distributed_replica_error_cap, DBMS_CONNECTION_POOL_WITH_FAILOVER_MAX_ERROR_COUNT, "Max number of errors per replica, prevents piling up an incredible amount of errors if replica was offline for some time and allows it to be reconsidered in a shorter amount of time.", 0) \

View File

@ -8,7 +8,7 @@ namespace DB
/// Prints internal server logs
/// Input blocks have to have the same structure as SystemLogsQueue::getSampleBlock()
/// NOTE: IRowOutputStream does not suite well for this case
/// NOTE: IRowOutputFormat does not suite well for this case
class InternalTextLogsRowOutputStream : public IBlockOutputStream
{
public:

View File

@ -311,7 +311,7 @@ void PushingToViewsBlockOutputStream::writeSuffix()
UInt64 milliseconds = main_watch.elapsedMilliseconds();
if (views.size() > 1)
{
LOG_TRACE(log, "Pushing from {} to {} views took {} ms.",
LOG_DEBUG(log, "Pushing from {} to {} views took {} ms.",
storage->getStorageID().getNameForLogs(), views.size(),
milliseconds);
}

View File

@ -22,20 +22,18 @@ namespace DB
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
extern const int UNKNOWN_PACKET_FROM_SERVER;
extern const int DUPLICATED_PART_UUIDS;
}
RemoteQueryExecutor::RemoteQueryExecutor(
Connection & connection,
const String & query_,
const Block & header_,
ContextPtr context_,
ThrottlerPtr throttler,
const Scalars & scalars_,
const Tables & external_tables_,
QueryProcessingStage::Enum stage_)
: header(header_), query(query_), context(context_), scalars(scalars_), external_tables(external_tables_), stage(stage_)
const String & query_, const Block & header_, ContextPtr context_,
ThrottlerPtr throttler, const Scalars & scalars_, const Tables & external_tables_,
QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_)
: header(header_), query(query_), context(context_)
, scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_)
{
create_connections = [this, &connection, throttler]()
{
@ -45,14 +43,11 @@ RemoteQueryExecutor::RemoteQueryExecutor(
RemoteQueryExecutor::RemoteQueryExecutor(
std::vector<IConnectionPool::Entry> && connections_,
const String & query_,
const Block & header_,
ContextPtr context_,
const ThrottlerPtr & throttler,
const Scalars & scalars_,
const Tables & external_tables_,
QueryProcessingStage::Enum stage_)
: header(header_), query(query_), context(context_), scalars(scalars_), external_tables(external_tables_), stage(stage_)
const String & query_, const Block & header_, ContextPtr context_,
const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_,
QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_)
: header(header_), query(query_), context(context_)
, scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_)
{
create_connections = [this, connections_, throttler]() mutable {
return std::make_unique<MultiplexedConnections>(std::move(connections_), context->getSettingsRef(), throttler);
@ -61,14 +56,11 @@ RemoteQueryExecutor::RemoteQueryExecutor(
RemoteQueryExecutor::RemoteQueryExecutor(
const ConnectionPoolWithFailoverPtr & pool,
const String & query_,
const Block & header_,
ContextPtr context_,
const ThrottlerPtr & throttler,
const Scalars & scalars_,
const Tables & external_tables_,
QueryProcessingStage::Enum stage_)
: header(header_), query(query_), context(context_), scalars(scalars_), external_tables(external_tables_), stage(stage_)
const String & query_, const Block & header_, ContextPtr context_,
const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_,
QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_)
: header(header_), query(query_), context(context_)
, scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_)
{
create_connections = [this, pool, throttler]()->std::unique_ptr<IConnections>
{
@ -307,6 +299,9 @@ std::optional<Block> RemoteQueryExecutor::processPacket(Packet packet)
{
switch (packet.type)
{
case Protocol::Server::ReadTaskRequest:
processReadTaskRequest();
break;
case Protocol::Server::PartUUIDs:
if (!setPartUUIDs(packet.part_uuids))
got_duplicated_part_uuids = true;
@ -385,6 +380,14 @@ bool RemoteQueryExecutor::setPartUUIDs(const std::vector<UUID> & uuids)
return true;
}
void RemoteQueryExecutor::processReadTaskRequest()
{
if (!task_iterator)
throw Exception("Distributed task iterator is not initialized", ErrorCodes::LOGICAL_ERROR);
auto response = (*task_iterator)();
connections->sendReadTaskResponse(response);
}
void RemoteQueryExecutor::finish(std::unique_ptr<ReadContext> * read_context)
{
/** If one of:

View File

@ -26,6 +26,9 @@ using ProfileInfoCallback = std::function<void(const BlockStreamProfileInfo & in
class RemoteQueryExecutorReadContext;
/// This is the same type as StorageS3Source::IteratorWrapper
using TaskIterator = std::function<String()>;
/// This class allows one to launch queries on remote replicas of one shard and get results
class RemoteQueryExecutor
{
@ -37,21 +40,21 @@ public:
Connection & connection,
const String & query_, const Block & header_, ContextPtr context_,
ThrottlerPtr throttler_ = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr<TaskIterator> task_iterator_ = {});
/// Accepts several connections already taken from pool.
RemoteQueryExecutor(
std::vector<IConnectionPool::Entry> && connections_,
const String & query_, const Block & header_, ContextPtr context_,
const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr<TaskIterator> task_iterator_ = {});
/// Takes a pool and gets one or several connections from it.
RemoteQueryExecutor(
const ConnectionPoolWithFailoverPtr & pool,
const String & query_, const Block & header_, ContextPtr context_,
const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr<TaskIterator> task_iterator_ = {});
~RemoteQueryExecutor();
@ -119,6 +122,8 @@ private:
/// Temporary tables needed to be sent to remote servers
Tables external_tables;
QueryProcessingStage::Enum stage;
/// Initiator identifier for distributed task processing
std::shared_ptr<TaskIterator> task_iterator;
/// Streams for reading from temporary tables and following sending of data
/// to remote servers for GLOBAL-subqueries
@ -179,6 +184,8 @@ private:
/// Return true if duplicates found.
bool setPartUUIDs(const std::vector<UUID> & uuids);
void processReadTaskRequest();
/// Cancell query and restart it with info about duplicated UUIDs
/// only for `allow_experimental_query_deduplication`.
std::variant<Block, int> restartQueryWithoutDuplicatedUUIDs(std::unique_ptr<ReadContext> * read_context = nullptr);

View File

@ -49,6 +49,16 @@ void copyDataImpl(IBlockInputStream & from, IBlockOutputStream & to, TCancelCall
to.writeSuffix();
}
void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<void(const Block & block)> & progress,
std::atomic<bool> * is_cancelled)
{
auto is_cancelled_pred = [is_cancelled] ()
{
return isAtomicSet(is_cancelled);
};
copyDataImpl(from, to, is_cancelled_pred, progress);
}
inline void doNothing(const Block &) {}

View File

@ -16,6 +16,9 @@ class Block;
*/
void copyData(IBlockInputStream & from, IBlockOutputStream & to, std::atomic<bool> * is_cancelled = nullptr);
void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<void(const Block & block)> & progress,
std::atomic<bool> * is_cancelled = nullptr);
void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<bool()> & is_cancelled);
void copyData(IBlockInputStream & from, IBlockOutputStream & to, const std::function<bool()> & is_cancelled,

View File

@ -52,7 +52,7 @@ static std::unordered_map<String, String> fetchTablesCreateQuery(
static std::vector<String> fetchTablesInDB(const mysqlxx::PoolWithFailover::Entry & connection, const std::string & database)
{
Block header{{std::make_shared<DataTypeString>(), "table_name"}};
String query = "SELECT TABLE_NAME AS table_name FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = " + quoteString(database);
String query = "SELECT TABLE_NAME AS table_name FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE != 'VIEW' AND TABLE_SCHEMA = " + quoteString(database);
std::vector<String> tables_in_db;
MySQLBlockInputStream input(connection, query, header, DEFAULT_BLOCK_SIZE);

View File

@ -1,7 +1,8 @@
#pragma once
#include <common/BorrowedObjectPool.h>
#include <Core/Block.h>
#include <Common/BorrowedObjectPool.h>
#include <Interpreters/Context.h>
#include "IDictionarySource.h"

View File

@ -10,7 +10,6 @@
#include "DictionarySourceFactory.h"
#include "DictionarySourceHelpers.h"
#include "DictionaryStructure.h"
#include "LibraryDictionarySourceExternal.h"
#include "registerDictionaries.h"
#include <IO/WriteBufferFromString.h>
#include <IO/WriteHelpers.h>

View File

@ -122,7 +122,7 @@ void registerDiskS3(DiskFactory & factory)
throw Exception("S3 path must ends with '/', but '" + uri.key + "' doesn't.", ErrorCodes::BAD_ARGUMENTS);
client_configuration.connectTimeoutMs = config.getUInt(config_prefix + ".connect_timeout_ms", 10000);
client_configuration.httpRequestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 5000);
client_configuration.requestTimeoutMs = config.getUInt(config_prefix + ".request_timeout_ms", 5000);
client_configuration.maxConnections = config.getUInt(config_prefix + ".max_connections", 100);
client_configuration.endpointOverride = uri.endpoint;

View File

@ -1,18 +0,0 @@
#include <Formats/IRowInputStream.h>
#include <Common/Exception.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
void IRowInputStream::syncAfterError()
{
throw Exception("Method syncAfterError is not implemented for input format", ErrorCodes::NOT_IMPLEMENTED);
}
}

View File

@ -1,51 +0,0 @@
#pragma once
#include <boost/noncopyable.hpp>
#include <memory>
#include <string>
#include <Columns/IColumn.h>
namespace DB
{
/// Contains extra information about read data.
struct RowReadExtension
{
/// IRowInputStream.read() output. It contains non zero for columns that actually read from the source and zero otherwise.
/// It's used to attach defaults for partially filled rows.
/// Can be empty, this means that all columns are read.
std::vector<UInt8> read_columns;
};
/** Interface of stream, that allows to read data by rows.
*/
class IRowInputStream : private boost::noncopyable
{
public:
/** Read next row and append it to the columns.
* If no more rows - return false.
*/
virtual bool read(MutableColumns & columns, RowReadExtension & extra) = 0;
virtual void readPrefix() {} /// delimiter before begin of result
virtual void readSuffix() {} /// delimiter after end of result
/// Skip data until next row.
/// This is intended for text streams, that allow skipping of errors.
/// By default - throws not implemented exception.
virtual bool allowSyncAfterError() const { return false; }
virtual void syncAfterError();
/// In case of parse error, try to roll back and parse last one or two rows very carefully
/// and collect as much as possible diagnostic information about error.
/// If not implemented, returns empty string.
virtual std::string getDiagnosticInfo() { return {}; }
virtual ~IRowInputStream() {}
};
using RowInputStreamPtr = std::shared_ptr<IRowInputStream>;
}

View File

@ -1,37 +0,0 @@
#include <Common/Exception.h>
#include <Core/Block.h>
#include <Formats/IRowOutputStream.h>
namespace DB
{
namespace ErrorCodes
{
extern const int NOT_IMPLEMENTED;
}
void IRowOutputStream::write(const Block & block, size_t row_num)
{
size_t columns = block.columns();
writeRowStartDelimiter();
for (size_t i = 0; i < columns; ++i)
{
if (i != 0)
writeFieldDelimiter();
const auto & col = block.getByPosition(i);
writeField(*col.column, *col.type, row_num);
}
writeRowEndDelimiter();
}
void IRowOutputStream::writeField(const IColumn &, const IDataType &, size_t)
{
throw Exception("Method writeField is not implemented for output format", ErrorCodes::NOT_IMPLEMENTED);
}
}

View File

@ -1,63 +0,0 @@
#pragma once
#include <memory>
#include <cstdint>
#include <boost/noncopyable.hpp>
#include <common/types.h>
namespace DB
{
class Block;
class IColumn;
class IDataType;
struct Progress;
/** Interface of stream for writing data by rows (for example: for output to terminal).
*/
class IRowOutputStream : private boost::noncopyable
{
public:
/** Write a row.
* Default implementation calls methods to write single values and delimiters
* (except delimiter between rows (writeRowBetweenDelimiter())).
*/
virtual void write(const Block & block, size_t row_num);
/** Write single value. */
virtual void writeField(const IColumn & column, const IDataType & type, size_t row_num);
/** Write delimiter. */
virtual void writeFieldDelimiter() {} /// delimiter between values
virtual void writeRowStartDelimiter() {} /// delimiter before each row
virtual void writeRowEndDelimiter() {} /// delimiter after each row
virtual void writeRowBetweenDelimiter() {} /// delimiter between rows
virtual void writePrefix() {} /// delimiter before resultset
virtual void writeSuffix() {} /// delimiter after resultset
/** Flush output buffers if any. */
virtual void flush() {}
/** Methods to set additional information for output in formats, that support it.
*/
virtual void setRowsBeforeLimit(size_t /*rows_before_limit*/) {}
virtual void setTotals(const Block & /*totals*/) {}
virtual void setExtremes(const Block & /*extremes*/) {}
/** Notify about progress. Method could be called from different threads.
* Passed value are delta, that must be summarized.
*/
virtual void onProgress(const Progress & /*progress*/) {}
/** Content-Type to set when sending HTTP response. */
virtual String getContentType() const { return "text/plain; charset=UTF-8"; }
virtual ~IRowOutputStream() {}
};
using RowOutputStreamPtr = std::shared_ptr<IRowOutputStream>;
}

View File

@ -13,8 +13,6 @@ PEERDIR(
SRCS(
FormatFactory.cpp
FormatSchemaInfo.cpp
IRowInputStream.cpp
IRowOutputStream.cpp
JSONEachRowUtils.cpp
MySQLBlockInputStream.cpp
NativeFormat.cpp

View File

@ -82,7 +82,7 @@ inline ALWAYS_INLINE void writeSlice(const GenericArraySlice & slice, GenericArr
sink.current_offset += slice.size;
}
else
throw Exception("Function writeSlice expect same column types for GenericArraySlice and GenericArraySink.",
throw Exception("Function writeSlice expects same column types for GenericArraySlice and GenericArraySink.",
ErrorCodes::LOGICAL_ERROR);
}
@ -162,7 +162,7 @@ inline ALWAYS_INLINE void writeSlice(const GenericValueSlice & slice, GenericArr
++sink.current_offset;
}
else
throw Exception("Function writeSlice expect same column types for GenericValueSlice and GenericArraySink.",
throw Exception("Function writeSlice expects same column types for GenericValueSlice and GenericArraySink.",
ErrorCodes::LOGICAL_ERROR);
}
@ -609,7 +609,7 @@ bool sliceHas(const GenericArraySlice & first, const GenericArraySlice & second)
{
/// Generic arrays should have the same type in order to use column.compareAt(...)
if (!first.elements->structureEquals(*second.elements))
return false;
throw Exception("Function sliceHas expects same column types for slices.", ErrorCodes::LOGICAL_ERROR);
auto impl = sliceHasImpl<search_type, GenericArraySlice, GenericArraySlice, sliceEqualElements, insliceEqualElements>;
return impl(first, second, nullptr, nullptr);
@ -670,7 +670,7 @@ void NO_INLINE arrayAllAny(FirstSource && first, SecondSource && second, ColumnU
auto & data = result.getData();
for (auto row : ext::range(0, size))
{
data[row] = static_cast<UInt8>(sliceHas<search_type>(first.getWhole(), second.getWhole()) ? 1 : 0);
data[row] = static_cast<UInt8>(sliceHas<search_type>(first.getWhole(), second.getWhole()));
first.next();
second.next();
}

View File

@ -477,7 +477,7 @@ DataTypePtr FunctionOverloadResolverAdaptor::getReturnTypeDefaultImplementationF
}
if (null_presence.has_nullable)
{
Block nested_columns = createBlockWithNestedColumns(arguments);
auto nested_columns = Block(createBlockWithNestedColumns(arguments));
auto return_type = getter(ColumnsWithTypeAndName(nested_columns.begin(), nested_columns.end()));
return makeNullable(return_type);
}

View File

@ -5,6 +5,7 @@
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNullable.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/getLeastSupertype.h>
#include <Columns/ColumnArray.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnFixedString.h>
@ -13,9 +14,9 @@
#include <Common/FieldVisitorsAccurateComparison.h>
#include <Common/memcmpSmall.h>
#include <Common/assert_cast.h>
#include "Columns/ColumnLowCardinality.h"
#include "DataTypes/DataTypeLowCardinality.h"
#include "Interpreters/castColumn.h"
#include <Columns/ColumnLowCardinality.h>
#include <DataTypes/DataTypeLowCardinality.h>
#include <Interpreters/castColumn.h>
namespace DB
@ -373,11 +374,10 @@ public:
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
if (!arguments[1]->onlyNull() && !allowArguments(array_type->getNestedType(), arguments[1]))
throw Exception("Types of array and 2nd argument of function \""
+ getName() + "\" must be identical up to nullability, cardinality, "
"numeric types, or Enum and numeric type. Passed: "
+ arguments[0]->getName() + " and " + arguments[1]->getName() + ".",
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
"Types of array and 2nd argument of function `{}` must be identical up to nullability, cardinality, "
"numeric types, or Enum and numeric type. Passed: {} and {}.",
getName(), arguments[0]->getName(), arguments[1]->getName());
return std::make_shared<DataTypeNumber<ResultType>>();
}
@ -494,86 +494,13 @@ private:
inline void moveResult() { result_column = std::move(result); }
};
static inline bool allowNested(const DataTypePtr & left, const DataTypePtr & right)
{
return ((isNativeNumber(left) || isEnum(left)) && isNativeNumber(right)) || left->equals(*right);
}
static inline bool allowArguments(const DataTypePtr & array_inner_type, const DataTypePtr & arg)
{
if (allowNested(array_inner_type, arg))
return true;
auto inner_type_decayed = removeNullable(removeLowCardinality(array_inner_type));
auto arg_decayed = removeNullable(removeLowCardinality(arg));
/// Nullable
const bool array_is_nullable = array_inner_type->isNullable();
const bool arg_is_nullable = arg->isNullable();
const DataTypePtr arg_or_arg_nullable_nested = arg_is_nullable
? checkAndGetDataType<DataTypeNullable>(arg.get())->getNestedType()
: arg;
if (array_is_nullable) // comparing Array(Nullable(T)) elem and U
{
const DataTypePtr array_nullable_nested =
checkAndGetDataType<DataTypeNullable>(array_inner_type.get())->getNestedType();
// We also allow Nullable(T) and LC(U) if the Nullable(T) and U are allowed,
// the LC(U) will be converted to U.
return allowNested(
array_nullable_nested,
recursiveRemoveLowCardinality(arg_or_arg_nullable_nested));
}
else if (arg_is_nullable) // cannot compare Array(T) elem (namely, T) and Nullable(T)
return false;
/// LowCardinality
const auto * const array_lc_ptr = checkAndGetDataType<DataTypeLowCardinality>(array_inner_type.get());
const auto * const arg_lc_ptr = checkAndGetDataType<DataTypeLowCardinality>(arg.get());
const DataTypePtr array_lc_inner_type = recursiveRemoveLowCardinality(array_inner_type);
const DataTypePtr arg_lc_inner_type = recursiveRemoveLowCardinality(arg);
const bool array_is_lc = nullptr != array_lc_ptr;
const bool arg_is_lc = nullptr != arg_lc_ptr;
const bool array_lc_inner_type_is_nullable = array_is_lc && array_lc_inner_type->isNullable();
const bool arg_lc_inner_type_is_nullable = arg_is_lc && arg_lc_inner_type->isNullable();
if (array_is_lc) // comparing LC(T) and U
{
const DataTypePtr array_lc_nested_or_lc_nullable_nested = array_lc_inner_type_is_nullable
? checkAndGetDataType<DataTypeNullable>(array_lc_inner_type.get())->getNestedType()
: array_lc_inner_type;
if (arg_is_lc) // comparing LC(T) and LC(U)
{
const DataTypePtr arg_lc_nested_or_lc_nullable_nested = arg_lc_inner_type_is_nullable
? checkAndGetDataType<DataTypeNullable>(arg_lc_inner_type.get())->getNestedType()
: arg_lc_inner_type;
return allowNested(
array_lc_nested_or_lc_nullable_nested,
arg_lc_nested_or_lc_nullable_nested);
}
else if (arg_is_nullable) // Comparing LC(T) and Nullable(U)
{
if (!array_lc_inner_type_is_nullable)
return false; // Can't compare Array(LC(U)) elem and Nullable(T);
return allowNested(
array_lc_nested_or_lc_nullable_nested,
arg_or_arg_nullable_nested);
}
else // Comparing LC(T) and U (U neither Nullable nor LC)
return allowNested(array_lc_nested_or_lc_nullable_nested, arg);
}
if (arg_is_lc) // Allow T and LC(U) if U and T are allowed (the low cardinality column will be converted).
return allowNested(array_inner_type, arg_lc_inner_type);
return false;
return ((isNativeNumber(inner_type_decayed) || isEnum(inner_type_decayed)) && isNativeNumber(arg_decayed))
|| getLeastSupertype({inner_type_decayed, arg_decayed});
}
#define INTEGRAL_TPL_PACK UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64
@ -1044,33 +971,38 @@ private:
if (!col)
return nullptr;
const IColumn & col_nested = col->getData();
DataTypePtr array_elements_type = assert_cast<const DataTypeArray &>(*arguments[0].type).getNestedType();
const DataTypePtr & index_type = arguments[1].type;
DataTypePtr common_type = getLeastSupertype({array_elements_type, index_type});
ColumnPtr col_nested = castColumn({ col->getDataPtr(), array_elements_type, "" }, common_type);
const ColumnPtr right_ptr = arguments[1].column->convertToFullColumnIfLowCardinality();
const IColumn & item_arg = *right_ptr.get();
ColumnPtr item_arg = castColumn({ right_ptr, removeLowCardinality(index_type), "" }, common_type);
auto col_res = ResultColumnType::create();
auto [null_map_data, null_map_item] = getNullMaps(arguments);
if (item_arg.onlyNull())
if (item_arg->onlyNull())
Impl::Null<ConcreteAction>::process(
col->getOffsets(),
col_res->getData(),
null_map_data);
else if (isColumnConst(item_arg))
else if (isColumnConst(*item_arg))
Impl::Main<ConcreteAction, true>::vector(
col_nested,
*col_nested,
col->getOffsets(),
typeid_cast<const ColumnConst &>(item_arg).getDataColumn(),
typeid_cast<const ColumnConst &>(*item_arg).getDataColumn(),
col_res->getData(), /// TODO This is wrong.
null_map_data,
nullptr);
else
Impl::Main<ConcreteAction>::vector(
col_nested,
*col_nested,
col->getOffsets(),
item_arg,
*item_arg,
col_res->getData(),
null_map_data,
null_map_item);

Some files were not shown because too many files have changed in this diff Show More