mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-17 13:13:36 +00:00
Merge branch 'master' into speedup-intdiv
This commit is contained in:
commit
f6f0d001cd
@ -11,6 +11,8 @@
|
||||
* Now replicas that are processing the `ALTER TABLE ATTACH PART[ITION]` command search in their `detached/` folders before fetching the data from other replicas. As an implementation detail, a new command `ATTACH_PART` is introduced in the replicated log. Parts are searched and compared by their checksums. [#18978](https://github.com/ClickHouse/ClickHouse/pull/18978) ([Mike Kot](https://github.com/myrrc)). **Note**:
|
||||
* `ATTACH PART[ITION]` queries may not work during cluster upgrade.
|
||||
* It's not possible to rollback to older ClickHouse version after executing `ALTER ... ATTACH` query in new version as the old servers would fail to pass the `ATTACH_PART` entry in the replicated log.
|
||||
* In this version, empty `<remote_url_allow_hosts></remote_url_allow_hosts>` will block all access to remote hosts while in previous versions it did nothing. If you want to keep old behaviour and you have empty `remote_url_allow_hosts` element in configuration file, remove it. [#20058](https://github.com/ClickHouse/ClickHouse/pull/20058) ([Vladimir Chebotarev](https://github.com/excitoon)).
|
||||
|
||||
|
||||
#### New Feature
|
||||
|
||||
@ -132,7 +134,6 @@
|
||||
* Fix receive and send timeouts and non-blocking read in secure socket. [#21429](https://github.com/ClickHouse/ClickHouse/pull/21429) ([Kruglov Pavel](https://github.com/Avogar)).
|
||||
* `force_drop_table` flag didn't work for `MATERIALIZED VIEW`, it's fixed. Fixes [#18943](https://github.com/ClickHouse/ClickHouse/issues/18943). [#20626](https://github.com/ClickHouse/ClickHouse/pull/20626) ([tavplubix](https://github.com/tavplubix)).
|
||||
* Fix name clashes in `PredicateRewriteVisitor`. It caused incorrect `WHERE` filtration after full join. Close [#20497](https://github.com/ClickHouse/ClickHouse/issues/20497). [#20622](https://github.com/ClickHouse/ClickHouse/pull/20622) ([Vladimir](https://github.com/vdimir)).
|
||||
* Fixed open behavior of remote host filter in case when there is `remote_url_allow_hosts` section in configuration but no entries there. [#20058](https://github.com/ClickHouse/ClickHouse/pull/20058) ([Vladimir Chebotarev](https://github.com/excitoon)).
|
||||
|
||||
#### Build/Testing/Packaging Improvement
|
||||
|
||||
|
@ -7,8 +7,7 @@
|
||||
#include <condition_variable>
|
||||
|
||||
#include <common/defines.h>
|
||||
|
||||
#include <Common/MoveOrCopyIfThrow.h>
|
||||
#include <common/MoveOrCopyIfThrow.h>
|
||||
|
||||
/** Pool for limited size objects that cannot be used from different threads simultaneously.
|
||||
* The main use case is to have fixed size of objects that can be reused in difference threads during their lifetime
|
1
debian/clickhouse-common-static.install
vendored
1
debian/clickhouse-common-static.install
vendored
@ -3,4 +3,3 @@ usr/bin/clickhouse-odbc-bridge
|
||||
usr/bin/clickhouse-library-bridge
|
||||
usr/bin/clickhouse-extract-from-config
|
||||
usr/share/bash-completion/completions
|
||||
etc/security/limits.d/clickhouse.conf
|
||||
|
16
debian/clickhouse-server.config
vendored
16
debian/clickhouse-server.config
vendored
@ -1,16 +0,0 @@
|
||||
#!/bin/sh -e
|
||||
|
||||
test -f /usr/share/debconf/confmodule && . /usr/share/debconf/confmodule
|
||||
|
||||
db_fget clickhouse-server/default-password seen || true
|
||||
password_seen="$RET"
|
||||
|
||||
if [ "$1" = "reconfigure" ]; then
|
||||
password_seen=false
|
||||
fi
|
||||
|
||||
if [ "$password_seen" != "true" ]; then
|
||||
db_input high clickhouse-server/default-password || true
|
||||
db_go || true
|
||||
fi
|
||||
db_go || true
|
8
debian/clickhouse-server.postinst
vendored
8
debian/clickhouse-server.postinst
vendored
@ -23,11 +23,13 @@ if [ ! -f "/etc/debian_version" ]; then
|
||||
fi
|
||||
|
||||
if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then
|
||||
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --group "${CLICKHOUSE_GROUP}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}" --log-path "${CLICKHOUSE_LOGDIR}" --data-path "${CLICKHOUSE_DATADIR}"
|
||||
|
||||
if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
|
||||
# if old rc.d service present - remove it
|
||||
if [ -x "/etc/init.d/clickhouse-server" ] && [ -x "/usr/sbin/update-rc.d" ]; then
|
||||
/usr/sbin/update-rc.d clickhouse-server remove
|
||||
echo "ClickHouse init script has migrated to systemd. Please manually stop old server and restart the service: sudo killall clickhouse-server && sleep 5 && sudo service clickhouse-server restart"
|
||||
fi
|
||||
|
||||
/bin/systemctl daemon-reload
|
||||
@ -38,10 +40,8 @@ if [ "$1" = configure ] || [ -n "$not_deb_os" ]; then
|
||||
if [ -x "/usr/sbin/update-rc.d" ]; then
|
||||
/usr/sbin/update-rc.d clickhouse-server defaults 19 19 >/dev/null || exit $?
|
||||
else
|
||||
echo # TODO [ "$OS" = "rhel" ] || [ "$OS" = "centos" ] || [ "$OS" = "fedora" ]
|
||||
echo # Other OS
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
${CLICKHOUSE_GENERIC_PROGRAM} install --user "${CLICKHOUSE_USER}" --group "${CLICKHOUSE_GROUP}" --pid-path "${CLICKHOUSE_PIDDIR}" --config-path "${CLICKHOUSE_CONFDIR}" --binary-path "${CLICKHOUSE_BINDIR}" --log-path "${CLICKHOUSE_LOGDIR}" --data-path "${CLICKHOUSE_DATADIR}"
|
||||
fi
|
||||
|
8
debian/clickhouse-server.preinst
vendored
8
debian/clickhouse-server.preinst
vendored
@ -1,8 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
if [ "$1" = "upgrade" ]; then
|
||||
# Return etc/cron.d/clickhouse-server to original state
|
||||
service clickhouse-server disable_cron ||:
|
||||
fi
|
||||
|
||||
#DEBHELPER#
|
6
debian/clickhouse-server.prerm
vendored
6
debian/clickhouse-server.prerm
vendored
@ -1,6 +0,0 @@
|
||||
#!/bin/sh
|
||||
|
||||
if [ "$1" = "upgrade" ] || [ "$1" = "remove" ]; then
|
||||
# Return etc/cron.d/clickhouse-server to original state
|
||||
service clickhouse-server disable_cron ||:
|
||||
fi
|
3
debian/clickhouse-server.templates
vendored
3
debian/clickhouse-server.templates
vendored
@ -1,3 +0,0 @@
|
||||
Template: clickhouse-server/default-password
|
||||
Type: password
|
||||
Description: Enter password for default user:
|
2
debian/clickhouse.limits
vendored
2
debian/clickhouse.limits
vendored
@ -1,2 +0,0 @@
|
||||
clickhouse soft nofile 262144
|
||||
clickhouse hard nofile 262144
|
3
debian/rules
vendored
3
debian/rules
vendored
@ -113,9 +113,6 @@ override_dh_install:
|
||||
ln -sf clickhouse-server.docs debian/clickhouse-client.docs
|
||||
ln -sf clickhouse-server.docs debian/clickhouse-common-static.docs
|
||||
|
||||
mkdir -p $(DESTDIR)/etc/security/limits.d
|
||||
cp debian/clickhouse.limits $(DESTDIR)/etc/security/limits.d/clickhouse.conf
|
||||
|
||||
# systemd compatibility
|
||||
mkdir -p $(DESTDIR)/etc/systemd/system/
|
||||
cp debian/clickhouse-server.service $(DESTDIR)/etc/systemd/system/
|
||||
|
2
debian/watch
vendored
2
debian/watch
vendored
@ -1,6 +1,6 @@
|
||||
version=4
|
||||
|
||||
opts="filenamemangle=s%(?:.*?)?v?(\d[\d.]*)-stable\.tar\.gz%clickhouse-$1.tar.gz%" \
|
||||
https://github.com/yandex/clickhouse/tags \
|
||||
https://github.com/ClickHouse/ClickHouse/tags \
|
||||
(?:.*?/)?v?(\d[\d.]*)-stable\.tar\.gz debian uupdate
|
||||
|
||||
|
@ -366,6 +366,9 @@ function run_tests
|
||||
|
||||
# JSON functions
|
||||
01666_blns
|
||||
|
||||
# Depends on AWS
|
||||
01801_s3_cluster
|
||||
)
|
||||
|
||||
(time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 ||:) | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
|
||||
|
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf-8 -*-
|
||||
from multiprocessing import cpu_count
|
||||
from subprocess import Popen, call, STDOUT
|
||||
from subprocess import Popen, call, check_output, STDOUT
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
@ -85,10 +85,27 @@ def prepare_for_hung_check():
|
||||
# Issue #21004, live views are experimental, so let's just suppress it
|
||||
call("""clickhouse client -q "KILL QUERY WHERE upper(query) LIKE 'WATCH %'" """, shell=True, stderr=STDOUT)
|
||||
|
||||
# Wait for last queries to finish if any, not longer than 120 seconds
|
||||
# Kill other queries which known to be slow
|
||||
# It's query from 01232_preparing_sets_race_condition_long, it may take up to 1000 seconds in slow builds
|
||||
call("""clickhouse client -q "KILL QUERY WHERE query LIKE 'insert into tableB select %'" """, shell=True, stderr=STDOUT)
|
||||
# Long query from 00084_external_agregation
|
||||
call("""clickhouse client -q "KILL QUERY WHERE query LIKE 'SELECT URL, uniq(SearchPhrase) AS u FROM test.hits GROUP BY URL ORDER BY u %'" """, shell=True, stderr=STDOUT)
|
||||
|
||||
# Wait for last queries to finish if any, not longer than 300 seconds
|
||||
call("""clickhouse client -q "select sleepEachRow((
|
||||
select maxOrDefault(120 - elapsed) + 1 from system.processes where query not like '%from system.processes%' and elapsed < 120
|
||||
) / 120) from numbers(120) format Null" """, shell=True, stderr=STDOUT)
|
||||
select maxOrDefault(300 - elapsed) + 1 from system.processes where query not like '%from system.processes%' and elapsed < 300
|
||||
) / 300) from numbers(300) format Null" """, shell=True, stderr=STDOUT)
|
||||
|
||||
# Even if all clickhouse-test processes are finished, there are probably some sh scripts,
|
||||
# which still run some new queries. Let's ignore them.
|
||||
try:
|
||||
query = """clickhouse client -q "SELECT count() FROM system.processes where where elapsed > 300" """
|
||||
output = check_output(query, shell=True, stderr=STDOUT).decode('utf-8').strip()
|
||||
if int(output) == 0:
|
||||
return False
|
||||
except:
|
||||
pass
|
||||
return True
|
||||
|
||||
if __name__ == "__main__":
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
|
||||
@ -119,12 +136,12 @@ if __name__ == "__main__":
|
||||
|
||||
logging.info("All processes finished")
|
||||
if args.hung_check:
|
||||
prepare_for_hung_check()
|
||||
have_long_running_queries = prepare_for_hung_check()
|
||||
logging.info("Checking if some queries hung")
|
||||
cmd = "{} {} {}".format(args.test_cmd, "--hung-check", "00001_select_1")
|
||||
res = call(cmd, shell=True, stderr=STDOUT)
|
||||
hung_check_status = "No queries hung\tOK\n"
|
||||
if res != 0:
|
||||
if res != 0 and have_long_running_queries:
|
||||
logging.info("Hung check failed with exit code {}".format(res))
|
||||
hung_check_status = "Hung check failed\tFAIL\n"
|
||||
open(os.path.join(args.output_folder, "test_results.tsv"), 'w+').write(hung_check_status)
|
||||
|
@ -94,10 +94,10 @@ postgres=# INSERT INTO test (int_id, str, "float") VALUES (1,'test',2);
|
||||
INSERT 0 1
|
||||
|
||||
postgresql> SELECT * FROM test;
|
||||
int_id | int_nullable | float | str | float_nullable
|
||||
--------+--------------+-------+------+----------------
|
||||
1 | | 2 | test |
|
||||
(1 row)
|
||||
int_id | int_nullable | float | str | float_nullable
|
||||
--------+--------------+-------+------+----------------
|
||||
1 | | 2 | test |
|
||||
(1 row)
|
||||
```
|
||||
|
||||
Table in ClickHouse, retrieving data from the PostgreSQL table created above:
|
||||
|
@ -33,7 +33,7 @@ SELECT bitmapBuild([1, 2, 3, 4, 5]) AS res, toTypeName(res);
|
||||
|
||||
``` text
|
||||
┌─res─┬─toTypeName(bitmapBuild([1, 2, 3, 4, 5]))─────┐
|
||||
│ │ AggregateFunction(groupBitmap, UInt8) │
|
||||
│ │ AggregateFunction(groupBitmap, UInt8) │
|
||||
└─────┴──────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
|
@ -437,13 +437,13 @@ A [FixedString(16)](../../sql-reference/data-types/fixedstring.md) data type has
|
||||
**Example**
|
||||
|
||||
``` sql
|
||||
SELECT murmurHash3_128('example_string') AS MurmurHash3, toTypeName(MurmurHash3) AS type;
|
||||
SELECT hex(murmurHash3_128('example_string')) AS MurmurHash3, toTypeName(MurmurHash3) AS type;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─MurmurHash3──────┬─type────────────┐
|
||||
│ 6<EFBFBD>1<1C>4"S5KT<4B>~~q │ FixedString(16) │
|
||||
└──────────────────┴─────────────────┘
|
||||
┌─MurmurHash3──────────────────────┬─type───┐
|
||||
│ 368A1A311CB7342253354B548E7E7E71 │ String │
|
||||
└──────────────────────────────────┴────────┘
|
||||
```
|
||||
|
||||
## xxHash32, xxHash64 {#hash-functions-xxhash32}
|
||||
|
@ -65,9 +65,9 @@ postgres=# INSERT INTO test (int_id, str, "float") VALUES (1,'test',2);
|
||||
INSERT 0 1
|
||||
|
||||
postgresql> SELECT * FROM test;
|
||||
int_id | int_nullable | float | str | float_nullable
|
||||
--------+--------------+-------+------+----------------
|
||||
1 | | 2 | test |
|
||||
int_id | int_nullable | float | str | float_nullable
|
||||
--------+--------------+-------+------+----------------
|
||||
1 | | 2 | test |
|
||||
(1 row)
|
||||
```
|
||||
|
||||
|
@ -35,7 +35,7 @@ SELECT bitmapBuild([1, 2, 3, 4, 5]) AS res, toTypeName(res)
|
||||
|
||||
``` text
|
||||
┌─res─┬─toTypeName(bitmapBuild([1, 2, 3, 4, 5]))─────┐
|
||||
│ │ AggregateFunction(groupBitmap, UInt8) │
|
||||
│ │ AggregateFunction(groupBitmap, UInt8) │
|
||||
└─────┴──────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
|
@ -434,13 +434,13 @@ A [FixedString(16)](../../sql-reference/data-types/fixedstring.md) データ型
|
||||
**例**
|
||||
|
||||
``` sql
|
||||
SELECT murmurHash3_128('example_string') AS MurmurHash3, toTypeName(MurmurHash3) AS type
|
||||
SELECT hex(murmurHash3_128('example_string')) AS MurmurHash3, toTypeName(MurmurHash3) AS type;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─MurmurHash3──────┬─type────────────┐
|
||||
│ 6<EFBFBD>1<1C>4"S5KT<4B>~~q │ FixedString(16) │
|
||||
└──────────────────┴─────────────────┘
|
||||
┌─MurmurHash3──────────────────────┬─type───┐
|
||||
│ 368A1A311CB7342253354B548E7E7E71 │ String │
|
||||
└──────────────────────────────────┴────────┘
|
||||
```
|
||||
|
||||
## xxHash32,xxHash64 {#hash-functions-xxhash32}
|
||||
|
@ -94,10 +94,10 @@ postgres=# INSERT INTO test (int_id, str, "float") VALUES (1,'test',2);
|
||||
INSERT 0 1
|
||||
|
||||
postgresql> SELECT * FROM test;
|
||||
int_id | int_nullable | float | str | float_nullable
|
||||
--------+--------------+-------+------+----------------
|
||||
1 | | 2 | test |
|
||||
(1 row)
|
||||
int_id | int_nullable | float | str | float_nullable
|
||||
--------+--------------+-------+------+----------------
|
||||
1 | | 2 | test |
|
||||
(1 row)
|
||||
```
|
||||
|
||||
Таблица в ClickHouse, получение данных из PostgreSQL таблицы, созданной выше:
|
||||
|
@ -25,7 +25,7 @@ SELECT bitmapBuild([1, 2, 3, 4, 5]) AS res, toTypeName(res);
|
||||
|
||||
``` text
|
||||
┌─res─┬─toTypeName(bitmapBuild([1, 2, 3, 4, 5]))─────┐
|
||||
│ │ AggregateFunction(groupBitmap, UInt8) │
|
||||
│ │ AggregateFunction(groupBitmap, UInt8) │
|
||||
└─────┴──────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
|
@ -430,7 +430,7 @@ murmurHash3_128( expr )
|
||||
|
||||
**Аргументы**
|
||||
|
||||
- `expr` — [выражение](../syntax.md#syntax-expressions), возвращающее значение типа[String](../../sql-reference/functions/hash-functions.md).
|
||||
- `expr` — [выражение](../syntax.md#syntax-expressions), возвращающее значение типа [String](../../sql-reference/functions/hash-functions.md).
|
||||
|
||||
**Возвращаемое значение**
|
||||
|
||||
@ -439,13 +439,13 @@ murmurHash3_128( expr )
|
||||
**Пример**
|
||||
|
||||
``` sql
|
||||
SELECT murmurHash3_128('example_string') AS MurmurHash3, toTypeName(MurmurHash3) AS type;
|
||||
SELECT hex(murmurHash3_128('example_string')) AS MurmurHash3, toTypeName(MurmurHash3) AS type;
|
||||
```
|
||||
|
||||
``` text
|
||||
┌─MurmurHash3──────┬─type────────────┐
|
||||
│ 6<EFBFBD>1<1C>4"S5KT<4B>~~q │ FixedString(16) │
|
||||
└──────────────────┴─────────────────┘
|
||||
┌─MurmurHash3──────────────────────┬─type───┐
|
||||
│ 368A1A311CB7342253354B548E7E7E71 │ String │
|
||||
└──────────────────────────────────┴────────┘
|
||||
```
|
||||
|
||||
## xxHash32, xxHash64 {#hash-functions-xxhash32-xxhash64}
|
||||
|
@ -65,10 +65,10 @@ postgres=# INSERT INTO test (int_id, str, "float") VALUES (1,'test',2);
|
||||
INSERT 0 1
|
||||
|
||||
postgresql> SELECT * FROM test;
|
||||
int_id | int_nullable | float | str | float_nullable
|
||||
--------+--------------+-------+------+----------------
|
||||
1 | | 2 | test |
|
||||
(1 row)
|
||||
int_id | int_nullable | float | str | float_nullable
|
||||
--------+--------------+-------+------+----------------
|
||||
1 | | 2 | test |
|
||||
(1 row)
|
||||
```
|
||||
|
||||
Получение данных в ClickHouse:
|
||||
|
@ -109,7 +109,8 @@ def build_single_page_version(lang, args, nav, cfg):
|
||||
extra['single_page'] = True
|
||||
extra['is_amp'] = False
|
||||
|
||||
with open(os.path.join(args.docs_dir, lang, 'single.md'), 'w') as single_md:
|
||||
single_md_path = os.path.join(args.docs_dir, lang, 'single.md')
|
||||
with open(single_md_path, 'w') as single_md:
|
||||
concatenate(lang, args.docs_dir, single_md, nav)
|
||||
|
||||
with util.temp_dir() as site_temp:
|
||||
@ -221,3 +222,7 @@ def build_single_page_version(lang, args, nav, cfg):
|
||||
subprocess.check_call(' '.join(create_pdf_command), shell=True)
|
||||
|
||||
logging.info(f'Finished building single page version for {lang}')
|
||||
|
||||
if os.path.exists(single_md_path):
|
||||
os.unlink(single_md_path)
|
||||
|
@ -108,14 +108,6 @@ void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeo
|
||||
" UNION ALL "
|
||||
"SELECT cluster FROM system.clusters"
|
||||
" UNION ALL "
|
||||
"SELECT name FROM system.errors"
|
||||
" UNION ALL "
|
||||
"SELECT event FROM system.events"
|
||||
" UNION ALL "
|
||||
"SELECT metric FROM system.asynchronous_metrics"
|
||||
" UNION ALL "
|
||||
"SELECT metric FROM system.metrics"
|
||||
" UNION ALL "
|
||||
"SELECT macro FROM system.macros"
|
||||
" UNION ALL "
|
||||
"SELECT policy_name FROM system.storage_policies"
|
||||
@ -139,17 +131,12 @@ void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeo
|
||||
|
||||
query << ") WHERE notEmpty(res)";
|
||||
|
||||
Settings settings;
|
||||
/// To show all rows from:
|
||||
/// - system.errors
|
||||
/// - system.events
|
||||
settings.system_events_show_zero_values = true;
|
||||
fetch(connection, timeouts, query.str(), settings);
|
||||
fetch(connection, timeouts, query.str());
|
||||
}
|
||||
|
||||
void Suggest::fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query, Settings & settings)
|
||||
void Suggest::fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query)
|
||||
{
|
||||
connection.sendQuery(timeouts, query, "" /* query_id */, QueryProcessingStage::Complete, &settings);
|
||||
connection.sendQuery(timeouts, query, "" /* query_id */, QueryProcessingStage::Complete);
|
||||
|
||||
while (true)
|
||||
{
|
||||
|
@ -33,7 +33,7 @@ public:
|
||||
private:
|
||||
|
||||
void loadImpl(Connection & connection, const ConnectionTimeouts & timeouts, size_t suggestion_limit);
|
||||
void fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query, Settings & settings);
|
||||
void fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query);
|
||||
void fillWordsFromBlock(const Block & block);
|
||||
|
||||
/// Words are fetched asynchronously.
|
||||
|
@ -71,6 +71,9 @@ namespace ErrorCodes
|
||||
|
||||
}
|
||||
|
||||
/// ANSI escape sequence for intense color in terminal.
|
||||
#define HILITE "\033[1m"
|
||||
#define END_HILITE "\033[0m"
|
||||
|
||||
using namespace DB;
|
||||
namespace po = boost::program_options;
|
||||
@ -559,20 +562,32 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
|
||||
bool stdin_is_a_tty = isatty(STDIN_FILENO);
|
||||
bool stdout_is_a_tty = isatty(STDOUT_FILENO);
|
||||
bool is_interactive = stdin_is_a_tty && stdout_is_a_tty;
|
||||
|
||||
/// dpkg or apt installers can ask for non-interactive work explicitly.
|
||||
|
||||
const char * debian_frontend_var = getenv("DEBIAN_FRONTEND");
|
||||
bool noninteractive = debian_frontend_var && debian_frontend_var == std::string_view("noninteractive");
|
||||
|
||||
bool is_interactive = !noninteractive && stdin_is_a_tty && stdout_is_a_tty;
|
||||
|
||||
/// We can ask password even if stdin is closed/redirected but /dev/tty is available.
|
||||
bool can_ask_password = !noninteractive && stdout_is_a_tty;
|
||||
|
||||
if (has_password_for_default_user)
|
||||
{
|
||||
fmt::print("Password for default user is already specified. To remind or reset, see {} and {}.\n",
|
||||
fmt::print(HILITE "Password for default user is already specified. To remind or reset, see {} and {}." END_HILITE "\n",
|
||||
users_config_file.string(), users_d.string());
|
||||
}
|
||||
else if (!is_interactive)
|
||||
else if (!can_ask_password)
|
||||
{
|
||||
fmt::print("Password for default user is empty string. See {} and {} to change it.\n",
|
||||
fmt::print(HILITE "Password for default user is empty string. See {} and {} to change it." END_HILITE "\n",
|
||||
users_config_file.string(), users_d.string());
|
||||
}
|
||||
else
|
||||
{
|
||||
/// NOTE: When installing debian package with dpkg -i, stdin is not a terminal but we are still being able to enter password.
|
||||
/// More sophisticated method with /dev/tty is used inside the `readpassphrase` function.
|
||||
|
||||
char buf[1000] = {};
|
||||
std::string password;
|
||||
if (auto * result = readpassphrase("Enter password for default user: ", buf, sizeof(buf), 0))
|
||||
@ -600,7 +615,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
"</yandex>\n";
|
||||
out.sync();
|
||||
out.finalize();
|
||||
fmt::print("Password for default user is saved in file {}.\n", password_file);
|
||||
fmt::print(HILITE "Password for default user is saved in file {}." END_HILITE "\n", password_file);
|
||||
#else
|
||||
out << "<yandex>\n"
|
||||
" <users>\n"
|
||||
@ -611,12 +626,12 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
"</yandex>\n";
|
||||
out.sync();
|
||||
out.finalize();
|
||||
fmt::print("Password for default user is saved in plaintext in file {}.\n", password_file);
|
||||
fmt::print(HILITE "Password for default user is saved in plaintext in file {}." END_HILITE "\n", password_file);
|
||||
#endif
|
||||
has_password_for_default_user = true;
|
||||
}
|
||||
else
|
||||
fmt::print("Password for default user is empty string. See {} and {} to change it.\n",
|
||||
fmt::print(HILITE "Password for default user is empty string. See {} and {} to change it." END_HILITE "\n",
|
||||
users_config_file.string(), users_d.string());
|
||||
}
|
||||
|
||||
@ -641,7 +656,6 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
|
||||
" This is optional. Taskstats accounting will be disabled."
|
||||
" To enable taskstats accounting you may add the required capability later manually.\"",
|
||||
"/tmp/test_setcap.sh", fs::canonical(main_bin_path).string());
|
||||
fmt::print(" {}\n", command);
|
||||
executeScript(command);
|
||||
#endif
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES
|
||||
library-bridge.cpp
|
||||
library-log.cpp
|
||||
LibraryInterface.cpp
|
||||
LibraryBridge.cpp
|
||||
Handlers.cpp
|
||||
HandlerFactory.cpp
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include "LibraryDictionarySourceExternal.h"
|
||||
#include "LibraryInterface.h"
|
||||
|
||||
#include <common/logger_useful.h>
|
||||
|
||||
namespace
|
@ -1,11 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/StringUtils/StringUtils.h>
|
||||
#include <Dictionaries/LibraryDictionarySourceExternal.h>
|
||||
#include <Core/Block.h>
|
||||
#include <ext/bit_cast.h>
|
||||
#include <ext/range.h>
|
||||
|
||||
#include "LibraryInterface.h"
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
@ -1,66 +0,0 @@
|
||||
#include <Dictionaries/LibraryDictionarySourceExternal.h>
|
||||
#include <common/logger_useful.h>
|
||||
|
||||
namespace
|
||||
{
|
||||
const char DICT_LOGGER_NAME[] = "LibraryDictionarySourceExternal";
|
||||
}
|
||||
|
||||
namespace ClickHouseLibrary
|
||||
{
|
||||
|
||||
std::string_view LIBRARY_CREATE_NEW_FUNC_NAME = "ClickHouseDictionary_v3_libNew";
|
||||
std::string_view LIBRARY_CLONE_FUNC_NAME = "ClickHouseDictionary_v3_libClone";
|
||||
std::string_view LIBRARY_DELETE_FUNC_NAME = "ClickHouseDictionary_v3_libDelete";
|
||||
|
||||
std::string_view LIBRARY_DATA_NEW_FUNC_NAME = "ClickHouseDictionary_v3_dataNew";
|
||||
std::string_view LIBRARY_DATA_DELETE_FUNC_NAME = "ClickHouseDictionary_v3_dataDelete";
|
||||
|
||||
std::string_view LIBRARY_LOAD_ALL_FUNC_NAME = "ClickHouseDictionary_v3_loadAll";
|
||||
std::string_view LIBRARY_LOAD_IDS_FUNC_NAME = "ClickHouseDictionary_v3_loadIds";
|
||||
std::string_view LIBRARY_LOAD_KEYS_FUNC_NAME = "ClickHouseDictionary_v3_loadKeys";
|
||||
|
||||
std::string_view LIBRARY_IS_MODIFIED_FUNC_NAME = "ClickHouseDictionary_v3_isModified";
|
||||
std::string_view LIBRARY_SUPPORTS_SELECTIVE_LOAD_FUNC_NAME = "ClickHouseDictionary_v3_supportsSelectiveLoad";
|
||||
|
||||
void log(LogLevel level, CString msg)
|
||||
{
|
||||
auto & logger = Poco::Logger::get(DICT_LOGGER_NAME);
|
||||
switch (level)
|
||||
{
|
||||
case LogLevel::TRACE:
|
||||
if (logger.trace())
|
||||
logger.trace(msg);
|
||||
break;
|
||||
case LogLevel::DEBUG:
|
||||
if (logger.debug())
|
||||
logger.debug(msg);
|
||||
break;
|
||||
case LogLevel::INFORMATION:
|
||||
if (logger.information())
|
||||
logger.information(msg);
|
||||
break;
|
||||
case LogLevel::NOTICE:
|
||||
if (logger.notice())
|
||||
logger.notice(msg);
|
||||
break;
|
||||
case LogLevel::WARNING:
|
||||
if (logger.warning())
|
||||
logger.warning(msg);
|
||||
break;
|
||||
case LogLevel::ERROR:
|
||||
if (logger.error())
|
||||
logger.error(msg);
|
||||
break;
|
||||
case LogLevel::CRITICAL:
|
||||
if (logger.critical())
|
||||
logger.critical(msg);
|
||||
break;
|
||||
case LogLevel::FATAL:
|
||||
if (logger.fatal())
|
||||
logger.fatal(msg);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -3,7 +3,7 @@
|
||||
#include <common/logger_useful.h>
|
||||
#include <nanodbc/nanodbc.h>
|
||||
#include <mutex>
|
||||
#include <Common/BorrowedObjectPool.h>
|
||||
#include <common/BorrowedObjectPool.h>
|
||||
#include <unordered_map>
|
||||
|
||||
|
||||
|
@ -173,18 +173,24 @@ int waitServersToFinish(std::vector<DB::ProtocolServerAdapter> & servers, size_t
|
||||
const int sleep_one_ms = 100;
|
||||
int sleep_current_ms = 0;
|
||||
int current_connections = 0;
|
||||
while (sleep_current_ms < sleep_max_ms)
|
||||
for (;;)
|
||||
{
|
||||
current_connections = 0;
|
||||
|
||||
for (auto & server : servers)
|
||||
{
|
||||
server.stop();
|
||||
current_connections += server.currentConnections();
|
||||
}
|
||||
|
||||
if (!current_connections)
|
||||
break;
|
||||
|
||||
sleep_current_ms += sleep_one_ms;
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(sleep_one_ms));
|
||||
if (sleep_current_ms < sleep_max_ms)
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(sleep_one_ms));
|
||||
else
|
||||
break;
|
||||
}
|
||||
return current_connections;
|
||||
}
|
||||
@ -951,6 +957,9 @@ int Server::main(const std::vector<std::string> & /*args*/)
|
||||
global_context->shutdownKeeperStorageDispatcher();
|
||||
}
|
||||
|
||||
/// Wait server pool to avoid use-after-free of destroyed context in the handlers
|
||||
server_pool.joinAll();
|
||||
|
||||
/** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available.
|
||||
* At this moment, no one could own shared part of Context.
|
||||
*/
|
||||
|
@ -551,6 +551,15 @@ void Connection::sendIgnoredPartUUIDs(const std::vector<UUID> & uuids)
|
||||
out->next();
|
||||
}
|
||||
|
||||
|
||||
void Connection::sendReadTaskResponse(const String & response)
|
||||
{
|
||||
writeVarUInt(Protocol::Client::ReadTaskResponse, *out);
|
||||
writeVarUInt(DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION, *out);
|
||||
writeStringBinary(response, *out);
|
||||
out->next();
|
||||
}
|
||||
|
||||
void Connection::sendPreparedData(ReadBuffer & input, size_t size, const String & name)
|
||||
{
|
||||
/// NOTE 'Throttler' is not used in this method (could use, but it's not important right now).
|
||||
@ -807,6 +816,9 @@ Packet Connection::receivePacket()
|
||||
readVectorBinary(res.part_uuids, *in);
|
||||
return res;
|
||||
|
||||
case Protocol::Server::ReadTaskRequest:
|
||||
return res;
|
||||
|
||||
default:
|
||||
/// In unknown state, disconnect - to not leave unsynchronised connection.
|
||||
disconnect();
|
||||
@ -907,13 +919,13 @@ void Connection::setDescription()
|
||||
}
|
||||
|
||||
|
||||
std::unique_ptr<Exception> Connection::receiveException()
|
||||
std::unique_ptr<Exception> Connection::receiveException() const
|
||||
{
|
||||
return std::make_unique<Exception>(readException(*in, "Received from " + getDescription(), true /* remote */));
|
||||
}
|
||||
|
||||
|
||||
std::vector<String> Connection::receiveMultistringMessage(UInt64 msg_type)
|
||||
std::vector<String> Connection::receiveMultistringMessage(UInt64 msg_type) const
|
||||
{
|
||||
size_t num = Protocol::Server::stringsInMessage(msg_type);
|
||||
std::vector<String> strings(num);
|
||||
@ -923,7 +935,7 @@ std::vector<String> Connection::receiveMultistringMessage(UInt64 msg_type)
|
||||
}
|
||||
|
||||
|
||||
Progress Connection::receiveProgress()
|
||||
Progress Connection::receiveProgress() const
|
||||
{
|
||||
Progress progress;
|
||||
progress.read(*in, server_revision);
|
||||
@ -931,7 +943,7 @@ Progress Connection::receiveProgress()
|
||||
}
|
||||
|
||||
|
||||
BlockStreamProfileInfo Connection::receiveProfileInfo()
|
||||
BlockStreamProfileInfo Connection::receiveProfileInfo() const
|
||||
{
|
||||
BlockStreamProfileInfo profile_info;
|
||||
profile_info.read(*in);
|
||||
|
@ -159,6 +159,8 @@ public:
|
||||
/// Send parts' uuids to excluded them from query processing
|
||||
void sendIgnoredPartUUIDs(const std::vector<UUID> & uuids);
|
||||
|
||||
void sendReadTaskResponse(const String &);
|
||||
|
||||
/// Send prepared block of data (serialized and, if need, compressed), that will be read from 'input'.
|
||||
/// You could pass size of serialized/compressed block.
|
||||
void sendPreparedData(ReadBuffer & input, size_t size, const String & name = "");
|
||||
@ -269,7 +271,7 @@ private:
|
||||
class LoggerWrapper
|
||||
{
|
||||
public:
|
||||
LoggerWrapper(Connection & parent_)
|
||||
explicit LoggerWrapper(Connection & parent_)
|
||||
: log(nullptr), parent(parent_)
|
||||
{
|
||||
}
|
||||
@ -304,10 +306,10 @@ private:
|
||||
Block receiveLogData();
|
||||
Block receiveDataImpl(BlockInputStreamPtr & stream);
|
||||
|
||||
std::vector<String> receiveMultistringMessage(UInt64 msg_type);
|
||||
std::unique_ptr<Exception> receiveException();
|
||||
Progress receiveProgress();
|
||||
BlockStreamProfileInfo receiveProfileInfo();
|
||||
std::vector<String> receiveMultistringMessage(UInt64 msg_type) const;
|
||||
std::unique_ptr<Exception> receiveException() const;
|
||||
Progress receiveProgress() const;
|
||||
BlockStreamProfileInfo receiveProfileInfo() const;
|
||||
|
||||
void initInputBuffers();
|
||||
void initBlockInput();
|
||||
|
@ -26,7 +26,7 @@ public:
|
||||
using Entry = PoolBase<Connection>::Entry;
|
||||
|
||||
public:
|
||||
virtual ~IConnectionPool() {}
|
||||
virtual ~IConnectionPool() = default;
|
||||
|
||||
/// Selects the connection to work.
|
||||
/// If force_connected is false, the client must manually ensure that returned connection is good.
|
||||
|
@ -14,6 +14,12 @@
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
|
||||
/** To receive data from multiple replicas (connections) from one shard asynchronously.
|
||||
* The principe of Hedged Connections is used to reduce tail latency:
|
||||
* if we don't receive data from replica and there is no progress in query execution
|
||||
@ -84,6 +90,11 @@ public:
|
||||
const ClientInfo & client_info,
|
||||
bool with_pending_data) override;
|
||||
|
||||
void sendReadTaskResponse(const String &) override
|
||||
{
|
||||
throw Exception("sendReadTaskResponse in not supported with HedgedConnections", ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
Packet receivePacket() override;
|
||||
|
||||
Packet receivePacketUnlocked(AsyncCallback async_callback) override;
|
||||
|
@ -24,6 +24,8 @@ public:
|
||||
const ClientInfo & client_info,
|
||||
bool with_pending_data) = 0;
|
||||
|
||||
virtual void sendReadTaskResponse(const String &) = 0;
|
||||
|
||||
/// Get packet from any replica.
|
||||
virtual Packet receivePacket() = 0;
|
||||
|
||||
|
@ -155,6 +155,15 @@ void MultiplexedConnections::sendIgnoredPartUUIDs(const std::vector<UUID> & uuid
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void MultiplexedConnections::sendReadTaskResponse(const String & response)
|
||||
{
|
||||
std::lock_guard lock(cancel_mutex);
|
||||
if (cancelled)
|
||||
return;
|
||||
current_connection->sendReadTaskResponse(response);
|
||||
}
|
||||
|
||||
Packet MultiplexedConnections::receivePacket()
|
||||
{
|
||||
std::lock_guard lock(cancel_mutex);
|
||||
@ -210,6 +219,7 @@ Packet MultiplexedConnections::drain()
|
||||
|
||||
switch (packet.type)
|
||||
{
|
||||
case Protocol::Server::ReadTaskRequest:
|
||||
case Protocol::Server::PartUUIDs:
|
||||
case Protocol::Server::Data:
|
||||
case Protocol::Server::Progress:
|
||||
@ -273,6 +283,7 @@ Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callbac
|
||||
|
||||
switch (packet.type)
|
||||
{
|
||||
case Protocol::Server::ReadTaskRequest:
|
||||
case Protocol::Server::PartUUIDs:
|
||||
case Protocol::Server::Data:
|
||||
case Protocol::Server::Progress:
|
||||
|
@ -39,6 +39,8 @@ public:
|
||||
const ClientInfo & client_info,
|
||||
bool with_pending_data) override;
|
||||
|
||||
void sendReadTaskResponse(const String &) override;
|
||||
|
||||
Packet receivePacket() override;
|
||||
|
||||
void disconnect() override;
|
||||
|
@ -6,7 +6,7 @@
|
||||
#include <Poco/Mutex.h>
|
||||
#include <Poco/Semaphore.h>
|
||||
|
||||
#include <Common/MoveOrCopyIfThrow.h>
|
||||
#include <common/MoveOrCopyIfThrow.h>
|
||||
|
||||
/** A very simple thread-safe queue of limited size.
|
||||
* If you try to pop an item from an empty queue, the thread is blocked until the queue becomes nonempty.
|
||||
|
@ -51,7 +51,7 @@ private:
|
||||
*/
|
||||
struct PoolEntryHelper
|
||||
{
|
||||
PoolEntryHelper(PooledObject & data_) : data(data_) { data.in_use = true; }
|
||||
explicit PoolEntryHelper(PooledObject & data_) : data(data_) { data.in_use = true; }
|
||||
~PoolEntryHelper()
|
||||
{
|
||||
std::unique_lock lock(data.pool.mutex);
|
||||
@ -69,7 +69,7 @@ public:
|
||||
public:
|
||||
friend class PoolBase<Object>;
|
||||
|
||||
Entry() {} /// For deferred initialization.
|
||||
Entry() = default; /// For deferred initialization.
|
||||
|
||||
/** The `Entry` object protects the resource from being used by another thread.
|
||||
* The following methods are forbidden for `rvalue`, so you can not write a similar to
|
||||
@ -99,10 +99,10 @@ public:
|
||||
private:
|
||||
std::shared_ptr<PoolEntryHelper> data;
|
||||
|
||||
Entry(PooledObject & object) : data(std::make_shared<PoolEntryHelper>(object)) {}
|
||||
explicit Entry(PooledObject & object) : data(std::make_shared<PoolEntryHelper>(object)) {}
|
||||
};
|
||||
|
||||
virtual ~PoolBase() {}
|
||||
virtual ~PoolBase() = default;
|
||||
|
||||
/** Allocates the object. Wait for free object in pool for 'timeout'. With 'timeout' < 0, the timeout is infinite. */
|
||||
Entry get(Poco::Timespan::TimeDiff timeout)
|
||||
|
@ -421,26 +421,38 @@ std::pair<ResponsePtr, Undo> TestKeeperMultiRequest::process(TestKeeper::Contain
|
||||
|
||||
try
|
||||
{
|
||||
for (const auto & request : requests)
|
||||
auto request_it = requests.begin();
|
||||
response.error = Error::ZOK;
|
||||
while (request_it != requests.end())
|
||||
{
|
||||
const TestKeeperRequest & concrete_request = dynamic_cast<const TestKeeperRequest &>(*request);
|
||||
const TestKeeperRequest & concrete_request = dynamic_cast<const TestKeeperRequest &>(**request_it);
|
||||
++request_it;
|
||||
auto [ cur_response, undo_action ] = concrete_request.process(container, zxid);
|
||||
response.responses.emplace_back(cur_response);
|
||||
if (cur_response->error != Error::ZOK)
|
||||
{
|
||||
response.error = cur_response->error;
|
||||
|
||||
for (auto it = undo_actions.rbegin(); it != undo_actions.rend(); ++it)
|
||||
if (*it)
|
||||
(*it)();
|
||||
|
||||
return { std::make_shared<MultiResponse>(response), {} };
|
||||
break;
|
||||
}
|
||||
|
||||
undo_actions.emplace_back(std::move(undo_action));
|
||||
}
|
||||
|
||||
if (response.error != Error::ZOK)
|
||||
{
|
||||
for (auto it = undo_actions.rbegin(); it != undo_actions.rend(); ++it)
|
||||
if (*it)
|
||||
(*it)();
|
||||
|
||||
while (request_it != requests.end())
|
||||
{
|
||||
const TestKeeperRequest & concrete_request = dynamic_cast<const TestKeeperRequest &>(**request_it);
|
||||
++request_it;
|
||||
response.responses.emplace_back(concrete_request.createResponse());
|
||||
response.responses.back()->error = Error::ZRUNTIMEINCONSISTENCY;
|
||||
}
|
||||
else
|
||||
undo_actions.emplace_back(std::move(undo_action));
|
||||
}
|
||||
|
||||
response.error = Error::ZOK;
|
||||
return { std::make_shared<MultiResponse>(response), {} };
|
||||
}
|
||||
catch (...)
|
||||
|
@ -74,6 +74,9 @@
|
||||
/// Minimum revision supporting OpenTelemetry
|
||||
#define DBMS_MIN_REVISION_WITH_OPENTELEMETRY 54442
|
||||
|
||||
|
||||
#define DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION 1
|
||||
|
||||
/// Minimum revision supporting interserver secret.
|
||||
#define DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET 54441
|
||||
|
||||
|
@ -76,8 +76,10 @@ namespace Protocol
|
||||
Log = 10, /// System logs of the query execution
|
||||
TableColumns = 11, /// Columns' description for default values calculation
|
||||
PartUUIDs = 12, /// List of unique parts ids.
|
||||
|
||||
MAX = PartUUIDs,
|
||||
ReadTaskRequest = 13, /// String (UUID) describes a request for which next task is needed
|
||||
/// This is such an inverted logic, where server sends requests
|
||||
/// And client returns back response
|
||||
MAX = ReadTaskRequest,
|
||||
};
|
||||
|
||||
/// NOTE: If the type of packet argument would be Enum, the comparison packet >= 0 && packet < 10
|
||||
@ -100,6 +102,7 @@ namespace Protocol
|
||||
"Log",
|
||||
"TableColumns",
|
||||
"PartUUIDs",
|
||||
"ReadTaskRequest"
|
||||
};
|
||||
return packet <= MAX
|
||||
? data[packet]
|
||||
@ -135,8 +138,9 @@ namespace Protocol
|
||||
KeepAlive = 6, /// Keep the connection alive
|
||||
Scalar = 7, /// A block of data (compressed or not).
|
||||
IgnoredPartUUIDs = 8, /// List of unique parts ids to exclude from query processing
|
||||
ReadTaskResponse = 9, /// TODO:
|
||||
|
||||
MAX = IgnoredPartUUIDs,
|
||||
MAX = ReadTaskResponse,
|
||||
};
|
||||
|
||||
inline const char * toString(UInt64 packet)
|
||||
@ -151,6 +155,7 @@ namespace Protocol
|
||||
"KeepAlive",
|
||||
"Scalar",
|
||||
"IgnoredPartUUIDs",
|
||||
"ReadTaskResponse",
|
||||
};
|
||||
return packet <= MAX
|
||||
? data[packet]
|
||||
|
@ -8,7 +8,7 @@ namespace DB
|
||||
|
||||
/// Prints internal server logs
|
||||
/// Input blocks have to have the same structure as SystemLogsQueue::getSampleBlock()
|
||||
/// NOTE: IRowOutputStream does not suite well for this case
|
||||
/// NOTE: IRowOutputFormat does not suite well for this case
|
||||
class InternalTextLogsRowOutputStream : public IBlockOutputStream
|
||||
{
|
||||
public:
|
||||
|
@ -22,20 +22,18 @@ namespace DB
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int UNKNOWN_PACKET_FROM_SERVER;
|
||||
extern const int DUPLICATED_PART_UUIDS;
|
||||
}
|
||||
|
||||
RemoteQueryExecutor::RemoteQueryExecutor(
|
||||
Connection & connection,
|
||||
const String & query_,
|
||||
const Block & header_,
|
||||
ContextPtr context_,
|
||||
ThrottlerPtr throttler,
|
||||
const Scalars & scalars_,
|
||||
const Tables & external_tables_,
|
||||
QueryProcessingStage::Enum stage_)
|
||||
: header(header_), query(query_), context(context_), scalars(scalars_), external_tables(external_tables_), stage(stage_)
|
||||
const String & query_, const Block & header_, ContextPtr context_,
|
||||
ThrottlerPtr throttler, const Scalars & scalars_, const Tables & external_tables_,
|
||||
QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_)
|
||||
: header(header_), query(query_), context(context_)
|
||||
, scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_)
|
||||
{
|
||||
create_connections = [this, &connection, throttler]()
|
||||
{
|
||||
@ -45,14 +43,11 @@ RemoteQueryExecutor::RemoteQueryExecutor(
|
||||
|
||||
RemoteQueryExecutor::RemoteQueryExecutor(
|
||||
std::vector<IConnectionPool::Entry> && connections_,
|
||||
const String & query_,
|
||||
const Block & header_,
|
||||
ContextPtr context_,
|
||||
const ThrottlerPtr & throttler,
|
||||
const Scalars & scalars_,
|
||||
const Tables & external_tables_,
|
||||
QueryProcessingStage::Enum stage_)
|
||||
: header(header_), query(query_), context(context_), scalars(scalars_), external_tables(external_tables_), stage(stage_)
|
||||
const String & query_, const Block & header_, ContextPtr context_,
|
||||
const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_,
|
||||
QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_)
|
||||
: header(header_), query(query_), context(context_)
|
||||
, scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_)
|
||||
{
|
||||
create_connections = [this, connections_, throttler]() mutable {
|
||||
return std::make_unique<MultiplexedConnections>(std::move(connections_), context->getSettingsRef(), throttler);
|
||||
@ -61,14 +56,11 @@ RemoteQueryExecutor::RemoteQueryExecutor(
|
||||
|
||||
RemoteQueryExecutor::RemoteQueryExecutor(
|
||||
const ConnectionPoolWithFailoverPtr & pool,
|
||||
const String & query_,
|
||||
const Block & header_,
|
||||
ContextPtr context_,
|
||||
const ThrottlerPtr & throttler,
|
||||
const Scalars & scalars_,
|
||||
const Tables & external_tables_,
|
||||
QueryProcessingStage::Enum stage_)
|
||||
: header(header_), query(query_), context(context_), scalars(scalars_), external_tables(external_tables_), stage(stage_)
|
||||
const String & query_, const Block & header_, ContextPtr context_,
|
||||
const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_,
|
||||
QueryProcessingStage::Enum stage_, std::shared_ptr<TaskIterator> task_iterator_)
|
||||
: header(header_), query(query_), context(context_)
|
||||
, scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_)
|
||||
{
|
||||
create_connections = [this, pool, throttler]()->std::unique_ptr<IConnections>
|
||||
{
|
||||
@ -307,6 +299,9 @@ std::optional<Block> RemoteQueryExecutor::processPacket(Packet packet)
|
||||
{
|
||||
switch (packet.type)
|
||||
{
|
||||
case Protocol::Server::ReadTaskRequest:
|
||||
processReadTaskRequest();
|
||||
break;
|
||||
case Protocol::Server::PartUUIDs:
|
||||
if (!setPartUUIDs(packet.part_uuids))
|
||||
got_duplicated_part_uuids = true;
|
||||
@ -385,6 +380,14 @@ bool RemoteQueryExecutor::setPartUUIDs(const std::vector<UUID> & uuids)
|
||||
return true;
|
||||
}
|
||||
|
||||
void RemoteQueryExecutor::processReadTaskRequest()
|
||||
{
|
||||
if (!task_iterator)
|
||||
throw Exception("Distributed task iterator is not initialized", ErrorCodes::LOGICAL_ERROR);
|
||||
auto response = (*task_iterator)();
|
||||
connections->sendReadTaskResponse(response);
|
||||
}
|
||||
|
||||
void RemoteQueryExecutor::finish(std::unique_ptr<ReadContext> * read_context)
|
||||
{
|
||||
/** If one of:
|
||||
|
@ -26,6 +26,9 @@ using ProfileInfoCallback = std::function<void(const BlockStreamProfileInfo & in
|
||||
|
||||
class RemoteQueryExecutorReadContext;
|
||||
|
||||
/// This is the same type as StorageS3Source::IteratorWrapper
|
||||
using TaskIterator = std::function<String()>;
|
||||
|
||||
/// This class allows one to launch queries on remote replicas of one shard and get results
|
||||
class RemoteQueryExecutor
|
||||
{
|
||||
@ -37,21 +40,21 @@ public:
|
||||
Connection & connection,
|
||||
const String & query_, const Block & header_, ContextPtr context_,
|
||||
ThrottlerPtr throttler_ = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
|
||||
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
|
||||
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr<TaskIterator> task_iterator_ = {});
|
||||
|
||||
/// Accepts several connections already taken from pool.
|
||||
RemoteQueryExecutor(
|
||||
std::vector<IConnectionPool::Entry> && connections_,
|
||||
const String & query_, const Block & header_, ContextPtr context_,
|
||||
const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
|
||||
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
|
||||
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr<TaskIterator> task_iterator_ = {});
|
||||
|
||||
/// Takes a pool and gets one or several connections from it.
|
||||
RemoteQueryExecutor(
|
||||
const ConnectionPoolWithFailoverPtr & pool,
|
||||
const String & query_, const Block & header_, ContextPtr context_,
|
||||
const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(),
|
||||
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete);
|
||||
QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr<TaskIterator> task_iterator_ = {});
|
||||
|
||||
~RemoteQueryExecutor();
|
||||
|
||||
@ -119,6 +122,8 @@ private:
|
||||
/// Temporary tables needed to be sent to remote servers
|
||||
Tables external_tables;
|
||||
QueryProcessingStage::Enum stage;
|
||||
/// Initiator identifier for distributed task processing
|
||||
std::shared_ptr<TaskIterator> task_iterator;
|
||||
|
||||
/// Streams for reading from temporary tables and following sending of data
|
||||
/// to remote servers for GLOBAL-subqueries
|
||||
@ -179,6 +184,8 @@ private:
|
||||
/// Return true if duplicates found.
|
||||
bool setPartUUIDs(const std::vector<UUID> & uuids);
|
||||
|
||||
void processReadTaskRequest();
|
||||
|
||||
/// Cancell query and restart it with info about duplicated UUIDs
|
||||
/// only for `allow_experimental_query_deduplication`.
|
||||
std::variant<Block, int> restartQueryWithoutDuplicatedUUIDs(std::unique_ptr<ReadContext> * read_context = nullptr);
|
||||
|
@ -52,7 +52,7 @@ static std::unordered_map<String, String> fetchTablesCreateQuery(
|
||||
static std::vector<String> fetchTablesInDB(const mysqlxx::PoolWithFailover::Entry & connection, const std::string & database)
|
||||
{
|
||||
Block header{{std::make_shared<DataTypeString>(), "table_name"}};
|
||||
String query = "SELECT TABLE_NAME AS table_name FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = " + quoteString(database);
|
||||
String query = "SELECT TABLE_NAME AS table_name FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE != 'VIEW' AND TABLE_SCHEMA = " + quoteString(database);
|
||||
|
||||
std::vector<String> tables_in_db;
|
||||
MySQLBlockInputStream input(connection, query, header, DEFAULT_BLOCK_SIZE);
|
||||
|
@ -1,7 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
#include <common/BorrowedObjectPool.h>
|
||||
|
||||
#include <Core/Block.h>
|
||||
#include <Common/BorrowedObjectPool.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
#include "IDictionarySource.h"
|
||||
|
@ -10,7 +10,6 @@
|
||||
#include "DictionarySourceFactory.h"
|
||||
#include "DictionarySourceHelpers.h"
|
||||
#include "DictionaryStructure.h"
|
||||
#include "LibraryDictionarySourceExternal.h"
|
||||
#include "registerDictionaries.h"
|
||||
#include <IO/WriteBufferFromString.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
@ -1,18 +0,0 @@
|
||||
#include <Formats/IRowInputStream.h>
|
||||
#include <Common/Exception.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
void IRowInputStream::syncAfterError()
|
||||
{
|
||||
throw Exception("Method syncAfterError is not implemented for input format", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
}
|
@ -1,51 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include <Columns/IColumn.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
/// Contains extra information about read data.
|
||||
struct RowReadExtension
|
||||
{
|
||||
/// IRowInputStream.read() output. It contains non zero for columns that actually read from the source and zero otherwise.
|
||||
/// It's used to attach defaults for partially filled rows.
|
||||
/// Can be empty, this means that all columns are read.
|
||||
std::vector<UInt8> read_columns;
|
||||
};
|
||||
|
||||
/** Interface of stream, that allows to read data by rows.
|
||||
*/
|
||||
class IRowInputStream : private boost::noncopyable
|
||||
{
|
||||
public:
|
||||
/** Read next row and append it to the columns.
|
||||
* If no more rows - return false.
|
||||
*/
|
||||
virtual bool read(MutableColumns & columns, RowReadExtension & extra) = 0;
|
||||
|
||||
virtual void readPrefix() {} /// delimiter before begin of result
|
||||
virtual void readSuffix() {} /// delimiter after end of result
|
||||
|
||||
/// Skip data until next row.
|
||||
/// This is intended for text streams, that allow skipping of errors.
|
||||
/// By default - throws not implemented exception.
|
||||
virtual bool allowSyncAfterError() const { return false; }
|
||||
virtual void syncAfterError();
|
||||
|
||||
/// In case of parse error, try to roll back and parse last one or two rows very carefully
|
||||
/// and collect as much as possible diagnostic information about error.
|
||||
/// If not implemented, returns empty string.
|
||||
virtual std::string getDiagnosticInfo() { return {}; }
|
||||
|
||||
virtual ~IRowInputStream() {}
|
||||
};
|
||||
|
||||
using RowInputStreamPtr = std::shared_ptr<IRowInputStream>;
|
||||
|
||||
}
|
@ -1,37 +0,0 @@
|
||||
#include <Common/Exception.h>
|
||||
#include <Core/Block.h>
|
||||
#include <Formats/IRowOutputStream.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
}
|
||||
|
||||
|
||||
void IRowOutputStream::write(const Block & block, size_t row_num)
|
||||
{
|
||||
size_t columns = block.columns();
|
||||
|
||||
writeRowStartDelimiter();
|
||||
|
||||
for (size_t i = 0; i < columns; ++i)
|
||||
{
|
||||
if (i != 0)
|
||||
writeFieldDelimiter();
|
||||
|
||||
const auto & col = block.getByPosition(i);
|
||||
writeField(*col.column, *col.type, row_num);
|
||||
}
|
||||
|
||||
writeRowEndDelimiter();
|
||||
}
|
||||
|
||||
void IRowOutputStream::writeField(const IColumn &, const IDataType &, size_t)
|
||||
{
|
||||
throw Exception("Method writeField is not implemented for output format", ErrorCodes::NOT_IMPLEMENTED);
|
||||
}
|
||||
|
||||
}
|
@ -1,63 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <cstdint>
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <common/types.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Block;
|
||||
class IColumn;
|
||||
class IDataType;
|
||||
struct Progress;
|
||||
|
||||
|
||||
/** Interface of stream for writing data by rows (for example: for output to terminal).
|
||||
*/
|
||||
class IRowOutputStream : private boost::noncopyable
|
||||
{
|
||||
public:
|
||||
|
||||
/** Write a row.
|
||||
* Default implementation calls methods to write single values and delimiters
|
||||
* (except delimiter between rows (writeRowBetweenDelimiter())).
|
||||
*/
|
||||
virtual void write(const Block & block, size_t row_num);
|
||||
|
||||
/** Write single value. */
|
||||
virtual void writeField(const IColumn & column, const IDataType & type, size_t row_num);
|
||||
|
||||
/** Write delimiter. */
|
||||
virtual void writeFieldDelimiter() {} /// delimiter between values
|
||||
virtual void writeRowStartDelimiter() {} /// delimiter before each row
|
||||
virtual void writeRowEndDelimiter() {} /// delimiter after each row
|
||||
virtual void writeRowBetweenDelimiter() {} /// delimiter between rows
|
||||
virtual void writePrefix() {} /// delimiter before resultset
|
||||
virtual void writeSuffix() {} /// delimiter after resultset
|
||||
|
||||
/** Flush output buffers if any. */
|
||||
virtual void flush() {}
|
||||
|
||||
/** Methods to set additional information for output in formats, that support it.
|
||||
*/
|
||||
virtual void setRowsBeforeLimit(size_t /*rows_before_limit*/) {}
|
||||
virtual void setTotals(const Block & /*totals*/) {}
|
||||
virtual void setExtremes(const Block & /*extremes*/) {}
|
||||
|
||||
/** Notify about progress. Method could be called from different threads.
|
||||
* Passed value are delta, that must be summarized.
|
||||
*/
|
||||
virtual void onProgress(const Progress & /*progress*/) {}
|
||||
|
||||
/** Content-Type to set when sending HTTP response. */
|
||||
virtual String getContentType() const { return "text/plain; charset=UTF-8"; }
|
||||
|
||||
virtual ~IRowOutputStream() {}
|
||||
};
|
||||
|
||||
using RowOutputStreamPtr = std::shared_ptr<IRowOutputStream>;
|
||||
|
||||
}
|
@ -13,8 +13,6 @@ PEERDIR(
|
||||
SRCS(
|
||||
FormatFactory.cpp
|
||||
FormatSchemaInfo.cpp
|
||||
IRowInputStream.cpp
|
||||
IRowOutputStream.cpp
|
||||
JSONEachRowUtils.cpp
|
||||
MySQLBlockInputStream.cpp
|
||||
NativeFormat.cpp
|
||||
|
@ -82,7 +82,7 @@ inline ALWAYS_INLINE void writeSlice(const GenericArraySlice & slice, GenericArr
|
||||
sink.current_offset += slice.size;
|
||||
}
|
||||
else
|
||||
throw Exception("Function writeSlice expect same column types for GenericArraySlice and GenericArraySink.",
|
||||
throw Exception("Function writeSlice expects same column types for GenericArraySlice and GenericArraySink.",
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
@ -162,7 +162,7 @@ inline ALWAYS_INLINE void writeSlice(const GenericValueSlice & slice, GenericArr
|
||||
++sink.current_offset;
|
||||
}
|
||||
else
|
||||
throw Exception("Function writeSlice expect same column types for GenericValueSlice and GenericArraySink.",
|
||||
throw Exception("Function writeSlice expects same column types for GenericValueSlice and GenericArraySink.",
|
||||
ErrorCodes::LOGICAL_ERROR);
|
||||
}
|
||||
|
||||
@ -609,7 +609,7 @@ bool sliceHas(const GenericArraySlice & first, const GenericArraySlice & second)
|
||||
{
|
||||
/// Generic arrays should have the same type in order to use column.compareAt(...)
|
||||
if (!first.elements->structureEquals(*second.elements))
|
||||
return false;
|
||||
throw Exception("Function sliceHas expects same column types for slices.", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
auto impl = sliceHasImpl<search_type, GenericArraySlice, GenericArraySlice, sliceEqualElements, insliceEqualElements>;
|
||||
return impl(first, second, nullptr, nullptr);
|
||||
@ -670,7 +670,7 @@ void NO_INLINE arrayAllAny(FirstSource && first, SecondSource && second, ColumnU
|
||||
auto & data = result.getData();
|
||||
for (auto row : ext::range(0, size))
|
||||
{
|
||||
data[row] = static_cast<UInt8>(sliceHas<search_type>(first.getWhole(), second.getWhole()) ? 1 : 0);
|
||||
data[row] = static_cast<UInt8>(sliceHas<search_type>(first.getWhole(), second.getWhole()));
|
||||
first.next();
|
||||
second.next();
|
||||
}
|
||||
|
@ -477,7 +477,7 @@ DataTypePtr FunctionOverloadResolverAdaptor::getReturnTypeDefaultImplementationF
|
||||
}
|
||||
if (null_presence.has_nullable)
|
||||
{
|
||||
Block nested_columns = createBlockWithNestedColumns(arguments);
|
||||
auto nested_columns = Block(createBlockWithNestedColumns(arguments));
|
||||
auto return_type = getter(ColumnsWithTypeAndName(nested_columns.begin(), nested_columns.end()));
|
||||
return makeNullable(return_type);
|
||||
}
|
||||
|
@ -5,6 +5,7 @@
|
||||
#include <DataTypes/DataTypeArray.h>
|
||||
#include <DataTypes/DataTypeNullable.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/getLeastSupertype.h>
|
||||
#include <Columns/ColumnArray.h>
|
||||
#include <Columns/ColumnString.h>
|
||||
#include <Columns/ColumnFixedString.h>
|
||||
@ -13,9 +14,9 @@
|
||||
#include <Common/FieldVisitorsAccurateComparison.h>
|
||||
#include <Common/memcmpSmall.h>
|
||||
#include <Common/assert_cast.h>
|
||||
#include "Columns/ColumnLowCardinality.h"
|
||||
#include "DataTypes/DataTypeLowCardinality.h"
|
||||
#include "Interpreters/castColumn.h"
|
||||
#include <Columns/ColumnLowCardinality.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
#include <Interpreters/castColumn.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -373,11 +374,10 @@ public:
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
|
||||
if (!arguments[1]->onlyNull() && !allowArguments(array_type->getNestedType(), arguments[1]))
|
||||
throw Exception("Types of array and 2nd argument of function \""
|
||||
+ getName() + "\" must be identical up to nullability, cardinality, "
|
||||
"numeric types, or Enum and numeric type. Passed: "
|
||||
+ arguments[0]->getName() + " and " + arguments[1]->getName() + ".",
|
||||
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
|
||||
throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
|
||||
"Types of array and 2nd argument of function `{}` must be identical up to nullability, cardinality, "
|
||||
"numeric types, or Enum and numeric type. Passed: {} and {}.",
|
||||
getName(), arguments[0]->getName(), arguments[1]->getName());
|
||||
|
||||
return std::make_shared<DataTypeNumber<ResultType>>();
|
||||
}
|
||||
@ -494,86 +494,13 @@ private:
|
||||
inline void moveResult() { result_column = std::move(result); }
|
||||
};
|
||||
|
||||
static inline bool allowNested(const DataTypePtr & left, const DataTypePtr & right)
|
||||
{
|
||||
return ((isNativeNumber(left) || isEnum(left)) && isNativeNumber(right)) || left->equals(*right);
|
||||
}
|
||||
|
||||
static inline bool allowArguments(const DataTypePtr & array_inner_type, const DataTypePtr & arg)
|
||||
{
|
||||
if (allowNested(array_inner_type, arg))
|
||||
return true;
|
||||
auto inner_type_decayed = removeNullable(removeLowCardinality(array_inner_type));
|
||||
auto arg_decayed = removeNullable(removeLowCardinality(arg));
|
||||
|
||||
/// Nullable
|
||||
|
||||
const bool array_is_nullable = array_inner_type->isNullable();
|
||||
const bool arg_is_nullable = arg->isNullable();
|
||||
|
||||
const DataTypePtr arg_or_arg_nullable_nested = arg_is_nullable
|
||||
? checkAndGetDataType<DataTypeNullable>(arg.get())->getNestedType()
|
||||
: arg;
|
||||
|
||||
if (array_is_nullable) // comparing Array(Nullable(T)) elem and U
|
||||
{
|
||||
const DataTypePtr array_nullable_nested =
|
||||
checkAndGetDataType<DataTypeNullable>(array_inner_type.get())->getNestedType();
|
||||
|
||||
// We also allow Nullable(T) and LC(U) if the Nullable(T) and U are allowed,
|
||||
// the LC(U) will be converted to U.
|
||||
return allowNested(
|
||||
array_nullable_nested,
|
||||
recursiveRemoveLowCardinality(arg_or_arg_nullable_nested));
|
||||
}
|
||||
else if (arg_is_nullable) // cannot compare Array(T) elem (namely, T) and Nullable(T)
|
||||
return false;
|
||||
|
||||
/// LowCardinality
|
||||
|
||||
const auto * const array_lc_ptr = checkAndGetDataType<DataTypeLowCardinality>(array_inner_type.get());
|
||||
const auto * const arg_lc_ptr = checkAndGetDataType<DataTypeLowCardinality>(arg.get());
|
||||
|
||||
const DataTypePtr array_lc_inner_type = recursiveRemoveLowCardinality(array_inner_type);
|
||||
const DataTypePtr arg_lc_inner_type = recursiveRemoveLowCardinality(arg);
|
||||
|
||||
const bool array_is_lc = nullptr != array_lc_ptr;
|
||||
const bool arg_is_lc = nullptr != arg_lc_ptr;
|
||||
|
||||
const bool array_lc_inner_type_is_nullable = array_is_lc && array_lc_inner_type->isNullable();
|
||||
const bool arg_lc_inner_type_is_nullable = arg_is_lc && arg_lc_inner_type->isNullable();
|
||||
|
||||
if (array_is_lc) // comparing LC(T) and U
|
||||
{
|
||||
const DataTypePtr array_lc_nested_or_lc_nullable_nested = array_lc_inner_type_is_nullable
|
||||
? checkAndGetDataType<DataTypeNullable>(array_lc_inner_type.get())->getNestedType()
|
||||
: array_lc_inner_type;
|
||||
|
||||
if (arg_is_lc) // comparing LC(T) and LC(U)
|
||||
{
|
||||
const DataTypePtr arg_lc_nested_or_lc_nullable_nested = arg_lc_inner_type_is_nullable
|
||||
? checkAndGetDataType<DataTypeNullable>(arg_lc_inner_type.get())->getNestedType()
|
||||
: arg_lc_inner_type;
|
||||
|
||||
return allowNested(
|
||||
array_lc_nested_or_lc_nullable_nested,
|
||||
arg_lc_nested_or_lc_nullable_nested);
|
||||
}
|
||||
else if (arg_is_nullable) // Comparing LC(T) and Nullable(U)
|
||||
{
|
||||
if (!array_lc_inner_type_is_nullable)
|
||||
return false; // Can't compare Array(LC(U)) elem and Nullable(T);
|
||||
|
||||
return allowNested(
|
||||
array_lc_nested_or_lc_nullable_nested,
|
||||
arg_or_arg_nullable_nested);
|
||||
}
|
||||
else // Comparing LC(T) and U (U neither Nullable nor LC)
|
||||
return allowNested(array_lc_nested_or_lc_nullable_nested, arg);
|
||||
}
|
||||
|
||||
if (arg_is_lc) // Allow T and LC(U) if U and T are allowed (the low cardinality column will be converted).
|
||||
return allowNested(array_inner_type, arg_lc_inner_type);
|
||||
|
||||
return false;
|
||||
return ((isNativeNumber(inner_type_decayed) || isEnum(inner_type_decayed)) && isNativeNumber(arg_decayed))
|
||||
|| getLeastSupertype({inner_type_decayed, arg_decayed});
|
||||
}
|
||||
|
||||
#define INTEGRAL_TPL_PACK UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64
|
||||
@ -1044,33 +971,38 @@ private:
|
||||
if (!col)
|
||||
return nullptr;
|
||||
|
||||
const IColumn & col_nested = col->getData();
|
||||
DataTypePtr array_elements_type = assert_cast<const DataTypeArray &>(*arguments[0].type).getNestedType();
|
||||
const DataTypePtr & index_type = arguments[1].type;
|
||||
|
||||
DataTypePtr common_type = getLeastSupertype({array_elements_type, index_type});
|
||||
|
||||
ColumnPtr col_nested = castColumn({ col->getDataPtr(), array_elements_type, "" }, common_type);
|
||||
|
||||
const ColumnPtr right_ptr = arguments[1].column->convertToFullColumnIfLowCardinality();
|
||||
const IColumn & item_arg = *right_ptr.get();
|
||||
ColumnPtr item_arg = castColumn({ right_ptr, removeLowCardinality(index_type), "" }, common_type);
|
||||
|
||||
auto col_res = ResultColumnType::create();
|
||||
|
||||
auto [null_map_data, null_map_item] = getNullMaps(arguments);
|
||||
|
||||
if (item_arg.onlyNull())
|
||||
if (item_arg->onlyNull())
|
||||
Impl::Null<ConcreteAction>::process(
|
||||
col->getOffsets(),
|
||||
col_res->getData(),
|
||||
null_map_data);
|
||||
else if (isColumnConst(item_arg))
|
||||
else if (isColumnConst(*item_arg))
|
||||
Impl::Main<ConcreteAction, true>::vector(
|
||||
col_nested,
|
||||
*col_nested,
|
||||
col->getOffsets(),
|
||||
typeid_cast<const ColumnConst &>(item_arg).getDataColumn(),
|
||||
typeid_cast<const ColumnConst &>(*item_arg).getDataColumn(),
|
||||
col_res->getData(), /// TODO This is wrong.
|
||||
null_map_data,
|
||||
nullptr);
|
||||
else
|
||||
Impl::Main<ConcreteAction>::vector(
|
||||
col_nested,
|
||||
*col_nested,
|
||||
col->getOffsets(),
|
||||
item_arg,
|
||||
*item_arg,
|
||||
col_res->getData(),
|
||||
null_map_data,
|
||||
null_map_item);
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <Interpreters/castColumn.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <ext/range.h>
|
||||
#include <ext/map.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -51,41 +52,13 @@ public:
|
||||
|
||||
ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
|
||||
{
|
||||
size_t rows = input_rows_count;
|
||||
size_t num_args = arguments.size();
|
||||
|
||||
DataTypePtr common_type = nullptr;
|
||||
auto commonType = [&common_type, &arguments]()
|
||||
{
|
||||
if (common_type == nullptr)
|
||||
{
|
||||
DataTypes data_types;
|
||||
data_types.reserve(arguments.size());
|
||||
for (const auto & argument : arguments)
|
||||
data_types.push_back(argument.type);
|
||||
|
||||
common_type = getLeastSupertype(data_types);
|
||||
}
|
||||
|
||||
return common_type;
|
||||
};
|
||||
DataTypePtr common_type = getLeastSupertype(ext::map(arguments, [](auto & arg) { return arg.type; }));
|
||||
|
||||
Columns preprocessed_columns(num_args);
|
||||
|
||||
for (size_t i = 0; i < num_args; ++i)
|
||||
{
|
||||
const auto & argument = arguments[i];
|
||||
ColumnPtr preprocessed_column = argument.column;
|
||||
|
||||
const auto * argument_type = typeid_cast<const DataTypeArray *>(argument.type.get());
|
||||
const auto & nested_type = argument_type->getNestedType();
|
||||
|
||||
/// Converts Array(Nothing) or Array(Nullable(Nothing) to common type. Example: hasAll([Null, 1], [Null]) -> 1
|
||||
if (typeid_cast<const DataTypeNothing *>(removeNullable(nested_type).get()))
|
||||
preprocessed_column = castColumn(argument, commonType());
|
||||
|
||||
preprocessed_columns[i] = std::move(preprocessed_column);
|
||||
}
|
||||
preprocessed_columns[i] = castColumn(arguments[i], common_type);
|
||||
|
||||
std::vector<std::unique_ptr<GatherUtils::IArraySource>> sources;
|
||||
|
||||
@ -100,12 +73,12 @@ public:
|
||||
}
|
||||
|
||||
if (const auto * argument_column_array = typeid_cast<const ColumnArray *>(argument_column.get()))
|
||||
sources.emplace_back(GatherUtils::createArraySource(*argument_column_array, is_const, rows));
|
||||
sources.emplace_back(GatherUtils::createArraySource(*argument_column_array, is_const, input_rows_count));
|
||||
else
|
||||
throw Exception{"Arguments for function " + getName() + " must be arrays.", ErrorCodes::LOGICAL_ERROR};
|
||||
}
|
||||
|
||||
auto result_column = ColumnUInt8::create(rows);
|
||||
auto result_column = ColumnUInt8::create(input_rows_count);
|
||||
auto * result_column_ptr = typeid_cast<ColumnUInt8 *>(result_column.get());
|
||||
GatherUtils::sliceHas(*sources[0], *sources[1], search_type, *result_column_ptr);
|
||||
|
||||
|
@ -172,11 +172,12 @@ public:
|
||||
for (size_t group = 1; group <= groups_count; ++group)
|
||||
all_matches.push_back(matched_groups[group]);
|
||||
|
||||
/// Additional limit to fail fast on supposedly incorrect usage.
|
||||
static constexpr size_t MAX_GROUPS_PER_ROW = 1000000;
|
||||
|
||||
if (all_matches.size() > MAX_GROUPS_PER_ROW)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size in the result of function {}", getName());
|
||||
/// Additional limit to fail fast on supposedly incorrect usage, arbitrary value.
|
||||
static constexpr size_t MAX_MATCHES_PER_ROW = 1000;
|
||||
if (matches_per_row > MAX_MATCHES_PER_ROW)
|
||||
throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE,
|
||||
"Too many matches per row (> {}) in the result of function {}",
|
||||
MAX_MATCHES_PER_ROW, getName());
|
||||
|
||||
pos = matched_groups[0].data() + std::max<size_t>(1, matched_groups[0].size());
|
||||
|
||||
|
@ -17,7 +17,7 @@ private:
|
||||
bool nextImpl() override;
|
||||
|
||||
public:
|
||||
ReadBufferFromIStream(std::istream & istr_, size_t size = DBMS_DEFAULT_BUFFER_SIZE);
|
||||
explicit ReadBufferFromIStream(std::istream & istr_, size_t size = DBMS_DEFAULT_BUFFER_SIZE);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
#pragma once
|
||||
|
||||
#include <Core/UUID.h>
|
||||
#include <Poco/Net/SocketAddress.h>
|
||||
#include <Common/UInt128.h>
|
||||
#include <common/types.h>
|
||||
|
@ -373,7 +373,7 @@ struct ContextSharedPart
|
||||
std::atomic_size_t max_partition_size_to_drop = 50000000000lu; /// Protects MergeTree partitions from accidental DROP (50GB by default)
|
||||
String format_schema_path; /// Path to a directory that contains schema files used by input formats.
|
||||
ActionLocksManagerPtr action_locks_manager; /// Set of storages' action lockers
|
||||
std::optional<SystemLogs> system_logs; /// Used to log queries and operations on parts
|
||||
std::unique_ptr<SystemLogs> system_logs; /// Used to log queries and operations on parts
|
||||
std::optional<StorageS3Settings> storage_s3_settings; /// Settings of S3 storage
|
||||
|
||||
RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml
|
||||
@ -442,24 +442,32 @@ struct ContextSharedPart
|
||||
|
||||
DatabaseCatalog::shutdown();
|
||||
|
||||
/// Preemptive destruction is important, because these objects may have a refcount to ContextShared (cyclic reference).
|
||||
/// TODO: Get rid of this.
|
||||
std::unique_ptr<SystemLogs> delete_system_logs;
|
||||
{
|
||||
auto lock = std::lock_guard(mutex);
|
||||
|
||||
system_logs.reset();
|
||||
embedded_dictionaries.reset();
|
||||
external_dictionaries_loader.reset();
|
||||
models_repository_guard.reset();
|
||||
external_models_loader.reset();
|
||||
buffer_flush_schedule_pool.reset();
|
||||
schedule_pool.reset();
|
||||
distributed_schedule_pool.reset();
|
||||
message_broker_schedule_pool.reset();
|
||||
ddl_worker.reset();
|
||||
/// Preemptive destruction is important, because these objects may have a refcount to ContextShared (cyclic reference).
|
||||
/// TODO: Get rid of this.
|
||||
|
||||
/// Stop trace collector if any
|
||||
trace_collector.reset();
|
||||
/// Stop zookeeper connection
|
||||
zookeeper.reset();
|
||||
delete_system_logs = std::move(system_logs);
|
||||
embedded_dictionaries.reset();
|
||||
external_dictionaries_loader.reset();
|
||||
models_repository_guard.reset();
|
||||
external_models_loader.reset();
|
||||
buffer_flush_schedule_pool.reset();
|
||||
schedule_pool.reset();
|
||||
distributed_schedule_pool.reset();
|
||||
message_broker_schedule_pool.reset();
|
||||
ddl_worker.reset();
|
||||
|
||||
/// Stop trace collector if any
|
||||
trace_collector.reset();
|
||||
/// Stop zookeeper connection
|
||||
zookeeper.reset();
|
||||
}
|
||||
|
||||
/// Can be removed w/o context lock
|
||||
delete_system_logs.reset();
|
||||
}
|
||||
|
||||
bool hasTraceCollector() const
|
||||
@ -1910,7 +1918,7 @@ void Context::setCluster(const String & cluster_name, const std::shared_ptr<Clus
|
||||
void Context::initializeSystemLogs()
|
||||
{
|
||||
auto lock = getLock();
|
||||
shared->system_logs.emplace(getGlobalContext(), getConfigRef());
|
||||
shared->system_logs = std::make_unique<SystemLogs>(getGlobalContext(), getConfigRef());
|
||||
}
|
||||
|
||||
void Context::initializeTraceCollector()
|
||||
@ -2615,6 +2623,20 @@ PartUUIDsPtr Context::getPartUUIDs()
|
||||
return part_uuids;
|
||||
}
|
||||
|
||||
|
||||
ReadTaskCallback Context::getReadTaskCallback() const
|
||||
{
|
||||
if (!next_task_callback.has_value())
|
||||
throw Exception(fmt::format("Next task callback is not set for query {}", getInitialQueryId()), ErrorCodes::LOGICAL_ERROR);
|
||||
return next_task_callback.value();
|
||||
}
|
||||
|
||||
|
||||
void Context::setReadTaskCallback(ReadTaskCallback && callback)
|
||||
{
|
||||
next_task_callback = callback;
|
||||
}
|
||||
|
||||
PartUUIDsPtr Context::getIgnoredPartUUIDs()
|
||||
{
|
||||
auto lock = getLock();
|
||||
|
@ -128,6 +128,9 @@ using InputInitializer = std::function<void(ContextPtr, const StoragePtr &)>;
|
||||
/// Callback for reading blocks of data from client for function input()
|
||||
using InputBlocksReader = std::function<Block(ContextPtr)>;
|
||||
|
||||
/// Used in distributed task processing
|
||||
using ReadTaskCallback = std::function<String()>;
|
||||
|
||||
/// An empty interface for an arbitrary object that may be attached by a shared pointer
|
||||
/// to query context, when using ClickHouse as a library.
|
||||
struct IHostContext
|
||||
@ -189,6 +192,9 @@ private:
|
||||
TemporaryTablesMapping external_tables_mapping;
|
||||
Scalars scalars;
|
||||
|
||||
/// Fields for distributed s3 function
|
||||
std::optional<ReadTaskCallback> next_task_callback;
|
||||
|
||||
/// Record entities accessed by current query, and store this information in system.query_log.
|
||||
struct QueryAccessInfo
|
||||
{
|
||||
@ -769,6 +775,10 @@ public:
|
||||
|
||||
PartUUIDsPtr getPartUUIDs();
|
||||
PartUUIDsPtr getIgnoredPartUUIDs();
|
||||
|
||||
ReadTaskCallback getReadTaskCallback() const;
|
||||
void setReadTaskCallback(ReadTaskCallback && callback);
|
||||
|
||||
private:
|
||||
std::unique_lock<std::recursive_mutex> getLock() const;
|
||||
|
||||
|
@ -26,9 +26,9 @@ struct DatabaseAndTableWithAlias
|
||||
UUID uuid = UUIDHelpers::Nil;
|
||||
|
||||
DatabaseAndTableWithAlias() = default;
|
||||
DatabaseAndTableWithAlias(const ASTPtr & identifier_node, const String & current_database = "");
|
||||
DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database = "");
|
||||
DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database = "");
|
||||
explicit DatabaseAndTableWithAlias(const ASTPtr & identifier_node, const String & current_database = "");
|
||||
explicit DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database = "");
|
||||
explicit DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database = "");
|
||||
|
||||
/// "alias." or "table." if alias is empty
|
||||
String getQualifiedNamePrefix(bool with_dot = true) const;
|
||||
@ -80,7 +80,7 @@ private:
|
||||
void addAdditionalColumns(NamesAndTypesList & target, const NamesAndTypesList & addition)
|
||||
{
|
||||
target.insert(target.end(), addition.begin(), addition.end());
|
||||
for (auto & col : addition)
|
||||
for (const auto & col : addition)
|
||||
names.insert(col.name);
|
||||
}
|
||||
|
||||
|
@ -723,7 +723,7 @@ static void generateUUIDForTable(ASTCreateQuery & create)
|
||||
/// If destination table (to_table_id) is not specified for materialized view,
|
||||
/// then MV will create inner table. We should generate UUID of inner table here,
|
||||
/// so it will be the same on all hosts if query in ON CLUSTER or database engine is Replicated.
|
||||
bool need_uuid_for_inner_table = create.is_materialized_view && !create.to_table_id;
|
||||
bool need_uuid_for_inner_table = !create.attach && create.is_materialized_view && !create.to_table_id;
|
||||
if (need_uuid_for_inner_table && create.to_inner_uuid == UUIDHelpers::Nil)
|
||||
create.to_inner_uuid = UUIDHelpers::generateV4();
|
||||
}
|
||||
|
@ -152,6 +152,8 @@ public:
|
||||
void shutdown() override
|
||||
{
|
||||
stopFlushThread();
|
||||
if (table)
|
||||
table->shutdown();
|
||||
}
|
||||
|
||||
String getName() override
|
||||
|
@ -20,7 +20,7 @@ public:
|
||||
bool second_with_brackets;
|
||||
|
||||
public:
|
||||
ASTPair(bool second_with_brackets_)
|
||||
explicit ASTPair(bool second_with_brackets_)
|
||||
: second_with_brackets(second_with_brackets_)
|
||||
{
|
||||
}
|
||||
@ -49,7 +49,7 @@ public:
|
||||
/// Has brackets around arguments
|
||||
bool has_brackets;
|
||||
|
||||
ASTFunctionWithKeyValueArguments(bool has_brackets_ = true)
|
||||
explicit ASTFunctionWithKeyValueArguments(bool has_brackets_ = true)
|
||||
: has_brackets(has_brackets_)
|
||||
{
|
||||
}
|
||||
|
@ -45,7 +45,7 @@ protected:
|
||||
class ParserIdentifier : public IParserBase
|
||||
{
|
||||
public:
|
||||
ParserIdentifier(bool allow_query_parameter_ = false) : allow_query_parameter(allow_query_parameter_) {}
|
||||
explicit ParserIdentifier(bool allow_query_parameter_ = false) : allow_query_parameter(allow_query_parameter_) {}
|
||||
protected:
|
||||
const char * getName() const override { return "identifier"; }
|
||||
bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
|
||||
@ -59,7 +59,7 @@ protected:
|
||||
class ParserCompoundIdentifier : public IParserBase
|
||||
{
|
||||
public:
|
||||
ParserCompoundIdentifier(bool table_name_with_optional_uuid_ = false, bool allow_query_parameter_ = false)
|
||||
explicit ParserCompoundIdentifier(bool table_name_with_optional_uuid_ = false, bool allow_query_parameter_ = false)
|
||||
: table_name_with_optional_uuid(table_name_with_optional_uuid_), allow_query_parameter(allow_query_parameter_)
|
||||
{
|
||||
}
|
||||
@ -85,7 +85,7 @@ public:
|
||||
using ColumnTransformers = MultiEnum<ColumnTransformer, UInt8>;
|
||||
static constexpr auto AllTransformers = ColumnTransformers{ColumnTransformer::APPLY, ColumnTransformer::EXCEPT, ColumnTransformer::REPLACE};
|
||||
|
||||
ParserColumnsTransformers(ColumnTransformers allowed_transformers_ = AllTransformers, bool is_strict_ = false)
|
||||
explicit ParserColumnsTransformers(ColumnTransformers allowed_transformers_ = AllTransformers, bool is_strict_ = false)
|
||||
: allowed_transformers(allowed_transformers_)
|
||||
, is_strict(is_strict_)
|
||||
{}
|
||||
@ -103,7 +103,7 @@ class ParserAsterisk : public IParserBase
|
||||
{
|
||||
public:
|
||||
using ColumnTransformers = ParserColumnsTransformers::ColumnTransformers;
|
||||
ParserAsterisk(ColumnTransformers allowed_transformers_ = ParserColumnsTransformers::AllTransformers)
|
||||
explicit ParserAsterisk(ColumnTransformers allowed_transformers_ = ParserColumnsTransformers::AllTransformers)
|
||||
: allowed_transformers(allowed_transformers_)
|
||||
{}
|
||||
|
||||
@ -129,7 +129,7 @@ class ParserColumnsMatcher : public IParserBase
|
||||
{
|
||||
public:
|
||||
using ColumnTransformers = ParserColumnsTransformers::ColumnTransformers;
|
||||
ParserColumnsMatcher(ColumnTransformers allowed_transformers_ = ParserColumnsTransformers::AllTransformers)
|
||||
explicit ParserColumnsMatcher(ColumnTransformers allowed_transformers_ = ParserColumnsTransformers::AllTransformers)
|
||||
: allowed_transformers(allowed_transformers_)
|
||||
{}
|
||||
|
||||
@ -149,7 +149,7 @@ protected:
|
||||
class ParserFunction : public IParserBase
|
||||
{
|
||||
public:
|
||||
ParserFunction(bool allow_function_parameters_ = true, bool is_table_function_ = false)
|
||||
explicit ParserFunction(bool allow_function_parameters_ = true, bool is_table_function_ = false)
|
||||
: allow_function_parameters(allow_function_parameters_), is_table_function(is_table_function_)
|
||||
{
|
||||
}
|
||||
|
@ -14,7 +14,7 @@ namespace DB
|
||||
/// Contains extra information about read data.
|
||||
struct RowReadExtension
|
||||
{
|
||||
/// IRowInputStream.read() output. It contains non zero for columns that actually read from the source and zero otherwise.
|
||||
/// IRowInputFormat::read output. It contains non zero for columns that actually read from the source and zero otherwise.
|
||||
/// It's used to attach defaults for partially filled rows.
|
||||
std::vector<UInt8> read_columns;
|
||||
};
|
||||
|
@ -25,6 +25,7 @@
|
||||
#include <Interpreters/OpenTelemetrySpanLog.h>
|
||||
#include <Storages/StorageReplicatedMergeTree.h>
|
||||
#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
|
||||
#include <Storages/StorageS3Cluster.h>
|
||||
#include <Core/ExternalTable.h>
|
||||
#include <Storages/ColumnDefault.h>
|
||||
#include <DataTypes/DataTypeLowCardinality.h>
|
||||
@ -33,6 +34,7 @@
|
||||
|
||||
#include <Processors/Executors/PullingAsyncPipelineExecutor.h>
|
||||
|
||||
#include "Core/Protocol.h"
|
||||
#include "TCPHandler.h"
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
@ -55,6 +57,7 @@ namespace ErrorCodes
|
||||
extern const int SOCKET_TIMEOUT;
|
||||
extern const int UNEXPECTED_PACKET_FROM_CLIENT;
|
||||
extern const int SUPPORT_IS_DISABLED;
|
||||
extern const int UNKNOWN_PROTOCOL;
|
||||
}
|
||||
|
||||
TCPHandler::TCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_)
|
||||
@ -285,6 +288,14 @@ void TCPHandler::runImpl()
|
||||
|
||||
customizeContext(query_context);
|
||||
|
||||
/// This callback is needed for requesting read tasks inside pipeline for distributed processing
|
||||
query_context->setReadTaskCallback([this]() -> String
|
||||
{
|
||||
std::lock_guard lock(task_callback_mutex);
|
||||
sendReadTaskRequestAssumeLocked();
|
||||
return receiveReadTaskResponseAssumeLocked();
|
||||
});
|
||||
|
||||
bool may_have_embedded_data = client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_CLIENT_SUPPORT_EMBEDDED_DATA;
|
||||
/// Processing Query
|
||||
state.io = executeQuery(state.query, query_context, false, state.stage, may_have_embedded_data);
|
||||
@ -644,6 +655,8 @@ void TCPHandler::processOrdinaryQueryWithProcessors()
|
||||
Block block;
|
||||
while (executor.pull(block, query_context->getSettingsRef().interactive_delay / 1000))
|
||||
{
|
||||
std::lock_guard lock(task_callback_mutex);
|
||||
|
||||
if (isQueryCancelled())
|
||||
{
|
||||
/// A packet was received requesting to stop execution of the request.
|
||||
@ -755,6 +768,13 @@ void TCPHandler::sendPartUUIDs()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void TCPHandler::sendReadTaskRequestAssumeLocked()
|
||||
{
|
||||
writeVarUInt(Protocol::Server::ReadTaskRequest, *out);
|
||||
out->next();
|
||||
}
|
||||
|
||||
void TCPHandler::sendProfileInfo(const BlockStreamProfileInfo & info)
|
||||
{
|
||||
writeVarUInt(Protocol::Server::ProfileInfo, *out);
|
||||
@ -963,10 +983,10 @@ bool TCPHandler::receivePacket()
|
||||
UInt64 packet_type = 0;
|
||||
readVarUInt(packet_type, *in);
|
||||
|
||||
// std::cerr << "Server got packet: " << Protocol::Client::toString(packet_type) << "\n";
|
||||
|
||||
switch (packet_type)
|
||||
{
|
||||
case Protocol::Client::ReadTaskResponse:
|
||||
throw Exception("ReadTaskResponse must be received only after requesting in callback", ErrorCodes::LOGICAL_ERROR);
|
||||
case Protocol::Client::IgnoredPartUUIDs:
|
||||
/// Part uuids packet if any comes before query.
|
||||
receiveIgnoredPartUUIDs();
|
||||
@ -1016,6 +1036,34 @@ void TCPHandler::receiveIgnoredPartUUIDs()
|
||||
query_context->getIgnoredPartUUIDs()->add(uuids);
|
||||
}
|
||||
|
||||
|
||||
String TCPHandler::receiveReadTaskResponseAssumeLocked()
|
||||
{
|
||||
UInt64 packet_type = 0;
|
||||
readVarUInt(packet_type, *in);
|
||||
if (packet_type != Protocol::Client::ReadTaskResponse)
|
||||
{
|
||||
if (packet_type == Protocol::Client::Cancel)
|
||||
{
|
||||
state.is_cancelled = true;
|
||||
return {};
|
||||
}
|
||||
else
|
||||
{
|
||||
throw Exception(fmt::format("Received {} packet after requesting read task",
|
||||
Protocol::Client::toString(packet_type)), ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT);
|
||||
}
|
||||
}
|
||||
UInt64 version;
|
||||
readVarUInt(version, *in);
|
||||
if (version != DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION)
|
||||
throw Exception("Protocol version for distributed processing mismatched", ErrorCodes::UNKNOWN_PROTOCOL);
|
||||
String response;
|
||||
readStringBinary(response, *in);
|
||||
return response;
|
||||
}
|
||||
|
||||
|
||||
void TCPHandler::receiveClusterNameAndSalt()
|
||||
{
|
||||
readStringBinary(cluster, *in);
|
||||
|
@ -89,7 +89,7 @@ struct QueryState
|
||||
*this = QueryState();
|
||||
}
|
||||
|
||||
bool empty()
|
||||
bool empty() const
|
||||
{
|
||||
return is_empty;
|
||||
}
|
||||
@ -150,6 +150,7 @@ private:
|
||||
String cluster;
|
||||
String cluster_secret;
|
||||
|
||||
std::mutex task_callback_mutex;
|
||||
|
||||
/// At the moment, only one ongoing query in the connection is supported at a time.
|
||||
QueryState state;
|
||||
@ -169,9 +170,11 @@ private:
|
||||
bool receivePacket();
|
||||
void receiveQuery();
|
||||
void receiveIgnoredPartUUIDs();
|
||||
String receiveReadTaskResponseAssumeLocked();
|
||||
bool receiveData(bool scalar);
|
||||
bool readDataNext(const size_t & poll_interval, const int & receive_timeout);
|
||||
void readData(const Settings & connection_settings);
|
||||
void receiveClusterNameAndSalt();
|
||||
std::tuple<size_t, int> getReadTimeouts(const Settings & connection_settings);
|
||||
|
||||
[[noreturn]] void receiveUnexpectedData();
|
||||
@ -198,12 +201,11 @@ private:
|
||||
void sendLogs();
|
||||
void sendEndOfStream();
|
||||
void sendPartUUIDs();
|
||||
void sendReadTaskRequestAssumeLocked();
|
||||
void sendProfileInfo(const BlockStreamProfileInfo & info);
|
||||
void sendTotals(const Block & totals);
|
||||
void sendExtremes(const Block & extremes);
|
||||
|
||||
void receiveClusterNameAndSalt();
|
||||
|
||||
/// Creates state.block_in/block_out for blocks read/write, depending on whether compression is enabled.
|
||||
void initBlockInput();
|
||||
void initBlockOutput(const Block & block);
|
||||
|
@ -1327,7 +1327,7 @@ String KeyCondition::toString() const
|
||||
* A range of tuples specifies some subset of this space.
|
||||
*
|
||||
* Hyperrectangles will be the subrange of an n-dimensional space that is a direct product of one-dimensional ranges.
|
||||
* In this case, the one-dimensional range can be:
|
||||
* In this case, the one-dimensional range can be:
|
||||
* a point, a segment, an open interval, a half-open interval;
|
||||
* unlimited on the left, unlimited on the right ...
|
||||
*
|
||||
|
@ -452,7 +452,8 @@ StorageDistributed::StorageDistributed(
|
||||
const DistributedSettings & distributed_settings_,
|
||||
bool attach,
|
||||
ClusterPtr owned_cluster_)
|
||||
: StorageDistributed(id_, columns_, constraints_, String{}, String{}, cluster_name_, context_, sharding_key_, storage_policy_name_, relative_data_path_, distributed_settings_, attach, std::move(owned_cluster_))
|
||||
: StorageDistributed(id_, columns_, constraints_, String{}, String{}, cluster_name_, context_, sharding_key_,
|
||||
storage_policy_name_, relative_data_path_, distributed_settings_, attach, std::move(owned_cluster_))
|
||||
{
|
||||
remote_table_function_ptr = std::move(remote_table_function_ptr_);
|
||||
}
|
||||
@ -473,20 +474,15 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(
|
||||
ClusterPtr optimized_cluster = getOptimizedCluster(local_context, metadata_snapshot, query_info.query);
|
||||
if (optimized_cluster)
|
||||
{
|
||||
LOG_DEBUG(
|
||||
log,
|
||||
"Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}",
|
||||
makeFormattedListOfShards(optimized_cluster));
|
||||
LOG_DEBUG(log, "Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}",
|
||||
makeFormattedListOfShards(optimized_cluster));
|
||||
cluster = optimized_cluster;
|
||||
query_info.optimized_cluster = cluster;
|
||||
}
|
||||
else
|
||||
{
|
||||
LOG_DEBUG(
|
||||
log,
|
||||
"Unable to figure out irrelevant shards from WHERE/PREWHERE clauses - the query will be sent to all shards of the "
|
||||
"cluster{}",
|
||||
has_sharding_key ? "" : " (no sharding key)");
|
||||
LOG_DEBUG(log, "Unable to figure out irrelevant shards from WHERE/PREWHERE clauses - the query will be sent to all shards of the cluster{}",
|
||||
has_sharding_key ? "" : " (no sharding key)");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -76,10 +76,15 @@ StorageMaterializedView::StorageMaterializedView(
|
||||
storage_metadata.setSelectQuery(select);
|
||||
setInMemoryMetadata(storage_metadata);
|
||||
|
||||
bool point_to_itself_by_uuid = has_inner_table && query.to_inner_uuid != UUIDHelpers::Nil
|
||||
&& query.to_inner_uuid == table_id_.uuid;
|
||||
bool point_to_itself_by_name = !has_inner_table && query.to_table_id.database_name == table_id_.database_name
|
||||
&& query.to_table_id.table_name == table_id_.table_name;
|
||||
if (point_to_itself_by_uuid || point_to_itself_by_name)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Materialized view {} cannot point to itself", table_id_.getFullTableName());
|
||||
|
||||
if (!has_inner_table)
|
||||
{
|
||||
if (query.to_table_id.database_name == table_id_.database_name && query.to_table_id.table_name == table_id_.table_name)
|
||||
throw Exception(ErrorCodes::BAD_ARGUMENTS, "Materialized view {} cannot point to itself", table_id_.getFullTableName());
|
||||
target_table_id = query.to_table_id;
|
||||
}
|
||||
else if (attach_)
|
||||
|
@ -11,7 +11,7 @@ class StorageProxy : public IStorage
|
||||
{
|
||||
public:
|
||||
|
||||
StorageProxy(const StorageID & table_id_) : IStorage(table_id_) {}
|
||||
explicit StorageProxy(const StorageID & table_id_) : IStorage(table_id_) {}
|
||||
|
||||
virtual StoragePtr getNested() const = 0;
|
||||
|
||||
|
@ -45,154 +45,267 @@ namespace ErrorCodes
|
||||
extern const int UNEXPECTED_EXPRESSION;
|
||||
extern const int S3_ERROR;
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
class StorageS3Source::DisclosedGlobIterator::Impl
|
||||
{
|
||||
class StorageS3Source : public SourceWithProgress
|
||||
|
||||
public:
|
||||
Impl(Aws::S3::S3Client & client_, const S3::URI & globbed_uri_)
|
||||
: client(client_), globbed_uri(globbed_uri_)
|
||||
{
|
||||
public:
|
||||
std::lock_guard lock(mutex);
|
||||
|
||||
static Block getHeader(Block sample_block, bool with_path_column, bool with_file_column)
|
||||
if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos)
|
||||
throw Exception("Expression can not have wildcards inside bucket name", ErrorCodes::UNEXPECTED_EXPRESSION);
|
||||
|
||||
const String key_prefix = globbed_uri.key.substr(0, globbed_uri.key.find_first_of("*?{"));
|
||||
|
||||
/// We don't have to list bucket, because there is no asterics.
|
||||
if (key_prefix.size() == globbed_uri.key.size())
|
||||
{
|
||||
if (with_path_column)
|
||||
sample_block.insert({DataTypeString().createColumn(), std::make_shared<DataTypeString>(), "_path"});
|
||||
if (with_file_column)
|
||||
sample_block.insert({DataTypeString().createColumn(), std::make_shared<DataTypeString>(), "_file"});
|
||||
|
||||
return sample_block;
|
||||
buffer.emplace_back(globbed_uri.key);
|
||||
buffer_iter = buffer.begin();
|
||||
is_finished = true;
|
||||
return;
|
||||
}
|
||||
|
||||
StorageS3Source(
|
||||
bool need_path,
|
||||
bool need_file,
|
||||
const String & format,
|
||||
String name_,
|
||||
const Block & sample_block,
|
||||
ContextPtr context,
|
||||
const ColumnsDescription & columns,
|
||||
UInt64 max_block_size,
|
||||
const CompressionMethod compression_method,
|
||||
const std::shared_ptr<Aws::S3::S3Client> & client,
|
||||
const String & bucket,
|
||||
const String & key)
|
||||
: SourceWithProgress(getHeader(sample_block, need_path, need_file))
|
||||
, name(std::move(name_))
|
||||
, with_file_column(need_file)
|
||||
, with_path_column(need_path)
|
||||
, file_path(bucket + "/" + key)
|
||||
{
|
||||
read_buf = wrapReadBufferWithCompressionMethod(std::make_unique<ReadBufferFromS3>(client, bucket, key), compression_method);
|
||||
auto input_format = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size);
|
||||
reader = std::make_shared<InputStreamFromInputFormat>(input_format);
|
||||
request.SetBucket(globbed_uri.bucket);
|
||||
request.SetPrefix(key_prefix);
|
||||
matcher = std::make_unique<re2::RE2>(makeRegexpPatternFromGlobs(globbed_uri.key));
|
||||
fillInternalBufferAssumeLocked();
|
||||
}
|
||||
|
||||
if (columns.hasDefaults())
|
||||
reader = std::make_shared<AddingDefaultsBlockInputStream>(reader, columns, context);
|
||||
String next()
|
||||
{
|
||||
std::lock_guard lock(mutex);
|
||||
return nextAssumeLocked();
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
String nextAssumeLocked()
|
||||
{
|
||||
if (buffer_iter != buffer.end())
|
||||
{
|
||||
auto answer = *buffer_iter;
|
||||
++buffer_iter;
|
||||
return answer;
|
||||
}
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
Chunk generate() override
|
||||
{
|
||||
if (!reader)
|
||||
return {};
|
||||
|
||||
if (!initialized)
|
||||
{
|
||||
reader->readSuffix();
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
if (auto block = reader->read())
|
||||
{
|
||||
auto columns = block.getColumns();
|
||||
UInt64 num_rows = block.rows();
|
||||
|
||||
if (with_path_column)
|
||||
columns.push_back(DataTypeString().createColumnConst(num_rows, file_path)->convertToFullColumnIfConst());
|
||||
if (with_file_column)
|
||||
{
|
||||
size_t last_slash_pos = file_path.find_last_of('/');
|
||||
columns.push_back(DataTypeString().createColumnConst(num_rows, file_path.substr(
|
||||
last_slash_pos + 1))->convertToFullColumnIfConst());
|
||||
}
|
||||
|
||||
return Chunk(std::move(columns), num_rows);
|
||||
}
|
||||
|
||||
reader.reset();
|
||||
|
||||
if (is_finished)
|
||||
return {};
|
||||
}
|
||||
|
||||
private:
|
||||
String name;
|
||||
std::unique_ptr<ReadBuffer> read_buf;
|
||||
BlockInputStreamPtr reader;
|
||||
bool initialized = false;
|
||||
bool with_file_column = false;
|
||||
bool with_path_column = false;
|
||||
String file_path;
|
||||
};
|
||||
fillInternalBufferAssumeLocked();
|
||||
|
||||
class StorageS3BlockOutputStream : public IBlockOutputStream
|
||||
return nextAssumeLocked();
|
||||
}
|
||||
|
||||
void fillInternalBufferAssumeLocked()
|
||||
{
|
||||
public:
|
||||
StorageS3BlockOutputStream(
|
||||
const String & format,
|
||||
const Block & sample_block_,
|
||||
ContextPtr context,
|
||||
const CompressionMethod compression_method,
|
||||
const std::shared_ptr<Aws::S3::S3Client> & client,
|
||||
const String & bucket,
|
||||
const String & key,
|
||||
size_t min_upload_part_size,
|
||||
size_t max_single_part_upload_size)
|
||||
: sample_block(sample_block_)
|
||||
{
|
||||
write_buf = wrapWriteBufferWithCompressionMethod(
|
||||
std::make_unique<WriteBufferFromS3>(client, bucket, key, min_upload_part_size, max_single_part_upload_size), compression_method, 3);
|
||||
writer = FormatFactory::instance().getOutputStreamParallelIfPossible(format, *write_buf, sample_block, context);
|
||||
}
|
||||
buffer.clear();
|
||||
|
||||
Block getHeader() const override
|
||||
{
|
||||
return sample_block;
|
||||
}
|
||||
outcome = client.ListObjectsV2(request);
|
||||
if (!outcome.IsSuccess())
|
||||
throw Exception(ErrorCodes::S3_ERROR, "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}",
|
||||
quoteString(request.GetBucket()), quoteString(request.GetPrefix()),
|
||||
backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage()));
|
||||
|
||||
void write(const Block & block) override
|
||||
{
|
||||
writer->write(block);
|
||||
}
|
||||
const auto & result_batch = outcome.GetResult().GetContents();
|
||||
|
||||
void writePrefix() override
|
||||
buffer.reserve(result_batch.size());
|
||||
for (const auto & row : result_batch)
|
||||
{
|
||||
writer->writePrefix();
|
||||
String key = row.GetKey();
|
||||
if (re2::RE2::FullMatch(key, *matcher))
|
||||
buffer.emplace_back(std::move(key));
|
||||
}
|
||||
/// Set iterator only after the whole batch is processed
|
||||
buffer_iter = buffer.begin();
|
||||
|
||||
void flush() override
|
||||
{
|
||||
writer->flush();
|
||||
}
|
||||
request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
|
||||
|
||||
void writeSuffix() override
|
||||
{
|
||||
writer->writeSuffix();
|
||||
writer->flush();
|
||||
write_buf->finalize();
|
||||
}
|
||||
/// It returns false when all objects were returned
|
||||
is_finished = !outcome.GetResult().GetIsTruncated();
|
||||
}
|
||||
|
||||
private:
|
||||
Block sample_block;
|
||||
std::unique_ptr<WriteBuffer> write_buf;
|
||||
BlockOutputStreamPtr writer;
|
||||
};
|
||||
std::mutex mutex;
|
||||
Strings buffer;
|
||||
Strings::iterator buffer_iter;
|
||||
Aws::S3::S3Client client;
|
||||
S3::URI globbed_uri;
|
||||
Aws::S3::Model::ListObjectsV2Request request;
|
||||
Aws::S3::Model::ListObjectsV2Outcome outcome;
|
||||
std::unique_ptr<re2::RE2> matcher;
|
||||
bool is_finished{false};
|
||||
};
|
||||
|
||||
StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(Aws::S3::S3Client & client_, const S3::URI & globbed_uri_)
|
||||
: pimpl(std::make_shared<StorageS3Source::DisclosedGlobIterator::Impl>(client_, globbed_uri_)) {}
|
||||
|
||||
String StorageS3Source::DisclosedGlobIterator::next()
|
||||
{
|
||||
return pimpl->next();
|
||||
}
|
||||
|
||||
|
||||
Block StorageS3Source::getHeader(Block sample_block, bool with_path_column, bool with_file_column)
|
||||
{
|
||||
if (with_path_column)
|
||||
sample_block.insert({DataTypeString().createColumn(), std::make_shared<DataTypeString>(), "_path"});
|
||||
if (with_file_column)
|
||||
sample_block.insert({DataTypeString().createColumn(), std::make_shared<DataTypeString>(), "_file"});
|
||||
|
||||
return sample_block;
|
||||
}
|
||||
|
||||
StorageS3Source::StorageS3Source(
|
||||
bool need_path,
|
||||
bool need_file,
|
||||
const String & format_,
|
||||
String name_,
|
||||
const Block & sample_block_,
|
||||
ContextPtr context_,
|
||||
const ColumnsDescription & columns_,
|
||||
UInt64 max_block_size_,
|
||||
const String compression_hint_,
|
||||
const std::shared_ptr<Aws::S3::S3Client> & client_,
|
||||
const String & bucket_,
|
||||
std::shared_ptr<IteratorWrapper> file_iterator_)
|
||||
: SourceWithProgress(getHeader(sample_block_, need_path, need_file))
|
||||
, WithContext(context_)
|
||||
, name(std::move(name_))
|
||||
, bucket(bucket_)
|
||||
, format(format_)
|
||||
, columns_desc(columns_)
|
||||
, max_block_size(max_block_size_)
|
||||
, compression_hint(compression_hint_)
|
||||
, client(client_)
|
||||
, sample_block(sample_block_)
|
||||
, with_file_column(need_file)
|
||||
, with_path_column(need_path)
|
||||
, file_iterator(file_iterator_)
|
||||
{
|
||||
initialize();
|
||||
}
|
||||
|
||||
|
||||
bool StorageS3Source::initialize()
|
||||
{
|
||||
String current_key = (*file_iterator)();
|
||||
if (current_key.empty())
|
||||
return false;
|
||||
|
||||
file_path = bucket + "/" + current_key;
|
||||
|
||||
read_buf = wrapReadBufferWithCompressionMethod(
|
||||
std::make_unique<ReadBufferFromS3>(client, bucket, current_key), chooseCompressionMethod(current_key, compression_hint));
|
||||
auto input_format = FormatFactory::instance().getInput(format, *read_buf, sample_block, getContext(), max_block_size);
|
||||
reader = std::make_shared<InputStreamFromInputFormat>(input_format);
|
||||
|
||||
if (columns_desc.hasDefaults())
|
||||
reader = std::make_shared<AddingDefaultsBlockInputStream>(reader, columns_desc, getContext());
|
||||
|
||||
initialized = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
String StorageS3Source::getName() const
|
||||
{
|
||||
return name;
|
||||
}
|
||||
|
||||
Chunk StorageS3Source::generate()
|
||||
{
|
||||
if (!reader)
|
||||
return {};
|
||||
|
||||
if (!initialized)
|
||||
{
|
||||
reader->readPrefix();
|
||||
initialized = true;
|
||||
}
|
||||
|
||||
if (auto block = reader->read())
|
||||
{
|
||||
auto columns = block.getColumns();
|
||||
UInt64 num_rows = block.rows();
|
||||
|
||||
if (with_path_column)
|
||||
columns.push_back(DataTypeString().createColumnConst(num_rows, file_path)->convertToFullColumnIfConst());
|
||||
if (with_file_column)
|
||||
{
|
||||
size_t last_slash_pos = file_path.find_last_of('/');
|
||||
columns.push_back(DataTypeString().createColumnConst(num_rows, file_path.substr(
|
||||
last_slash_pos + 1))->convertToFullColumnIfConst());
|
||||
}
|
||||
|
||||
return Chunk(std::move(columns), num_rows);
|
||||
}
|
||||
|
||||
reader->readSuffix();
|
||||
reader.reset();
|
||||
read_buf.reset();
|
||||
|
||||
if (!initialize())
|
||||
return {};
|
||||
|
||||
return generate();
|
||||
}
|
||||
|
||||
|
||||
class StorageS3BlockOutputStream : public IBlockOutputStream
|
||||
{
|
||||
public:
|
||||
StorageS3BlockOutputStream(
|
||||
const String & format,
|
||||
const Block & sample_block_,
|
||||
ContextPtr context,
|
||||
const CompressionMethod compression_method,
|
||||
const std::shared_ptr<Aws::S3::S3Client> & client,
|
||||
const String & bucket,
|
||||
const String & key,
|
||||
size_t min_upload_part_size,
|
||||
size_t max_single_part_upload_size)
|
||||
: sample_block(sample_block_)
|
||||
{
|
||||
write_buf = wrapWriteBufferWithCompressionMethod(
|
||||
std::make_unique<WriteBufferFromS3>(client, bucket, key, min_upload_part_size, max_single_part_upload_size), compression_method, 3);
|
||||
writer = FormatFactory::instance().getOutputStreamParallelIfPossible(format, *write_buf, sample_block, context);
|
||||
}
|
||||
|
||||
Block getHeader() const override
|
||||
{
|
||||
return sample_block;
|
||||
}
|
||||
|
||||
void write(const Block & block) override
|
||||
{
|
||||
writer->write(block);
|
||||
}
|
||||
|
||||
void writePrefix() override
|
||||
{
|
||||
writer->writePrefix();
|
||||
}
|
||||
|
||||
void flush() override
|
||||
{
|
||||
writer->flush();
|
||||
}
|
||||
|
||||
void writeSuffix() override
|
||||
{
|
||||
writer->writeSuffix();
|
||||
writer->flush();
|
||||
write_buf->finalize();
|
||||
}
|
||||
|
||||
private:
|
||||
Block sample_block;
|
||||
std::unique_ptr<WriteBuffer> write_buf;
|
||||
BlockOutputStreamPtr writer;
|
||||
};
|
||||
|
||||
|
||||
StorageS3::StorageS3(
|
||||
const S3::URI & uri_,
|
||||
const String & access_key_id_,
|
||||
@ -205,84 +318,23 @@ StorageS3::StorageS3(
|
||||
const ColumnsDescription & columns_,
|
||||
const ConstraintsDescription & constraints_,
|
||||
ContextPtr context_,
|
||||
const String & compression_method_)
|
||||
const String & compression_method_,
|
||||
bool distributed_processing_)
|
||||
: IStorage(table_id_)
|
||||
, WithContext(context_->getGlobalContext())
|
||||
, uri(uri_)
|
||||
, access_key_id(access_key_id_)
|
||||
, secret_access_key(secret_access_key_)
|
||||
, max_connections(max_connections_)
|
||||
, client_auth{uri_, access_key_id_, secret_access_key_, max_connections_, {}, {}} /// Client and settings will be updated later
|
||||
, format_name(format_name_)
|
||||
, min_upload_part_size(min_upload_part_size_)
|
||||
, max_single_part_upload_size(max_single_part_upload_size_)
|
||||
, compression_method(compression_method_)
|
||||
, name(uri_.storage_name)
|
||||
, distributed_processing(distributed_processing_)
|
||||
{
|
||||
getContext()->getRemoteHostFilter().checkURL(uri_.uri);
|
||||
context_->getGlobalContext()->getRemoteHostFilter().checkURL(uri_.uri);
|
||||
StorageInMemoryMetadata storage_metadata;
|
||||
storage_metadata.setColumns(columns_);
|
||||
storage_metadata.setConstraints(constraints_);
|
||||
setInMemoryMetadata(storage_metadata);
|
||||
updateAuthSettings(context_);
|
||||
}
|
||||
|
||||
|
||||
namespace
|
||||
{
|
||||
/* "Recursive" directory listing with matched paths as a result.
|
||||
* Have the same method in StorageFile.
|
||||
*/
|
||||
Strings listFilesWithRegexpMatching(Aws::S3::S3Client & client, const S3::URI & globbed_uri)
|
||||
{
|
||||
if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos)
|
||||
{
|
||||
throw Exception("Expression can not have wildcards inside bucket name", ErrorCodes::UNEXPECTED_EXPRESSION);
|
||||
}
|
||||
|
||||
const String key_prefix = globbed_uri.key.substr(0, globbed_uri.key.find_first_of("*?{"));
|
||||
if (key_prefix.size() == globbed_uri.key.size())
|
||||
{
|
||||
return {globbed_uri.key};
|
||||
}
|
||||
|
||||
Aws::S3::Model::ListObjectsV2Request request;
|
||||
request.SetBucket(globbed_uri.bucket);
|
||||
request.SetPrefix(key_prefix);
|
||||
|
||||
re2::RE2 matcher(makeRegexpPatternFromGlobs(globbed_uri.key));
|
||||
Strings result;
|
||||
Aws::S3::Model::ListObjectsV2Outcome outcome;
|
||||
int page = 0;
|
||||
do
|
||||
{
|
||||
++page;
|
||||
outcome = client.ListObjectsV2(request);
|
||||
if (!outcome.IsSuccess())
|
||||
{
|
||||
if (page > 1)
|
||||
throw Exception(ErrorCodes::S3_ERROR, "Could not list objects in bucket {} with prefix {}, page {}, S3 exception: {}, message: {}",
|
||||
quoteString(request.GetBucket()), quoteString(request.GetPrefix()), page,
|
||||
backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage()));
|
||||
|
||||
throw Exception(ErrorCodes::S3_ERROR, "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}",
|
||||
quoteString(request.GetBucket()), quoteString(request.GetPrefix()),
|
||||
backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage()));
|
||||
}
|
||||
|
||||
for (const auto & row : outcome.GetResult().GetContents())
|
||||
{
|
||||
String key = row.GetKey();
|
||||
if (re2::RE2::FullMatch(key, matcher))
|
||||
result.emplace_back(std::move(key));
|
||||
}
|
||||
|
||||
request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken());
|
||||
}
|
||||
while (outcome.GetResult().GetIsTruncated());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
updateClientAndAuthSettings(context_, client_auth);
|
||||
}
|
||||
|
||||
|
||||
@ -295,7 +347,7 @@ Pipe StorageS3::read(
|
||||
size_t max_block_size,
|
||||
unsigned num_streams)
|
||||
{
|
||||
updateAuthSettings(local_context);
|
||||
updateClientAndAuthSettings(local_context, client_auth);
|
||||
|
||||
Pipes pipes;
|
||||
bool need_path_column = false;
|
||||
@ -308,7 +360,26 @@ Pipe StorageS3::read(
|
||||
need_file_column = true;
|
||||
}
|
||||
|
||||
for (const String & key : listFilesWithRegexpMatching(*client, uri))
|
||||
std::shared_ptr<StorageS3Source::IteratorWrapper> iterator_wrapper{nullptr};
|
||||
if (distributed_processing)
|
||||
{
|
||||
iterator_wrapper = std::make_shared<StorageS3Source::IteratorWrapper>(
|
||||
[callback = local_context->getReadTaskCallback()]() -> String {
|
||||
return callback();
|
||||
});
|
||||
}
|
||||
else
|
||||
{
|
||||
/// Iterate through disclosed globs and make a source for each file
|
||||
auto glob_iterator = std::make_shared<StorageS3Source::DisclosedGlobIterator>(*client_auth.client, client_auth.uri);
|
||||
iterator_wrapper = std::make_shared<StorageS3Source::IteratorWrapper>([glob_iterator]()
|
||||
{
|
||||
return glob_iterator->next();
|
||||
});
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < num_streams; ++i)
|
||||
{
|
||||
pipes.emplace_back(std::make_shared<StorageS3Source>(
|
||||
need_path_column,
|
||||
need_file_column,
|
||||
@ -318,63 +389,62 @@ Pipe StorageS3::read(
|
||||
local_context,
|
||||
metadata_snapshot->getColumns(),
|
||||
max_block_size,
|
||||
chooseCompressionMethod(uri.key, compression_method),
|
||||
client,
|
||||
uri.bucket,
|
||||
key));
|
||||
|
||||
compression_method,
|
||||
client_auth.client,
|
||||
client_auth.uri.bucket,
|
||||
iterator_wrapper));
|
||||
}
|
||||
auto pipe = Pipe::unitePipes(std::move(pipes));
|
||||
// It's possible to have many buckets read from s3, resize(num_streams) might open too many handles at the same time.
|
||||
// Using narrowPipe instead.
|
||||
|
||||
narrowPipe(pipe, num_streams);
|
||||
return pipe;
|
||||
}
|
||||
|
||||
BlockOutputStreamPtr StorageS3::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context)
|
||||
{
|
||||
updateAuthSettings(local_context);
|
||||
updateClientAndAuthSettings(local_context, client_auth);
|
||||
return std::make_shared<StorageS3BlockOutputStream>(
|
||||
format_name,
|
||||
metadata_snapshot->getSampleBlock(),
|
||||
getContext(),
|
||||
chooseCompressionMethod(uri.key, compression_method),
|
||||
client,
|
||||
uri.bucket,
|
||||
uri.key,
|
||||
local_context,
|
||||
chooseCompressionMethod(client_auth.uri.key, compression_method),
|
||||
client_auth.client,
|
||||
client_auth.uri.bucket,
|
||||
client_auth.uri.key,
|
||||
min_upload_part_size,
|
||||
max_single_part_upload_size);
|
||||
}
|
||||
|
||||
void StorageS3::updateAuthSettings(ContextPtr local_context)
|
||||
void StorageS3::updateClientAndAuthSettings(ContextPtr ctx, StorageS3::ClientAuthentificaiton & upd)
|
||||
{
|
||||
auto settings = local_context->getStorageS3Settings().getSettings(uri.uri.toString());
|
||||
if (client && (!access_key_id.empty() || settings == auth_settings))
|
||||
auto settings = ctx->getStorageS3Settings().getSettings(upd.uri.uri.toString());
|
||||
if (upd.client && (!upd.access_key_id.empty() || settings == upd.auth_settings))
|
||||
return;
|
||||
|
||||
Aws::Auth::AWSCredentials credentials(access_key_id, secret_access_key);
|
||||
Aws::Auth::AWSCredentials credentials(upd.access_key_id, upd.secret_access_key);
|
||||
HeaderCollection headers;
|
||||
if (access_key_id.empty())
|
||||
if (upd.access_key_id.empty())
|
||||
{
|
||||
credentials = Aws::Auth::AWSCredentials(settings.access_key_id, settings.secret_access_key);
|
||||
headers = settings.headers;
|
||||
}
|
||||
|
||||
S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration(
|
||||
local_context->getRemoteHostFilter(), local_context->getGlobalContext()->getSettingsRef().s3_max_redirects);
|
||||
ctx->getRemoteHostFilter(), ctx->getGlobalContext()->getSettingsRef().s3_max_redirects);
|
||||
|
||||
client_configuration.endpointOverride = uri.endpoint;
|
||||
client_configuration.maxConnections = max_connections;
|
||||
client_configuration.endpointOverride = upd.uri.endpoint;
|
||||
client_configuration.maxConnections = upd.max_connections;
|
||||
|
||||
client = S3::ClientFactory::instance().create(
|
||||
upd.client = S3::ClientFactory::instance().create(
|
||||
client_configuration,
|
||||
uri.is_virtual_hosted_style,
|
||||
upd.uri.is_virtual_hosted_style,
|
||||
credentials.GetAWSAccessKeyId(),
|
||||
credentials.GetAWSSecretKey(),
|
||||
settings.server_side_encryption_customer_key_base64,
|
||||
std::move(headers),
|
||||
settings.use_environment_credentials.value_or(getContext()->getConfigRef().getBool("s3.use_environment_credentials", false)));
|
||||
settings.use_environment_credentials.value_or(ctx->getConfigRef().getBool("s3.use_environment_credentials", false)));
|
||||
|
||||
auth_settings = std::move(settings);
|
||||
upd.auth_settings = std::move(settings);
|
||||
}
|
||||
|
||||
void registerStorageS3Impl(const String & name, StorageFactory & factory)
|
||||
@ -385,7 +455,8 @@ void registerStorageS3Impl(const String & name, StorageFactory & factory)
|
||||
|
||||
if (engine_args.size() < 2 || engine_args.size() > 5)
|
||||
throw Exception(
|
||||
"Storage S3 requires 2 to 5 arguments: url, [access_key_id, secret_access_key], name of used format and [compression_method].", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
"Storage S3 requires 2 to 5 arguments: url, [access_key_id, secret_access_key], name of used format and [compression_method].",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
for (auto & engine_arg : engine_args)
|
||||
engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, args.getLocalContext());
|
||||
|
@ -4,11 +4,20 @@
|
||||
|
||||
#if USE_AWS_S3
|
||||
|
||||
#include <Core/Types.h>
|
||||
|
||||
#include <Compression/CompressionInfo.h>
|
||||
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Storages/StorageS3Settings.h>
|
||||
|
||||
#include <Processors/Sources/SourceWithProgress.h>
|
||||
#include <Poco/URI.h>
|
||||
#include <common/logger_useful.h>
|
||||
#include <ext/shared_ptr_helper.h>
|
||||
#include <IO/S3Common.h>
|
||||
#include <IO/CompressionMethod.h>
|
||||
#include <Interpreters/Context.h>
|
||||
|
||||
namespace Aws::S3
|
||||
{
|
||||
@ -18,6 +27,66 @@ namespace Aws::S3
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class StorageS3SequentialSource;
|
||||
class StorageS3Source : public SourceWithProgress, WithContext
|
||||
{
|
||||
public:
|
||||
class DisclosedGlobIterator
|
||||
{
|
||||
public:
|
||||
DisclosedGlobIterator(Aws::S3::S3Client &, const S3::URI &);
|
||||
String next();
|
||||
private:
|
||||
class Impl;
|
||||
/// shared_ptr to have copy constructor
|
||||
std::shared_ptr<Impl> pimpl;
|
||||
};
|
||||
|
||||
using IteratorWrapper = std::function<String()>;
|
||||
|
||||
static Block getHeader(Block sample_block, bool with_path_column, bool with_file_column);
|
||||
|
||||
StorageS3Source(
|
||||
bool need_path,
|
||||
bool need_file,
|
||||
const String & format,
|
||||
String name_,
|
||||
const Block & sample_block,
|
||||
ContextPtr context_,
|
||||
const ColumnsDescription & columns_,
|
||||
UInt64 max_block_size_,
|
||||
const String compression_hint_,
|
||||
const std::shared_ptr<Aws::S3::S3Client> & client_,
|
||||
const String & bucket,
|
||||
std::shared_ptr<IteratorWrapper> file_iterator_);
|
||||
|
||||
String getName() const override;
|
||||
|
||||
Chunk generate() override;
|
||||
|
||||
private:
|
||||
String name;
|
||||
String bucket;
|
||||
String file_path;
|
||||
String format;
|
||||
ColumnsDescription columns_desc;
|
||||
UInt64 max_block_size;
|
||||
String compression_hint;
|
||||
std::shared_ptr<Aws::S3::S3Client> client;
|
||||
Block sample_block;
|
||||
|
||||
|
||||
std::unique_ptr<ReadBuffer> read_buf;
|
||||
BlockInputStreamPtr reader;
|
||||
bool initialized = false;
|
||||
bool with_file_column = false;
|
||||
bool with_path_column = false;
|
||||
std::shared_ptr<IteratorWrapper> file_iterator;
|
||||
|
||||
/// Recreate ReadBuffer and BlockInputStream for each file.
|
||||
bool initialize();
|
||||
};
|
||||
|
||||
/**
|
||||
* This class represents table engine for external S3 urls.
|
||||
* It sends HTTP GET to server when select is called and
|
||||
@ -37,7 +106,8 @@ public:
|
||||
const ColumnsDescription & columns_,
|
||||
const ConstraintsDescription & constraints_,
|
||||
ContextPtr context_,
|
||||
const String & compression_method_ = "");
|
||||
const String & compression_method_ = "",
|
||||
bool distributed_processing_ = false);
|
||||
|
||||
String getName() const override
|
||||
{
|
||||
@ -58,20 +128,30 @@ public:
|
||||
NamesAndTypesList getVirtuals() const override;
|
||||
|
||||
private:
|
||||
const S3::URI uri;
|
||||
const String access_key_id;
|
||||
const String secret_access_key;
|
||||
const UInt64 max_connections;
|
||||
|
||||
friend class StorageS3Cluster;
|
||||
friend class TableFunctionS3Cluster;
|
||||
|
||||
struct ClientAuthentificaiton
|
||||
{
|
||||
const S3::URI uri;
|
||||
const String access_key_id;
|
||||
const String secret_access_key;
|
||||
const UInt64 max_connections;
|
||||
std::shared_ptr<Aws::S3::S3Client> client;
|
||||
S3AuthSettings auth_settings;
|
||||
};
|
||||
|
||||
ClientAuthentificaiton client_auth;
|
||||
|
||||
String format_name;
|
||||
size_t min_upload_part_size;
|
||||
size_t max_single_part_upload_size;
|
||||
String compression_method;
|
||||
std::shared_ptr<Aws::S3::S3Client> client;
|
||||
String name;
|
||||
S3AuthSettings auth_settings;
|
||||
const bool distributed_processing;
|
||||
|
||||
void updateAuthSettings(ContextPtr context);
|
||||
static void updateClientAndAuthSettings(ContextPtr, ClientAuthentificaiton &);
|
||||
};
|
||||
|
||||
}
|
||||
|
166
src/Storages/StorageS3Cluster.cpp
Normal file
166
src/Storages/StorageS3Cluster.cpp
Normal file
@ -0,0 +1,166 @@
|
||||
#include "Storages/StorageS3Cluster.h"
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
#include <Common/config.h>
|
||||
#endif
|
||||
|
||||
#if USE_AWS_S3
|
||||
|
||||
#include "Common/Exception.h"
|
||||
#include <Common/Throttler.h>
|
||||
#include "Client/Connection.h"
|
||||
#include "Core/QueryProcessingStage.h"
|
||||
#include <Core/UUID.h>
|
||||
#include "DataStreams/RemoteBlockInputStream.h"
|
||||
#include <Columns/ColumnsNumber.h>
|
||||
#include <DataTypes/DataTypesNumber.h>
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <IO/ReadBufferFromS3.h>
|
||||
#include <IO/ReadHelpers.h>
|
||||
#include <IO/WriteBufferFromS3.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/getHeaderForProcessingStage.h>
|
||||
#include <Interpreters/SelectQueryOptions.h>
|
||||
#include <Interpreters/InterpreterSelectQuery.h>
|
||||
#include <Interpreters/getTableExpressions.h>
|
||||
#include <Formats/FormatFactory.h>
|
||||
#include <DataStreams/IBlockOutputStream.h>
|
||||
#include <DataStreams/AddingDefaultsBlockInputStream.h>
|
||||
#include <DataStreams/narrowBlockInputStreams.h>
|
||||
#include <Processors/Formats/InputStreamFromInputFormat.h>
|
||||
#include <Processors/Pipe.h>
|
||||
#include <Processors/Sources/SourceFromInputStream.h>
|
||||
#include "Processors/Sources/SourceWithProgress.h"
|
||||
#include <Processors/Sources/RemoteSource.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
#include <Storages/IStorage.h>
|
||||
#include <Storages/SelectQueryInfo.h>
|
||||
#include <common/logger_useful.h>
|
||||
|
||||
#include <aws/core/auth/AWSCredentials.h>
|
||||
#include <aws/s3/S3Client.h>
|
||||
#include <aws/s3/model/ListObjectsV2Request.h>
|
||||
|
||||
#include <ios>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <cassert>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
|
||||
StorageS3Cluster::StorageS3Cluster(
|
||||
const String & filename_,
|
||||
const String & access_key_id_,
|
||||
const String & secret_access_key_,
|
||||
const StorageID & table_id_,
|
||||
String cluster_name_,
|
||||
const String & format_name_,
|
||||
UInt64 max_connections_,
|
||||
const ColumnsDescription & columns_,
|
||||
const ConstraintsDescription & constraints_,
|
||||
ContextPtr context_,
|
||||
const String & compression_method_)
|
||||
: IStorage(table_id_)
|
||||
, client_auth{S3::URI{Poco::URI{filename_}}, access_key_id_, secret_access_key_, max_connections_, {}, {}}
|
||||
, filename(filename_)
|
||||
, cluster_name(cluster_name_)
|
||||
, format_name(format_name_)
|
||||
, compression_method(compression_method_)
|
||||
{
|
||||
StorageInMemoryMetadata storage_metadata;
|
||||
storage_metadata.setColumns(columns_);
|
||||
storage_metadata.setConstraints(constraints_);
|
||||
setInMemoryMetadata(storage_metadata);
|
||||
StorageS3::updateClientAndAuthSettings(context_, client_auth);
|
||||
}
|
||||
|
||||
/// The code executes on initiator
|
||||
Pipe StorageS3Cluster::read(
|
||||
const Names & column_names,
|
||||
const StorageMetadataPtr & metadata_snapshot,
|
||||
SelectQueryInfo & query_info,
|
||||
ContextPtr context,
|
||||
QueryProcessingStage::Enum processed_stage,
|
||||
size_t /*max_block_size*/,
|
||||
unsigned /*num_streams*/)
|
||||
{
|
||||
StorageS3::updateClientAndAuthSettings(context, client_auth);
|
||||
|
||||
auto cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettings());
|
||||
S3::URI s3_uri(Poco::URI{filename});
|
||||
StorageS3::updateClientAndAuthSettings(context, client_auth);
|
||||
|
||||
auto iterator = std::make_shared<StorageS3Source::DisclosedGlobIterator>(*client_auth.client, client_auth.uri);
|
||||
auto callback = std::make_shared<StorageS3Source::IteratorWrapper>([iterator]() mutable -> String
|
||||
{
|
||||
return iterator->next();
|
||||
});
|
||||
|
||||
/// Calculate the header. This is significant, because some columns could be thrown away in some cases like query with count(*)
|
||||
Block header =
|
||||
InterpreterSelectQuery(query_info.query, context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock();
|
||||
|
||||
const Scalars & scalars = context->hasQueryContext() ? context->getQueryContext()->getScalars() : Scalars{};
|
||||
|
||||
Pipes pipes;
|
||||
connections.reserve(cluster->getShardCount());
|
||||
|
||||
const bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState;
|
||||
|
||||
for (const auto & replicas : cluster->getShardsAddresses())
|
||||
{
|
||||
/// There will be only one replica, because we consider each replica as a shard
|
||||
for (const auto & node : replicas)
|
||||
{
|
||||
connections.emplace_back(std::make_shared<Connection>(
|
||||
node.host_name, node.port, context->getGlobalContext()->getCurrentDatabase(),
|
||||
node.user, node.password, node.cluster, node.cluster_secret,
|
||||
"S3ClusterInititiator",
|
||||
node.compression,
|
||||
node.secure
|
||||
));
|
||||
|
||||
/// For unknown reason global context is passed to IStorage::read() method
|
||||
/// So, task_identifier is passed as constructor argument. It is more obvious.
|
||||
auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
|
||||
*connections.back(), queryToString(query_info.query), header, context,
|
||||
/*throttler=*/nullptr, scalars, Tables(), processed_stage, callback);
|
||||
|
||||
pipes.emplace_back(std::make_shared<RemoteSource>(remote_query_executor, add_agg_info, false));
|
||||
}
|
||||
}
|
||||
|
||||
metadata_snapshot->check(column_names, getVirtuals(), getStorageID());
|
||||
return Pipe::unitePipes(std::move(pipes));
|
||||
}
|
||||
|
||||
QueryProcessingStage::Enum StorageS3Cluster::getQueryProcessingStage(
|
||||
ContextPtr context, QueryProcessingStage::Enum to_stage, SelectQueryInfo &) const
|
||||
{
|
||||
/// Initiator executes query on remote node.
|
||||
if (context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY)
|
||||
if (to_stage >= QueryProcessingStage::Enum::WithMergeableState)
|
||||
return QueryProcessingStage::Enum::WithMergeableState;
|
||||
|
||||
/// Follower just reads the data.
|
||||
return QueryProcessingStage::Enum::FetchColumns;
|
||||
}
|
||||
|
||||
|
||||
NamesAndTypesList StorageS3Cluster::getVirtuals() const
|
||||
{
|
||||
return NamesAndTypesList{
|
||||
{"_path", std::make_shared<DataTypeString>()},
|
||||
{"_file", std::make_shared<DataTypeString>()}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
63
src/Storages/StorageS3Cluster.h
Normal file
63
src/Storages/StorageS3Cluster.h
Normal file
@ -0,0 +1,63 @@
|
||||
#pragma once
|
||||
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
#include <Common/config.h>
|
||||
#endif
|
||||
|
||||
#if USE_AWS_S3
|
||||
|
||||
#include "Client/Connection.h"
|
||||
#include <Interpreters/Cluster.h>
|
||||
#include <IO/S3Common.h>
|
||||
#include <Storages/StorageS3.h>
|
||||
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include "ext/shared_ptr_helper.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
|
||||
struct ClientAuthentificationBuilder
|
||||
{
|
||||
String access_key_id;
|
||||
String secret_access_key;
|
||||
UInt64 max_connections;
|
||||
};
|
||||
|
||||
class StorageS3Cluster : public ext::shared_ptr_helper<StorageS3Cluster>, public IStorage
|
||||
{
|
||||
friend struct ext::shared_ptr_helper<StorageS3Cluster>;
|
||||
public:
|
||||
std::string getName() const override { return "S3Cluster"; }
|
||||
|
||||
Pipe read(const Names &, const StorageMetadataPtr &, SelectQueryInfo &,
|
||||
ContextPtr, QueryProcessingStage::Enum, size_t /*max_block_size*/, unsigned /*num_streams*/) override;
|
||||
|
||||
QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, SelectQueryInfo &) const override;
|
||||
|
||||
NamesAndTypesList getVirtuals() const override;
|
||||
|
||||
protected:
|
||||
StorageS3Cluster(
|
||||
const String & filename_, const String & access_key_id_, const String & secret_access_key_, const StorageID & table_id_,
|
||||
String cluster_name_, const String & format_name_, UInt64 max_connections_, const ColumnsDescription & columns_,
|
||||
const ConstraintsDescription & constraints_, ContextPtr context_, const String & compression_method_);
|
||||
|
||||
private:
|
||||
/// Connections from initiator to other nodes
|
||||
std::vector<std::shared_ptr<Connection>> connections;
|
||||
StorageS3::ClientAuthentificaiton client_auth;
|
||||
|
||||
String filename;
|
||||
String cluster_name;
|
||||
String format_name;
|
||||
String compression_method;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -31,7 +31,7 @@ public:
|
||||
unsigned num_streams) override;
|
||||
|
||||
protected:
|
||||
StorageSystemOne(const StorageID & table_id_);
|
||||
explicit StorageSystemOne(const StorageID & table_id_);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -18,7 +18,7 @@ namespace DB
|
||||
class TableFunctionRemote : public ITableFunction
|
||||
{
|
||||
public:
|
||||
TableFunctionRemote(const std::string & name_, bool secure_ = false);
|
||||
explicit TableFunctionRemote(const std::string & name_, bool secure_ = false);
|
||||
|
||||
std::string getName() const override { return name; }
|
||||
|
||||
|
@ -17,7 +17,6 @@ namespace DB
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
@ -26,35 +25,53 @@ void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr con
|
||||
/// Parse args
|
||||
ASTs & args_func = ast_function->children;
|
||||
|
||||
const auto message = fmt::format(
|
||||
"The signature of table function {} could be the following:\n" \
|
||||
" - url, format, structure\n" \
|
||||
" - url, format, structure, compression_method\n" \
|
||||
" - url, access_key_id, secret_access_key, format, structure\n" \
|
||||
" - url, access_key_id, secret_access_key, format, structure, compression_method",
|
||||
getName());
|
||||
|
||||
if (args_func.size() != 1)
|
||||
throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::LOGICAL_ERROR);
|
||||
throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
ASTs & args = args_func.at(0)->children;
|
||||
|
||||
if (args.size() < 3 || args.size() > 6)
|
||||
throw Exception("Table function '" + getName() + "' requires 3 to 6 arguments: url, [access_key_id, secret_access_key,] format, structure and [compression_method].",
|
||||
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
throw Exception(message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
for (auto & arg : args)
|
||||
arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
|
||||
|
||||
/// Size -> argument indexes
|
||||
static auto size_to_args = std::map<size_t, std::map<String, size_t>>
|
||||
{
|
||||
{3, {{"format", 1}, {"structure", 2}}},
|
||||
{4, {{"format", 1}, {"structure", 2}, {"compression_method", 3}}},
|
||||
{5, {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}}},
|
||||
{6, {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}, {"compression_method", 5}}}
|
||||
};
|
||||
|
||||
/// This argument is always the first
|
||||
filename = args[0]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
|
||||
if (args.size() < 5)
|
||||
{
|
||||
format = args[1]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
structure = args[2]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
}
|
||||
else
|
||||
{
|
||||
access_key_id = args[1]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
secret_access_key = args[2]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
format = args[3]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
structure = args[4]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
}
|
||||
auto & args_to_idx = size_to_args[args.size()];
|
||||
|
||||
if (args.size() == 4 || args.size() == 6)
|
||||
compression_method = args.back()->as<ASTLiteral &>().value.safeGet<String>();
|
||||
if (args_to_idx.contains("format"))
|
||||
format = args[args_to_idx["format"]]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
|
||||
if (args_to_idx.contains("structure"))
|
||||
structure = args[args_to_idx["structure"]]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
|
||||
if (args_to_idx.contains("compression_method"))
|
||||
compression_method = args[args_to_idx["compression_method"]]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
|
||||
if (args_to_idx.contains("access_key_id"))
|
||||
access_key_id = args[args_to_idx["access_key_id"]]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
|
||||
if (args_to_idx.contains("secret_access_key"))
|
||||
secret_access_key = args[args_to_idx["secret_access_key"]]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
}
|
||||
|
||||
ColumnsDescription TableFunctionS3::getActualTableStructure(ContextPtr context) const
|
||||
|
144
src/TableFunctions/TableFunctionS3Cluster.cpp
Normal file
144
src/TableFunctions/TableFunctionS3Cluster.cpp
Normal file
@ -0,0 +1,144 @@
|
||||
#if !defined(ARCADIA_BUILD)
|
||||
#include <Common/config.h>
|
||||
#endif
|
||||
|
||||
#if USE_AWS_S3
|
||||
|
||||
#include <Storages/StorageS3Cluster.h>
|
||||
|
||||
#include <DataTypes/DataTypeString.h>
|
||||
#include <DataStreams/RemoteBlockInputStream.h>
|
||||
#include <IO/S3Common.h>
|
||||
#include <Storages/StorageS3.h>
|
||||
#include <Interpreters/evaluateConstantExpression.h>
|
||||
#include <Interpreters/Context.h>
|
||||
#include <Interpreters/ClientInfo.h>
|
||||
#include <TableFunctions/TableFunctionFactory.h>
|
||||
#include <TableFunctions/TableFunctionS3.h>
|
||||
#include <TableFunctions/TableFunctionS3Cluster.h>
|
||||
#include <TableFunctions/parseColumnsListForTableFunction.h>
|
||||
#include <Parsers/ASTLiteral.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ASTFunction.h>
|
||||
#include <Parsers/IAST_fwd.h>
|
||||
#include <Processors/Sources/SourceFromInputStream.h>
|
||||
|
||||
#include "registerTableFunctions.h"
|
||||
|
||||
#include <memory>
|
||||
#include <thread>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
|
||||
}
|
||||
|
||||
|
||||
void TableFunctionS3Cluster::parseArguments(const ASTPtr & ast_function, ContextPtr context)
|
||||
{
|
||||
/// Parse args
|
||||
ASTs & args_func = ast_function->children;
|
||||
|
||||
if (args_func.size() != 1)
|
||||
throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
ASTs & args = args_func.at(0)->children;
|
||||
|
||||
const auto message = fmt::format(
|
||||
"The signature of table function {} could be the following:\n" \
|
||||
" - cluster, url, format, structure\n" \
|
||||
" - cluster, url, format, structure, compression_method\n" \
|
||||
" - cluster, url, access_key_id, secret_access_key, format, structure\n" \
|
||||
" - cluster, url, access_key_id, secret_access_key, format, structure, compression_method",
|
||||
getName());
|
||||
|
||||
if (args.size() < 4 || args.size() > 7)
|
||||
throw Exception(message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
|
||||
|
||||
for (auto & arg : args)
|
||||
arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context);
|
||||
|
||||
/// This arguments are always the first
|
||||
cluster_name = args[0]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
filename = args[1]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
|
||||
/// Size -> argument indexes
|
||||
static auto size_to_args = std::map<size_t, std::map<String, size_t>>
|
||||
{
|
||||
{4, {{"format", 2}, {"structure", 3}}},
|
||||
{5, {{"format", 2}, {"structure", 3}, {"compression_method", 4}}},
|
||||
{6, {{"access_key_id", 2}, {"secret_access_key", 3}, {"format", 4}, {"structure", 5}}},
|
||||
{7, {{"access_key_id", 2}, {"secret_access_key", 3}, {"format", 4}, {"structure", 5}, {"compression_method", 6}}}
|
||||
};
|
||||
|
||||
auto & args_to_idx = size_to_args[args.size()];
|
||||
|
||||
if (args_to_idx.contains("format"))
|
||||
format = args[args_to_idx["format"]]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
|
||||
if (args_to_idx.contains("structure"))
|
||||
structure = args[args_to_idx["structure"]]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
|
||||
if (args_to_idx.contains("compression_method"))
|
||||
compression_method = args[args_to_idx["compression_method"]]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
|
||||
if (args_to_idx.contains("access_key_id"))
|
||||
access_key_id = args[args_to_idx["access_key_id"]]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
|
||||
if (args_to_idx.contains("secret_access_key"))
|
||||
secret_access_key = args[args_to_idx["secret_access_key"]]->as<ASTLiteral &>().value.safeGet<String>();
|
||||
}
|
||||
|
||||
|
||||
ColumnsDescription TableFunctionS3Cluster::getActualTableStructure(ContextPtr context) const
|
||||
{
|
||||
return parseColumnsListFromString(structure, context);
|
||||
}
|
||||
|
||||
StoragePtr TableFunctionS3Cluster::executeImpl(
|
||||
const ASTPtr & /*function*/, ContextPtr context,
|
||||
const std::string & table_name, ColumnsDescription /*cached_columns*/) const
|
||||
{
|
||||
StoragePtr storage;
|
||||
if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY)
|
||||
{
|
||||
/// On worker node this filename won't contains globs
|
||||
Poco::URI uri (filename);
|
||||
S3::URI s3_uri (uri);
|
||||
/// Actually this parameters are not used
|
||||
UInt64 min_upload_part_size = context->getSettingsRef().s3_min_upload_part_size;
|
||||
UInt64 max_single_part_upload_size = context->getSettingsRef().s3_max_single_part_upload_size;
|
||||
UInt64 max_connections = context->getSettingsRef().s3_max_connections;
|
||||
storage = StorageS3::create(
|
||||
s3_uri, access_key_id, secret_access_key, StorageID(getDatabaseName(), table_name),
|
||||
format, min_upload_part_size, max_single_part_upload_size, max_connections,
|
||||
getActualTableStructure(context), ConstraintsDescription{},
|
||||
context, compression_method, /*distributed_processing=*/true);
|
||||
}
|
||||
else
|
||||
{
|
||||
storage = StorageS3Cluster::create(
|
||||
filename, access_key_id, secret_access_key, StorageID(getDatabaseName(), table_name),
|
||||
cluster_name, format, context->getSettingsRef().s3_max_connections,
|
||||
getActualTableStructure(context), ConstraintsDescription{},
|
||||
context, compression_method);
|
||||
}
|
||||
|
||||
storage->startup();
|
||||
|
||||
return storage;
|
||||
}
|
||||
|
||||
|
||||
void registerTableFunctionS3Cluster(TableFunctionFactory & factory)
|
||||
{
|
||||
factory.registerFunction<TableFunctionS3Cluster>();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
#endif
|
56
src/TableFunctions/TableFunctionS3Cluster.h
Normal file
56
src/TableFunctions/TableFunctionS3Cluster.h
Normal file
@ -0,0 +1,56 @@
|
||||
#pragma once
|
||||
|
||||
#include <Common/config.h>
|
||||
|
||||
#if USE_AWS_S3
|
||||
|
||||
#include <TableFunctions/ITableFunction.h>
|
||||
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class Context;
|
||||
|
||||
/**
|
||||
* s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure)
|
||||
* A table function, which allows to process many files from S3 on a specific cluster
|
||||
* On initiator it creates a connection to _all_ nodes in cluster, discloses asterics
|
||||
* in S3 file path and dispatch each file dynamically.
|
||||
* On worker node it asks initiator about next task to process, processes it.
|
||||
* This is repeated until the tasks are finished.
|
||||
*/
|
||||
class TableFunctionS3Cluster : public ITableFunction
|
||||
{
|
||||
public:
|
||||
static constexpr auto name = "s3Cluster";
|
||||
std::string getName() const override
|
||||
{
|
||||
return name;
|
||||
}
|
||||
bool hasStaticStructure() const override { return true; }
|
||||
|
||||
protected:
|
||||
StoragePtr executeImpl(
|
||||
const ASTPtr & ast_function,
|
||||
ContextPtr context,
|
||||
const std::string & table_name,
|
||||
ColumnsDescription cached_columns) const override;
|
||||
|
||||
const char * getStorageTypeName() const override { return "S3Cluster"; }
|
||||
|
||||
ColumnsDescription getActualTableStructure(ContextPtr) const override;
|
||||
void parseArguments(const ASTPtr &, ContextPtr) override;
|
||||
|
||||
String cluster_name;
|
||||
String filename;
|
||||
String format;
|
||||
String structure;
|
||||
String access_key_id;
|
||||
String secret_access_key;
|
||||
String compression_method = "auto";
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -21,6 +21,7 @@ void registerTableFunctions()
|
||||
|
||||
#if USE_AWS_S3
|
||||
registerTableFunctionS3(factory);
|
||||
registerTableFunctionS3Cluster(factory);
|
||||
registerTableFunctionCOS(factory);
|
||||
#endif
|
||||
|
||||
|
@ -21,6 +21,7 @@ void registerTableFunctionGenerate(TableFunctionFactory & factory);
|
||||
|
||||
#if USE_AWS_S3
|
||||
void registerTableFunctionS3(TableFunctionFactory & factory);
|
||||
void registerTableFunctionS3Cluster(TableFunctionFactory & factory);
|
||||
void registerTableFunctionCOS(TableFunctionFactory & factory);
|
||||
#endif
|
||||
|
||||
|
@ -117,6 +117,45 @@ def dml_with_materialize_mysql_database(clickhouse_node, mysql_node, service_nam
|
||||
mysql_node.query("DROP DATABASE test_database")
|
||||
|
||||
|
||||
def materialize_mysql_database_with_views(clickhouse_node, mysql_node, service_name):
|
||||
mysql_node.query("DROP DATABASE IF EXISTS test_database")
|
||||
clickhouse_node.query("DROP DATABASE IF EXISTS test_database")
|
||||
mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'")
|
||||
# existed before the mapping was created
|
||||
|
||||
mysql_node.query("CREATE TABLE test_database.test_table_1 ("
|
||||
"`key` INT NOT NULL PRIMARY KEY, "
|
||||
"unsigned_tiny_int TINYINT UNSIGNED, tiny_int TINYINT, "
|
||||
"unsigned_small_int SMALLINT UNSIGNED, small_int SMALLINT, "
|
||||
"unsigned_medium_int MEDIUMINT UNSIGNED, medium_int MEDIUMINT, "
|
||||
"unsigned_int INT UNSIGNED, _int INT, "
|
||||
"unsigned_integer INTEGER UNSIGNED, _integer INTEGER, "
|
||||
"unsigned_bigint BIGINT UNSIGNED, _bigint BIGINT, "
|
||||
"/* Need ClickHouse support read mysql decimal unsigned_decimal DECIMAL(19, 10) UNSIGNED, _decimal DECIMAL(19, 10), */"
|
||||
"unsigned_float FLOAT UNSIGNED, _float FLOAT, "
|
||||
"unsigned_double DOUBLE UNSIGNED, _double DOUBLE, "
|
||||
"_varchar VARCHAR(10), _char CHAR(10), binary_col BINARY(8), "
|
||||
"/* Need ClickHouse support Enum('a', 'b', 'v') _enum ENUM('a', 'b', 'c'), */"
|
||||
"_date Date, _datetime DateTime, _timestamp TIMESTAMP, _bool BOOLEAN) ENGINE = InnoDB;")
|
||||
|
||||
mysql_node.query("CREATE VIEW test_database.test_table_1_view AS SELECT SUM(tiny_int) FROM test_database.test_table_1 GROUP BY _date;")
|
||||
|
||||
# it already has some data
|
||||
mysql_node.query("""
|
||||
INSERT INTO test_database.test_table_1 VALUES(1, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 3.2, -3.2, 3.4, -3.4, 'varchar', 'char', 'binary',
|
||||
'2020-01-01', '2020-01-01 00:00:00', '2020-01-01 00:00:00', true);
|
||||
""")
|
||||
clickhouse_node.query(
|
||||
"CREATE DATABASE test_database ENGINE = MaterializeMySQL('{}:3306', 'test_database', 'root', 'clickhouse')".format(
|
||||
service_name))
|
||||
|
||||
assert "test_database" in clickhouse_node.query("SHOW DATABASES")
|
||||
check_query(clickhouse_node, "SHOW TABLES FROM test_database FORMAT TSV", "test_table_1\n")
|
||||
|
||||
clickhouse_node.query("DROP DATABASE test_database")
|
||||
mysql_node.query("DROP DATABASE test_database")
|
||||
|
||||
|
||||
def materialize_mysql_database_with_datetime_and_decimal(clickhouse_node, mysql_node, service_name):
|
||||
mysql_node.query("DROP DATABASE IF EXISTS test_database")
|
||||
clickhouse_node.query("DROP DATABASE IF EXISTS test_database")
|
||||
|
@ -150,12 +150,14 @@ def started_mysql_8_0():
|
||||
@pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_atomic])
|
||||
def test_materialize_database_dml_with_mysql_5_7(started_cluster, started_mysql_5_7, clickhouse_node):
|
||||
materialize_with_ddl.dml_with_materialize_mysql_database(clickhouse_node, started_mysql_5_7, "mysql1")
|
||||
materialize_with_ddl.materialize_mysql_database_with_views(clickhouse_node, started_mysql_5_7, "mysql1")
|
||||
materialize_with_ddl.materialize_mysql_database_with_datetime_and_decimal(clickhouse_node, started_mysql_5_7, "mysql1")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_atomic])
|
||||
def test_materialize_database_dml_with_mysql_8_0(started_cluster, started_mysql_8_0, clickhouse_node):
|
||||
materialize_with_ddl.dml_with_materialize_mysql_database(clickhouse_node, started_mysql_8_0, "mysql8_0")
|
||||
materialize_with_ddl.materialize_mysql_database_with_views(clickhouse_node, started_mysql_8_0, "mysql8_0")
|
||||
materialize_with_ddl.materialize_mysql_database_with_datetime_and_decimal(clickhouse_node, started_mysql_8_0, "mysql8_0")
|
||||
|
||||
|
||||
|
24
tests/integration/test_s3_cluster/configs/cluster.xml
Normal file
24
tests/integration/test_s3_cluster/configs/cluster.xml
Normal file
@ -0,0 +1,24 @@
|
||||
|
||||
<yandex>
|
||||
<remote_servers>
|
||||
<cluster_simple>
|
||||
<shard>
|
||||
<replica>
|
||||
<host>s0_0_0</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
<replica>
|
||||
<host>s0_0_1</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
<shard>
|
||||
<replica>
|
||||
<host>s0_1_0</host>
|
||||
<port>9000</port>
|
||||
</replica>
|
||||
</shard>
|
||||
</cluster_simple>
|
||||
|
||||
</remote_servers>
|
||||
</yandex>
|
10
tests/integration/test_s3_cluster/data/clickhouse/part1.csv
Normal file
10
tests/integration/test_s3_cluster/data/clickhouse/part1.csv
Normal file
@ -0,0 +1,10 @@
|
||||
"fSRH",976027584,"[[(-1.5346513608456012e-204,-2.867937504545497e266),(3.1627675144114637e-231,-2.20343471241604e-54),(-1.866886218651809e-89,-7.695893036366416e100),(8.196307577166986e-169,-8.203793887684096e-263),(-1.6150328830402252e-215,8.531116551449711e-296),(4.3378407855931477e92,1.1313645428723989e117),(-4.238081208165573e137,-8.969951719788361e67)],[(-3.409639554701108e169,-7.277093176871153e-254),(1.1466207153308928e-226,3.429893348531029e96),(6.451302850199177e-189,-7.52379443153242e125),(-1.7132078539493614e-127,-2.3177814806867505e241),(1.4996520594989919e-257,4.271017883966942e128)],[(65460976657479156000,1.7055814145588595e253),(-1.921491101580189e154,3.2912740465446566e-286),(0.0008437955075350972,-5.143493717005472e-107),(8.637208599142187e-150,7.825076274945548e136),(1.8077733932468565e-159,5.51061479974026e-77),(1.300406236793709e-260,10669142.497111017),(-1.731981751951159e91,-1.270795062098902e102)],[(3.336706342781395e-7,-1.1919528866481513e266)]]"
|
||||
"sX6>",733011552,"[[(-3.737863336077909e-44,3.066510481088993e-161),(-1.0047259170558555e-31,8.066145272086467e-274)],[(1.2261835328136691e-58,-6.154561379350395e258),(8.26019994651558e35,-6.736984599062694e-19),(-1.4143671344485664e-238,-1.220003479858045e203),(2.466089772925698e-207,1.0025476904532926e-242),(-6.3786667153054354e240,-7.010834902137467e-103),(-6.766918514324285e-263,7.404639608483947e188),(2.753493977757937e126,-4.089565842001999e-152)],[(4.339873790493155e239,-5.022554811588342e24),(-1.7712390083519473e-66,1.3290563068463308e112),(3.3648764781548893e233,1.1123394188044336e112),(-5.415278137684864e195,5.590597851016202e-270),(-2.1032310903543943e99,-2.2335799924679948e-184)]]"
|
||||
"",2396526460,"[[(1.9925796792641788e-261,1.647618305107044e158),(3.014593666207223e-222,-9.016473078578002e-20),(-1.5307802021477097e-230,-7.867078587209265e-243),(-7.330317098800564e295,1.7496539408601967e-281)],[(2.2816938730052074e98,-3.3089122320442997e-136),(-4.930983789361344e-263,-6.526758521792829e59),(-2.6482873886835413e34,-4.1985691142515947e83),(1.5496810029349365e238,-4.790553105593492e71),(-7.597436233325566e83,-1.3791763752378415e137),(-1.917321980700588e-307,-1.5913257477581824e62)]]"
|
||||
"=@ep",3618088392,"[[(-2.2303235811290024e-306,8.64070367587338e-13),(-7.403012423264767e-129,-1.0825508572345856e-147),(-3.6080301450167e286,1.7302718548299961e285),(-1.3839239794870825e-156,4.255424291564323e107),(2.3191305762555e-33,-2.873899421579949e-145),(7.237414513124649e-159,-4.926574547865783e178),(4.251831312243431e-199,1.2164714479391436e201)],[(-5.114074387943793e242,2.0119340496886292e295),(-3.3663670765548e-262,-6.1992631068472835e221),(1.1539386993255106e-261,1.582903697171063e-33),(-6.1914577817088e118,-1.0401495621681123e145)],[],[(-5.9815907467493136e82,4.369047439375412e219),(-4.485368440431237e89,-3.633023372434946e-59),(-2.087497331251707e-180,1.0524018118646965e257)],[(-1.2636503461000215e-228,-4.8426877075223456e204),(2.74943107551342e281,-7.453097760262003e-14)]]"
|
||||
"",3467776823,"[]"
|
||||
"b'zQ",484159052,"[[(3.041838095219909e276,-6.956822159518612e-87)],[(6.636906358770296e-97,1.0531865724169307e-214)],[(-8.429249069245283e-243,-2.134779842898037e243)],[(-0.4657586598569572,2.799768548127799e187),(-5.961335445789657e-129,2.560331789344886e293),(-3.139409694983184e45,2.8011384557268085e-47)]]"
|
||||
"6xGw",4126129912,"[]"
|
||||
"Q",3109335413,"[[(-2.8435266267772945e39,9.548278488724291e26),(-1.1682790407223344e46,-3.925561182768867e-266),(2.8381633655721614e-202,-3.472921303086527e40),(3.3968328275944204e-150,-2.2188876184777275e-69),(-1.2612795000783405e-88,-1.2942793285205966e-49),(1.3678466236967012e179,1.721664680964459e97),(-1.1020844667744628e198,-3.403142062758506e-47)],[],[(1.343149099058239e-279,9.397894929770352e-132),(-5.280854317597215e250,9.862550191577643e-292),(-7.11468799151533e-58,7.510011657942604e96),(1.183774454157175e-288,-1.5697197095936546e272),(-3.727289017361602e120,2.422831380775067e-107),(1.4345094301262986e-177,2.4990983297605437e-91)],[(9.195226893854516e169,6.546374357272709e-236),(2.320311199531441e-126,2.2257031285964243e-185),(3.351868475505779e-184,1.84394695526876e88)],[(1.6290814396647987e-112,-3.589542711073253e38),(4.0060174859833907e-261,-1.9900431208726192e-296),(2.047468933030435e56,8.483912759156179e-57),(3.1165727272872075e191,-1.5487136748040008e-156),(0.43564020198461034,4.618165048931035e-244),(-7.674951896752824e-214,1.1652522629091777e-105),(4.838653901829244e-89,5.3085904574780206e169)],[(1.8286703553352283e-246,2.0403170465657044e255),(2.040810692623279e267,4.3956975402250484e-8),(2.4101343663018673e131,-8.672394158504762e167),(3.092080945239809e-219,-3.775474693770226e293),(-1.527991241079512e-15,-1.2603969180963007e226),(9.17470637459212e-56,1.6021090930395906e-133),(7.877647227721046e58,3.2592118033868903e-108)],[(1.4334765313272463e170,2.6971234798957105e-50)]]"
|
||||
"^ip",1015254922,"[[(-2.227414144223298e-63,1.2391785738638914e276),(1.2668491759136862e207,2.5656762953078853e-67),(2.385410876813441e-268,1.451107969531624e25),(-5.475956161647574e131,2239495689376746),(1.5591286361054593e180,3.672868971445151e117)]]"
|
||||
"5N]",1720727300,"[[(-2.0670321228319122e-258,-2.6893477429616666e-32),(-2.2424105705209414e225,3.547832127050775e25),(4.452916756606404e-121,-3.71114618421911e156),(-1.966961937965055e-110,3.1217044497868816e227),(20636923519704216,1.3500210618276638e30),(3.3195926701816527e-276,1.5557140338374535e234)],[]]"
|
|
@ -0,0 +1,3 @@
|
||||
"b'zQ",2960084897,"[[(3.014593666207223e-222,-7.277093176871153e-254),(-1.5307802021477097e-230,3.429893348531029e96),(-7.330317098800564e295,-7.52379443153242e125),(2.2816938730052074e98,-2.3177814806867505e241),(-4.930983789361344e-263,4.271017883966942e128)],[(-2.6482873886835413e34,1.7055814145588595e253),(1.5496810029349365e238,3.2912740465446566e-286),(-7.597436233325566e83,-5.143493717005472e-107),(-1.917321980700588e-307,7.825076274945548e136)],[(-2.2303235811290024e-306,5.51061479974026e-77),(-7.403012423264767e-129,10669142.497111017),(-3.6080301450167e286,-1.270795062098902e102),(-1.3839239794870825e-156,-1.1919528866481513e266),(2.3191305762555e-33,3.066510481088993e-161),(7.237414513124649e-159,8.066145272086467e-274)],[(4.251831312243431e-199,-6.154561379350395e258),(-5.114074387943793e242,-6.736984599062694e-19),(-3.3663670765548e-262,-1.220003479858045e203),(1.1539386993255106e-261,1.0025476904532926e-242),(-6.1914577817088e118,-7.010834902137467e-103),(-5.9815907467493136e82,7.404639608483947e188),(-4.485368440431237e89,-4.089565842001999e-152)]]"
|
||||
"6xGw",2107128550,"[[(-2.087497331251707e-180,-5.022554811588342e24),(-1.2636503461000215e-228,1.3290563068463308e112),(2.74943107551342e281,1.1123394188044336e112),(3.041838095219909e276,5.590597851016202e-270)],[],[(6.636906358770296e-97,-2.2335799924679948e-184),(-8.429249069245283e-243,1.647618305107044e158),(-0.4657586598569572,-9.016473078578002e-20)]]"
|
||||
"Q",2713167232,"[[(-5.961335445789657e-129,-7.867078587209265e-243),(-3.139409694983184e45,1.7496539408601967e-281)],[(-2.8435266267772945e39,-3.3089122320442997e-136)]]"
|
|
@ -0,0 +1,5 @@
|
||||
"~m`",820408404,"[]"
|
||||
"~E",3621610983,"[[(1.183772215004139e-238,-1.282774073199881e211),(1.6787305112393978e-46,7.500499989257719e25),(-2.458759475104641e-260,3.1724599388651864e-171),(-2.0163203163062471e118,-4.677226438945462e-162),(-5.52491070012707e-135,7.051780441780731e-236)]]"
|
||||
"~1",1715555780,"[[(-6.847404226505131e-267,5.939552045362479e-272),(8.02275075985457e-160,8.369250185716419e-104),(-1.193940928527857e-258,-1.132580458849774e39)],[(1.1866087552639048e253,3.104988412734545e57),(-3.37278669639914e84,-2.387628643569968e287),(-2.452136349495753e73,3.194309776006896e-204),(-1001997440265471100,3.482122851077378e-182)],[],[(-5.754682082202988e-20,6.598766936241908e156)],[(8.386764833095757e300,1.2049637765877942e229),(3.136243074210055e53,5.764669663844127e-100),(-4.190632347661851e195,-5.053553379163823e302),(2.0805194731736336e-19,-1.0849036699112485e-271),(1.1292361211411365e227,-8.767824448179629e229),(-3.6938137156625264e-19,-5.387931698392423e109),(-1.2240482125885677e189,-1.5631467861525635e-103)],[(-2.3917431782202442e138,7.817228281030191e-242),(-1.1462343232899826e279,-1.971215065504208e-225),(5.4316119855340265e-62,3.761081156597423e-60),(8.111852137718836e306,8.115485489580134e-208)],[]]"
|
||||
"~%]",1606443384,"[[]]"
|
||||
"}or",726681547,"[]"
|
|
@ -0,0 +1,7 @@
|
||||
"kvUES",4281162618,"[[(2.4538308454074088e303,1.2209370543175666e178),(1.4564007891121754e-186,2.340773478952682e-273),(-1.01791181533976e165,-3.9617466227377253e248)]]"
|
||||
"Gu",4280623186,"[[(-1.623487579335014e38,-1.0633405021023563e225),(-4.373688812751571e180,2.5511550357717127e138)]]"
|
||||
"J_u1",4277430503,"[[(2.981826196369429e-294,-6.059236590410922e236),(8.502045137575854e-296,3.0210403188125657e-91),(-9.370591842861745e175,4.150870185764185e129),(1.011801592194125e275,-9.236010982686472e266),(-3.1830638196303316e277,2.417706446545472e-105),(-1.4369143023804266e-201,4.7529126795899655e238)],[(-2.118789593804697e186,-1.8760231612433755e-280),(2.5982563179976053e200,-1.4683025762313524e-40)],[(-1.873397623255704e-240,1.4363190147949886e-283),(-1.5760337746177136e153,1.5272278536086246e-34),(-8.117473317695919e155,2.4375370926733504e150),(-1.179230972881795e99,1.7693459774706515e-259),(2.2102106250558424e-40,4.734162675762768e-56),(6.058833110550111e-8,8.892471775821198e164),(-1.8208740799996599e59,6.446958261080721e178)]]"
|
||||
"s:\",4265055390,"[[(-3.291651377214531e-167,3.9198636942402856e185),(2.4897781692770126e176,2.579309759138358e188),(4.653945381397663e205,3.216314556208208e158),(-5.3373279440714224e-39,2.404386813826413e212),(-1.4217294382527138e307,8.874978978402512e-173)],[(8.527603121149904e-58,-5.0520795335878225e88),(-0.00022870878520550814,-3.2334214176860943e-68),(-6.97683613433404e304,-2.1573757788072144e-82),(-1.1394163455875937e36,-3.817990182461824e271),(2.4099027412881423e-209,8.542179392011098e-156),(3.2610511540394803e174,1.1692631657517616e-20)],[(3.625474290538107e261,-5.359205062039837e-193),(-3.574126569378072e-112,-5.421804160994412e265),(-4.873653931207849e-76,3219678918284.317),(-7.030770825898911e-57,1.4647389742249787e-274),(-4.4882439220492357e-203,6.569338333730439e-38)],[(-2.2418056002374865e-136,5.113251922954469e-16),(2.5156744571032497e297,-3.0536957683846124e-192)],[(1.861112291954516e306,-1.8160882143331256e129),(1.982573454900027e290,-2.451412311394593e170)],[(-2.8292230178712157e-18,1.2570198161962067e216),(6.24832495972797e-164,-2.0770908334330718e-273)],[(980143647.1858811,1.2738714961511727e106),(6.516450532397311e-184,4.088688742052062e31),(-2.246311532913914e269,-7.418103885850518e-179),(1.2222973942835046e-289,2.750544834553288e-46),(9.503169349701076e159,-1.355457053256579e215)]]"
|
||||
":hzO",4263726959,"[[(-2.553206398375626e-90,1.6536977728640226e199),(1.5630078027143848e-36,2.805242683101373e-211),(2.2573933085983554e-92,3.450501333524858e292),(-1.215900901292646e-275,-3.860558658606121e272),(6.65716072773856e-145,2.5359010031217893e217)],[(-1.3308039625779135e308,1.7464622720773261e258),(-3.2986890093446374e179,3.9038871583175653e-69),(-4.3594764087383885e-95,4.229921973278908e-123),(-5.455694205415656e137,3.597894902167716e108),(1.2480860990110662e-29,-1.4873488392480292e-185),(7.563210285835444e55,-5624068447.488605)],[(3.9517937289943195e181,-3.2799189227094424e-68),(8.906762198487649e-167,3.952452177941537e-159)]]"
|
||||
"a",4258301804,"[[(5.827965576703262e-281,2.2523852665173977e90)],[(-6.837604072282348e-97,8.125864241406046e-61)],[(-2.3047912084435663e53,-8.814499720685194e36),(1.2072558137199047e-79,1.2096862541827071e142),(2.2000026293774143e275,-3.2571689055108606e-199),(1.1822278574921316e134,2.9571188365006754e-86),(1.0448954272555034e-169,1.2182183489600953e-60)],[(-3.1366540817730525e89,9.327128058982966e-306),(6.588968210928936e73,-11533531378.938957),(-2.6715943555840563e44,-4.557428011172859e224),(-3.8334913754415923e285,-4.748721454106074e-173),(-1.6912052107425128e275,-4.789382438422238e-219),(1.8538365229016863e151,-3.5698172075468775e-37)],[(-2.1963131282037294e49,-5.53604352524995e-296)],[(-8.834414834987965e167,1.3186354307320576e247),(2.109209547987338e298,1.2191009105107557e-32),(-3.896880410603213e-92,-3.4589588698231044e-121),(-3.252529090888335e138,-7.862741341454407e204)],[(-9.673078095447289e-207,8.839303128607278e123),(2.6043620378793597e-244,-6.898328199987363e-308),(-2.5921142292355475e-54,1.0352159149517285e-143)]]"
|
||||
"S+",4257734123,"[[(1.5714269203495863e245,-15651321.549208183),(-3.7292056272445236e-254,-4.556927533596056e-234),(-3.0309414401442555e-203,-3.84393827531526e-12)],[(1.7718777510571518e219,3.972086323144777e139),(1.5723805735454373e-67,-3.805243648123396e226),(154531069271292800000,1.1384408025183933e-285),(-2.009892367470994e-247,2.0325742976832167e81)],[(1.2145787097670788e55,-5.0579298233321666e-30),(5.05577441452021e-182,-2.968914705509665e-175),(-1.702335524921919e67,-2.852552828587631e-226),(-2.7664498327826963e-99,-1.2967072085088717e-305),(7.68881162387673e-68,-1.2506915095983359e-142),(-7.60308693295946e-40,5.414853590549086e218)],[(8.595602987813848e226,-3.9708286611967497e-206),(-5.80352787694746e-52,5.610493934761672e236),(2.1336999375861025e217,-5.431988994371099e-154),(-6.2758614367782974e29,-8.359901046980544e-55)],[(1.6910790690897504e54,9.798739710823911e197),(-6.530270107036228e-284,8.758552462406328e-302),(2.931625032390877e-118,2.8793800873550273e83),(-3.293986884112906e-88,11877326093331202),(0.0008071321465157103,1.0720860516457485e-298)]]"
|
|
129
tests/integration/test_s3_cluster/test.py
Normal file
129
tests/integration/test_s3_cluster/test.py
Normal file
@ -0,0 +1,129 @@
|
||||
import logging
|
||||
import os
|
||||
|
||||
import pytest
|
||||
from helpers.cluster import ClickHouseCluster
|
||||
from helpers.test_tools import TSV
|
||||
|
||||
logging.getLogger().setLevel(logging.INFO)
|
||||
logging.getLogger().addHandler(logging.StreamHandler())
|
||||
|
||||
SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))
|
||||
S3_DATA = ['data/clickhouse/part1.csv', 'data/clickhouse/part123.csv', 'data/database/part2.csv', 'data/database/partition675.csv']
|
||||
|
||||
def create_buckets_s3(cluster):
|
||||
minio = cluster.minio_client
|
||||
for file in S3_DATA:
|
||||
minio.fput_object(bucket_name=cluster.minio_bucket, object_name=file, file_path=os.path.join(SCRIPT_DIR, file))
|
||||
for obj in minio.list_objects(cluster.minio_bucket, recursive=True):
|
||||
print(obj.object_name)
|
||||
|
||||
|
||||
@pytest.fixture(scope="module")
|
||||
def started_cluster():
|
||||
try:
|
||||
cluster = ClickHouseCluster(__file__)
|
||||
cluster.add_instance('s0_0_0', main_configs=["configs/cluster.xml"], with_minio=True)
|
||||
cluster.add_instance('s0_0_1', main_configs=["configs/cluster.xml"])
|
||||
cluster.add_instance('s0_1_0', main_configs=["configs/cluster.xml"])
|
||||
|
||||
logging.info("Starting cluster...")
|
||||
cluster.start()
|
||||
logging.info("Cluster started")
|
||||
|
||||
create_buckets_s3(cluster)
|
||||
|
||||
yield cluster
|
||||
finally:
|
||||
cluster.shutdown()
|
||||
|
||||
|
||||
def test_select_all(started_cluster):
|
||||
node = started_cluster.instances['s0_0_0']
|
||||
pure_s3 = node.query("""
|
||||
SELECT * from s3(
|
||||
'http://minio1:9001/root/data/{clickhouse,database}/*',
|
||||
'minio', 'minio123', 'CSV',
|
||||
'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')
|
||||
ORDER BY (name, value, polygon)""")
|
||||
# print(pure_s3)
|
||||
s3_distibuted = node.query("""
|
||||
SELECT * from s3Cluster(
|
||||
'cluster_simple',
|
||||
'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV',
|
||||
'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ORDER BY (name, value, polygon)""")
|
||||
# print(s3_distibuted)
|
||||
|
||||
assert TSV(pure_s3) == TSV(s3_distibuted)
|
||||
|
||||
|
||||
def test_count(started_cluster):
|
||||
node = started_cluster.instances['s0_0_0']
|
||||
pure_s3 = node.query("""
|
||||
SELECT count(*) from s3(
|
||||
'http://minio1:9001/root/data/{clickhouse,database}/*',
|
||||
'minio', 'minio123', 'CSV',
|
||||
'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')""")
|
||||
# print(pure_s3)
|
||||
s3_distibuted = node.query("""
|
||||
SELECT count(*) from s3Cluster(
|
||||
'cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*',
|
||||
'minio', 'minio123', 'CSV',
|
||||
'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')""")
|
||||
# print(s3_distibuted)
|
||||
|
||||
assert TSV(pure_s3) == TSV(s3_distibuted)
|
||||
|
||||
|
||||
def test_union_all(started_cluster):
|
||||
node = started_cluster.instances['s0_0_0']
|
||||
pure_s3 = node.query("""
|
||||
SELECT * FROM
|
||||
(
|
||||
SELECT * from s3(
|
||||
'http://minio1:9001/root/data/{clickhouse,database}/*',
|
||||
'minio', 'minio123', 'CSV',
|
||||
'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')
|
||||
UNION ALL
|
||||
SELECT * from s3(
|
||||
'http://minio1:9001/root/data/{clickhouse,database}/*',
|
||||
'minio', 'minio123', 'CSV',
|
||||
'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')
|
||||
)
|
||||
ORDER BY (name, value, polygon)
|
||||
""")
|
||||
# print(pure_s3)
|
||||
s3_distibuted = node.query("""
|
||||
SELECT * FROM
|
||||
(
|
||||
SELECT * from s3Cluster(
|
||||
'cluster_simple',
|
||||
'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV',
|
||||
'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')
|
||||
UNION ALL
|
||||
SELECT * from s3Cluster(
|
||||
'cluster_simple',
|
||||
'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV',
|
||||
'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')
|
||||
)
|
||||
ORDER BY (name, value, polygon)
|
||||
""")
|
||||
# print(s3_distibuted)
|
||||
|
||||
assert TSV(pure_s3) == TSV(s3_distibuted)
|
||||
|
||||
|
||||
def test_wrong_cluster(started_cluster):
|
||||
node = started_cluster.instances['s0_0_0']
|
||||
error = node.query_and_get_error("""
|
||||
SELECT count(*) from s3Cluster(
|
||||
'non_existent_cluster',
|
||||
'http://minio1:9001/root/data/{clickhouse,database}/*',
|
||||
'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')
|
||||
UNION ALL
|
||||
SELECT count(*) from s3Cluster(
|
||||
'non_existent_cluster',
|
||||
'http://minio1:9001/root/data/{clickhouse,database}/*',
|
||||
'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')""")
|
||||
|
||||
assert "not found" in error
|
@ -34,10 +34,6 @@
|
||||
1
|
||||
0
|
||||
-
|
||||
0
|
||||
0
|
||||
0
|
||||
0
|
||||
-
|
||||
0
|
||||
1
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user