diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f895c7c482..cc1ec835a7b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,8 @@ * Now replicas that are processing the `ALTER TABLE ATTACH PART[ITION]` command search in their `detached/` folders before fetching the data from other replicas. As an implementation detail, a new command `ATTACH_PART` is introduced in the replicated log. Parts are searched and compared by their checksums. [#18978](https://github.com/ClickHouse/ClickHouse/pull/18978) ([Mike Kot](https://github.com/myrrc)). **Note**: * `ATTACH PART[ITION]` queries may not work during cluster upgrade. * It's not possible to rollback to older ClickHouse version after executing `ALTER ... ATTACH` query in new version as the old servers would fail to pass the `ATTACH_PART` entry in the replicated log. +* In this version, empty `` will block all access to remote hosts while in previous versions it did nothing. If you want to keep old behaviour and you have empty `remote_url_allow_hosts` element in configuration file, remove it. [#20058](https://github.com/ClickHouse/ClickHouse/pull/20058) ([Vladimir Chebotarev](https://github.com/excitoon)). + #### New Feature @@ -132,7 +134,6 @@ * Fix receive and send timeouts and non-blocking read in secure socket. [#21429](https://github.com/ClickHouse/ClickHouse/pull/21429) ([Kruglov Pavel](https://github.com/Avogar)). * `force_drop_table` flag didn't work for `MATERIALIZED VIEW`, it's fixed. Fixes [#18943](https://github.com/ClickHouse/ClickHouse/issues/18943). [#20626](https://github.com/ClickHouse/ClickHouse/pull/20626) ([tavplubix](https://github.com/tavplubix)). * Fix name clashes in `PredicateRewriteVisitor`. It caused incorrect `WHERE` filtration after full join. Close [#20497](https://github.com/ClickHouse/ClickHouse/issues/20497). [#20622](https://github.com/ClickHouse/ClickHouse/pull/20622) ([Vladimir](https://github.com/vdimir)). -* Fixed open behavior of remote host filter in case when there is `remote_url_allow_hosts` section in configuration but no entries there. [#20058](https://github.com/ClickHouse/ClickHouse/pull/20058) ([Vladimir Chebotarev](https://github.com/excitoon)). #### Build/Testing/Packaging Improvement diff --git a/src/Common/BorrowedObjectPool.h b/base/common/BorrowedObjectPool.h similarity index 99% rename from src/Common/BorrowedObjectPool.h rename to base/common/BorrowedObjectPool.h index d5263cf92a8..6a90a7e7122 100644 --- a/src/Common/BorrowedObjectPool.h +++ b/base/common/BorrowedObjectPool.h @@ -7,8 +7,7 @@ #include #include - -#include +#include /** Pool for limited size objects that cannot be used from different threads simultaneously. * The main use case is to have fixed size of objects that can be reused in difference threads during their lifetime diff --git a/src/Common/MoveOrCopyIfThrow.h b/base/common/MoveOrCopyIfThrow.h similarity index 100% rename from src/Common/MoveOrCopyIfThrow.h rename to base/common/MoveOrCopyIfThrow.h diff --git a/docker/test/fasttest/run.sh b/docker/test/fasttest/run.sh index 14c6ee0d337..a42cb25f6f0 100755 --- a/docker/test/fasttest/run.sh +++ b/docker/test/fasttest/run.sh @@ -366,6 +366,9 @@ function run_tests # JSON functions 01666_blns + + # Depends on AWS + 01801_s3_cluster ) (time clickhouse-test --hung-check -j 8 --order=random --use-skip-list --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 ||:) | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt" diff --git a/docker/test/stress/stress b/docker/test/stress/stress index 25a705ecbd1..4fbedceb0b8 100755 --- a/docker/test/stress/stress +++ b/docker/test/stress/stress @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- from multiprocessing import cpu_count -from subprocess import Popen, call, STDOUT +from subprocess import Popen, call, check_output, STDOUT import os import sys import shutil @@ -85,10 +85,27 @@ def prepare_for_hung_check(): # Issue #21004, live views are experimental, so let's just suppress it call("""clickhouse client -q "KILL QUERY WHERE upper(query) LIKE 'WATCH %'" """, shell=True, stderr=STDOUT) - # Wait for last queries to finish if any, not longer than 120 seconds + # Kill other queries which known to be slow + # It's query from 01232_preparing_sets_race_condition_long, it may take up to 1000 seconds in slow builds + call("""clickhouse client -q "KILL QUERY WHERE query LIKE 'insert into tableB select %'" """, shell=True, stderr=STDOUT) + # Long query from 00084_external_agregation + call("""clickhouse client -q "KILL QUERY WHERE query LIKE 'SELECT URL, uniq(SearchPhrase) AS u FROM test.hits GROUP BY URL ORDER BY u %'" """, shell=True, stderr=STDOUT) + + # Wait for last queries to finish if any, not longer than 300 seconds call("""clickhouse client -q "select sleepEachRow(( - select maxOrDefault(120 - elapsed) + 1 from system.processes where query not like '%from system.processes%' and elapsed < 120 - ) / 120) from numbers(120) format Null" """, shell=True, stderr=STDOUT) + select maxOrDefault(300 - elapsed) + 1 from system.processes where query not like '%from system.processes%' and elapsed < 300 + ) / 300) from numbers(300) format Null" """, shell=True, stderr=STDOUT) + + # Even if all clickhouse-test processes are finished, there are probably some sh scripts, + # which still run some new queries. Let's ignore them. + try: + query = """clickhouse client -q "SELECT count() FROM system.processes where where elapsed > 300" """ + output = check_output(query, shell=True, stderr=STDOUT).decode('utf-8').strip() + if int(output) == 0: + return False + except: + pass + return True if __name__ == "__main__": logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') @@ -119,12 +136,12 @@ if __name__ == "__main__": logging.info("All processes finished") if args.hung_check: - prepare_for_hung_check() + have_long_running_queries = prepare_for_hung_check() logging.info("Checking if some queries hung") cmd = "{} {} {}".format(args.test_cmd, "--hung-check", "00001_select_1") res = call(cmd, shell=True, stderr=STDOUT) hung_check_status = "No queries hung\tOK\n" - if res != 0: + if res != 0 and have_long_running_queries: logging.info("Hung check failed with exit code {}".format(res)) hung_check_status = "Hung check failed\tFAIL\n" open(os.path.join(args.output_folder, "test_results.tsv"), 'w+').write(hung_check_status) diff --git a/docs/en/engines/table-engines/integrations/postgresql.md b/docs/en/engines/table-engines/integrations/postgresql.md index ad5bebb3dea..4474b764d2e 100644 --- a/docs/en/engines/table-engines/integrations/postgresql.md +++ b/docs/en/engines/table-engines/integrations/postgresql.md @@ -94,10 +94,10 @@ postgres=# INSERT INTO test (int_id, str, "float") VALUES (1,'test',2); INSERT 0 1 postgresql> SELECT * FROM test; - int_id | int_nullable | float | str | float_nullable ---------+--------------+-------+------+---------------- - 1 | | 2 | test | -(1 row) + int_id | int_nullable | float | str | float_nullable + --------+--------------+-------+------+---------------- + 1 | | 2 | test | + (1 row) ``` Table in ClickHouse, retrieving data from the PostgreSQL table created above: diff --git a/docs/en/sql-reference/functions/bitmap-functions.md b/docs/en/sql-reference/functions/bitmap-functions.md index 7ec400949e9..4875532605e 100644 --- a/docs/en/sql-reference/functions/bitmap-functions.md +++ b/docs/en/sql-reference/functions/bitmap-functions.md @@ -33,7 +33,7 @@ SELECT bitmapBuild([1, 2, 3, 4, 5]) AS res, toTypeName(res); ``` text ┌─res─┬─toTypeName(bitmapBuild([1, 2, 3, 4, 5]))─────┐ -│  │ AggregateFunction(groupBitmap, UInt8) │ +│ │ AggregateFunction(groupBitmap, UInt8) │ └─────┴──────────────────────────────────────────────┘ ``` diff --git a/docs/en/sql-reference/functions/hash-functions.md b/docs/en/sql-reference/functions/hash-functions.md index c60067b06af..0ea4cfd6fbe 100644 --- a/docs/en/sql-reference/functions/hash-functions.md +++ b/docs/en/sql-reference/functions/hash-functions.md @@ -437,13 +437,13 @@ A [FixedString(16)](../../sql-reference/data-types/fixedstring.md) data type has **Example** ``` sql -SELECT murmurHash3_128('example_string') AS MurmurHash3, toTypeName(MurmurHash3) AS type; +SELECT hex(murmurHash3_128('example_string')) AS MurmurHash3, toTypeName(MurmurHash3) AS type; ``` ``` text -┌─MurmurHash3──────┬─type────────────┐ -│ 6�1�4"S5KT�~~q │ FixedString(16) │ -└──────────────────┴─────────────────┘ +┌─MurmurHash3──────────────────────┬─type───┐ +│ 368A1A311CB7342253354B548E7E7E71 │ String │ +└──────────────────────────────────┴────────┘ ``` ## xxHash32, xxHash64 {#hash-functions-xxhash32} diff --git a/docs/en/sql-reference/table-functions/postgresql.md b/docs/en/sql-reference/table-functions/postgresql.md index bfb5fdf9be6..3eab572ac12 100644 --- a/docs/en/sql-reference/table-functions/postgresql.md +++ b/docs/en/sql-reference/table-functions/postgresql.md @@ -65,9 +65,9 @@ postgres=# INSERT INTO test (int_id, str, "float") VALUES (1,'test',2); INSERT 0 1 postgresql> SELECT * FROM test; - int_id | int_nullable | float | str | float_nullable ---------+--------------+-------+------+---------------- - 1 | | 2 | test | + int_id | int_nullable | float | str | float_nullable + --------+--------------+-------+------+---------------- + 1 | | 2 | test | (1 row) ``` diff --git a/docs/ja/sql-reference/functions/bitmap-functions.md b/docs/ja/sql-reference/functions/bitmap-functions.md index cc57e762610..de3ce938444 100644 --- a/docs/ja/sql-reference/functions/bitmap-functions.md +++ b/docs/ja/sql-reference/functions/bitmap-functions.md @@ -35,7 +35,7 @@ SELECT bitmapBuild([1, 2, 3, 4, 5]) AS res, toTypeName(res) ``` text ┌─res─┬─toTypeName(bitmapBuild([1, 2, 3, 4, 5]))─────┐ -│  │ AggregateFunction(groupBitmap, UInt8) │ +│ │ AggregateFunction(groupBitmap, UInt8) │ └─────┴──────────────────────────────────────────────┘ ``` diff --git a/docs/ja/sql-reference/functions/hash-functions.md b/docs/ja/sql-reference/functions/hash-functions.md index d48e6846bb4..a98ae60690d 100644 --- a/docs/ja/sql-reference/functions/hash-functions.md +++ b/docs/ja/sql-reference/functions/hash-functions.md @@ -434,13 +434,13 @@ A [FixedString(16)](../../sql-reference/data-types/fixedstring.md) データ型 **例** ``` sql -SELECT murmurHash3_128('example_string') AS MurmurHash3, toTypeName(MurmurHash3) AS type +SELECT hex(murmurHash3_128('example_string')) AS MurmurHash3, toTypeName(MurmurHash3) AS type; ``` ``` text -┌─MurmurHash3──────┬─type────────────┐ -│ 6�1�4"S5KT�~~q │ FixedString(16) │ -└──────────────────┴─────────────────┘ +┌─MurmurHash3──────────────────────┬─type───┐ +│ 368A1A311CB7342253354B548E7E7E71 │ String │ +└──────────────────────────────────┴────────┘ ``` ## xxHash32,xxHash64 {#hash-functions-xxhash32} diff --git a/docs/ru/engines/table-engines/integrations/postgresql.md b/docs/ru/engines/table-engines/integrations/postgresql.md index 064665d49c1..cb8e38ae5c9 100644 --- a/docs/ru/engines/table-engines/integrations/postgresql.md +++ b/docs/ru/engines/table-engines/integrations/postgresql.md @@ -94,10 +94,10 @@ postgres=# INSERT INTO test (int_id, str, "float") VALUES (1,'test',2); INSERT 0 1 postgresql> SELECT * FROM test; - int_id | int_nullable | float | str | float_nullable ---------+--------------+-------+------+---------------- - 1 | | 2 | test | -(1 row) + int_id | int_nullable | float | str | float_nullable + --------+--------------+-------+------+---------------- + 1 | | 2 | test | + (1 row) ``` Таблица в ClickHouse, получение данных из PostgreSQL таблицы, созданной выше: diff --git a/docs/ru/sql-reference/functions/bitmap-functions.md b/docs/ru/sql-reference/functions/bitmap-functions.md index ddae2f3eb40..3da729664d0 100644 --- a/docs/ru/sql-reference/functions/bitmap-functions.md +++ b/docs/ru/sql-reference/functions/bitmap-functions.md @@ -25,7 +25,7 @@ SELECT bitmapBuild([1, 2, 3, 4, 5]) AS res, toTypeName(res); ``` text ┌─res─┬─toTypeName(bitmapBuild([1, 2, 3, 4, 5]))─────┐ -│  │ AggregateFunction(groupBitmap, UInt8) │ +│ │ AggregateFunction(groupBitmap, UInt8) │ └─────┴──────────────────────────────────────────────┘ ``` diff --git a/docs/ru/sql-reference/functions/hash-functions.md b/docs/ru/sql-reference/functions/hash-functions.md index 2efff9c3727..07c741e0588 100644 --- a/docs/ru/sql-reference/functions/hash-functions.md +++ b/docs/ru/sql-reference/functions/hash-functions.md @@ -430,7 +430,7 @@ murmurHash3_128( expr ) **Аргументы** -- `expr` — [выражение](../syntax.md#syntax-expressions), возвращающее значение типа[String](../../sql-reference/functions/hash-functions.md). +- `expr` — [выражение](../syntax.md#syntax-expressions), возвращающее значение типа [String](../../sql-reference/functions/hash-functions.md). **Возвращаемое значение** @@ -439,13 +439,13 @@ murmurHash3_128( expr ) **Пример** ``` sql -SELECT murmurHash3_128('example_string') AS MurmurHash3, toTypeName(MurmurHash3) AS type; +SELECT hex(murmurHash3_128('example_string')) AS MurmurHash3, toTypeName(MurmurHash3) AS type; ``` ``` text -┌─MurmurHash3──────┬─type────────────┐ -│ 6�1�4"S5KT�~~q │ FixedString(16) │ -└──────────────────┴─────────────────┘ +┌─MurmurHash3──────────────────────┬─type───┐ +│ 368A1A311CB7342253354B548E7E7E71 │ String │ +└──────────────────────────────────┴────────┘ ``` ## xxHash32, xxHash64 {#hash-functions-xxhash32-xxhash64} diff --git a/docs/ru/sql-reference/table-functions/postgresql.md b/docs/ru/sql-reference/table-functions/postgresql.md index 66637276726..2d8afe28f1e 100644 --- a/docs/ru/sql-reference/table-functions/postgresql.md +++ b/docs/ru/sql-reference/table-functions/postgresql.md @@ -65,10 +65,10 @@ postgres=# INSERT INTO test (int_id, str, "float") VALUES (1,'test',2); INSERT 0 1 postgresql> SELECT * FROM test; - int_id | int_nullable | float | str | float_nullable ---------+--------------+-------+------+---------------- - 1 | | 2 | test | -(1 row) + int_id | int_nullable | float | str | float_nullable + --------+--------------+-------+------+---------------- + 1 | | 2 | test | + (1 row) ``` Получение данных в ClickHouse: diff --git a/docs/tools/single_page.py b/docs/tools/single_page.py index b88df5a03cb..a1e650d3ad3 100644 --- a/docs/tools/single_page.py +++ b/docs/tools/single_page.py @@ -109,7 +109,8 @@ def build_single_page_version(lang, args, nav, cfg): extra['single_page'] = True extra['is_amp'] = False - with open(os.path.join(args.docs_dir, lang, 'single.md'), 'w') as single_md: + single_md_path = os.path.join(args.docs_dir, lang, 'single.md') + with open(single_md_path, 'w') as single_md: concatenate(lang, args.docs_dir, single_md, nav) with util.temp_dir() as site_temp: @@ -221,3 +222,7 @@ def build_single_page_version(lang, args, nav, cfg): subprocess.check_call(' '.join(create_pdf_command), shell=True) logging.info(f'Finished building single page version for {lang}') + + if os.path.exists(single_md_path): + os.unlink(single_md_path) + \ No newline at end of file diff --git a/programs/client/Suggest.cpp b/programs/client/Suggest.cpp index dfa7048349e..8d4c0fdbd5a 100644 --- a/programs/client/Suggest.cpp +++ b/programs/client/Suggest.cpp @@ -108,14 +108,6 @@ void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeo " UNION ALL " "SELECT cluster FROM system.clusters" " UNION ALL " - "SELECT name FROM system.errors" - " UNION ALL " - "SELECT event FROM system.events" - " UNION ALL " - "SELECT metric FROM system.asynchronous_metrics" - " UNION ALL " - "SELECT metric FROM system.metrics" - " UNION ALL " "SELECT macro FROM system.macros" " UNION ALL " "SELECT policy_name FROM system.storage_policies" @@ -139,17 +131,12 @@ void Suggest::loadImpl(Connection & connection, const ConnectionTimeouts & timeo query << ") WHERE notEmpty(res)"; - Settings settings; - /// To show all rows from: - /// - system.errors - /// - system.events - settings.system_events_show_zero_values = true; - fetch(connection, timeouts, query.str(), settings); + fetch(connection, timeouts, query.str()); } -void Suggest::fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query, Settings & settings) +void Suggest::fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query) { - connection.sendQuery(timeouts, query, "" /* query_id */, QueryProcessingStage::Complete, &settings); + connection.sendQuery(timeouts, query, "" /* query_id */, QueryProcessingStage::Complete); while (true) { diff --git a/programs/client/Suggest.h b/programs/client/Suggest.h index 0049bc08ebf..03332088cbe 100644 --- a/programs/client/Suggest.h +++ b/programs/client/Suggest.h @@ -33,7 +33,7 @@ public: private: void loadImpl(Connection & connection, const ConnectionTimeouts & timeouts, size_t suggestion_limit); - void fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query, Settings & settings); + void fetch(Connection & connection, const ConnectionTimeouts & timeouts, const std::string & query); void fillWordsFromBlock(const Block & block); /// Words are fetched asynchronously. diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp index 1179ce4cba3..11ae4231aa5 100644 --- a/programs/install/Install.cpp +++ b/programs/install/Install.cpp @@ -566,12 +566,12 @@ int mainEntryClickHouseInstall(int argc, char ** argv) if (has_password_for_default_user) { - fmt::print(HILITE "Password for default user is already specified. To remind or reset, see {} and {}." END_HILITE, + fmt::print(HILITE "Password for default user is already specified. To remind or reset, see {} and {}." END_HILITE "\n", users_config_file.string(), users_d.string()); } else if (!stdout_is_a_tty) { - fmt::print(HILITE "Password for default user is empty string. See {} and {} to change it." END_HILITE, + fmt::print(HILITE "Password for default user is empty string. See {} and {} to change it." END_HILITE "\n", users_config_file.string(), users_d.string()); } else @@ -606,7 +606,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv) "\n"; out.sync(); out.finalize(); - fmt::print("Password for default user is saved in file {}.\n", password_file); + fmt::print(HILITE "Password for default user is saved in file {}." END_HILITE "\n", password_file); #else out << "\n" " \n" @@ -617,12 +617,12 @@ int mainEntryClickHouseInstall(int argc, char ** argv) "\n"; out.sync(); out.finalize(); - fmt::print("Password for default user is saved in plaintext in file {}.\n", password_file); + fmt::print(HILITE "Password for default user is saved in plaintext in file {}." END_HILITE "\n", password_file); #endif has_password_for_default_user = true; } else - fmt::print("Password for default user is empty string. See {} and {} to change it.\n", + fmt::print(HILITE "Password for default user is empty string. See {} and {} to change it." END_HILITE "\n", users_config_file.string(), users_d.string()); } diff --git a/programs/library-bridge/CMakeLists.txt b/programs/library-bridge/CMakeLists.txt index 5ceff47ee0c..0913c6e4a9a 100644 --- a/programs/library-bridge/CMakeLists.txt +++ b/programs/library-bridge/CMakeLists.txt @@ -1,6 +1,6 @@ set (CLICKHOUSE_LIBRARY_BRIDGE_SOURCES library-bridge.cpp - library-log.cpp + LibraryInterface.cpp LibraryBridge.cpp Handlers.cpp HandlerFactory.cpp diff --git a/src/Dictionaries/LibraryDictionarySourceExternal.cpp b/programs/library-bridge/LibraryInterface.cpp similarity index 97% rename from src/Dictionaries/LibraryDictionarySourceExternal.cpp rename to programs/library-bridge/LibraryInterface.cpp index 259d0a2846a..3975368c17f 100644 --- a/src/Dictionaries/LibraryDictionarySourceExternal.cpp +++ b/programs/library-bridge/LibraryInterface.cpp @@ -1,4 +1,5 @@ -#include "LibraryDictionarySourceExternal.h" +#include "LibraryInterface.h" + #include namespace diff --git a/src/Dictionaries/LibraryDictionarySourceExternal.h b/programs/library-bridge/LibraryInterface.h similarity index 100% rename from src/Dictionaries/LibraryDictionarySourceExternal.h rename to programs/library-bridge/LibraryInterface.h diff --git a/programs/library-bridge/LibraryUtils.h b/programs/library-bridge/LibraryUtils.h index 359d1de93e3..8ced8df1c48 100644 --- a/programs/library-bridge/LibraryUtils.h +++ b/programs/library-bridge/LibraryUtils.h @@ -1,11 +1,12 @@ #pragma once #include -#include #include #include #include +#include "LibraryInterface.h" + namespace DB { diff --git a/programs/library-bridge/library-log.cpp b/programs/library-bridge/library-log.cpp deleted file mode 100644 index 89fb31623b3..00000000000 --- a/programs/library-bridge/library-log.cpp +++ /dev/null @@ -1,66 +0,0 @@ -#include -#include - -namespace -{ -const char DICT_LOGGER_NAME[] = "LibraryDictionarySourceExternal"; -} - -namespace ClickHouseLibrary -{ - -std::string_view LIBRARY_CREATE_NEW_FUNC_NAME = "ClickHouseDictionary_v3_libNew"; -std::string_view LIBRARY_CLONE_FUNC_NAME = "ClickHouseDictionary_v3_libClone"; -std::string_view LIBRARY_DELETE_FUNC_NAME = "ClickHouseDictionary_v3_libDelete"; - -std::string_view LIBRARY_DATA_NEW_FUNC_NAME = "ClickHouseDictionary_v3_dataNew"; -std::string_view LIBRARY_DATA_DELETE_FUNC_NAME = "ClickHouseDictionary_v3_dataDelete"; - -std::string_view LIBRARY_LOAD_ALL_FUNC_NAME = "ClickHouseDictionary_v3_loadAll"; -std::string_view LIBRARY_LOAD_IDS_FUNC_NAME = "ClickHouseDictionary_v3_loadIds"; -std::string_view LIBRARY_LOAD_KEYS_FUNC_NAME = "ClickHouseDictionary_v3_loadKeys"; - -std::string_view LIBRARY_IS_MODIFIED_FUNC_NAME = "ClickHouseDictionary_v3_isModified"; -std::string_view LIBRARY_SUPPORTS_SELECTIVE_LOAD_FUNC_NAME = "ClickHouseDictionary_v3_supportsSelectiveLoad"; - -void log(LogLevel level, CString msg) -{ - auto & logger = Poco::Logger::get(DICT_LOGGER_NAME); - switch (level) - { - case LogLevel::TRACE: - if (logger.trace()) - logger.trace(msg); - break; - case LogLevel::DEBUG: - if (logger.debug()) - logger.debug(msg); - break; - case LogLevel::INFORMATION: - if (logger.information()) - logger.information(msg); - break; - case LogLevel::NOTICE: - if (logger.notice()) - logger.notice(msg); - break; - case LogLevel::WARNING: - if (logger.warning()) - logger.warning(msg); - break; - case LogLevel::ERROR: - if (logger.error()) - logger.error(msg); - break; - case LogLevel::CRITICAL: - if (logger.critical()) - logger.critical(msg); - break; - case LogLevel::FATAL: - if (logger.fatal()) - logger.fatal(msg); - break; - } -} - -} diff --git a/programs/odbc-bridge/ODBCConnectionFactory.h b/programs/odbc-bridge/ODBCConnectionFactory.h index 958cf03cfce..56961ddb2fb 100644 --- a/programs/odbc-bridge/ODBCConnectionFactory.h +++ b/programs/odbc-bridge/ODBCConnectionFactory.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 8a96612721d..e3b4316079c 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -173,18 +173,24 @@ int waitServersToFinish(std::vector & servers, size_t const int sleep_one_ms = 100; int sleep_current_ms = 0; int current_connections = 0; - while (sleep_current_ms < sleep_max_ms) + for (;;) { current_connections = 0; + for (auto & server : servers) { server.stop(); current_connections += server.currentConnections(); } + if (!current_connections) break; + sleep_current_ms += sleep_one_ms; - std::this_thread::sleep_for(std::chrono::milliseconds(sleep_one_ms)); + if (sleep_current_ms < sleep_max_ms) + std::this_thread::sleep_for(std::chrono::milliseconds(sleep_one_ms)); + else + break; } return current_connections; } @@ -951,6 +957,9 @@ int Server::main(const std::vector & /*args*/) global_context->shutdownKeeperStorageDispatcher(); } + /// Wait server pool to avoid use-after-free of destroyed context in the handlers + server_pool.joinAll(); + /** Explicitly destroy Context. It is more convenient than in destructor of Server, because logger is still available. * At this moment, no one could own shared part of Context. */ diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index 939a48d949f..70d8109545b 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -551,6 +551,15 @@ void Connection::sendIgnoredPartUUIDs(const std::vector & uuids) out->next(); } + +void Connection::sendReadTaskResponse(const String & response) +{ + writeVarUInt(Protocol::Client::ReadTaskResponse, *out); + writeVarUInt(DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION, *out); + writeStringBinary(response, *out); + out->next(); +} + void Connection::sendPreparedData(ReadBuffer & input, size_t size, const String & name) { /// NOTE 'Throttler' is not used in this method (could use, but it's not important right now). @@ -807,6 +816,9 @@ Packet Connection::receivePacket() readVectorBinary(res.part_uuids, *in); return res; + case Protocol::Server::ReadTaskRequest: + return res; + default: /// In unknown state, disconnect - to not leave unsynchronised connection. disconnect(); @@ -907,13 +919,13 @@ void Connection::setDescription() } -std::unique_ptr Connection::receiveException() +std::unique_ptr Connection::receiveException() const { return std::make_unique(readException(*in, "Received from " + getDescription(), true /* remote */)); } -std::vector Connection::receiveMultistringMessage(UInt64 msg_type) +std::vector Connection::receiveMultistringMessage(UInt64 msg_type) const { size_t num = Protocol::Server::stringsInMessage(msg_type); std::vector strings(num); @@ -923,7 +935,7 @@ std::vector Connection::receiveMultistringMessage(UInt64 msg_type) } -Progress Connection::receiveProgress() +Progress Connection::receiveProgress() const { Progress progress; progress.read(*in, server_revision); @@ -931,7 +943,7 @@ Progress Connection::receiveProgress() } -BlockStreamProfileInfo Connection::receiveProfileInfo() +BlockStreamProfileInfo Connection::receiveProfileInfo() const { BlockStreamProfileInfo profile_info; profile_info.read(*in); diff --git a/src/Client/Connection.h b/src/Client/Connection.h index 65ed956a60b..b4b0d36fb1f 100644 --- a/src/Client/Connection.h +++ b/src/Client/Connection.h @@ -159,6 +159,8 @@ public: /// Send parts' uuids to excluded them from query processing void sendIgnoredPartUUIDs(const std::vector & uuids); + void sendReadTaskResponse(const String &); + /// Send prepared block of data (serialized and, if need, compressed), that will be read from 'input'. /// You could pass size of serialized/compressed block. void sendPreparedData(ReadBuffer & input, size_t size, const String & name = ""); @@ -269,7 +271,7 @@ private: class LoggerWrapper { public: - LoggerWrapper(Connection & parent_) + explicit LoggerWrapper(Connection & parent_) : log(nullptr), parent(parent_) { } @@ -304,10 +306,10 @@ private: Block receiveLogData(); Block receiveDataImpl(BlockInputStreamPtr & stream); - std::vector receiveMultistringMessage(UInt64 msg_type); - std::unique_ptr receiveException(); - Progress receiveProgress(); - BlockStreamProfileInfo receiveProfileInfo(); + std::vector receiveMultistringMessage(UInt64 msg_type) const; + std::unique_ptr receiveException() const; + Progress receiveProgress() const; + BlockStreamProfileInfo receiveProfileInfo() const; void initInputBuffers(); void initBlockInput(); diff --git a/src/Client/ConnectionPool.h b/src/Client/ConnectionPool.h index 9e1d5f78b03..bf73e9756d2 100644 --- a/src/Client/ConnectionPool.h +++ b/src/Client/ConnectionPool.h @@ -26,7 +26,7 @@ public: using Entry = PoolBase::Entry; public: - virtual ~IConnectionPool() {} + virtual ~IConnectionPool() = default; /// Selects the connection to work. /// If force_connected is false, the client must manually ensure that returned connection is good. diff --git a/src/Client/HedgedConnections.h b/src/Client/HedgedConnections.h index f1675108349..9f7d8837536 100644 --- a/src/Client/HedgedConnections.h +++ b/src/Client/HedgedConnections.h @@ -14,6 +14,12 @@ namespace DB { +namespace ErrorCodes +{ + extern const int LOGICAL_ERROR; +} + + /** To receive data from multiple replicas (connections) from one shard asynchronously. * The principe of Hedged Connections is used to reduce tail latency: * if we don't receive data from replica and there is no progress in query execution @@ -84,6 +90,11 @@ public: const ClientInfo & client_info, bool with_pending_data) override; + void sendReadTaskResponse(const String &) override + { + throw Exception("sendReadTaskResponse in not supported with HedgedConnections", ErrorCodes::LOGICAL_ERROR); + } + Packet receivePacket() override; Packet receivePacketUnlocked(AsyncCallback async_callback) override; diff --git a/src/Client/IConnections.h b/src/Client/IConnections.h index 38730922456..d251a5fb3ab 100644 --- a/src/Client/IConnections.h +++ b/src/Client/IConnections.h @@ -24,6 +24,8 @@ public: const ClientInfo & client_info, bool with_pending_data) = 0; + virtual void sendReadTaskResponse(const String &) = 0; + /// Get packet from any replica. virtual Packet receivePacket() = 0; diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp index 8b2b7c49f26..2992e991df7 100644 --- a/src/Client/MultiplexedConnections.cpp +++ b/src/Client/MultiplexedConnections.cpp @@ -155,6 +155,15 @@ void MultiplexedConnections::sendIgnoredPartUUIDs(const std::vector & uuid } } + +void MultiplexedConnections::sendReadTaskResponse(const String & response) +{ + std::lock_guard lock(cancel_mutex); + if (cancelled) + return; + current_connection->sendReadTaskResponse(response); +} + Packet MultiplexedConnections::receivePacket() { std::lock_guard lock(cancel_mutex); @@ -210,6 +219,7 @@ Packet MultiplexedConnections::drain() switch (packet.type) { + case Protocol::Server::ReadTaskRequest: case Protocol::Server::PartUUIDs: case Protocol::Server::Data: case Protocol::Server::Progress: @@ -273,6 +283,7 @@ Packet MultiplexedConnections::receivePacketUnlocked(AsyncCallback async_callbac switch (packet.type) { + case Protocol::Server::ReadTaskRequest: case Protocol::Server::PartUUIDs: case Protocol::Server::Data: case Protocol::Server::Progress: diff --git a/src/Client/MultiplexedConnections.h b/src/Client/MultiplexedConnections.h index c04b06e525e..f642db1c4cd 100644 --- a/src/Client/MultiplexedConnections.h +++ b/src/Client/MultiplexedConnections.h @@ -39,6 +39,8 @@ public: const ClientInfo & client_info, bool with_pending_data) override; + void sendReadTaskResponse(const String &) override; + Packet receivePacket() override; void disconnect() override; diff --git a/src/Common/ConcurrentBoundedQueue.h b/src/Common/ConcurrentBoundedQueue.h index 7bc7f362095..cb29efc3349 100644 --- a/src/Common/ConcurrentBoundedQueue.h +++ b/src/Common/ConcurrentBoundedQueue.h @@ -6,7 +6,7 @@ #include #include -#include +#include /** A very simple thread-safe queue of limited size. * If you try to pop an item from an empty queue, the thread is blocked until the queue becomes nonempty. diff --git a/src/Common/PoolBase.h b/src/Common/PoolBase.h index 43f4fbff9fe..6fc5aee26dd 100644 --- a/src/Common/PoolBase.h +++ b/src/Common/PoolBase.h @@ -51,7 +51,7 @@ private: */ struct PoolEntryHelper { - PoolEntryHelper(PooledObject & data_) : data(data_) { data.in_use = true; } + explicit PoolEntryHelper(PooledObject & data_) : data(data_) { data.in_use = true; } ~PoolEntryHelper() { std::unique_lock lock(data.pool.mutex); @@ -69,7 +69,7 @@ public: public: friend class PoolBase; - Entry() {} /// For deferred initialization. + Entry() = default; /// For deferred initialization. /** The `Entry` object protects the resource from being used by another thread. * The following methods are forbidden for `rvalue`, so you can not write a similar to @@ -99,10 +99,10 @@ public: private: std::shared_ptr data; - Entry(PooledObject & object) : data(std::make_shared(object)) {} + explicit Entry(PooledObject & object) : data(std::make_shared(object)) {} }; - virtual ~PoolBase() {} + virtual ~PoolBase() = default; /** Allocates the object. Wait for free object in pool for 'timeout'. With 'timeout' < 0, the timeout is infinite. */ Entry get(Poco::Timespan::TimeDiff timeout) diff --git a/src/Core/Defines.h b/src/Core/Defines.h index e7c1c86a23e..668a60f9be8 100644 --- a/src/Core/Defines.h +++ b/src/Core/Defines.h @@ -74,6 +74,9 @@ /// Minimum revision supporting OpenTelemetry #define DBMS_MIN_REVISION_WITH_OPENTELEMETRY 54442 + +#define DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION 1 + /// Minimum revision supporting interserver secret. #define DBMS_MIN_REVISION_WITH_INTERSERVER_SECRET 54441 diff --git a/src/Core/Protocol.h b/src/Core/Protocol.h index df51a0cb61a..92e780104b5 100644 --- a/src/Core/Protocol.h +++ b/src/Core/Protocol.h @@ -76,8 +76,10 @@ namespace Protocol Log = 10, /// System logs of the query execution TableColumns = 11, /// Columns' description for default values calculation PartUUIDs = 12, /// List of unique parts ids. - - MAX = PartUUIDs, + ReadTaskRequest = 13, /// String (UUID) describes a request for which next task is needed + /// This is such an inverted logic, where server sends requests + /// And client returns back response + MAX = ReadTaskRequest, }; /// NOTE: If the type of packet argument would be Enum, the comparison packet >= 0 && packet < 10 @@ -100,6 +102,7 @@ namespace Protocol "Log", "TableColumns", "PartUUIDs", + "ReadTaskRequest" }; return packet <= MAX ? data[packet] @@ -135,8 +138,9 @@ namespace Protocol KeepAlive = 6, /// Keep the connection alive Scalar = 7, /// A block of data (compressed or not). IgnoredPartUUIDs = 8, /// List of unique parts ids to exclude from query processing + ReadTaskResponse = 9, /// TODO: - MAX = IgnoredPartUUIDs, + MAX = ReadTaskResponse, }; inline const char * toString(UInt64 packet) @@ -151,6 +155,7 @@ namespace Protocol "KeepAlive", "Scalar", "IgnoredPartUUIDs", + "ReadTaskResponse", }; return packet <= MAX ? data[packet] diff --git a/src/DataStreams/InternalTextLogsRowOutputStream.h b/src/DataStreams/InternalTextLogsRowOutputStream.h index 0f333f70d18..8ade76b34a7 100644 --- a/src/DataStreams/InternalTextLogsRowOutputStream.h +++ b/src/DataStreams/InternalTextLogsRowOutputStream.h @@ -8,7 +8,7 @@ namespace DB /// Prints internal server logs /// Input blocks have to have the same structure as SystemLogsQueue::getSampleBlock() -/// NOTE: IRowOutputStream does not suite well for this case +/// NOTE: IRowOutputFormat does not suite well for this case class InternalTextLogsRowOutputStream : public IBlockOutputStream { public: diff --git a/src/DataStreams/RemoteQueryExecutor.cpp b/src/DataStreams/RemoteQueryExecutor.cpp index 4aa659854b9..0961dd41458 100644 --- a/src/DataStreams/RemoteQueryExecutor.cpp +++ b/src/DataStreams/RemoteQueryExecutor.cpp @@ -22,20 +22,18 @@ namespace DB namespace ErrorCodes { + extern const int LOGICAL_ERROR; extern const int UNKNOWN_PACKET_FROM_SERVER; extern const int DUPLICATED_PART_UUIDS; } RemoteQueryExecutor::RemoteQueryExecutor( Connection & connection, - const String & query_, - const Block & header_, - ContextPtr context_, - ThrottlerPtr throttler, - const Scalars & scalars_, - const Tables & external_tables_, - QueryProcessingStage::Enum stage_) - : header(header_), query(query_), context(context_), scalars(scalars_), external_tables(external_tables_), stage(stage_) + const String & query_, const Block & header_, ContextPtr context_, + ThrottlerPtr throttler, const Scalars & scalars_, const Tables & external_tables_, + QueryProcessingStage::Enum stage_, std::shared_ptr task_iterator_) + : header(header_), query(query_), context(context_) + , scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_) { create_connections = [this, &connection, throttler]() { @@ -45,14 +43,11 @@ RemoteQueryExecutor::RemoteQueryExecutor( RemoteQueryExecutor::RemoteQueryExecutor( std::vector && connections_, - const String & query_, - const Block & header_, - ContextPtr context_, - const ThrottlerPtr & throttler, - const Scalars & scalars_, - const Tables & external_tables_, - QueryProcessingStage::Enum stage_) - : header(header_), query(query_), context(context_), scalars(scalars_), external_tables(external_tables_), stage(stage_) + const String & query_, const Block & header_, ContextPtr context_, + const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, + QueryProcessingStage::Enum stage_, std::shared_ptr task_iterator_) + : header(header_), query(query_), context(context_) + , scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_) { create_connections = [this, connections_, throttler]() mutable { return std::make_unique(std::move(connections_), context->getSettingsRef(), throttler); @@ -61,14 +56,11 @@ RemoteQueryExecutor::RemoteQueryExecutor( RemoteQueryExecutor::RemoteQueryExecutor( const ConnectionPoolWithFailoverPtr & pool, - const String & query_, - const Block & header_, - ContextPtr context_, - const ThrottlerPtr & throttler, - const Scalars & scalars_, - const Tables & external_tables_, - QueryProcessingStage::Enum stage_) - : header(header_), query(query_), context(context_), scalars(scalars_), external_tables(external_tables_), stage(stage_) + const String & query_, const Block & header_, ContextPtr context_, + const ThrottlerPtr & throttler, const Scalars & scalars_, const Tables & external_tables_, + QueryProcessingStage::Enum stage_, std::shared_ptr task_iterator_) + : header(header_), query(query_), context(context_) + , scalars(scalars_), external_tables(external_tables_), stage(stage_), task_iterator(task_iterator_) { create_connections = [this, pool, throttler]()->std::unique_ptr { @@ -307,6 +299,9 @@ std::optional RemoteQueryExecutor::processPacket(Packet packet) { switch (packet.type) { + case Protocol::Server::ReadTaskRequest: + processReadTaskRequest(); + break; case Protocol::Server::PartUUIDs: if (!setPartUUIDs(packet.part_uuids)) got_duplicated_part_uuids = true; @@ -385,6 +380,14 @@ bool RemoteQueryExecutor::setPartUUIDs(const std::vector & uuids) return true; } +void RemoteQueryExecutor::processReadTaskRequest() +{ + if (!task_iterator) + throw Exception("Distributed task iterator is not initialized", ErrorCodes::LOGICAL_ERROR); + auto response = (*task_iterator)(); + connections->sendReadTaskResponse(response); +} + void RemoteQueryExecutor::finish(std::unique_ptr * read_context) { /** If one of: diff --git a/src/DataStreams/RemoteQueryExecutor.h b/src/DataStreams/RemoteQueryExecutor.h index 45a633230b7..a9cffd9cf97 100644 --- a/src/DataStreams/RemoteQueryExecutor.h +++ b/src/DataStreams/RemoteQueryExecutor.h @@ -26,6 +26,9 @@ using ProfileInfoCallback = std::function; + /// This class allows one to launch queries on remote replicas of one shard and get results class RemoteQueryExecutor { @@ -37,21 +40,21 @@ public: Connection & connection, const String & query_, const Block & header_, ContextPtr context_, ThrottlerPtr throttler_ = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), - QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete); + QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr task_iterator_ = {}); /// Accepts several connections already taken from pool. RemoteQueryExecutor( std::vector && connections_, const String & query_, const Block & header_, ContextPtr context_, const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), - QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete); + QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr task_iterator_ = {}); /// Takes a pool and gets one or several connections from it. RemoteQueryExecutor( const ConnectionPoolWithFailoverPtr & pool, const String & query_, const Block & header_, ContextPtr context_, const ThrottlerPtr & throttler = nullptr, const Scalars & scalars_ = Scalars(), const Tables & external_tables_ = Tables(), - QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete); + QueryProcessingStage::Enum stage_ = QueryProcessingStage::Complete, std::shared_ptr task_iterator_ = {}); ~RemoteQueryExecutor(); @@ -119,6 +122,8 @@ private: /// Temporary tables needed to be sent to remote servers Tables external_tables; QueryProcessingStage::Enum stage; + /// Initiator identifier for distributed task processing + std::shared_ptr task_iterator; /// Streams for reading from temporary tables and following sending of data /// to remote servers for GLOBAL-subqueries @@ -179,6 +184,8 @@ private: /// Return true if duplicates found. bool setPartUUIDs(const std::vector & uuids); + void processReadTaskRequest(); + /// Cancell query and restart it with info about duplicated UUIDs /// only for `allow_experimental_query_deduplication`. std::variant restartQueryWithoutDuplicatedUUIDs(std::unique_ptr * read_context = nullptr); diff --git a/src/Databases/MySQL/MaterializeMetadata.cpp b/src/Databases/MySQL/MaterializeMetadata.cpp index a54d378f813..f5e648903ed 100644 --- a/src/Databases/MySQL/MaterializeMetadata.cpp +++ b/src/Databases/MySQL/MaterializeMetadata.cpp @@ -52,7 +52,7 @@ static std::unordered_map fetchTablesCreateQuery( static std::vector fetchTablesInDB(const mysqlxx::PoolWithFailover::Entry & connection, const std::string & database) { Block header{{std::make_shared(), "table_name"}}; - String query = "SELECT TABLE_NAME AS table_name FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_SCHEMA = " + quoteString(database); + String query = "SELECT TABLE_NAME AS table_name FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE != 'VIEW' AND TABLE_SCHEMA = " + quoteString(database); std::vector tables_in_db; MySQLBlockInputStream input(connection, query, header, DEFAULT_BLOCK_SIZE); diff --git a/src/Dictionaries/ExecutablePoolDictionarySource.h b/src/Dictionaries/ExecutablePoolDictionarySource.h index 7f24e56257a..7a0b8681a21 100644 --- a/src/Dictionaries/ExecutablePoolDictionarySource.h +++ b/src/Dictionaries/ExecutablePoolDictionarySource.h @@ -1,7 +1,8 @@ #pragma once +#include + #include -#include #include #include "IDictionarySource.h" diff --git a/src/Dictionaries/LibraryDictionarySource.cpp b/src/Dictionaries/LibraryDictionarySource.cpp index f09869b5a30..a971ba4b1be 100644 --- a/src/Dictionaries/LibraryDictionarySource.cpp +++ b/src/Dictionaries/LibraryDictionarySource.cpp @@ -10,7 +10,6 @@ #include "DictionarySourceFactory.h" #include "DictionarySourceHelpers.h" #include "DictionaryStructure.h" -#include "LibraryDictionarySourceExternal.h" #include "registerDictionaries.h" #include #include diff --git a/src/Formats/IRowInputStream.cpp b/src/Formats/IRowInputStream.cpp deleted file mode 100644 index f3603982de5..00000000000 --- a/src/Formats/IRowInputStream.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include -#include - - -namespace DB -{ - -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; -} - -void IRowInputStream::syncAfterError() -{ - throw Exception("Method syncAfterError is not implemented for input format", ErrorCodes::NOT_IMPLEMENTED); -} - -} diff --git a/src/Formats/IRowInputStream.h b/src/Formats/IRowInputStream.h deleted file mode 100644 index e0b8a574f17..00000000000 --- a/src/Formats/IRowInputStream.h +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once - -#include -#include -#include - -#include - - -namespace DB -{ - -/// Contains extra information about read data. -struct RowReadExtension -{ - /// IRowInputStream.read() output. It contains non zero for columns that actually read from the source and zero otherwise. - /// It's used to attach defaults for partially filled rows. - /// Can be empty, this means that all columns are read. - std::vector read_columns; -}; - -/** Interface of stream, that allows to read data by rows. - */ -class IRowInputStream : private boost::noncopyable -{ -public: - /** Read next row and append it to the columns. - * If no more rows - return false. - */ - virtual bool read(MutableColumns & columns, RowReadExtension & extra) = 0; - - virtual void readPrefix() {} /// delimiter before begin of result - virtual void readSuffix() {} /// delimiter after end of result - - /// Skip data until next row. - /// This is intended for text streams, that allow skipping of errors. - /// By default - throws not implemented exception. - virtual bool allowSyncAfterError() const { return false; } - virtual void syncAfterError(); - - /// In case of parse error, try to roll back and parse last one or two rows very carefully - /// and collect as much as possible diagnostic information about error. - /// If not implemented, returns empty string. - virtual std::string getDiagnosticInfo() { return {}; } - - virtual ~IRowInputStream() {} -}; - -using RowInputStreamPtr = std::shared_ptr; - -} diff --git a/src/Formats/IRowOutputStream.cpp b/src/Formats/IRowOutputStream.cpp deleted file mode 100644 index f84d810b8e8..00000000000 --- a/src/Formats/IRowOutputStream.cpp +++ /dev/null @@ -1,37 +0,0 @@ -#include -#include -#include - - -namespace DB -{ -namespace ErrorCodes -{ - extern const int NOT_IMPLEMENTED; -} - - -void IRowOutputStream::write(const Block & block, size_t row_num) -{ - size_t columns = block.columns(); - - writeRowStartDelimiter(); - - for (size_t i = 0; i < columns; ++i) - { - if (i != 0) - writeFieldDelimiter(); - - const auto & col = block.getByPosition(i); - writeField(*col.column, *col.type, row_num); - } - - writeRowEndDelimiter(); -} - -void IRowOutputStream::writeField(const IColumn &, const IDataType &, size_t) -{ - throw Exception("Method writeField is not implemented for output format", ErrorCodes::NOT_IMPLEMENTED); -} - -} diff --git a/src/Formats/IRowOutputStream.h b/src/Formats/IRowOutputStream.h deleted file mode 100644 index 7cf6251cd0d..00000000000 --- a/src/Formats/IRowOutputStream.h +++ /dev/null @@ -1,63 +0,0 @@ -#pragma once - -#include -#include -#include -#include - - -namespace DB -{ - -class Block; -class IColumn; -class IDataType; -struct Progress; - - -/** Interface of stream for writing data by rows (for example: for output to terminal). - */ -class IRowOutputStream : private boost::noncopyable -{ -public: - - /** Write a row. - * Default implementation calls methods to write single values and delimiters - * (except delimiter between rows (writeRowBetweenDelimiter())). - */ - virtual void write(const Block & block, size_t row_num); - - /** Write single value. */ - virtual void writeField(const IColumn & column, const IDataType & type, size_t row_num); - - /** Write delimiter. */ - virtual void writeFieldDelimiter() {} /// delimiter between values - virtual void writeRowStartDelimiter() {} /// delimiter before each row - virtual void writeRowEndDelimiter() {} /// delimiter after each row - virtual void writeRowBetweenDelimiter() {} /// delimiter between rows - virtual void writePrefix() {} /// delimiter before resultset - virtual void writeSuffix() {} /// delimiter after resultset - - /** Flush output buffers if any. */ - virtual void flush() {} - - /** Methods to set additional information for output in formats, that support it. - */ - virtual void setRowsBeforeLimit(size_t /*rows_before_limit*/) {} - virtual void setTotals(const Block & /*totals*/) {} - virtual void setExtremes(const Block & /*extremes*/) {} - - /** Notify about progress. Method could be called from different threads. - * Passed value are delta, that must be summarized. - */ - virtual void onProgress(const Progress & /*progress*/) {} - - /** Content-Type to set when sending HTTP response. */ - virtual String getContentType() const { return "text/plain; charset=UTF-8"; } - - virtual ~IRowOutputStream() {} -}; - -using RowOutputStreamPtr = std::shared_ptr; - -} diff --git a/src/Formats/ya.make b/src/Formats/ya.make index 8fe938be125..476e13f9a4f 100644 --- a/src/Formats/ya.make +++ b/src/Formats/ya.make @@ -13,8 +13,6 @@ PEERDIR( SRCS( FormatFactory.cpp FormatSchemaInfo.cpp - IRowInputStream.cpp - IRowOutputStream.cpp JSONEachRowUtils.cpp MySQLBlockInputStream.cpp NativeFormat.cpp diff --git a/src/Functions/GatherUtils/Algorithms.h b/src/Functions/GatherUtils/Algorithms.h index e174261d76e..1a962089d0c 100644 --- a/src/Functions/GatherUtils/Algorithms.h +++ b/src/Functions/GatherUtils/Algorithms.h @@ -82,7 +82,7 @@ inline ALWAYS_INLINE void writeSlice(const GenericArraySlice & slice, GenericArr sink.current_offset += slice.size; } else - throw Exception("Function writeSlice expect same column types for GenericArraySlice and GenericArraySink.", + throw Exception("Function writeSlice expects same column types for GenericArraySlice and GenericArraySink.", ErrorCodes::LOGICAL_ERROR); } @@ -162,7 +162,7 @@ inline ALWAYS_INLINE void writeSlice(const GenericValueSlice & slice, GenericArr ++sink.current_offset; } else - throw Exception("Function writeSlice expect same column types for GenericValueSlice and GenericArraySink.", + throw Exception("Function writeSlice expects same column types for GenericValueSlice and GenericArraySink.", ErrorCodes::LOGICAL_ERROR); } @@ -609,7 +609,7 @@ bool sliceHas(const GenericArraySlice & first, const GenericArraySlice & second) { /// Generic arrays should have the same type in order to use column.compareAt(...) if (!first.elements->structureEquals(*second.elements)) - return false; + throw Exception("Function sliceHas expects same column types for slices.", ErrorCodes::LOGICAL_ERROR); auto impl = sliceHasImpl; return impl(first, second, nullptr, nullptr); @@ -670,7 +670,7 @@ void NO_INLINE arrayAllAny(FirstSource && first, SecondSource && second, ColumnU auto & data = result.getData(); for (auto row : ext::range(0, size)) { - data[row] = static_cast(sliceHas(first.getWhole(), second.getWhole()) ? 1 : 0); + data[row] = static_cast(sliceHas(first.getWhole(), second.getWhole())); first.next(); second.next(); } diff --git a/src/Functions/IFunction.cpp b/src/Functions/IFunction.cpp index e4a1adb8525..0a8e8f426b0 100644 --- a/src/Functions/IFunction.cpp +++ b/src/Functions/IFunction.cpp @@ -477,7 +477,7 @@ DataTypePtr FunctionOverloadResolverAdaptor::getReturnTypeDefaultImplementationF } if (null_presence.has_nullable) { - Block nested_columns = createBlockWithNestedColumns(arguments); + auto nested_columns = Block(createBlockWithNestedColumns(arguments)); auto return_type = getter(ColumnsWithTypeAndName(nested_columns.begin(), nested_columns.end())); return makeNullable(return_type); } diff --git a/src/Functions/array/arrayIndex.h b/src/Functions/array/arrayIndex.h index fb9496e634f..1b58c9418bf 100644 --- a/src/Functions/array/arrayIndex.h +++ b/src/Functions/array/arrayIndex.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -13,9 +14,9 @@ #include #include #include -#include "Columns/ColumnLowCardinality.h" -#include "DataTypes/DataTypeLowCardinality.h" -#include "Interpreters/castColumn.h" +#include +#include +#include namespace DB @@ -373,11 +374,10 @@ public: ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); if (!arguments[1]->onlyNull() && !allowArguments(array_type->getNestedType(), arguments[1])) - throw Exception("Types of array and 2nd argument of function \"" - + getName() + "\" must be identical up to nullability, cardinality, " - "numeric types, or Enum and numeric type. Passed: " - + arguments[0]->getName() + " and " + arguments[1]->getName() + ".", - ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, + "Types of array and 2nd argument of function `{}` must be identical up to nullability, cardinality, " + "numeric types, or Enum and numeric type. Passed: {} and {}.", + getName(), arguments[0]->getName(), arguments[1]->getName()); return std::make_shared>(); } @@ -494,86 +494,13 @@ private: inline void moveResult() { result_column = std::move(result); } }; - static inline bool allowNested(const DataTypePtr & left, const DataTypePtr & right) - { - return ((isNativeNumber(left) || isEnum(left)) && isNativeNumber(right)) || left->equals(*right); - } - static inline bool allowArguments(const DataTypePtr & array_inner_type, const DataTypePtr & arg) { - if (allowNested(array_inner_type, arg)) - return true; + auto inner_type_decayed = removeNullable(removeLowCardinality(array_inner_type)); + auto arg_decayed = removeNullable(removeLowCardinality(arg)); - /// Nullable - - const bool array_is_nullable = array_inner_type->isNullable(); - const bool arg_is_nullable = arg->isNullable(); - - const DataTypePtr arg_or_arg_nullable_nested = arg_is_nullable - ? checkAndGetDataType(arg.get())->getNestedType() - : arg; - - if (array_is_nullable) // comparing Array(Nullable(T)) elem and U - { - const DataTypePtr array_nullable_nested = - checkAndGetDataType(array_inner_type.get())->getNestedType(); - - // We also allow Nullable(T) and LC(U) if the Nullable(T) and U are allowed, - // the LC(U) will be converted to U. - return allowNested( - array_nullable_nested, - recursiveRemoveLowCardinality(arg_or_arg_nullable_nested)); - } - else if (arg_is_nullable) // cannot compare Array(T) elem (namely, T) and Nullable(T) - return false; - - /// LowCardinality - - const auto * const array_lc_ptr = checkAndGetDataType(array_inner_type.get()); - const auto * const arg_lc_ptr = checkAndGetDataType(arg.get()); - - const DataTypePtr array_lc_inner_type = recursiveRemoveLowCardinality(array_inner_type); - const DataTypePtr arg_lc_inner_type = recursiveRemoveLowCardinality(arg); - - const bool array_is_lc = nullptr != array_lc_ptr; - const bool arg_is_lc = nullptr != arg_lc_ptr; - - const bool array_lc_inner_type_is_nullable = array_is_lc && array_lc_inner_type->isNullable(); - const bool arg_lc_inner_type_is_nullable = arg_is_lc && arg_lc_inner_type->isNullable(); - - if (array_is_lc) // comparing LC(T) and U - { - const DataTypePtr array_lc_nested_or_lc_nullable_nested = array_lc_inner_type_is_nullable - ? checkAndGetDataType(array_lc_inner_type.get())->getNestedType() - : array_lc_inner_type; - - if (arg_is_lc) // comparing LC(T) and LC(U) - { - const DataTypePtr arg_lc_nested_or_lc_nullable_nested = arg_lc_inner_type_is_nullable - ? checkAndGetDataType(arg_lc_inner_type.get())->getNestedType() - : arg_lc_inner_type; - - return allowNested( - array_lc_nested_or_lc_nullable_nested, - arg_lc_nested_or_lc_nullable_nested); - } - else if (arg_is_nullable) // Comparing LC(T) and Nullable(U) - { - if (!array_lc_inner_type_is_nullable) - return false; // Can't compare Array(LC(U)) elem and Nullable(T); - - return allowNested( - array_lc_nested_or_lc_nullable_nested, - arg_or_arg_nullable_nested); - } - else // Comparing LC(T) and U (U neither Nullable nor LC) - return allowNested(array_lc_nested_or_lc_nullable_nested, arg); - } - - if (arg_is_lc) // Allow T and LC(U) if U and T are allowed (the low cardinality column will be converted). - return allowNested(array_inner_type, arg_lc_inner_type); - - return false; + return ((isNativeNumber(inner_type_decayed) || isEnum(inner_type_decayed)) && isNativeNumber(arg_decayed)) + || getLeastSupertype({inner_type_decayed, arg_decayed}); } #define INTEGRAL_TPL_PACK UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64 @@ -1044,33 +971,38 @@ private: if (!col) return nullptr; - const IColumn & col_nested = col->getData(); + DataTypePtr array_elements_type = assert_cast(*arguments[0].type).getNestedType(); + const DataTypePtr & index_type = arguments[1].type; + + DataTypePtr common_type = getLeastSupertype({array_elements_type, index_type}); + + ColumnPtr col_nested = castColumn({ col->getDataPtr(), array_elements_type, "" }, common_type); const ColumnPtr right_ptr = arguments[1].column->convertToFullColumnIfLowCardinality(); - const IColumn & item_arg = *right_ptr.get(); + ColumnPtr item_arg = castColumn({ right_ptr, removeLowCardinality(index_type), "" }, common_type); auto col_res = ResultColumnType::create(); auto [null_map_data, null_map_item] = getNullMaps(arguments); - if (item_arg.onlyNull()) + if (item_arg->onlyNull()) Impl::Null::process( col->getOffsets(), col_res->getData(), null_map_data); - else if (isColumnConst(item_arg)) + else if (isColumnConst(*item_arg)) Impl::Main::vector( - col_nested, + *col_nested, col->getOffsets(), - typeid_cast(item_arg).getDataColumn(), + typeid_cast(*item_arg).getDataColumn(), col_res->getData(), /// TODO This is wrong. null_map_data, nullptr); else Impl::Main::vector( - col_nested, + *col_nested, col->getOffsets(), - item_arg, + *item_arg, col_res->getData(), null_map_data, null_map_item); diff --git a/src/Functions/array/hasAllAny.h b/src/Functions/array/hasAllAny.h index b35c5996652..1ad1df14020 100644 --- a/src/Functions/array/hasAllAny.h +++ b/src/Functions/array/hasAllAny.h @@ -13,6 +13,7 @@ #include #include #include +#include namespace DB @@ -51,41 +52,13 @@ public: ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override { - size_t rows = input_rows_count; size_t num_args = arguments.size(); - DataTypePtr common_type = nullptr; - auto commonType = [&common_type, &arguments]() - { - if (common_type == nullptr) - { - DataTypes data_types; - data_types.reserve(arguments.size()); - for (const auto & argument : arguments) - data_types.push_back(argument.type); - - common_type = getLeastSupertype(data_types); - } - - return common_type; - }; + DataTypePtr common_type = getLeastSupertype(ext::map(arguments, [](auto & arg) { return arg.type; })); Columns preprocessed_columns(num_args); - for (size_t i = 0; i < num_args; ++i) - { - const auto & argument = arguments[i]; - ColumnPtr preprocessed_column = argument.column; - - const auto * argument_type = typeid_cast(argument.type.get()); - const auto & nested_type = argument_type->getNestedType(); - - /// Converts Array(Nothing) or Array(Nullable(Nothing) to common type. Example: hasAll([Null, 1], [Null]) -> 1 - if (typeid_cast(removeNullable(nested_type).get())) - preprocessed_column = castColumn(argument, commonType()); - - preprocessed_columns[i] = std::move(preprocessed_column); - } + preprocessed_columns[i] = castColumn(arguments[i], common_type); std::vector> sources; @@ -100,12 +73,12 @@ public: } if (const auto * argument_column_array = typeid_cast(argument_column.get())) - sources.emplace_back(GatherUtils::createArraySource(*argument_column_array, is_const, rows)); + sources.emplace_back(GatherUtils::createArraySource(*argument_column_array, is_const, input_rows_count)); else throw Exception{"Arguments for function " + getName() + " must be arrays.", ErrorCodes::LOGICAL_ERROR}; } - auto result_column = ColumnUInt8::create(rows); + auto result_column = ColumnUInt8::create(input_rows_count); auto * result_column_ptr = typeid_cast(result_column.get()); GatherUtils::sliceHas(*sources[0], *sources[1], search_type, *result_column_ptr); diff --git a/src/Functions/extractAllGroups.h b/src/Functions/extractAllGroups.h index c77d497cf17..289cff6ca4a 100644 --- a/src/Functions/extractAllGroups.h +++ b/src/Functions/extractAllGroups.h @@ -172,11 +172,12 @@ public: for (size_t group = 1; group <= groups_count; ++group) all_matches.push_back(matched_groups[group]); - /// Additional limit to fail fast on supposedly incorrect usage. - static constexpr size_t MAX_GROUPS_PER_ROW = 1000000; - - if (all_matches.size() > MAX_GROUPS_PER_ROW) - throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, "Too large array size in the result of function {}", getName()); + /// Additional limit to fail fast on supposedly incorrect usage, arbitrary value. + static constexpr size_t MAX_MATCHES_PER_ROW = 1000; + if (matches_per_row > MAX_MATCHES_PER_ROW) + throw Exception(ErrorCodes::TOO_LARGE_ARRAY_SIZE, + "Too many matches per row (> {}) in the result of function {}", + MAX_MATCHES_PER_ROW, getName()); pos = matched_groups[0].data() + std::max(1, matched_groups[0].size()); diff --git a/src/IO/ReadBufferFromIStream.h b/src/IO/ReadBufferFromIStream.h index 7f804783ba2..67cc60c053f 100644 --- a/src/IO/ReadBufferFromIStream.h +++ b/src/IO/ReadBufferFromIStream.h @@ -17,7 +17,7 @@ private: bool nextImpl() override; public: - ReadBufferFromIStream(std::istream & istr_, size_t size = DBMS_DEFAULT_BUFFER_SIZE); + explicit ReadBufferFromIStream(std::istream & istr_, size_t size = DBMS_DEFAULT_BUFFER_SIZE); }; } diff --git a/src/Interpreters/ClientInfo.h b/src/Interpreters/ClientInfo.h index 21aae45bfab..60abb0dd671 100644 --- a/src/Interpreters/ClientInfo.h +++ b/src/Interpreters/ClientInfo.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 187edf8843f..8c0d8f9d48b 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -373,7 +373,7 @@ struct ContextSharedPart std::atomic_size_t max_partition_size_to_drop = 50000000000lu; /// Protects MergeTree partitions from accidental DROP (50GB by default) String format_schema_path; /// Path to a directory that contains schema files used by input formats. ActionLocksManagerPtr action_locks_manager; /// Set of storages' action lockers - std::optional system_logs; /// Used to log queries and operations on parts + std::unique_ptr system_logs; /// Used to log queries and operations on parts std::optional storage_s3_settings; /// Settings of S3 storage RemoteHostFilter remote_host_filter; /// Allowed URL from config.xml @@ -442,24 +442,32 @@ struct ContextSharedPart DatabaseCatalog::shutdown(); - /// Preemptive destruction is important, because these objects may have a refcount to ContextShared (cyclic reference). - /// TODO: Get rid of this. + std::unique_ptr delete_system_logs; + { + auto lock = std::lock_guard(mutex); - system_logs.reset(); - embedded_dictionaries.reset(); - external_dictionaries_loader.reset(); - models_repository_guard.reset(); - external_models_loader.reset(); - buffer_flush_schedule_pool.reset(); - schedule_pool.reset(); - distributed_schedule_pool.reset(); - message_broker_schedule_pool.reset(); - ddl_worker.reset(); + /// Preemptive destruction is important, because these objects may have a refcount to ContextShared (cyclic reference). + /// TODO: Get rid of this. - /// Stop trace collector if any - trace_collector.reset(); - /// Stop zookeeper connection - zookeeper.reset(); + delete_system_logs = std::move(system_logs); + embedded_dictionaries.reset(); + external_dictionaries_loader.reset(); + models_repository_guard.reset(); + external_models_loader.reset(); + buffer_flush_schedule_pool.reset(); + schedule_pool.reset(); + distributed_schedule_pool.reset(); + message_broker_schedule_pool.reset(); + ddl_worker.reset(); + + /// Stop trace collector if any + trace_collector.reset(); + /// Stop zookeeper connection + zookeeper.reset(); + } + + /// Can be removed w/o context lock + delete_system_logs.reset(); } bool hasTraceCollector() const @@ -1910,7 +1918,7 @@ void Context::setCluster(const String & cluster_name, const std::shared_ptrsystem_logs.emplace(getGlobalContext(), getConfigRef()); + shared->system_logs = std::make_unique(getGlobalContext(), getConfigRef()); } void Context::initializeTraceCollector() @@ -2615,6 +2623,20 @@ PartUUIDsPtr Context::getPartUUIDs() return part_uuids; } + +ReadTaskCallback Context::getReadTaskCallback() const +{ + if (!next_task_callback.has_value()) + throw Exception(fmt::format("Next task callback is not set for query {}", getInitialQueryId()), ErrorCodes::LOGICAL_ERROR); + return next_task_callback.value(); +} + + +void Context::setReadTaskCallback(ReadTaskCallback && callback) +{ + next_task_callback = callback; +} + PartUUIDsPtr Context::getIgnoredPartUUIDs() { auto lock = getLock(); diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index b5912738833..680ee7c779f 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -128,6 +128,9 @@ using InputInitializer = std::function; /// Callback for reading blocks of data from client for function input() using InputBlocksReader = std::function; +/// Used in distributed task processing +using ReadTaskCallback = std::function; + /// An empty interface for an arbitrary object that may be attached by a shared pointer /// to query context, when using ClickHouse as a library. struct IHostContext @@ -189,6 +192,9 @@ private: TemporaryTablesMapping external_tables_mapping; Scalars scalars; + /// Fields for distributed s3 function + std::optional next_task_callback; + /// Record entities accessed by current query, and store this information in system.query_log. struct QueryAccessInfo { @@ -769,6 +775,10 @@ public: PartUUIDsPtr getPartUUIDs(); PartUUIDsPtr getIgnoredPartUUIDs(); + + ReadTaskCallback getReadTaskCallback() const; + void setReadTaskCallback(ReadTaskCallback && callback); + private: std::unique_lock getLock() const; diff --git a/src/Interpreters/DatabaseAndTableWithAlias.h b/src/Interpreters/DatabaseAndTableWithAlias.h index d2b1d655de7..a4773ec435b 100644 --- a/src/Interpreters/DatabaseAndTableWithAlias.h +++ b/src/Interpreters/DatabaseAndTableWithAlias.h @@ -26,9 +26,9 @@ struct DatabaseAndTableWithAlias UUID uuid = UUIDHelpers::Nil; DatabaseAndTableWithAlias() = default; - DatabaseAndTableWithAlias(const ASTPtr & identifier_node, const String & current_database = ""); - DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database = ""); - DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database = ""); + explicit DatabaseAndTableWithAlias(const ASTPtr & identifier_node, const String & current_database = ""); + explicit DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database = ""); + explicit DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database = ""); /// "alias." or "table." if alias is empty String getQualifiedNamePrefix(bool with_dot = true) const; @@ -80,7 +80,7 @@ private: void addAdditionalColumns(NamesAndTypesList & target, const NamesAndTypesList & addition) { target.insert(target.end(), addition.begin(), addition.end()); - for (auto & col : addition) + for (const auto & col : addition) names.insert(col.name); } diff --git a/src/Interpreters/SystemLog.h b/src/Interpreters/SystemLog.h index 2f7bdd4a22f..aa01ca3517b 100644 --- a/src/Interpreters/SystemLog.h +++ b/src/Interpreters/SystemLog.h @@ -152,6 +152,8 @@ public: void shutdown() override { stopFlushThread(); + if (table) + table->shutdown(); } String getName() override diff --git a/src/Parsers/ASTFunctionWithKeyValueArguments.h b/src/Parsers/ASTFunctionWithKeyValueArguments.h index 88ab712cc04..f5eaa33bfc7 100644 --- a/src/Parsers/ASTFunctionWithKeyValueArguments.h +++ b/src/Parsers/ASTFunctionWithKeyValueArguments.h @@ -20,7 +20,7 @@ public: bool second_with_brackets; public: - ASTPair(bool second_with_brackets_) + explicit ASTPair(bool second_with_brackets_) : second_with_brackets(second_with_brackets_) { } @@ -49,7 +49,7 @@ public: /// Has brackets around arguments bool has_brackets; - ASTFunctionWithKeyValueArguments(bool has_brackets_ = true) + explicit ASTFunctionWithKeyValueArguments(bool has_brackets_ = true) : has_brackets(has_brackets_) { } diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h index cbbbd3f6d3b..f8b2408ac16 100644 --- a/src/Parsers/ExpressionElementParsers.h +++ b/src/Parsers/ExpressionElementParsers.h @@ -45,7 +45,7 @@ protected: class ParserIdentifier : public IParserBase { public: - ParserIdentifier(bool allow_query_parameter_ = false) : allow_query_parameter(allow_query_parameter_) {} + explicit ParserIdentifier(bool allow_query_parameter_ = false) : allow_query_parameter(allow_query_parameter_) {} protected: const char * getName() const override { return "identifier"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; @@ -59,7 +59,7 @@ protected: class ParserCompoundIdentifier : public IParserBase { public: - ParserCompoundIdentifier(bool table_name_with_optional_uuid_ = false, bool allow_query_parameter_ = false) + explicit ParserCompoundIdentifier(bool table_name_with_optional_uuid_ = false, bool allow_query_parameter_ = false) : table_name_with_optional_uuid(table_name_with_optional_uuid_), allow_query_parameter(allow_query_parameter_) { } @@ -85,7 +85,7 @@ public: using ColumnTransformers = MultiEnum; static constexpr auto AllTransformers = ColumnTransformers{ColumnTransformer::APPLY, ColumnTransformer::EXCEPT, ColumnTransformer::REPLACE}; - ParserColumnsTransformers(ColumnTransformers allowed_transformers_ = AllTransformers, bool is_strict_ = false) + explicit ParserColumnsTransformers(ColumnTransformers allowed_transformers_ = AllTransformers, bool is_strict_ = false) : allowed_transformers(allowed_transformers_) , is_strict(is_strict_) {} @@ -103,7 +103,7 @@ class ParserAsterisk : public IParserBase { public: using ColumnTransformers = ParserColumnsTransformers::ColumnTransformers; - ParserAsterisk(ColumnTransformers allowed_transformers_ = ParserColumnsTransformers::AllTransformers) + explicit ParserAsterisk(ColumnTransformers allowed_transformers_ = ParserColumnsTransformers::AllTransformers) : allowed_transformers(allowed_transformers_) {} @@ -129,7 +129,7 @@ class ParserColumnsMatcher : public IParserBase { public: using ColumnTransformers = ParserColumnsTransformers::ColumnTransformers; - ParserColumnsMatcher(ColumnTransformers allowed_transformers_ = ParserColumnsTransformers::AllTransformers) + explicit ParserColumnsMatcher(ColumnTransformers allowed_transformers_ = ParserColumnsTransformers::AllTransformers) : allowed_transformers(allowed_transformers_) {} @@ -149,7 +149,7 @@ protected: class ParserFunction : public IParserBase { public: - ParserFunction(bool allow_function_parameters_ = true, bool is_table_function_ = false) + explicit ParserFunction(bool allow_function_parameters_ = true, bool is_table_function_ = false) : allow_function_parameters(allow_function_parameters_), is_table_function(is_table_function_) { } diff --git a/src/Processors/Formats/IRowInputFormat.h b/src/Processors/Formats/IRowInputFormat.h index c802bd3066b..8c600ad7285 100644 --- a/src/Processors/Formats/IRowInputFormat.h +++ b/src/Processors/Formats/IRowInputFormat.h @@ -14,7 +14,7 @@ namespace DB /// Contains extra information about read data. struct RowReadExtension { - /// IRowInputStream.read() output. It contains non zero for columns that actually read from the source and zero otherwise. + /// IRowInputFormat::read output. It contains non zero for columns that actually read from the source and zero otherwise. /// It's used to attach defaults for partially filled rows. std::vector read_columns; }; diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index d1a0ea61066..c6cd74f6c6a 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -33,6 +34,7 @@ #include +#include "Core/Protocol.h" #include "TCPHandler.h" #if !defined(ARCADIA_BUILD) @@ -55,6 +57,7 @@ namespace ErrorCodes extern const int SOCKET_TIMEOUT; extern const int UNEXPECTED_PACKET_FROM_CLIENT; extern const int SUPPORT_IS_DISABLED; + extern const int UNKNOWN_PROTOCOL; } TCPHandler::TCPHandler(IServer & server_, const Poco::Net::StreamSocket & socket_, bool parse_proxy_protocol_, std::string server_display_name_) @@ -285,6 +288,14 @@ void TCPHandler::runImpl() customizeContext(query_context); + /// This callback is needed for requesting read tasks inside pipeline for distributed processing + query_context->setReadTaskCallback([this]() -> String + { + std::lock_guard lock(task_callback_mutex); + sendReadTaskRequestAssumeLocked(); + return receiveReadTaskResponseAssumeLocked(); + }); + bool may_have_embedded_data = client_tcp_protocol_version >= DBMS_MIN_REVISION_WITH_CLIENT_SUPPORT_EMBEDDED_DATA; /// Processing Query state.io = executeQuery(state.query, query_context, false, state.stage, may_have_embedded_data); @@ -644,6 +655,8 @@ void TCPHandler::processOrdinaryQueryWithProcessors() Block block; while (executor.pull(block, query_context->getSettingsRef().interactive_delay / 1000)) { + std::lock_guard lock(task_callback_mutex); + if (isQueryCancelled()) { /// A packet was received requesting to stop execution of the request. @@ -755,6 +768,13 @@ void TCPHandler::sendPartUUIDs() } } + +void TCPHandler::sendReadTaskRequestAssumeLocked() +{ + writeVarUInt(Protocol::Server::ReadTaskRequest, *out); + out->next(); +} + void TCPHandler::sendProfileInfo(const BlockStreamProfileInfo & info) { writeVarUInt(Protocol::Server::ProfileInfo, *out); @@ -963,10 +983,10 @@ bool TCPHandler::receivePacket() UInt64 packet_type = 0; readVarUInt(packet_type, *in); -// std::cerr << "Server got packet: " << Protocol::Client::toString(packet_type) << "\n"; - switch (packet_type) { + case Protocol::Client::ReadTaskResponse: + throw Exception("ReadTaskResponse must be received only after requesting in callback", ErrorCodes::LOGICAL_ERROR); case Protocol::Client::IgnoredPartUUIDs: /// Part uuids packet if any comes before query. receiveIgnoredPartUUIDs(); @@ -1016,6 +1036,34 @@ void TCPHandler::receiveIgnoredPartUUIDs() query_context->getIgnoredPartUUIDs()->add(uuids); } + +String TCPHandler::receiveReadTaskResponseAssumeLocked() +{ + UInt64 packet_type = 0; + readVarUInt(packet_type, *in); + if (packet_type != Protocol::Client::ReadTaskResponse) + { + if (packet_type == Protocol::Client::Cancel) + { + state.is_cancelled = true; + return {}; + } + else + { + throw Exception(fmt::format("Received {} packet after requesting read task", + Protocol::Client::toString(packet_type)), ErrorCodes::UNEXPECTED_PACKET_FROM_CLIENT); + } + } + UInt64 version; + readVarUInt(version, *in); + if (version != DBMS_CLUSTER_PROCESSING_PROTOCOL_VERSION) + throw Exception("Protocol version for distributed processing mismatched", ErrorCodes::UNKNOWN_PROTOCOL); + String response; + readStringBinary(response, *in); + return response; +} + + void TCPHandler::receiveClusterNameAndSalt() { readStringBinary(cluster, *in); diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index e0160b82962..708d21c8251 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -89,7 +89,7 @@ struct QueryState *this = QueryState(); } - bool empty() + bool empty() const { return is_empty; } @@ -150,6 +150,7 @@ private: String cluster; String cluster_secret; + std::mutex task_callback_mutex; /// At the moment, only one ongoing query in the connection is supported at a time. QueryState state; @@ -169,9 +170,11 @@ private: bool receivePacket(); void receiveQuery(); void receiveIgnoredPartUUIDs(); + String receiveReadTaskResponseAssumeLocked(); bool receiveData(bool scalar); bool readDataNext(const size_t & poll_interval, const int & receive_timeout); void readData(const Settings & connection_settings); + void receiveClusterNameAndSalt(); std::tuple getReadTimeouts(const Settings & connection_settings); [[noreturn]] void receiveUnexpectedData(); @@ -198,12 +201,11 @@ private: void sendLogs(); void sendEndOfStream(); void sendPartUUIDs(); + void sendReadTaskRequestAssumeLocked(); void sendProfileInfo(const BlockStreamProfileInfo & info); void sendTotals(const Block & totals); void sendExtremes(const Block & extremes); - void receiveClusterNameAndSalt(); - /// Creates state.block_in/block_out for blocks read/write, depending on whether compression is enabled. void initBlockInput(); void initBlockOutput(const Block & block); diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index e42e53d3f1b..5c82e5378e8 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -452,7 +452,8 @@ StorageDistributed::StorageDistributed( const DistributedSettings & distributed_settings_, bool attach, ClusterPtr owned_cluster_) - : StorageDistributed(id_, columns_, constraints_, String{}, String{}, cluster_name_, context_, sharding_key_, storage_policy_name_, relative_data_path_, distributed_settings_, attach, std::move(owned_cluster_)) + : StorageDistributed(id_, columns_, constraints_, String{}, String{}, cluster_name_, context_, sharding_key_, + storage_policy_name_, relative_data_path_, distributed_settings_, attach, std::move(owned_cluster_)) { remote_table_function_ptr = std::move(remote_table_function_ptr_); } @@ -473,20 +474,15 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage( ClusterPtr optimized_cluster = getOptimizedCluster(local_context, metadata_snapshot, query_info.query); if (optimized_cluster) { - LOG_DEBUG( - log, - "Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}", - makeFormattedListOfShards(optimized_cluster)); + LOG_DEBUG(log, "Skipping irrelevant shards - the query will be sent to the following shards of the cluster (shard numbers): {}", + makeFormattedListOfShards(optimized_cluster)); cluster = optimized_cluster; query_info.optimized_cluster = cluster; } else { - LOG_DEBUG( - log, - "Unable to figure out irrelevant shards from WHERE/PREWHERE clauses - the query will be sent to all shards of the " - "cluster{}", - has_sharding_key ? "" : " (no sharding key)"); + LOG_DEBUG(log, "Unable to figure out irrelevant shards from WHERE/PREWHERE clauses - the query will be sent to all shards of the cluster{}", + has_sharding_key ? "" : " (no sharding key)"); } } diff --git a/src/Storages/StorageProxy.h b/src/Storages/StorageProxy.h index 6fd2a86b6eb..2c3e9d610b0 100644 --- a/src/Storages/StorageProxy.h +++ b/src/Storages/StorageProxy.h @@ -11,7 +11,7 @@ class StorageProxy : public IStorage { public: - StorageProxy(const StorageID & table_id_) : IStorage(table_id_) {} + explicit StorageProxy(const StorageID & table_id_) : IStorage(table_id_) {} virtual StoragePtr getNested() const = 0; diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index af4b1777223..a5cbd004d55 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -45,154 +45,267 @@ namespace ErrorCodes extern const int UNEXPECTED_EXPRESSION; extern const int S3_ERROR; } - - -namespace +class StorageS3Source::DisclosedGlobIterator::Impl { - class StorageS3Source : public SourceWithProgress + +public: + Impl(Aws::S3::S3Client & client_, const S3::URI & globbed_uri_) + : client(client_), globbed_uri(globbed_uri_) { - public: + std::lock_guard lock(mutex); - static Block getHeader(Block sample_block, bool with_path_column, bool with_file_column) + if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos) + throw Exception("Expression can not have wildcards inside bucket name", ErrorCodes::UNEXPECTED_EXPRESSION); + + const String key_prefix = globbed_uri.key.substr(0, globbed_uri.key.find_first_of("*?{")); + + /// We don't have to list bucket, because there is no asterics. + if (key_prefix.size() == globbed_uri.key.size()) { - if (with_path_column) - sample_block.insert({DataTypeString().createColumn(), std::make_shared(), "_path"}); - if (with_file_column) - sample_block.insert({DataTypeString().createColumn(), std::make_shared(), "_file"}); - - return sample_block; + buffer.emplace_back(globbed_uri.key); + buffer_iter = buffer.begin(); + is_finished = true; + return; } - StorageS3Source( - bool need_path, - bool need_file, - const String & format, - String name_, - const Block & sample_block, - ContextPtr context, - const ColumnsDescription & columns, - UInt64 max_block_size, - const CompressionMethod compression_method, - const std::shared_ptr & client, - const String & bucket, - const String & key) - : SourceWithProgress(getHeader(sample_block, need_path, need_file)) - , name(std::move(name_)) - , with_file_column(need_file) - , with_path_column(need_path) - , file_path(bucket + "/" + key) - { - read_buf = wrapReadBufferWithCompressionMethod(std::make_unique(client, bucket, key), compression_method); - auto input_format = FormatFactory::instance().getInput(format, *read_buf, sample_block, context, max_block_size); - reader = std::make_shared(input_format); + request.SetBucket(globbed_uri.bucket); + request.SetPrefix(key_prefix); + matcher = std::make_unique(makeRegexpPatternFromGlobs(globbed_uri.key)); + fillInternalBufferAssumeLocked(); + } - if (columns.hasDefaults()) - reader = std::make_shared(reader, columns, context); + String next() + { + std::lock_guard lock(mutex); + return nextAssumeLocked(); + } + +private: + + String nextAssumeLocked() + { + if (buffer_iter != buffer.end()) + { + auto answer = *buffer_iter; + ++buffer_iter; + return answer; } - String getName() const override - { - return name; - } - - Chunk generate() override - { - if (!reader) - return {}; - - if (!initialized) - { - reader->readSuffix(); - initialized = true; - } - - if (auto block = reader->read()) - { - auto columns = block.getColumns(); - UInt64 num_rows = block.rows(); - - if (with_path_column) - columns.push_back(DataTypeString().createColumnConst(num_rows, file_path)->convertToFullColumnIfConst()); - if (with_file_column) - { - size_t last_slash_pos = file_path.find_last_of('/'); - columns.push_back(DataTypeString().createColumnConst(num_rows, file_path.substr( - last_slash_pos + 1))->convertToFullColumnIfConst()); - } - - return Chunk(std::move(columns), num_rows); - } - - reader.reset(); - + if (is_finished) return {}; - } - private: - String name; - std::unique_ptr read_buf; - BlockInputStreamPtr reader; - bool initialized = false; - bool with_file_column = false; - bool with_path_column = false; - String file_path; - }; + fillInternalBufferAssumeLocked(); - class StorageS3BlockOutputStream : public IBlockOutputStream + return nextAssumeLocked(); + } + + void fillInternalBufferAssumeLocked() { - public: - StorageS3BlockOutputStream( - const String & format, - const Block & sample_block_, - ContextPtr context, - const CompressionMethod compression_method, - const std::shared_ptr & client, - const String & bucket, - const String & key, - size_t min_upload_part_size, - size_t max_single_part_upload_size) - : sample_block(sample_block_) - { - write_buf = wrapWriteBufferWithCompressionMethod( - std::make_unique(client, bucket, key, min_upload_part_size, max_single_part_upload_size), compression_method, 3); - writer = FormatFactory::instance().getOutputStreamParallelIfPossible(format, *write_buf, sample_block, context); - } + buffer.clear(); - Block getHeader() const override - { - return sample_block; - } + outcome = client.ListObjectsV2(request); + if (!outcome.IsSuccess()) + throw Exception(ErrorCodes::S3_ERROR, "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", + quoteString(request.GetBucket()), quoteString(request.GetPrefix()), + backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage())); - void write(const Block & block) override - { - writer->write(block); - } + const auto & result_batch = outcome.GetResult().GetContents(); - void writePrefix() override + buffer.reserve(result_batch.size()); + for (const auto & row : result_batch) { - writer->writePrefix(); + String key = row.GetKey(); + if (re2::RE2::FullMatch(key, *matcher)) + buffer.emplace_back(std::move(key)); } + /// Set iterator only after the whole batch is processed + buffer_iter = buffer.begin(); - void flush() override - { - writer->flush(); - } + request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); - void writeSuffix() override - { - writer->writeSuffix(); - writer->flush(); - write_buf->finalize(); - } + /// It returns false when all objects were returned + is_finished = !outcome.GetResult().GetIsTruncated(); + } - private: - Block sample_block; - std::unique_ptr write_buf; - BlockOutputStreamPtr writer; - }; + std::mutex mutex; + Strings buffer; + Strings::iterator buffer_iter; + Aws::S3::S3Client client; + S3::URI globbed_uri; + Aws::S3::Model::ListObjectsV2Request request; + Aws::S3::Model::ListObjectsV2Outcome outcome; + std::unique_ptr matcher; + bool is_finished{false}; +}; + +StorageS3Source::DisclosedGlobIterator::DisclosedGlobIterator(Aws::S3::S3Client & client_, const S3::URI & globbed_uri_) + : pimpl(std::make_shared(client_, globbed_uri_)) {} + +String StorageS3Source::DisclosedGlobIterator::next() +{ + return pimpl->next(); } +Block StorageS3Source::getHeader(Block sample_block, bool with_path_column, bool with_file_column) +{ + if (with_path_column) + sample_block.insert({DataTypeString().createColumn(), std::make_shared(), "_path"}); + if (with_file_column) + sample_block.insert({DataTypeString().createColumn(), std::make_shared(), "_file"}); + + return sample_block; +} + +StorageS3Source::StorageS3Source( + bool need_path, + bool need_file, + const String & format_, + String name_, + const Block & sample_block_, + ContextPtr context_, + const ColumnsDescription & columns_, + UInt64 max_block_size_, + const String compression_hint_, + const std::shared_ptr & client_, + const String & bucket_, + std::shared_ptr file_iterator_) + : SourceWithProgress(getHeader(sample_block_, need_path, need_file)) + , WithContext(context_) + , name(std::move(name_)) + , bucket(bucket_) + , format(format_) + , columns_desc(columns_) + , max_block_size(max_block_size_) + , compression_hint(compression_hint_) + , client(client_) + , sample_block(sample_block_) + , with_file_column(need_file) + , with_path_column(need_path) + , file_iterator(file_iterator_) +{ + initialize(); +} + + +bool StorageS3Source::initialize() +{ + String current_key = (*file_iterator)(); + if (current_key.empty()) + return false; + + file_path = bucket + "/" + current_key; + + read_buf = wrapReadBufferWithCompressionMethod( + std::make_unique(client, bucket, current_key), chooseCompressionMethod(current_key, compression_hint)); + auto input_format = FormatFactory::instance().getInput(format, *read_buf, sample_block, getContext(), max_block_size); + reader = std::make_shared(input_format); + + if (columns_desc.hasDefaults()) + reader = std::make_shared(reader, columns_desc, getContext()); + + initialized = false; + return true; +} + +String StorageS3Source::getName() const +{ + return name; +} + +Chunk StorageS3Source::generate() +{ + if (!reader) + return {}; + + if (!initialized) + { + reader->readPrefix(); + initialized = true; + } + + if (auto block = reader->read()) + { + auto columns = block.getColumns(); + UInt64 num_rows = block.rows(); + + if (with_path_column) + columns.push_back(DataTypeString().createColumnConst(num_rows, file_path)->convertToFullColumnIfConst()); + if (with_file_column) + { + size_t last_slash_pos = file_path.find_last_of('/'); + columns.push_back(DataTypeString().createColumnConst(num_rows, file_path.substr( + last_slash_pos + 1))->convertToFullColumnIfConst()); + } + + return Chunk(std::move(columns), num_rows); + } + + reader->readSuffix(); + reader.reset(); + read_buf.reset(); + + if (!initialize()) + return {}; + + return generate(); +} + + +class StorageS3BlockOutputStream : public IBlockOutputStream +{ +public: + StorageS3BlockOutputStream( + const String & format, + const Block & sample_block_, + ContextPtr context, + const CompressionMethod compression_method, + const std::shared_ptr & client, + const String & bucket, + const String & key, + size_t min_upload_part_size, + size_t max_single_part_upload_size) + : sample_block(sample_block_) + { + write_buf = wrapWriteBufferWithCompressionMethod( + std::make_unique(client, bucket, key, min_upload_part_size, max_single_part_upload_size), compression_method, 3); + writer = FormatFactory::instance().getOutputStreamParallelIfPossible(format, *write_buf, sample_block, context); + } + + Block getHeader() const override + { + return sample_block; + } + + void write(const Block & block) override + { + writer->write(block); + } + + void writePrefix() override + { + writer->writePrefix(); + } + + void flush() override + { + writer->flush(); + } + + void writeSuffix() override + { + writer->writeSuffix(); + writer->flush(); + write_buf->finalize(); + } + +private: + Block sample_block; + std::unique_ptr write_buf; + BlockOutputStreamPtr writer; +}; + + StorageS3::StorageS3( const S3::URI & uri_, const String & access_key_id_, @@ -205,84 +318,23 @@ StorageS3::StorageS3( const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, ContextPtr context_, - const String & compression_method_) + const String & compression_method_, + bool distributed_processing_) : IStorage(table_id_) - , WithContext(context_->getGlobalContext()) - , uri(uri_) - , access_key_id(access_key_id_) - , secret_access_key(secret_access_key_) - , max_connections(max_connections_) + , client_auth{uri_, access_key_id_, secret_access_key_, max_connections_, {}, {}} /// Client and settings will be updated later , format_name(format_name_) , min_upload_part_size(min_upload_part_size_) , max_single_part_upload_size(max_single_part_upload_size_) , compression_method(compression_method_) , name(uri_.storage_name) + , distributed_processing(distributed_processing_) { - getContext()->getRemoteHostFilter().checkURL(uri_.uri); + context_->getGlobalContext()->getRemoteHostFilter().checkURL(uri_.uri); StorageInMemoryMetadata storage_metadata; storage_metadata.setColumns(columns_); storage_metadata.setConstraints(constraints_); setInMemoryMetadata(storage_metadata); - updateAuthSettings(context_); -} - - -namespace -{ - /* "Recursive" directory listing with matched paths as a result. - * Have the same method in StorageFile. - */ -Strings listFilesWithRegexpMatching(Aws::S3::S3Client & client, const S3::URI & globbed_uri) -{ - if (globbed_uri.bucket.find_first_of("*?{") != globbed_uri.bucket.npos) - { - throw Exception("Expression can not have wildcards inside bucket name", ErrorCodes::UNEXPECTED_EXPRESSION); - } - - const String key_prefix = globbed_uri.key.substr(0, globbed_uri.key.find_first_of("*?{")); - if (key_prefix.size() == globbed_uri.key.size()) - { - return {globbed_uri.key}; - } - - Aws::S3::Model::ListObjectsV2Request request; - request.SetBucket(globbed_uri.bucket); - request.SetPrefix(key_prefix); - - re2::RE2 matcher(makeRegexpPatternFromGlobs(globbed_uri.key)); - Strings result; - Aws::S3::Model::ListObjectsV2Outcome outcome; - int page = 0; - do - { - ++page; - outcome = client.ListObjectsV2(request); - if (!outcome.IsSuccess()) - { - if (page > 1) - throw Exception(ErrorCodes::S3_ERROR, "Could not list objects in bucket {} with prefix {}, page {}, S3 exception: {}, message: {}", - quoteString(request.GetBucket()), quoteString(request.GetPrefix()), page, - backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage())); - - throw Exception(ErrorCodes::S3_ERROR, "Could not list objects in bucket {} with prefix {}, S3 exception: {}, message: {}", - quoteString(request.GetBucket()), quoteString(request.GetPrefix()), - backQuote(outcome.GetError().GetExceptionName()), quoteString(outcome.GetError().GetMessage())); - } - - for (const auto & row : outcome.GetResult().GetContents()) - { - String key = row.GetKey(); - if (re2::RE2::FullMatch(key, matcher)) - result.emplace_back(std::move(key)); - } - - request.SetContinuationToken(outcome.GetResult().GetNextContinuationToken()); - } - while (outcome.GetResult().GetIsTruncated()); - - return result; -} - + updateClientAndAuthSettings(context_, client_auth); } @@ -295,7 +347,7 @@ Pipe StorageS3::read( size_t max_block_size, unsigned num_streams) { - updateAuthSettings(local_context); + updateClientAndAuthSettings(local_context, client_auth); Pipes pipes; bool need_path_column = false; @@ -308,7 +360,26 @@ Pipe StorageS3::read( need_file_column = true; } - for (const String & key : listFilesWithRegexpMatching(*client, uri)) + std::shared_ptr iterator_wrapper{nullptr}; + if (distributed_processing) + { + iterator_wrapper = std::make_shared( + [callback = local_context->getReadTaskCallback()]() -> String { + return callback(); + }); + } + else + { + /// Iterate through disclosed globs and make a source for each file + auto glob_iterator = std::make_shared(*client_auth.client, client_auth.uri); + iterator_wrapper = std::make_shared([glob_iterator]() + { + return glob_iterator->next(); + }); + } + + for (size_t i = 0; i < num_streams; ++i) + { pipes.emplace_back(std::make_shared( need_path_column, need_file_column, @@ -318,63 +389,62 @@ Pipe StorageS3::read( local_context, metadata_snapshot->getColumns(), max_block_size, - chooseCompressionMethod(uri.key, compression_method), - client, - uri.bucket, - key)); - + compression_method, + client_auth.client, + client_auth.uri.bucket, + iterator_wrapper)); + } auto pipe = Pipe::unitePipes(std::move(pipes)); - // It's possible to have many buckets read from s3, resize(num_streams) might open too many handles at the same time. - // Using narrowPipe instead. + narrowPipe(pipe, num_streams); return pipe; } BlockOutputStreamPtr StorageS3::write(const ASTPtr & /*query*/, const StorageMetadataPtr & metadata_snapshot, ContextPtr local_context) { - updateAuthSettings(local_context); + updateClientAndAuthSettings(local_context, client_auth); return std::make_shared( format_name, metadata_snapshot->getSampleBlock(), - getContext(), - chooseCompressionMethod(uri.key, compression_method), - client, - uri.bucket, - uri.key, + local_context, + chooseCompressionMethod(client_auth.uri.key, compression_method), + client_auth.client, + client_auth.uri.bucket, + client_auth.uri.key, min_upload_part_size, max_single_part_upload_size); } -void StorageS3::updateAuthSettings(ContextPtr local_context) +void StorageS3::updateClientAndAuthSettings(ContextPtr ctx, StorageS3::ClientAuthentificaiton & upd) { - auto settings = local_context->getStorageS3Settings().getSettings(uri.uri.toString()); - if (client && (!access_key_id.empty() || settings == auth_settings)) + auto settings = ctx->getStorageS3Settings().getSettings(upd.uri.uri.toString()); + if (upd.client && (!upd.access_key_id.empty() || settings == upd.auth_settings)) return; - Aws::Auth::AWSCredentials credentials(access_key_id, secret_access_key); + Aws::Auth::AWSCredentials credentials(upd.access_key_id, upd.secret_access_key); HeaderCollection headers; - if (access_key_id.empty()) + if (upd.access_key_id.empty()) { credentials = Aws::Auth::AWSCredentials(settings.access_key_id, settings.secret_access_key); headers = settings.headers; } S3::PocoHTTPClientConfiguration client_configuration = S3::ClientFactory::instance().createClientConfiguration( - local_context->getRemoteHostFilter(), local_context->getGlobalContext()->getSettingsRef().s3_max_redirects); + ctx->getRemoteHostFilter(), ctx->getGlobalContext()->getSettingsRef().s3_max_redirects); - client_configuration.endpointOverride = uri.endpoint; - client_configuration.maxConnections = max_connections; + client_configuration.endpointOverride = upd.uri.endpoint; + client_configuration.maxConnections = upd.max_connections; - client = S3::ClientFactory::instance().create( + upd.client = S3::ClientFactory::instance().create( client_configuration, - uri.is_virtual_hosted_style, + upd.uri.is_virtual_hosted_style, credentials.GetAWSAccessKeyId(), credentials.GetAWSSecretKey(), settings.server_side_encryption_customer_key_base64, std::move(headers), - settings.use_environment_credentials.value_or(getContext()->getConfigRef().getBool("s3.use_environment_credentials", false))); + settings.use_environment_credentials.value_or(ctx->getConfigRef().getBool("s3.use_environment_credentials", false))); - auth_settings = std::move(settings); + upd.auth_settings = std::move(settings); } void registerStorageS3Impl(const String & name, StorageFactory & factory) @@ -385,7 +455,8 @@ void registerStorageS3Impl(const String & name, StorageFactory & factory) if (engine_args.size() < 2 || engine_args.size() > 5) throw Exception( - "Storage S3 requires 2 to 5 arguments: url, [access_key_id, secret_access_key], name of used format and [compression_method].", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + "Storage S3 requires 2 to 5 arguments: url, [access_key_id, secret_access_key], name of used format and [compression_method].", + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); for (auto & engine_arg : engine_args) engine_arg = evaluateConstantExpressionOrIdentifierAsLiteral(engine_arg, args.getLocalContext()); diff --git a/src/Storages/StorageS3.h b/src/Storages/StorageS3.h index 14946f881f2..1e1d76fa6e3 100644 --- a/src/Storages/StorageS3.h +++ b/src/Storages/StorageS3.h @@ -4,11 +4,20 @@ #if USE_AWS_S3 +#include + +#include + #include #include + +#include #include #include #include +#include +#include +#include namespace Aws::S3 { @@ -18,6 +27,66 @@ namespace Aws::S3 namespace DB { +class StorageS3SequentialSource; +class StorageS3Source : public SourceWithProgress, WithContext +{ +public: + class DisclosedGlobIterator + { + public: + DisclosedGlobIterator(Aws::S3::S3Client &, const S3::URI &); + String next(); + private: + class Impl; + /// shared_ptr to have copy constructor + std::shared_ptr pimpl; + }; + + using IteratorWrapper = std::function; + + static Block getHeader(Block sample_block, bool with_path_column, bool with_file_column); + + StorageS3Source( + bool need_path, + bool need_file, + const String & format, + String name_, + const Block & sample_block, + ContextPtr context_, + const ColumnsDescription & columns_, + UInt64 max_block_size_, + const String compression_hint_, + const std::shared_ptr & client_, + const String & bucket, + std::shared_ptr file_iterator_); + + String getName() const override; + + Chunk generate() override; + +private: + String name; + String bucket; + String file_path; + String format; + ColumnsDescription columns_desc; + UInt64 max_block_size; + String compression_hint; + std::shared_ptr client; + Block sample_block; + + + std::unique_ptr read_buf; + BlockInputStreamPtr reader; + bool initialized = false; + bool with_file_column = false; + bool with_path_column = false; + std::shared_ptr file_iterator; + + /// Recreate ReadBuffer and BlockInputStream for each file. + bool initialize(); +}; + /** * This class represents table engine for external S3 urls. * It sends HTTP GET to server when select is called and @@ -37,7 +106,8 @@ public: const ColumnsDescription & columns_, const ConstraintsDescription & constraints_, ContextPtr context_, - const String & compression_method_ = ""); + const String & compression_method_ = "", + bool distributed_processing_ = false); String getName() const override { @@ -58,20 +128,30 @@ public: NamesAndTypesList getVirtuals() const override; private: - const S3::URI uri; - const String access_key_id; - const String secret_access_key; - const UInt64 max_connections; + + friend class StorageS3Cluster; + friend class TableFunctionS3Cluster; + + struct ClientAuthentificaiton + { + const S3::URI uri; + const String access_key_id; + const String secret_access_key; + const UInt64 max_connections; + std::shared_ptr client; + S3AuthSettings auth_settings; + }; + + ClientAuthentificaiton client_auth; String format_name; size_t min_upload_part_size; size_t max_single_part_upload_size; String compression_method; - std::shared_ptr client; String name; - S3AuthSettings auth_settings; + const bool distributed_processing; - void updateAuthSettings(ContextPtr context); + static void updateClientAndAuthSettings(ContextPtr, ClientAuthentificaiton &); }; } diff --git a/src/Storages/StorageS3Cluster.cpp b/src/Storages/StorageS3Cluster.cpp new file mode 100644 index 00000000000..8afc0e44023 --- /dev/null +++ b/src/Storages/StorageS3Cluster.cpp @@ -0,0 +1,166 @@ +#include "Storages/StorageS3Cluster.h" + +#if !defined(ARCADIA_BUILD) +#include +#endif + +#if USE_AWS_S3 + +#include "Common/Exception.h" +#include +#include "Client/Connection.h" +#include "Core/QueryProcessingStage.h" +#include +#include "DataStreams/RemoteBlockInputStream.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "Processors/Sources/SourceWithProgress.h" +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace DB +{ + + +StorageS3Cluster::StorageS3Cluster( + const String & filename_, + const String & access_key_id_, + const String & secret_access_key_, + const StorageID & table_id_, + String cluster_name_, + const String & format_name_, + UInt64 max_connections_, + const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, + ContextPtr context_, + const String & compression_method_) + : IStorage(table_id_) + , client_auth{S3::URI{Poco::URI{filename_}}, access_key_id_, secret_access_key_, max_connections_, {}, {}} + , filename(filename_) + , cluster_name(cluster_name_) + , format_name(format_name_) + , compression_method(compression_method_) +{ + StorageInMemoryMetadata storage_metadata; + storage_metadata.setColumns(columns_); + storage_metadata.setConstraints(constraints_); + setInMemoryMetadata(storage_metadata); + StorageS3::updateClientAndAuthSettings(context_, client_auth); +} + +/// The code executes on initiator +Pipe StorageS3Cluster::read( + const Names & column_names, + const StorageMetadataPtr & metadata_snapshot, + SelectQueryInfo & query_info, + ContextPtr context, + QueryProcessingStage::Enum processed_stage, + size_t /*max_block_size*/, + unsigned /*num_streams*/) +{ + StorageS3::updateClientAndAuthSettings(context, client_auth); + + auto cluster = context->getCluster(cluster_name)->getClusterWithReplicasAsShards(context->getSettings()); + S3::URI s3_uri(Poco::URI{filename}); + StorageS3::updateClientAndAuthSettings(context, client_auth); + + auto iterator = std::make_shared(*client_auth.client, client_auth.uri); + auto callback = std::make_shared([iterator]() mutable -> String + { + return iterator->next(); + }); + + /// Calculate the header. This is significant, because some columns could be thrown away in some cases like query with count(*) + Block header = + InterpreterSelectQuery(query_info.query, context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); + + const Scalars & scalars = context->hasQueryContext() ? context->getQueryContext()->getScalars() : Scalars{}; + + Pipes pipes; + connections.reserve(cluster->getShardCount()); + + const bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState; + + for (const auto & replicas : cluster->getShardsAddresses()) + { + /// There will be only one replica, because we consider each replica as a shard + for (const auto & node : replicas) + { + connections.emplace_back(std::make_shared( + node.host_name, node.port, context->getGlobalContext()->getCurrentDatabase(), + node.user, node.password, node.cluster, node.cluster_secret, + "S3ClusterInititiator", + node.compression, + node.secure + )); + + /// For unknown reason global context is passed to IStorage::read() method + /// So, task_identifier is passed as constructor argument. It is more obvious. + auto remote_query_executor = std::make_shared( + *connections.back(), queryToString(query_info.query), header, context, + /*throttler=*/nullptr, scalars, Tables(), processed_stage, callback); + + pipes.emplace_back(std::make_shared(remote_query_executor, add_agg_info, false)); + } + } + + metadata_snapshot->check(column_names, getVirtuals(), getStorageID()); + return Pipe::unitePipes(std::move(pipes)); +} + +QueryProcessingStage::Enum StorageS3Cluster::getQueryProcessingStage( + ContextPtr context, QueryProcessingStage::Enum to_stage, SelectQueryInfo &) const +{ + /// Initiator executes query on remote node. + if (context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY) + if (to_stage >= QueryProcessingStage::Enum::WithMergeableState) + return QueryProcessingStage::Enum::WithMergeableState; + + /// Follower just reads the data. + return QueryProcessingStage::Enum::FetchColumns; +} + + +NamesAndTypesList StorageS3Cluster::getVirtuals() const +{ + return NamesAndTypesList{ + {"_path", std::make_shared()}, + {"_file", std::make_shared()} + }; +} + + +} + +#endif diff --git a/src/Storages/StorageS3Cluster.h b/src/Storages/StorageS3Cluster.h new file mode 100644 index 00000000000..c98840d62fc --- /dev/null +++ b/src/Storages/StorageS3Cluster.h @@ -0,0 +1,63 @@ +#pragma once + +#if !defined(ARCADIA_BUILD) +#include +#endif + +#if USE_AWS_S3 + +#include "Client/Connection.h" +#include +#include +#include + +#include +#include +#include "ext/shared_ptr_helper.h" + +namespace DB +{ + +class Context; + +struct ClientAuthentificationBuilder +{ + String access_key_id; + String secret_access_key; + UInt64 max_connections; +}; + +class StorageS3Cluster : public ext::shared_ptr_helper, public IStorage +{ + friend struct ext::shared_ptr_helper; +public: + std::string getName() const override { return "S3Cluster"; } + + Pipe read(const Names &, const StorageMetadataPtr &, SelectQueryInfo &, + ContextPtr, QueryProcessingStage::Enum, size_t /*max_block_size*/, unsigned /*num_streams*/) override; + + QueryProcessingStage::Enum getQueryProcessingStage(ContextPtr, QueryProcessingStage::Enum, SelectQueryInfo &) const override; + + NamesAndTypesList getVirtuals() const override; + +protected: + StorageS3Cluster( + const String & filename_, const String & access_key_id_, const String & secret_access_key_, const StorageID & table_id_, + String cluster_name_, const String & format_name_, UInt64 max_connections_, const ColumnsDescription & columns_, + const ConstraintsDescription & constraints_, ContextPtr context_, const String & compression_method_); + +private: + /// Connections from initiator to other nodes + std::vector> connections; + StorageS3::ClientAuthentificaiton client_auth; + + String filename; + String cluster_name; + String format_name; + String compression_method; +}; + + +} + +#endif diff --git a/src/Storages/System/StorageSystemOne.h b/src/Storages/System/StorageSystemOne.h index a34f9562025..a14d5e15726 100644 --- a/src/Storages/System/StorageSystemOne.h +++ b/src/Storages/System/StorageSystemOne.h @@ -31,7 +31,7 @@ public: unsigned num_streams) override; protected: - StorageSystemOne(const StorageID & table_id_); + explicit StorageSystemOne(const StorageID & table_id_); }; } diff --git a/src/TableFunctions/TableFunctionRemote.h b/src/TableFunctions/TableFunctionRemote.h index 8fc918dfc20..845c36182dc 100644 --- a/src/TableFunctions/TableFunctionRemote.h +++ b/src/TableFunctions/TableFunctionRemote.h @@ -18,7 +18,7 @@ namespace DB class TableFunctionRemote : public ITableFunction { public: - TableFunctionRemote(const std::string & name_, bool secure_ = false); + explicit TableFunctionRemote(const std::string & name_, bool secure_ = false); std::string getName() const override { return name; } diff --git a/src/TableFunctions/TableFunctionS3.cpp b/src/TableFunctions/TableFunctionS3.cpp index 1a67bae48b0..2da597f49ff 100644 --- a/src/TableFunctions/TableFunctionS3.cpp +++ b/src/TableFunctions/TableFunctionS3.cpp @@ -17,7 +17,6 @@ namespace DB namespace ErrorCodes { - extern const int LOGICAL_ERROR; extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; } @@ -26,35 +25,53 @@ void TableFunctionS3::parseArguments(const ASTPtr & ast_function, ContextPtr con /// Parse args ASTs & args_func = ast_function->children; + const auto message = fmt::format( + "The signature of table function {} could be the following:\n" \ + " - url, format, structure\n" \ + " - url, format, structure, compression_method\n" \ + " - url, access_key_id, secret_access_key, format, structure\n" \ + " - url, access_key_id, secret_access_key, format, structure, compression_method", + getName()); + if (args_func.size() != 1) - throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::LOGICAL_ERROR); + throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); ASTs & args = args_func.at(0)->children; if (args.size() < 3 || args.size() > 6) - throw Exception("Table function '" + getName() + "' requires 3 to 6 arguments: url, [access_key_id, secret_access_key,] format, structure and [compression_method].", - ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + throw Exception(message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); for (auto & arg : args) arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); + /// Size -> argument indexes + static auto size_to_args = std::map> + { + {3, {{"format", 1}, {"structure", 2}}}, + {4, {{"format", 1}, {"structure", 2}, {"compression_method", 3}}}, + {5, {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}}}, + {6, {{"access_key_id", 1}, {"secret_access_key", 2}, {"format", 3}, {"structure", 4}, {"compression_method", 5}}} + }; + + /// This argument is always the first filename = args[0]->as().value.safeGet(); - if (args.size() < 5) - { - format = args[1]->as().value.safeGet(); - structure = args[2]->as().value.safeGet(); - } - else - { - access_key_id = args[1]->as().value.safeGet(); - secret_access_key = args[2]->as().value.safeGet(); - format = args[3]->as().value.safeGet(); - structure = args[4]->as().value.safeGet(); - } + auto & args_to_idx = size_to_args[args.size()]; - if (args.size() == 4 || args.size() == 6) - compression_method = args.back()->as().value.safeGet(); + if (args_to_idx.contains("format")) + format = args[args_to_idx["format"]]->as().value.safeGet(); + + if (args_to_idx.contains("structure")) + structure = args[args_to_idx["structure"]]->as().value.safeGet(); + + if (args_to_idx.contains("compression_method")) + compression_method = args[args_to_idx["compression_method"]]->as().value.safeGet(); + + if (args_to_idx.contains("access_key_id")) + access_key_id = args[args_to_idx["access_key_id"]]->as().value.safeGet(); + + if (args_to_idx.contains("secret_access_key")) + secret_access_key = args[args_to_idx["secret_access_key"]]->as().value.safeGet(); } ColumnsDescription TableFunctionS3::getActualTableStructure(ContextPtr context) const diff --git a/src/TableFunctions/TableFunctionS3Cluster.cpp b/src/TableFunctions/TableFunctionS3Cluster.cpp new file mode 100644 index 00000000000..26ef07ef97f --- /dev/null +++ b/src/TableFunctions/TableFunctionS3Cluster.cpp @@ -0,0 +1,144 @@ +#if !defined(ARCADIA_BUILD) +#include +#endif + +#if USE_AWS_S3 + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "registerTableFunctions.h" + +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + + +void TableFunctionS3Cluster::parseArguments(const ASTPtr & ast_function, ContextPtr context) +{ + /// Parse args + ASTs & args_func = ast_function->children; + + if (args_func.size() != 1) + throw Exception("Table function '" + getName() + "' must have arguments.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + ASTs & args = args_func.at(0)->children; + + const auto message = fmt::format( + "The signature of table function {} could be the following:\n" \ + " - cluster, url, format, structure\n" \ + " - cluster, url, format, structure, compression_method\n" \ + " - cluster, url, access_key_id, secret_access_key, format, structure\n" \ + " - cluster, url, access_key_id, secret_access_key, format, structure, compression_method", + getName()); + + if (args.size() < 4 || args.size() > 7) + throw Exception(message, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + for (auto & arg : args) + arg = evaluateConstantExpressionOrIdentifierAsLiteral(arg, context); + + /// This arguments are always the first + cluster_name = args[0]->as().value.safeGet(); + filename = args[1]->as().value.safeGet(); + + /// Size -> argument indexes + static auto size_to_args = std::map> + { + {4, {{"format", 2}, {"structure", 3}}}, + {5, {{"format", 2}, {"structure", 3}, {"compression_method", 4}}}, + {6, {{"access_key_id", 2}, {"secret_access_key", 3}, {"format", 4}, {"structure", 5}}}, + {7, {{"access_key_id", 2}, {"secret_access_key", 3}, {"format", 4}, {"structure", 5}, {"compression_method", 6}}} + }; + + auto & args_to_idx = size_to_args[args.size()]; + + if (args_to_idx.contains("format")) + format = args[args_to_idx["format"]]->as().value.safeGet(); + + if (args_to_idx.contains("structure")) + structure = args[args_to_idx["structure"]]->as().value.safeGet(); + + if (args_to_idx.contains("compression_method")) + compression_method = args[args_to_idx["compression_method"]]->as().value.safeGet(); + + if (args_to_idx.contains("access_key_id")) + access_key_id = args[args_to_idx["access_key_id"]]->as().value.safeGet(); + + if (args_to_idx.contains("secret_access_key")) + secret_access_key = args[args_to_idx["secret_access_key"]]->as().value.safeGet(); +} + + +ColumnsDescription TableFunctionS3Cluster::getActualTableStructure(ContextPtr context) const +{ + return parseColumnsListFromString(structure, context); +} + +StoragePtr TableFunctionS3Cluster::executeImpl( + const ASTPtr & /*function*/, ContextPtr context, + const std::string & table_name, ColumnsDescription /*cached_columns*/) const +{ + StoragePtr storage; + if (context->getClientInfo().query_kind == ClientInfo::QueryKind::SECONDARY_QUERY) + { + /// On worker node this filename won't contains globs + Poco::URI uri (filename); + S3::URI s3_uri (uri); + /// Actually this parameters are not used + UInt64 min_upload_part_size = context->getSettingsRef().s3_min_upload_part_size; + UInt64 max_single_part_upload_size = context->getSettingsRef().s3_max_single_part_upload_size; + UInt64 max_connections = context->getSettingsRef().s3_max_connections; + storage = StorageS3::create( + s3_uri, access_key_id, secret_access_key, StorageID(getDatabaseName(), table_name), + format, min_upload_part_size, max_single_part_upload_size, max_connections, + getActualTableStructure(context), ConstraintsDescription{}, + context, compression_method, /*distributed_processing=*/true); + } + else + { + storage = StorageS3Cluster::create( + filename, access_key_id, secret_access_key, StorageID(getDatabaseName(), table_name), + cluster_name, format, context->getSettingsRef().s3_max_connections, + getActualTableStructure(context), ConstraintsDescription{}, + context, compression_method); + } + + storage->startup(); + + return storage; +} + + +void registerTableFunctionS3Cluster(TableFunctionFactory & factory) +{ + factory.registerFunction(); +} + + +} + +#endif diff --git a/src/TableFunctions/TableFunctionS3Cluster.h b/src/TableFunctions/TableFunctionS3Cluster.h new file mode 100644 index 00000000000..cc857725ce6 --- /dev/null +++ b/src/TableFunctions/TableFunctionS3Cluster.h @@ -0,0 +1,56 @@ +#pragma once + +#include + +#if USE_AWS_S3 + +#include + + +namespace DB +{ + +class Context; + +/** + * s3Cluster(cluster_name, source, [access_key_id, secret_access_key,] format, structure) + * A table function, which allows to process many files from S3 on a specific cluster + * On initiator it creates a connection to _all_ nodes in cluster, discloses asterics + * in S3 file path and dispatch each file dynamically. + * On worker node it asks initiator about next task to process, processes it. + * This is repeated until the tasks are finished. + */ +class TableFunctionS3Cluster : public ITableFunction +{ +public: + static constexpr auto name = "s3Cluster"; + std::string getName() const override + { + return name; + } + bool hasStaticStructure() const override { return true; } + +protected: + StoragePtr executeImpl( + const ASTPtr & ast_function, + ContextPtr context, + const std::string & table_name, + ColumnsDescription cached_columns) const override; + + const char * getStorageTypeName() const override { return "S3Cluster"; } + + ColumnsDescription getActualTableStructure(ContextPtr) const override; + void parseArguments(const ASTPtr &, ContextPtr) override; + + String cluster_name; + String filename; + String format; + String structure; + String access_key_id; + String secret_access_key; + String compression_method = "auto"; +}; + +} + +#endif diff --git a/src/TableFunctions/registerTableFunctions.cpp b/src/TableFunctions/registerTableFunctions.cpp index 2e55c16d815..6cf40c4f090 100644 --- a/src/TableFunctions/registerTableFunctions.cpp +++ b/src/TableFunctions/registerTableFunctions.cpp @@ -21,6 +21,7 @@ void registerTableFunctions() #if USE_AWS_S3 registerTableFunctionS3(factory); + registerTableFunctionS3Cluster(factory); registerTableFunctionCOS(factory); #endif diff --git a/src/TableFunctions/registerTableFunctions.h b/src/TableFunctions/registerTableFunctions.h index 2654ab2afc2..c49fafc5f86 100644 --- a/src/TableFunctions/registerTableFunctions.h +++ b/src/TableFunctions/registerTableFunctions.h @@ -21,6 +21,7 @@ void registerTableFunctionGenerate(TableFunctionFactory & factory); #if USE_AWS_S3 void registerTableFunctionS3(TableFunctionFactory & factory); +void registerTableFunctionS3Cluster(TableFunctionFactory & factory); void registerTableFunctionCOS(TableFunctionFactory & factory); #endif diff --git a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py index 1675b72e0c4..38574a81d0a 100644 --- a/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py +++ b/tests/integration/test_materialize_mysql_database/materialize_with_ddl.py @@ -117,6 +117,45 @@ def dml_with_materialize_mysql_database(clickhouse_node, mysql_node, service_nam mysql_node.query("DROP DATABASE test_database") +def materialize_mysql_database_with_views(clickhouse_node, mysql_node, service_name): + mysql_node.query("DROP DATABASE IF EXISTS test_database") + clickhouse_node.query("DROP DATABASE IF EXISTS test_database") + mysql_node.query("CREATE DATABASE test_database DEFAULT CHARACTER SET 'utf8'") + # existed before the mapping was created + + mysql_node.query("CREATE TABLE test_database.test_table_1 (" + "`key` INT NOT NULL PRIMARY KEY, " + "unsigned_tiny_int TINYINT UNSIGNED, tiny_int TINYINT, " + "unsigned_small_int SMALLINT UNSIGNED, small_int SMALLINT, " + "unsigned_medium_int MEDIUMINT UNSIGNED, medium_int MEDIUMINT, " + "unsigned_int INT UNSIGNED, _int INT, " + "unsigned_integer INTEGER UNSIGNED, _integer INTEGER, " + "unsigned_bigint BIGINT UNSIGNED, _bigint BIGINT, " + "/* Need ClickHouse support read mysql decimal unsigned_decimal DECIMAL(19, 10) UNSIGNED, _decimal DECIMAL(19, 10), */" + "unsigned_float FLOAT UNSIGNED, _float FLOAT, " + "unsigned_double DOUBLE UNSIGNED, _double DOUBLE, " + "_varchar VARCHAR(10), _char CHAR(10), binary_col BINARY(8), " + "/* Need ClickHouse support Enum('a', 'b', 'v') _enum ENUM('a', 'b', 'c'), */" + "_date Date, _datetime DateTime, _timestamp TIMESTAMP, _bool BOOLEAN) ENGINE = InnoDB;") + + mysql_node.query("CREATE VIEW test_database.test_table_1_view AS SELECT SUM(tiny_int) FROM test_database.test_table_1 GROUP BY _date;") + + # it already has some data + mysql_node.query(""" + INSERT INTO test_database.test_table_1 VALUES(1, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 3.2, -3.2, 3.4, -3.4, 'varchar', 'char', 'binary', + '2020-01-01', '2020-01-01 00:00:00', '2020-01-01 00:00:00', true); + """) + clickhouse_node.query( + "CREATE DATABASE test_database ENGINE = MaterializeMySQL('{}:3306', 'test_database', 'root', 'clickhouse')".format( + service_name)) + + assert "test_database" in clickhouse_node.query("SHOW DATABASES") + check_query(clickhouse_node, "SHOW TABLES FROM test_database FORMAT TSV", "test_table_1\n") + + clickhouse_node.query("DROP DATABASE test_database") + mysql_node.query("DROP DATABASE test_database") + + def materialize_mysql_database_with_datetime_and_decimal(clickhouse_node, mysql_node, service_name): mysql_node.query("DROP DATABASE IF EXISTS test_database") clickhouse_node.query("DROP DATABASE IF EXISTS test_database") diff --git a/tests/integration/test_materialize_mysql_database/test.py b/tests/integration/test_materialize_mysql_database/test.py index 730305a6f16..3c41c0a2177 100644 --- a/tests/integration/test_materialize_mysql_database/test.py +++ b/tests/integration/test_materialize_mysql_database/test.py @@ -150,12 +150,14 @@ def started_mysql_8_0(): @pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_atomic]) def test_materialize_database_dml_with_mysql_5_7(started_cluster, started_mysql_5_7, clickhouse_node): materialize_with_ddl.dml_with_materialize_mysql_database(clickhouse_node, started_mysql_5_7, "mysql1") + materialize_with_ddl.materialize_mysql_database_with_views(clickhouse_node, started_mysql_5_7, "mysql1") materialize_with_ddl.materialize_mysql_database_with_datetime_and_decimal(clickhouse_node, started_mysql_5_7, "mysql1") @pytest.mark.parametrize(('clickhouse_node'), [node_db_ordinary, node_db_atomic]) def test_materialize_database_dml_with_mysql_8_0(started_cluster, started_mysql_8_0, clickhouse_node): materialize_with_ddl.dml_with_materialize_mysql_database(clickhouse_node, started_mysql_8_0, "mysql8_0") + materialize_with_ddl.materialize_mysql_database_with_views(clickhouse_node, started_mysql_8_0, "mysql8_0") materialize_with_ddl.materialize_mysql_database_with_datetime_and_decimal(clickhouse_node, started_mysql_8_0, "mysql8_0") diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.reference b/tests/integration/test_s3_cluster/__init__.py similarity index 100% rename from tests/queries/0_stateless/01676_clickhouse_client_autocomplete.reference rename to tests/integration/test_s3_cluster/__init__.py diff --git a/tests/integration/test_s3_cluster/configs/cluster.xml b/tests/integration/test_s3_cluster/configs/cluster.xml new file mode 100644 index 00000000000..8334ace15eb --- /dev/null +++ b/tests/integration/test_s3_cluster/configs/cluster.xml @@ -0,0 +1,24 @@ + + + + + + + s0_0_0 + 9000 + + + s0_0_1 + 9000 + + + + + s0_1_0 + 9000 + + + + + + \ No newline at end of file diff --git a/tests/integration/test_s3_cluster/data/clickhouse/part1.csv b/tests/integration/test_s3_cluster/data/clickhouse/part1.csv new file mode 100644 index 00000000000..a44d3ca1ffb --- /dev/null +++ b/tests/integration/test_s3_cluster/data/clickhouse/part1.csv @@ -0,0 +1,10 @@ +"fSRH",976027584,"[[(-1.5346513608456012e-204,-2.867937504545497e266),(3.1627675144114637e-231,-2.20343471241604e-54),(-1.866886218651809e-89,-7.695893036366416e100),(8.196307577166986e-169,-8.203793887684096e-263),(-1.6150328830402252e-215,8.531116551449711e-296),(4.3378407855931477e92,1.1313645428723989e117),(-4.238081208165573e137,-8.969951719788361e67)],[(-3.409639554701108e169,-7.277093176871153e-254),(1.1466207153308928e-226,3.429893348531029e96),(6.451302850199177e-189,-7.52379443153242e125),(-1.7132078539493614e-127,-2.3177814806867505e241),(1.4996520594989919e-257,4.271017883966942e128)],[(65460976657479156000,1.7055814145588595e253),(-1.921491101580189e154,3.2912740465446566e-286),(0.0008437955075350972,-5.143493717005472e-107),(8.637208599142187e-150,7.825076274945548e136),(1.8077733932468565e-159,5.51061479974026e-77),(1.300406236793709e-260,10669142.497111017),(-1.731981751951159e91,-1.270795062098902e102)],[(3.336706342781395e-7,-1.1919528866481513e266)]]" +"sX6>",733011552,"[[(-3.737863336077909e-44,3.066510481088993e-161),(-1.0047259170558555e-31,8.066145272086467e-274)],[(1.2261835328136691e-58,-6.154561379350395e258),(8.26019994651558e35,-6.736984599062694e-19),(-1.4143671344485664e-238,-1.220003479858045e203),(2.466089772925698e-207,1.0025476904532926e-242),(-6.3786667153054354e240,-7.010834902137467e-103),(-6.766918514324285e-263,7.404639608483947e188),(2.753493977757937e126,-4.089565842001999e-152)],[(4.339873790493155e239,-5.022554811588342e24),(-1.7712390083519473e-66,1.3290563068463308e112),(3.3648764781548893e233,1.1123394188044336e112),(-5.415278137684864e195,5.590597851016202e-270),(-2.1032310903543943e99,-2.2335799924679948e-184)]]" +"",2396526460,"[[(1.9925796792641788e-261,1.647618305107044e158),(3.014593666207223e-222,-9.016473078578002e-20),(-1.5307802021477097e-230,-7.867078587209265e-243),(-7.330317098800564e295,1.7496539408601967e-281)],[(2.2816938730052074e98,-3.3089122320442997e-136),(-4.930983789361344e-263,-6.526758521792829e59),(-2.6482873886835413e34,-4.1985691142515947e83),(1.5496810029349365e238,-4.790553105593492e71),(-7.597436233325566e83,-1.3791763752378415e137),(-1.917321980700588e-307,-1.5913257477581824e62)]]" +"=@ep",3618088392,"[[(-2.2303235811290024e-306,8.64070367587338e-13),(-7.403012423264767e-129,-1.0825508572345856e-147),(-3.6080301450167e286,1.7302718548299961e285),(-1.3839239794870825e-156,4.255424291564323e107),(2.3191305762555e-33,-2.873899421579949e-145),(7.237414513124649e-159,-4.926574547865783e178),(4.251831312243431e-199,1.2164714479391436e201)],[(-5.114074387943793e242,2.0119340496886292e295),(-3.3663670765548e-262,-6.1992631068472835e221),(1.1539386993255106e-261,1.582903697171063e-33),(-6.1914577817088e118,-1.0401495621681123e145)],[],[(-5.9815907467493136e82,4.369047439375412e219),(-4.485368440431237e89,-3.633023372434946e-59),(-2.087497331251707e-180,1.0524018118646965e257)],[(-1.2636503461000215e-228,-4.8426877075223456e204),(2.74943107551342e281,-7.453097760262003e-14)]]" +"",3467776823,"[]" +"b'zQ",484159052,"[[(3.041838095219909e276,-6.956822159518612e-87)],[(6.636906358770296e-97,1.0531865724169307e-214)],[(-8.429249069245283e-243,-2.134779842898037e243)],[(-0.4657586598569572,2.799768548127799e187),(-5.961335445789657e-129,2.560331789344886e293),(-3.139409694983184e45,2.8011384557268085e-47)]]" +"6xGw",4126129912,"[]" +"Q",3109335413,"[[(-2.8435266267772945e39,9.548278488724291e26),(-1.1682790407223344e46,-3.925561182768867e-266),(2.8381633655721614e-202,-3.472921303086527e40),(3.3968328275944204e-150,-2.2188876184777275e-69),(-1.2612795000783405e-88,-1.2942793285205966e-49),(1.3678466236967012e179,1.721664680964459e97),(-1.1020844667744628e198,-3.403142062758506e-47)],[],[(1.343149099058239e-279,9.397894929770352e-132),(-5.280854317597215e250,9.862550191577643e-292),(-7.11468799151533e-58,7.510011657942604e96),(1.183774454157175e-288,-1.5697197095936546e272),(-3.727289017361602e120,2.422831380775067e-107),(1.4345094301262986e-177,2.4990983297605437e-91)],[(9.195226893854516e169,6.546374357272709e-236),(2.320311199531441e-126,2.2257031285964243e-185),(3.351868475505779e-184,1.84394695526876e88)],[(1.6290814396647987e-112,-3.589542711073253e38),(4.0060174859833907e-261,-1.9900431208726192e-296),(2.047468933030435e56,8.483912759156179e-57),(3.1165727272872075e191,-1.5487136748040008e-156),(0.43564020198461034,4.618165048931035e-244),(-7.674951896752824e-214,1.1652522629091777e-105),(4.838653901829244e-89,5.3085904574780206e169)],[(1.8286703553352283e-246,2.0403170465657044e255),(2.040810692623279e267,4.3956975402250484e-8),(2.4101343663018673e131,-8.672394158504762e167),(3.092080945239809e-219,-3.775474693770226e293),(-1.527991241079512e-15,-1.2603969180963007e226),(9.17470637459212e-56,1.6021090930395906e-133),(7.877647227721046e58,3.2592118033868903e-108)],[(1.4334765313272463e170,2.6971234798957105e-50)]]" +"^ip",1015254922,"[[(-2.227414144223298e-63,1.2391785738638914e276),(1.2668491759136862e207,2.5656762953078853e-67),(2.385410876813441e-268,1.451107969531624e25),(-5.475956161647574e131,2239495689376746),(1.5591286361054593e180,3.672868971445151e117)]]" +"5N]",1720727300,"[[(-2.0670321228319122e-258,-2.6893477429616666e-32),(-2.2424105705209414e225,3.547832127050775e25),(4.452916756606404e-121,-3.71114618421911e156),(-1.966961937965055e-110,3.1217044497868816e227),(20636923519704216,1.3500210618276638e30),(3.3195926701816527e-276,1.5557140338374535e234)],[]]" diff --git a/tests/integration/test_s3_cluster/data/clickhouse/part123.csv b/tests/integration/test_s3_cluster/data/clickhouse/part123.csv new file mode 100644 index 00000000000..1ca3353b741 --- /dev/null +++ b/tests/integration/test_s3_cluster/data/clickhouse/part123.csv @@ -0,0 +1,3 @@ +"b'zQ",2960084897,"[[(3.014593666207223e-222,-7.277093176871153e-254),(-1.5307802021477097e-230,3.429893348531029e96),(-7.330317098800564e295,-7.52379443153242e125),(2.2816938730052074e98,-2.3177814806867505e241),(-4.930983789361344e-263,4.271017883966942e128)],[(-2.6482873886835413e34,1.7055814145588595e253),(1.5496810029349365e238,3.2912740465446566e-286),(-7.597436233325566e83,-5.143493717005472e-107),(-1.917321980700588e-307,7.825076274945548e136)],[(-2.2303235811290024e-306,5.51061479974026e-77),(-7.403012423264767e-129,10669142.497111017),(-3.6080301450167e286,-1.270795062098902e102),(-1.3839239794870825e-156,-1.1919528866481513e266),(2.3191305762555e-33,3.066510481088993e-161),(7.237414513124649e-159,8.066145272086467e-274)],[(4.251831312243431e-199,-6.154561379350395e258),(-5.114074387943793e242,-6.736984599062694e-19),(-3.3663670765548e-262,-1.220003479858045e203),(1.1539386993255106e-261,1.0025476904532926e-242),(-6.1914577817088e118,-7.010834902137467e-103),(-5.9815907467493136e82,7.404639608483947e188),(-4.485368440431237e89,-4.089565842001999e-152)]]" +"6xGw",2107128550,"[[(-2.087497331251707e-180,-5.022554811588342e24),(-1.2636503461000215e-228,1.3290563068463308e112),(2.74943107551342e281,1.1123394188044336e112),(3.041838095219909e276,5.590597851016202e-270)],[],[(6.636906358770296e-97,-2.2335799924679948e-184),(-8.429249069245283e-243,1.647618305107044e158),(-0.4657586598569572,-9.016473078578002e-20)]]" +"Q",2713167232,"[[(-5.961335445789657e-129,-7.867078587209265e-243),(-3.139409694983184e45,1.7496539408601967e-281)],[(-2.8435266267772945e39,-3.3089122320442997e-136)]]" diff --git a/tests/integration/test_s3_cluster/data/database/part2.csv b/tests/integration/test_s3_cluster/data/database/part2.csv new file mode 100644 index 00000000000..572676e47c6 --- /dev/null +++ b/tests/integration/test_s3_cluster/data/database/part2.csv @@ -0,0 +1,5 @@ +"~m`",820408404,"[]" +"~E",3621610983,"[[(1.183772215004139e-238,-1.282774073199881e211),(1.6787305112393978e-46,7.500499989257719e25),(-2.458759475104641e-260,3.1724599388651864e-171),(-2.0163203163062471e118,-4.677226438945462e-162),(-5.52491070012707e-135,7.051780441780731e-236)]]" +"~1",1715555780,"[[(-6.847404226505131e-267,5.939552045362479e-272),(8.02275075985457e-160,8.369250185716419e-104),(-1.193940928527857e-258,-1.132580458849774e39)],[(1.1866087552639048e253,3.104988412734545e57),(-3.37278669639914e84,-2.387628643569968e287),(-2.452136349495753e73,3.194309776006896e-204),(-1001997440265471100,3.482122851077378e-182)],[],[(-5.754682082202988e-20,6.598766936241908e156)],[(8.386764833095757e300,1.2049637765877942e229),(3.136243074210055e53,5.764669663844127e-100),(-4.190632347661851e195,-5.053553379163823e302),(2.0805194731736336e-19,-1.0849036699112485e-271),(1.1292361211411365e227,-8.767824448179629e229),(-3.6938137156625264e-19,-5.387931698392423e109),(-1.2240482125885677e189,-1.5631467861525635e-103)],[(-2.3917431782202442e138,7.817228281030191e-242),(-1.1462343232899826e279,-1.971215065504208e-225),(5.4316119855340265e-62,3.761081156597423e-60),(8.111852137718836e306,8.115485489580134e-208)],[]]" +"~%]",1606443384,"[[]]" +"}or",726681547,"[]" \ No newline at end of file diff --git a/tests/integration/test_s3_cluster/data/database/partition675.csv b/tests/integration/test_s3_cluster/data/database/partition675.csv new file mode 100644 index 00000000000..e8496680368 --- /dev/null +++ b/tests/integration/test_s3_cluster/data/database/partition675.csv @@ -0,0 +1,7 @@ +"kvUES",4281162618,"[[(2.4538308454074088e303,1.2209370543175666e178),(1.4564007891121754e-186,2.340773478952682e-273),(-1.01791181533976e165,-3.9617466227377253e248)]]" +"Gu",4280623186,"[[(-1.623487579335014e38,-1.0633405021023563e225),(-4.373688812751571e180,2.5511550357717127e138)]]" +"J_u1",4277430503,"[[(2.981826196369429e-294,-6.059236590410922e236),(8.502045137575854e-296,3.0210403188125657e-91),(-9.370591842861745e175,4.150870185764185e129),(1.011801592194125e275,-9.236010982686472e266),(-3.1830638196303316e277,2.417706446545472e-105),(-1.4369143023804266e-201,4.7529126795899655e238)],[(-2.118789593804697e186,-1.8760231612433755e-280),(2.5982563179976053e200,-1.4683025762313524e-40)],[(-1.873397623255704e-240,1.4363190147949886e-283),(-1.5760337746177136e153,1.5272278536086246e-34),(-8.117473317695919e155,2.4375370926733504e150),(-1.179230972881795e99,1.7693459774706515e-259),(2.2102106250558424e-40,4.734162675762768e-56),(6.058833110550111e-8,8.892471775821198e164),(-1.8208740799996599e59,6.446958261080721e178)]]" +"s:\",4265055390,"[[(-3.291651377214531e-167,3.9198636942402856e185),(2.4897781692770126e176,2.579309759138358e188),(4.653945381397663e205,3.216314556208208e158),(-5.3373279440714224e-39,2.404386813826413e212),(-1.4217294382527138e307,8.874978978402512e-173)],[(8.527603121149904e-58,-5.0520795335878225e88),(-0.00022870878520550814,-3.2334214176860943e-68),(-6.97683613433404e304,-2.1573757788072144e-82),(-1.1394163455875937e36,-3.817990182461824e271),(2.4099027412881423e-209,8.542179392011098e-156),(3.2610511540394803e174,1.1692631657517616e-20)],[(3.625474290538107e261,-5.359205062039837e-193),(-3.574126569378072e-112,-5.421804160994412e265),(-4.873653931207849e-76,3219678918284.317),(-7.030770825898911e-57,1.4647389742249787e-274),(-4.4882439220492357e-203,6.569338333730439e-38)],[(-2.2418056002374865e-136,5.113251922954469e-16),(2.5156744571032497e297,-3.0536957683846124e-192)],[(1.861112291954516e306,-1.8160882143331256e129),(1.982573454900027e290,-2.451412311394593e170)],[(-2.8292230178712157e-18,1.2570198161962067e216),(6.24832495972797e-164,-2.0770908334330718e-273)],[(980143647.1858811,1.2738714961511727e106),(6.516450532397311e-184,4.088688742052062e31),(-2.246311532913914e269,-7.418103885850518e-179),(1.2222973942835046e-289,2.750544834553288e-46),(9.503169349701076e159,-1.355457053256579e215)]]" +":hzO",4263726959,"[[(-2.553206398375626e-90,1.6536977728640226e199),(1.5630078027143848e-36,2.805242683101373e-211),(2.2573933085983554e-92,3.450501333524858e292),(-1.215900901292646e-275,-3.860558658606121e272),(6.65716072773856e-145,2.5359010031217893e217)],[(-1.3308039625779135e308,1.7464622720773261e258),(-3.2986890093446374e179,3.9038871583175653e-69),(-4.3594764087383885e-95,4.229921973278908e-123),(-5.455694205415656e137,3.597894902167716e108),(1.2480860990110662e-29,-1.4873488392480292e-185),(7.563210285835444e55,-5624068447.488605)],[(3.9517937289943195e181,-3.2799189227094424e-68),(8.906762198487649e-167,3.952452177941537e-159)]]" +"a",4258301804,"[[(5.827965576703262e-281,2.2523852665173977e90)],[(-6.837604072282348e-97,8.125864241406046e-61)],[(-2.3047912084435663e53,-8.814499720685194e36),(1.2072558137199047e-79,1.2096862541827071e142),(2.2000026293774143e275,-3.2571689055108606e-199),(1.1822278574921316e134,2.9571188365006754e-86),(1.0448954272555034e-169,1.2182183489600953e-60)],[(-3.1366540817730525e89,9.327128058982966e-306),(6.588968210928936e73,-11533531378.938957),(-2.6715943555840563e44,-4.557428011172859e224),(-3.8334913754415923e285,-4.748721454106074e-173),(-1.6912052107425128e275,-4.789382438422238e-219),(1.8538365229016863e151,-3.5698172075468775e-37)],[(-2.1963131282037294e49,-5.53604352524995e-296)],[(-8.834414834987965e167,1.3186354307320576e247),(2.109209547987338e298,1.2191009105107557e-32),(-3.896880410603213e-92,-3.4589588698231044e-121),(-3.252529090888335e138,-7.862741341454407e204)],[(-9.673078095447289e-207,8.839303128607278e123),(2.6043620378793597e-244,-6.898328199987363e-308),(-2.5921142292355475e-54,1.0352159149517285e-143)]]" +"S+",4257734123,"[[(1.5714269203495863e245,-15651321.549208183),(-3.7292056272445236e-254,-4.556927533596056e-234),(-3.0309414401442555e-203,-3.84393827531526e-12)],[(1.7718777510571518e219,3.972086323144777e139),(1.5723805735454373e-67,-3.805243648123396e226),(154531069271292800000,1.1384408025183933e-285),(-2.009892367470994e-247,2.0325742976832167e81)],[(1.2145787097670788e55,-5.0579298233321666e-30),(5.05577441452021e-182,-2.968914705509665e-175),(-1.702335524921919e67,-2.852552828587631e-226),(-2.7664498327826963e-99,-1.2967072085088717e-305),(7.68881162387673e-68,-1.2506915095983359e-142),(-7.60308693295946e-40,5.414853590549086e218)],[(8.595602987813848e226,-3.9708286611967497e-206),(-5.80352787694746e-52,5.610493934761672e236),(2.1336999375861025e217,-5.431988994371099e-154),(-6.2758614367782974e29,-8.359901046980544e-55)],[(1.6910790690897504e54,9.798739710823911e197),(-6.530270107036228e-284,8.758552462406328e-302),(2.931625032390877e-118,2.8793800873550273e83),(-3.293986884112906e-88,11877326093331202),(0.0008071321465157103,1.0720860516457485e-298)]]" diff --git a/tests/integration/test_s3_cluster/test.py b/tests/integration/test_s3_cluster/test.py new file mode 100644 index 00000000000..f60e6e6862f --- /dev/null +++ b/tests/integration/test_s3_cluster/test.py @@ -0,0 +1,129 @@ +import logging +import os + +import pytest +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV + +logging.getLogger().setLevel(logging.INFO) +logging.getLogger().addHandler(logging.StreamHandler()) + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) +S3_DATA = ['data/clickhouse/part1.csv', 'data/clickhouse/part123.csv', 'data/database/part2.csv', 'data/database/partition675.csv'] + +def create_buckets_s3(cluster): + minio = cluster.minio_client + for file in S3_DATA: + minio.fput_object(bucket_name=cluster.minio_bucket, object_name=file, file_path=os.path.join(SCRIPT_DIR, file)) + for obj in minio.list_objects(cluster.minio_bucket, recursive=True): + print(obj.object_name) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster = ClickHouseCluster(__file__) + cluster.add_instance('s0_0_0', main_configs=["configs/cluster.xml"], with_minio=True) + cluster.add_instance('s0_0_1', main_configs=["configs/cluster.xml"]) + cluster.add_instance('s0_1_0', main_configs=["configs/cluster.xml"]) + + logging.info("Starting cluster...") + cluster.start() + logging.info("Cluster started") + + create_buckets_s3(cluster) + + yield cluster + finally: + cluster.shutdown() + + +def test_select_all(started_cluster): + node = started_cluster.instances['s0_0_0'] + pure_s3 = node.query(""" + SELECT * from s3( + 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'minio', 'minio123', 'CSV', + 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') + ORDER BY (name, value, polygon)""") + # print(pure_s3) + s3_distibuted = node.query(""" + SELECT * from s3Cluster( + 'cluster_simple', + 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', + 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') ORDER BY (name, value, polygon)""") + # print(s3_distibuted) + + assert TSV(pure_s3) == TSV(s3_distibuted) + + +def test_count(started_cluster): + node = started_cluster.instances['s0_0_0'] + pure_s3 = node.query(""" + SELECT count(*) from s3( + 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'minio', 'minio123', 'CSV', + 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')""") + # print(pure_s3) + s3_distibuted = node.query(""" + SELECT count(*) from s3Cluster( + 'cluster_simple', 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'minio', 'minio123', 'CSV', + 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')""") + # print(s3_distibuted) + + assert TSV(pure_s3) == TSV(s3_distibuted) + + +def test_union_all(started_cluster): + node = started_cluster.instances['s0_0_0'] + pure_s3 = node.query(""" + SELECT * FROM + ( + SELECT * from s3( + 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'minio', 'minio123', 'CSV', + 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') + UNION ALL + SELECT * from s3( + 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'minio', 'minio123', 'CSV', + 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') + ) + ORDER BY (name, value, polygon) + """) + # print(pure_s3) + s3_distibuted = node.query(""" + SELECT * FROM + ( + SELECT * from s3Cluster( + 'cluster_simple', + 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', + 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') + UNION ALL + SELECT * from s3Cluster( + 'cluster_simple', + 'http://minio1:9001/root/data/{clickhouse,database}/*', 'minio', 'minio123', 'CSV', + 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') + ) + ORDER BY (name, value, polygon) + """) + # print(s3_distibuted) + + assert TSV(pure_s3) == TSV(s3_distibuted) + + +def test_wrong_cluster(started_cluster): + node = started_cluster.instances['s0_0_0'] + error = node.query_and_get_error(""" + SELECT count(*) from s3Cluster( + 'non_existent_cluster', + 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))') + UNION ALL + SELECT count(*) from s3Cluster( + 'non_existent_cluster', + 'http://minio1:9001/root/data/{clickhouse,database}/*', + 'minio', 'minio123', 'CSV', 'name String, value UInt32, polygon Array(Array(Tuple(Float64, Float64)))')""") + + assert "not found" in error \ No newline at end of file diff --git a/tests/queries/0_stateless/00555_hasAll_hasAny.reference b/tests/queries/0_stateless/00555_hasAll_hasAny.reference index b33700bfa02..5608f7b970e 100644 --- a/tests/queries/0_stateless/00555_hasAll_hasAny.reference +++ b/tests/queries/0_stateless/00555_hasAll_hasAny.reference @@ -34,10 +34,6 @@ 1 0 - -0 -0 -0 -0 - 0 1 diff --git a/tests/queries/0_stateless/00555_hasAll_hasAny.sql b/tests/queries/0_stateless/00555_hasAll_hasAny.sql index 9df356dce2e..c8a6c3cecbd 100644 --- a/tests/queries/0_stateless/00555_hasAll_hasAny.sql +++ b/tests/queries/0_stateless/00555_hasAll_hasAny.sql @@ -39,10 +39,10 @@ select hasAny(['a', 'b'], ['a', 'c']); select hasAll(['a', 'b'], ['a', 'c']); select '-'; -select hasAny([1], ['a']); -select hasAll([1], ['a']); -select hasAll([[1, 2], [3, 4]], ['a', 'c']); -select hasAny([[1, 2], [3, 4]], ['a', 'c']); +select hasAny([1], ['a']); -- { serverError 386 } +select hasAll([1], ['a']); -- { serverError 386 } +select hasAll([[1, 2], [3, 4]], ['a', 'c']); -- { serverError 386 } +select hasAny([[1, 2], [3, 4]], ['a', 'c']); -- { serverError 386 } select '-'; select hasAll([[1, 2], [3, 4]], [[1, 2], [3, 5]]); diff --git a/tests/queries/0_stateless/00555_hasSubstr.reference b/tests/queries/0_stateless/00555_hasSubstr.reference index 1051fa28d6c..de97d19c932 100644 --- a/tests/queries/0_stateless/00555_hasSubstr.reference +++ b/tests/queries/0_stateless/00555_hasSubstr.reference @@ -20,8 +20,6 @@ 0 1 - -0 -0 1 1 0 diff --git a/tests/queries/0_stateless/00555_hasSubstr.sql b/tests/queries/0_stateless/00555_hasSubstr.sql index 04c70e4a43b..5f90a69c546 100644 --- a/tests/queries/0_stateless/00555_hasSubstr.sql +++ b/tests/queries/0_stateless/00555_hasSubstr.sql @@ -25,8 +25,8 @@ select hasSubstr(['a', 'b'], ['a', 'c']); select hasSubstr(['a', 'c', 'b'], ['a', 'c']); select '-'; -select hasSubstr([1], ['a']); -select hasSubstr([[1, 2], [3, 4]], ['a', 'c']); +select hasSubstr([1], ['a']); -- { serverError 386 } +select hasSubstr([[1, 2], [3, 4]], ['a', 'c']); -- { serverError 386 } select hasSubstr([[1, 2], [3, 4], [5, 8]], [[3, 4]]); select hasSubstr([[1, 2], [3, 4], [5, 8]], [[3, 4], [5, 8]]); select hasSubstr([[1, 2], [3, 4], [5, 8]], [[1, 2], [5, 8]]); diff --git a/tests/queries/0_stateless/00700_decimal_complex_types.reference b/tests/queries/0_stateless/00700_decimal_complex_types.reference index e81dd94513f..9c7c6fefefd 100644 --- a/tests/queries/0_stateless/00700_decimal_complex_types.reference +++ b/tests/queries/0_stateless/00700_decimal_complex_types.reference @@ -39,9 +39,33 @@ Tuple(Decimal(9, 1), Decimal(18, 1), Decimal(38, 1)) Decimal(9, 1) Decimal(18, 1 1 0 1 0 1 0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 1 0 2 0 3 0 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 +1 [0.100,0.200,0.300,0.400,0.500,0.600] Array(Decimal(18, 3)) [0.100,0.200,0.300,0.700,0.800,0.900] Array(Decimal(38, 3)) [0.400,0.500,0.600,0.700,0.800,0.900] Array(Decimal(38, 3)) diff --git a/tests/queries/0_stateless/00700_decimal_complex_types.sql b/tests/queries/0_stateless/00700_decimal_complex_types.sql index 2d506b124a2..f4b29e77be9 100644 --- a/tests/queries/0_stateless/00700_decimal_complex_types.sql +++ b/tests/queries/0_stateless/00700_decimal_complex_types.sql @@ -58,35 +58,35 @@ SELECT has(a, toDecimal32(0.1, 3)), has(a, toDecimal32(1.0, 3)) FROM decimal; SELECT has(b, toDecimal64(0.4, 3)), has(b, toDecimal64(1.0, 3)) FROM decimal; SELECT has(c, toDecimal128(0.7, 3)), has(c, toDecimal128(1.0, 3)) FROM decimal; -SELECT has(a, toDecimal32(0.1, 2)) FROM decimal; -- { serverError 43 } -SELECT has(a, toDecimal32(0.1, 4)) FROM decimal; -- { serverError 43 } -SELECT has(a, toDecimal64(0.1, 3)) FROM decimal; -- { serverError 43 } -SELECT has(a, toDecimal128(0.1, 3)) FROM decimal; -- { serverError 43 } -SELECT has(b, toDecimal32(0.4, 3)) FROM decimal; -- { serverError 43 } -SELECT has(b, toDecimal64(0.4, 2)) FROM decimal; -- { serverError 43 } -SELECT has(b, toDecimal64(0.4, 4)) FROM decimal; -- { serverError 43 } -SELECT has(b, toDecimal128(0.4, 3)) FROM decimal; -- { serverError 43 } -SELECT has(c, toDecimal32(0.7, 3)) FROM decimal; -- { serverError 43 } -SELECT has(c, toDecimal64(0.7, 3)) FROM decimal; -- { serverError 43 } -SELECT has(c, toDecimal128(0.7, 2)) FROM decimal; -- { serverError 43 } -SELECT has(c, toDecimal128(0.7, 4)) FROM decimal; -- { serverError 43 } +SELECT has(a, toDecimal32(0.1, 2)) FROM decimal; +SELECT has(a, toDecimal32(0.1, 4)) FROM decimal; +SELECT has(a, toDecimal64(0.1, 3)) FROM decimal; +SELECT has(a, toDecimal128(0.1, 3)) FROM decimal; +SELECT has(b, toDecimal32(0.4, 3)) FROM decimal; +SELECT has(b, toDecimal64(0.4, 2)) FROM decimal; +SELECT has(b, toDecimal64(0.4, 4)) FROM decimal; +SELECT has(b, toDecimal128(0.4, 3)) FROM decimal; +SELECT has(c, toDecimal32(0.7, 3)) FROM decimal; +SELECT has(c, toDecimal64(0.7, 3)) FROM decimal; +SELECT has(c, toDecimal128(0.7, 2)) FROM decimal; +SELECT has(c, toDecimal128(0.7, 4)) FROM decimal; SELECT indexOf(a, toDecimal32(0.1, 3)), indexOf(a, toDecimal32(1.0, 3)) FROM decimal; SELECT indexOf(b, toDecimal64(0.5, 3)), indexOf(b, toDecimal64(1.0, 3)) FROM decimal; SELECT indexOf(c, toDecimal128(0.9, 3)), indexOf(c, toDecimal128(1.0, 3)) FROM decimal; -SELECT indexOf(a, toDecimal32(0.1, 2)) FROM decimal; -- { serverError 43 } -SELECT indexOf(a, toDecimal32(0.1, 4)) FROM decimal; -- { serverError 43 } -SELECT indexOf(a, toDecimal64(0.1, 3)) FROM decimal; -- { serverError 43 } -SELECT indexOf(a, toDecimal128(0.1, 3)) FROM decimal; -- { serverError 43 } -SELECT indexOf(b, toDecimal32(0.4, 3)) FROM decimal; -- { serverError 43 } -SELECT indexOf(b, toDecimal64(0.4, 2)) FROM decimal; -- { serverError 43 } -SELECT indexOf(b, toDecimal64(0.4, 4)) FROM decimal; -- { serverError 43 } -SELECT indexOf(b, toDecimal128(0.4, 3)) FROM decimal; -- { serverError 43 } -SELECT indexOf(c, toDecimal32(0.7, 3)) FROM decimal; -- { serverError 43 } -SELECT indexOf(c, toDecimal64(0.7, 3)) FROM decimal; -- { serverError 43 } -SELECT indexOf(c, toDecimal128(0.7, 2)) FROM decimal; -- { serverError 43 } -SELECT indexOf(c, toDecimal128(0.7, 4)) FROM decimal; -- { serverError 43 } +SELECT indexOf(a, toDecimal32(0.1, 2)) FROM decimal; +SELECT indexOf(a, toDecimal32(0.1, 4)) FROM decimal; +SELECT indexOf(a, toDecimal64(0.1, 3)) FROM decimal; +SELECT indexOf(a, toDecimal128(0.1, 3)) FROM decimal; +SELECT indexOf(b, toDecimal32(0.4, 3)) FROM decimal; +SELECT indexOf(b, toDecimal64(0.4, 2)) FROM decimal; +SELECT indexOf(b, toDecimal64(0.4, 4)) FROM decimal; +SELECT indexOf(b, toDecimal128(0.4, 3)) FROM decimal; +SELECT indexOf(c, toDecimal32(0.7, 3)) FROM decimal; +SELECT indexOf(c, toDecimal64(0.7, 3)) FROM decimal; +SELECT indexOf(c, toDecimal128(0.7, 2)) FROM decimal; +SELECT indexOf(c, toDecimal128(0.7, 4)) FROM decimal; SELECT arrayConcat(a, b) AS x, toTypeName(x) FROM decimal; SELECT arrayConcat(a, c) AS x, toTypeName(x) FROM decimal; diff --git a/tests/queries/0_stateless/00918_has_unsufficient_type_check.reference b/tests/queries/0_stateless/00918_has_unsufficient_type_check.reference index 7938dcdde86..b261da18d51 100644 --- a/tests/queries/0_stateless/00918_has_unsufficient_type_check.reference +++ b/tests/queries/0_stateless/00918_has_unsufficient_type_check.reference @@ -1,3 +1,2 @@ -0 1 0 diff --git a/tests/queries/0_stateless/00918_has_unsufficient_type_check.sql b/tests/queries/0_stateless/00918_has_unsufficient_type_check.sql index f76fd446a8e..c40419e4d56 100644 --- a/tests/queries/0_stateless/00918_has_unsufficient_type_check.sql +++ b/tests/queries/0_stateless/00918_has_unsufficient_type_check.sql @@ -1,3 +1,3 @@ -SELECT hasAny([['Hello, world']], [[[]]]); +SELECT hasAny([['Hello, world']], [[[]]]); -- { serverError 386 } SELECT hasAny([['Hello, world']], [['Hello', 'world'], ['Hello, world']]); SELECT hasAll([['Hello, world']], [['Hello', 'world'], ['Hello, world']]); diff --git a/tests/queries/0_stateless/01181_db_atomic_drop_on_cluster.sql b/tests/queries/0_stateless/01181_db_atomic_drop_on_cluster.sql index ca393c36617..24283a0e8e3 100644 --- a/tests/queries/0_stateless/01181_db_atomic_drop_on_cluster.sql +++ b/tests/queries/0_stateless/01181_db_atomic_drop_on_cluster.sql @@ -1,5 +1,4 @@ DROP TABLE IF EXISTS test_repl ON CLUSTER test_shard_localhost SYNC; - CREATE TABLE test_repl ON CLUSTER test_shard_localhost (n UInt64) ENGINE ReplicatedMergeTree('/clickhouse/test_01181/{database}/test_repl','r1') ORDER BY tuple(); DETACH TABLE test_repl ON CLUSTER test_shard_localhost SYNC; ATTACH TABLE test_repl ON CLUSTER test_shard_localhost; diff --git a/tests/queries/0_stateless/01661_extract_all_groups_throw_fast.sql b/tests/queries/0_stateless/01661_extract_all_groups_throw_fast.sql index 48d3baba0c5..a056d77896c 100644 --- a/tests/queries/0_stateless/01661_extract_all_groups_throw_fast.sql +++ b/tests/queries/0_stateless/01661_extract_all_groups_throw_fast.sql @@ -1 +1,2 @@ SELECT repeat('abcdefghijklmnopqrstuvwxyz', number * 100) AS haystack, extractAllGroupsHorizontal(haystack, '(\\w)') AS matches FROM numbers(1023); -- { serverError 128 } +SELECT count(extractAllGroupsHorizontal(materialize('a'), '(a)')) FROM numbers(1000000) FORMAT Null; -- shouldn't fail diff --git a/tests/queries/0_stateless/01676_long_clickhouse_client_autocomplete.reference b/tests/queries/0_stateless/01676_long_clickhouse_client_autocomplete.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh b/tests/queries/0_stateless/01676_long_clickhouse_client_autocomplete.sh similarity index 80% rename from tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh rename to tests/queries/0_stateless/01676_long_clickhouse_client_autocomplete.sh index 08e07044841..1ed5c6be272 100755 --- a/tests/queries/0_stateless/01676_clickhouse_client_autocomplete.sh +++ b/tests/queries/0_stateless/01676_long_clickhouse_client_autocomplete.sh @@ -69,18 +69,6 @@ compwords_positive=( max_concurrent_queries_for_all_users # system.clusters test_shard_localhost - # system.errors, also it is very rare to cover system_events_show_zero_values - CONDITIONAL_TREE_PARENT_NOT_FOUND - # system.events, also it is very rare to cover system_events_show_zero_values - WriteBufferFromFileDescriptorWriteFailed - # system.asynchronous_metrics, also this metric has zero value - # - # NOTE: that there is no ability to complete metrics like - # jemalloc.background_thread.num_runs, due to "." is used as a word breaker - # (and this cannot be changed -- db.table) - ReplicasMaxAbsoluteDelay - # system.metrics - PartsPreCommitted # system.macros default_path_test # system.storage_policies, egh not uniq diff --git a/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.config.xml b/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.config.xml new file mode 100644 index 00000000000..2d0a480a375 --- /dev/null +++ b/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.config.xml @@ -0,0 +1,35 @@ + + + + trace + true + + + 9000 + + ./ + + 0 + + + + + + + ::/0 + + + default + default + 1 + + + + + + + + + + + diff --git a/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.reference b/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.sh b/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.sh new file mode 100755 index 00000000000..a4fd7529ab2 --- /dev/null +++ b/tests/queries/0_stateless/01737_clickhouse_server_wait_server_pool_long.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +server_opts=( + "--config-file=$CUR_DIR/$(basename "${BASH_SOURCE[0]}" .sh).config.xml" + "--" + # to avoid multiple listen sockets (complexity for port discovering) + "--listen_host=127.1" + # we will discover the real port later. + "--tcp_port=0" + "--shutdown_wait_unfinished=0" +) +CLICKHOUSE_WATCHDOG_ENABLE=0 $CLICKHOUSE_SERVER_BINARY "${server_opts[@]}" >& clickhouse-server.log & +server_pid=$! + +trap cleanup EXIT +function cleanup() +{ + kill -9 $server_pid + kill -9 $client_pid + + echo "Test failed. Server log:" + cat clickhouse-server.log + rm -f clickhouse-server.log + + exit 1 +} + +server_port= +i=0 retries=300 +# wait until server will start to listen (max 30 seconds) +while [[ -z $server_port ]] && [[ $i -lt $retries ]]; do + server_port=$(lsof -n -a -P -i tcp -s tcp:LISTEN -p $server_pid 2>/dev/null | awk -F'[ :]' '/LISTEN/ { print $(NF-1) }') + ((++i)) + sleep 0.1 +done +if [[ -z $server_port ]]; then + echo "Cannot wait for LISTEN socket" >&2 + exit 1 +fi + +# wait for the server to start accepting tcp connections (max 30 seconds) +i=0 retries=300 +while ! $CLICKHOUSE_CLIENT_BINARY --host 127.1 --port "$server_port" --format Null -q 'select 1' 2>/dev/null && [[ $i -lt $retries ]]; do + sleep 0.1 +done +if ! $CLICKHOUSE_CLIENT_BINARY --host 127.1 --port "$server_port" --format Null -q 'select 1'; then + echo "Cannot wait until server will start accepting connections on " >&2 + exit 1 +fi + +query_id="$CLICKHOUSE_DATABASE-$SECONDS" +$CLICKHOUSE_CLIENT_BINARY --query_id "$query_id" --host 127.1 --port "$server_port" --format Null -q 'select sleepEachRow(1) from numbers(10)' 2>/dev/null & +client_pid=$! + +# wait until the query will appear in processlist (max 10 second) +# (it is enough to trigger the problem) +i=0 retries=1000 +while [[ $($CLICKHOUSE_CLIENT_BINARY --host 127.1 --port "$server_port" -q "select count() from system.processes where query_id = '$query_id'") != "1" ]] && [[ $i -lt $retries ]]; do + sleep 0.01 +done +if [[ $($CLICKHOUSE_CLIENT_BINARY --host 127.1 --port "$server_port" -q "select count() from system.processes where query_id = '$query_id'") != "1" ]]; then + echo "Cannot wait until the query will start" >&2 + exit 1 +fi + +# send TERM and save the error code to ensure that it is 0 (EXIT_SUCCESS) +kill $server_pid +wait $server_pid +return_code=$? + +wait $client_pid + +trap '' EXIT +if [ $return_code != 0 ]; then + cat clickhouse-server.log +fi +rm -f clickhouse-server.log + +exit $return_code diff --git a/tests/queries/0_stateless/01801_s3_cluster.reference b/tests/queries/0_stateless/01801_s3_cluster.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01801_s3_cluster.sh b/tests/queries/0_stateless/01801_s3_cluster.sh new file mode 100755 index 00000000000..215d5500be5 --- /dev/null +++ b/tests/queries/0_stateless/01801_s3_cluster.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash + +# NOTE: this is a partial copy of the 01683_dist_INSERT_block_structure_mismatch, +# but this test also checks the log messages + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + + +${CLICKHOUSE_CLIENT_BINARY} --send_logs_level="none" -q "SELECT * FROM s3('https://s3.mds.yandex.net/clickhouse-test-reports/*/*/functional_stateless_tests_(ubsan)/test_results.tsv', '$S3_ACCESS_KEY_ID', '$S3_SECRET_ACCESS', 'LineAsString', 'line String') limit 100 FORMAT Null;" +${CLICKHOUSE_CLIENT_BINARY} --send_logs_level="none" -q "SELECT * FROM s3Cluster('test_cluster_two_shards', 'https://s3.mds.yandex.net/clickhouse-test-reports/*/*/functional_stateless_tests_(ubsan)/test_results.tsv', '$S3_ACCESS_KEY_ID', '$S3_SECRET_ACCESS', 'LineAsString', 'line String') limit 100 FORMAT Null;" diff --git a/tests/queries/0_stateless/01812_basic_auth_http_server.reference b/tests/queries/0_stateless/01812_basic_auth_http_server.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01812_basic_auth_http_server.sh b/tests/queries/0_stateless/01812_basic_auth_http_server.sh new file mode 100755 index 00000000000..4b993137bbd --- /dev/null +++ b/tests/queries/0_stateless/01812_basic_auth_http_server.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash +# shellcheck disable=SC2046 + +# In very old (e.g. 1.1.54385) versions of ClickHouse there was a bug in Poco HTTP library: +# Basic HTTP authentication headers was not parsed if the size of URL is exactly 4077 + something bytes. +# So, the user may get authentication error if valid credentials are passed. +# This is a minor issue because it does not have security implications (at worse the user will be not allowed to access). + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# In this test we do the opposite: passing the invalid credentials while server is accepting default user without a password. +# And if the bug exists, they will be ignored (treat as empty credentials) and query succeed. + +for i in {3950..4100}; do ${CLICKHOUSE_CURL} --user default:12345 "${CLICKHOUSE_URL}&query=SELECT+1"$(perl -e "print '+'x$i") | grep -v -F 'password' ||:; done + +# You can check that the bug exists in old version by running the old server in Docker: +# docker run --network host -it --rm yandex/clickhouse-server:1.1.54385 diff --git a/tests/queries/0_stateless/01812_has_generic.reference b/tests/queries/0_stateless/01812_has_generic.reference new file mode 100644 index 00000000000..e8183f05f5d --- /dev/null +++ b/tests/queries/0_stateless/01812_has_generic.reference @@ -0,0 +1,3 @@ +1 +1 +1 diff --git a/tests/queries/0_stateless/01812_has_generic.sql b/tests/queries/0_stateless/01812_has_generic.sql new file mode 100644 index 00000000000..9ab5b655102 --- /dev/null +++ b/tests/queries/0_stateless/01812_has_generic.sql @@ -0,0 +1,3 @@ +SELECT has([(1, 2), (3, 4)], (toUInt16(3), 4)); +SELECT hasAny([(1, 2), (3, 4)], [(toUInt16(3), 4)]); +SELECT hasAll([(1, 2), (3, 4)], [(toNullable(1), toUInt64(2)), (toUInt16(3), 4)]); diff --git a/tests/queries/0_stateless/arcadia_skip_list.txt b/tests/queries/0_stateless/arcadia_skip_list.txt index c8a0971bb28..0af6f1b72c1 100644 --- a/tests/queries/0_stateless/arcadia_skip_list.txt +++ b/tests/queries/0_stateless/arcadia_skip_list.txt @@ -229,3 +229,4 @@ 01791_dist_INSERT_block_structure_mismatch 01801_distinct_group_by_shard 01804_dictionary_decimal256_type +01801_s3_distributed diff --git a/tests/queries/shell_config.sh b/tests/queries/shell_config.sh index 5b942a95d02..ff8de80c0cc 100644 --- a/tests/queries/shell_config.sh +++ b/tests/queries/shell_config.sh @@ -23,14 +23,21 @@ export CLICKHOUSE_TEST_ZOOKEEPER_PREFIX="${CLICKHOUSE_TEST_NAME}_${CLICKHOUSE_DA [ -v CLICKHOUSE_LOG_COMMENT ] && CLICKHOUSE_BENCHMARK_OPT0+=" --log_comment='${CLICKHOUSE_LOG_COMMENT}' " export CLICKHOUSE_BINARY=${CLICKHOUSE_BINARY:="clickhouse"} +# client [ -x "$CLICKHOUSE_BINARY-client" ] && CLICKHOUSE_CLIENT_BINARY=${CLICKHOUSE_CLIENT_BINARY:=$CLICKHOUSE_BINARY-client} [ -x "$CLICKHOUSE_BINARY" ] && CLICKHOUSE_CLIENT_BINARY=${CLICKHOUSE_CLIENT_BINARY:=$CLICKHOUSE_BINARY client} export CLICKHOUSE_CLIENT_BINARY=${CLICKHOUSE_CLIENT_BINARY:=$CLICKHOUSE_BINARY-client} export CLICKHOUSE_CLIENT_OPT="${CLICKHOUSE_CLIENT_OPT0:-} ${CLICKHOUSE_CLIENT_OPT:-}" export CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:="$CLICKHOUSE_CLIENT_BINARY ${CLICKHOUSE_CLIENT_OPT:-}"} +# local [ -x "${CLICKHOUSE_BINARY}-local" ] && CLICKHOUSE_LOCAL=${CLICKHOUSE_LOCAL:="${CLICKHOUSE_BINARY}-local"} [ -x "${CLICKHOUSE_BINARY}" ] && CLICKHOUSE_LOCAL=${CLICKHOUSE_LOCAL:="${CLICKHOUSE_BINARY} local"} export CLICKHOUSE_LOCAL=${CLICKHOUSE_LOCAL:="${CLICKHOUSE_BINARY}-local"} +# server +[ -x "${CLICKHOUSE_BINARY}-server" ] && CLICKHOUSE_SERVER_BINARY=${CLICKHOUSE_SERVER_BINARY:="${CLICKHOUSE_BINARY}-server"} +[ -x "${CLICKHOUSE_BINARY}" ] && CLICKHOUSE_SERVER_BINARY=${CLICKHOUSE_SERVER_BINARY:="${CLICKHOUSE_BINARY} server"} +export CLICKHOUSE_SERVER_BINARY=${CLICKHOUSE_SERVER_BINARY:="${CLICKHOUSE_BINARY}-server"} +# others export CLICKHOUSE_OBFUSCATOR=${CLICKHOUSE_OBFUSCATOR:="${CLICKHOUSE_BINARY}-obfuscator"} export CLICKHOUSE_COMPRESSOR=${CLICKHOUSE_COMPRESSOR:="${CLICKHOUSE_BINARY}-compressor"} export CLICKHOUSE_BENCHMARK=${CLICKHOUSE_BENCHMARK:="${CLICKHOUSE_BINARY}-benchmark ${CLICKHOUSE_BENCHMARK_OPT0:-}"} diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json index d41a41bd524..740a8d33ae7 100644 --- a/tests/queries/skip_list.json +++ b/tests/queries/skip_list.json @@ -696,6 +696,7 @@ "01682_cache_dictionary_complex_key", "01684_ssd_cache_dictionary_simple_key", "01685_ssd_cache_dictionary_complex_key", + "01737_clickhouse_server_wait_server_pool_long", // This test is fully compatible to run in parallel, however under ASAN processes are pretty heavy and may fail under flaky adress check. "01760_system_dictionaries", "01760_polygon_dictionaries", "01778_hierarchical_dictionaries",