Merge branch 'ClickHouse:master' into orc_dict_encode

This commit is contained in:
李扬 2024-08-29 10:16:06 +08:00 committed by GitHub
commit 3d04f3d33a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 89 additions and 64 deletions

View File

@ -30,7 +30,6 @@
* Support more variants of JOIN strictness (`LEFT/RIGHT SEMI/ANTI/ANY JOIN`) with inequality conditions which involve columns from both left and right table. e.g. `t1.y < t2.y` (see the setting `allow_experimental_join_condition`). [#64281](https://github.com/ClickHouse/ClickHouse/pull/64281) ([lgbo](https://github.com/lgbo-ustc)).
* Intrpret Hive-style partitioning for different engines (`File`, `URL`, `S3`, `AzureBlobStorage`, `HDFS`). Hive-style partitioning organizes data into partitioned sub-directories, making it efficient to query and manage large datasets. Currently, it only creates virtual columns with the appropriate name and data. The follow-up PR will introduce the appropriate data filtering (performance speedup). [#65997](https://github.com/ClickHouse/ClickHouse/pull/65997) ([Yarik Briukhovetskyi](https://github.com/yariks5s)).
* Add function `printf` for Spark compatiability (but you can use the existing `format` function). [#66257](https://github.com/ClickHouse/ClickHouse/pull/66257) ([李扬](https://github.com/taiyang-li)).
* Added a new server setting, `disable_insertion_and_mutation`. If it is enabled, the server will deny all insertions and mutations. This includes asynchronous INSERTs. This setting can be used to create read-only replicas. [#66519](https://github.com/ClickHouse/ClickHouse/pull/66519) ([Xu Jia](https://github.com/XuJia0210)).
* Add options `restore_replace_external_engines_to_null` and `restore_replace_external_table_functions_to_null` to replace external engines and table_engines to `Null` engine that can be useful for testing. It should work for RESTORE and explicit table creation. [#66536](https://github.com/ClickHouse/ClickHouse/pull/66536) ([Ilya Yatsishin](https://github.com/qoega)).
* Added support for reading `MULTILINESTRING` geometry in `WKT` format using function `readWKTLineString`. [#67647](https://github.com/ClickHouse/ClickHouse/pull/67647) ([Jacob Reckhard](https://github.com/jacobrec)).
* Add a new table function `fuzzQuery`. This function allows the modification of a given query string with random variations. Example: `SELECT query FROM fuzzQuery('SELECT 1') LIMIT 5;`. [#67655](https://github.com/ClickHouse/ClickHouse/pull/67655) ([pufit](https://github.com/pufit)).

View File

@ -56,6 +56,14 @@ Other upcoming meetups
* [Toronto Meetup (Shopify)](https://www.meetup.com/clickhouse-toronto-user-group/events/301490855/) - September 10
* [Austin Meetup](https://www.meetup.com/clickhouse-austin-user-group/events/302558689/) - September 17
* [London Meetup](https://www.meetup.com/clickhouse-london-user-group/events/302977267) - September 17
* [Tel Aviv Meetup](https://www.meetup.com/clickhouse-meetup-israel/events/303095121) - September 22
* [Madrid Meetup](https://www.meetup.com/clickhouse-spain-user-group/events/303096564/) - October 22
* [Barcelona Meetup](https://www.meetup.com/clickhouse-spain-user-group/events/303096876/) - October 29
* [Oslo Meetup](https://www.meetup.com/open-source-real-time-data-warehouse-real-time-analytics/events/302938622) - October 31
* [Ghent Meetup](https://www.meetup.com/clickhouse-belgium-user-group/events/303049405/) - November 19
* [Dubai Meetup](https://www.meetup.com/clickhouse-dubai-meetup-group/events/303096989/) - November 21
## Recent Recordings
* **Recent Meetup Videos**: [Meetup Playlist](https://www.youtube.com/playlist?list=PL0Z2YDlm0b3iNDUzpY1S3L_iV4nARda_U) Whenever possible recordings of the ClickHouse Community Meetups are edited and presented as individual talks. Current featuring "Modern SQL in 2023", "Fast, Concurrent, and Consistent Asynchronous INSERTS in ClickHouse", and "Full-Text Indices: Design and Experiments"

View File

@ -49,7 +49,7 @@ Result:
## multiIf
Allows to write the [CASE](../../sql-reference/operators/index.md#operator_case) operator more compactly in the query.
Allows to write the [CASE](../../sql-reference/operators/index.md#conditional-expression) operator more compactly in the query.
**Syntax**
@ -264,4 +264,4 @@ SELECT clamp(1, 2, 3) result, toTypeName(result) type;
┌─result─┬─type────┐
│ 2 │ Float64 │
└────────┴─────────┘
```
```

View File

@ -13,6 +13,7 @@
#include <IO/ReadHelpers.h>
#include <Interpreters/Context.h>
#include <Core/Settings.h>
#include <Poco/Environment.h>
#pragma clang diagnostic ignored "-Wreserved-identifier"
@ -371,8 +372,8 @@ try
/// in case of double fault.
LOG_FATAL(log, "########## Short fault info ############");
LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) Received signal {}",
VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", GIT_HASH,
LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}, architecture: {}) (from thread {}) Received signal {}",
VERSION_STRING, VERSION_OFFICIAL, daemon ? daemon->build_id : "", GIT_HASH, Poco::Environment::osArchitecture(),
thread_num, sig);
std::string signal_description = "Unknown signal";

View File

@ -1,17 +1,17 @@
1
CREATE DATABASE test_01114_1\nENGINE = Atomic
CREATE DATABASE test_01114_2\nENGINE = Atomic
CREATE DATABASE test_01114_3\nENGINE = Ordinary
test_01114_1 Atomic store 00001114-1000-4000-8000-000000000001 1
test_01114_2 Atomic store 00001114-1000-4000-8000-000000000002 1
test_01114_3 Ordinary test_01114_3 test_01114_3 1
CREATE DATABASE default_1\nENGINE = Atomic
CREATE DATABASE default_2\nENGINE = Atomic
CREATE DATABASE default_3\nENGINE = Ordinary
default_1 Atomic store 00001114-1000-4000-8000-000000000001 1
default_2 Atomic store 00001114-1000-4000-8000-000000000002 1
default_3 Ordinary default_3 default_3 1
110
100
CREATE TABLE test_01114_2.mt UUID \'00001114-0000-4000-8000-000000000002\'\n(\n `n` UInt64\n)\nENGINE = MergeTree\nPARTITION BY n % 5\nORDER BY tuple()\nSETTINGS index_granularity = 8192
mt 00001114-0000-4000-8000-000000000002 CREATE TABLE test_01114_2.mt (`n` UInt64) ENGINE = MergeTree PARTITION BY n % 5 ORDER BY tuple() SETTINGS index_granularity = 8192
CREATE TABLE default_2.mt UUID \'00001114-0000-4000-8000-000000000002\'\n(\n `n` UInt64\n)\nENGINE = MergeTree\nPARTITION BY n % 5\nORDER BY tuple()\nSETTINGS index_granularity = 8192
mt 00001114-0000-4000-8000-000000000002 CREATE TABLE default_2.mt (`n` UInt64) ENGINE = MergeTree PARTITION BY n % 5 ORDER BY tuple() SETTINGS index_granularity = 8192
110
CREATE TABLE test_01114_1.mt UUID \'00001114-0000-4000-8000-000000000001\'\n(\n `n` UInt64\n)\nENGINE = MergeTree\nPARTITION BY n % 5\nORDER BY tuple()\nSETTINGS index_granularity = 8192
CREATE TABLE test_01114_2.mt UUID \'00001114-0000-4000-8000-000000000002\'\n(\n `n` UInt64\n)\nENGINE = MergeTree\nPARTITION BY n % 5\nORDER BY tuple()\nSETTINGS index_granularity = 8192
CREATE TABLE default_1.mt UUID \'00001114-0000-4000-8000-000000000001\'\n(\n `n` UInt64\n)\nENGINE = MergeTree\nPARTITION BY n % 5\nORDER BY tuple()\nSETTINGS index_granularity = 8192
CREATE TABLE default_2.mt UUID \'00001114-0000-4000-8000-000000000002\'\n(\n `n` UInt64\n)\nENGINE = MergeTree\nPARTITION BY n % 5\nORDER BY tuple()\nSETTINGS index_granularity = 8192
5
dropped
110 5995

View File

@ -9,81 +9,98 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
DATABASE_1="${CLICKHOUSE_DATABASE}_1"
DATABASE_2="${CLICKHOUSE_DATABASE}_2"
DATABASE_3="${CLICKHOUSE_DATABASE}_3"
$CLICKHOUSE_CLIENT -nm -q "
DROP DATABASE IF EXISTS test_01114_1;
DROP DATABASE IF EXISTS test_01114_2;
DROP DATABASE IF EXISTS test_01114_3;
"
$CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=0 -q "CREATE DATABASE ${DATABASE_1} ENGINE=Ordinary" 2>&1| grep -Fac "UNKNOWN_DATABASE_ENGINE"
$CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=0 -q "CREATE DATABASE test_01114_1 ENGINE=Ordinary" 2>&1| grep -Fac "UNKNOWN_DATABASE_ENGINE"
$CLICKHOUSE_CLIENT -q "CREATE DATABASE ${DATABASE_1} ENGINE=Atomic"
$CLICKHOUSE_CLIENT -q "CREATE DATABASE ${DATABASE_2}"
$CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 -q "CREATE DATABASE ${DATABASE_3} ENGINE=Ordinary"
$CLICKHOUSE_CLIENT -q "CREATE DATABASE test_01114_1 ENGINE=Atomic"
$CLICKHOUSE_CLIENT -q "CREATE DATABASE test_01114_2"
$CLICKHOUSE_CLIENT --allow_deprecated_database_ordinary=1 -q "CREATE DATABASE test_01114_3 ENGINE=Ordinary"
$CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=0 -q "SHOW CREATE DATABASE ${DATABASE_1}"
$CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=0 -q "SHOW CREATE DATABASE ${DATABASE_2}"
$CLICKHOUSE_CLIENT -q "SHOW CREATE DATABASE ${DATABASE_3}"
$CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=0 -q "SHOW CREATE DATABASE test_01114_1"
$CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=0 -q "SHOW CREATE DATABASE test_01114_2"
$CLICKHOUSE_CLIENT -q "SHOW CREATE DATABASE test_01114_3"
uuid_db_1=`$CLICKHOUSE_CLIENT -q "SELECT uuid FROM system.databases WHERE name='test_01114_1'"`
uuid_db_2=`$CLICKHOUSE_CLIENT -q "SELECT uuid FROM system.databases WHERE name='test_01114_2'"`
uuid_db_1=`$CLICKHOUSE_CLIENT -q "SELECT uuid FROM system.databases WHERE name='${DATABASE_1}'"`
uuid_db_2=`$CLICKHOUSE_CLIENT -q "SELECT uuid FROM system.databases WHERE name='${DATABASE_2}'"`
$CLICKHOUSE_CLIENT -q "SELECT name,
engine,
splitByChar('/', data_path)[-2],
splitByChar('/', metadata_path)[-2] as uuid_path, ((splitByChar('/', metadata_path)[-3] as metadata) = substr(uuid_path, 1, 3)) OR metadata='metadata'
FROM system.databases WHERE name LIKE 'test_01114_%'" | sed "s/$uuid_db_1/00001114-1000-4000-8000-000000000001/g" | sed "s/$uuid_db_2/00001114-1000-4000-8000-000000000002/g"
FROM system.databases WHERE name LIKE '${CLICKHOUSE_DATABASE}_%'" | sed "s/$uuid_db_1/00001114-1000-4000-8000-000000000001/g" | sed "s/$uuid_db_2/00001114-1000-4000-8000-000000000002/g"
$CLICKHOUSE_CLIENT -nm -q "
CREATE TABLE test_01114_1.mt_tmp (n UInt64) ENGINE=MergeTree() ORDER BY tuple();
INSERT INTO test_01114_1.mt_tmp SELECT * FROM numbers(100);
CREATE TABLE test_01114_3.mt (n UInt64) ENGINE=MergeTree() ORDER BY tuple() PARTITION BY (n % 5);
INSERT INTO test_01114_3.mt SELECT * FROM numbers(110);
CREATE TABLE ${DATABASE_1}.mt_tmp (n UInt64) ENGINE=MergeTree() ORDER BY tuple();
INSERT INTO ${DATABASE_1}.mt_tmp SELECT * FROM numbers(100);
CREATE TABLE ${DATABASE_3}.mt (n UInt64) ENGINE=MergeTree() ORDER BY tuple() PARTITION BY (n % 5);
INSERT INTO ${DATABASE_3}.mt SELECT * FROM numbers(110);
RENAME TABLE test_01114_1.mt_tmp TO test_01114_3.mt_tmp; /* move from Atomic to Ordinary */
RENAME TABLE test_01114_3.mt TO test_01114_1.mt; /* move from Ordinary to Atomic */
SELECT count() FROM test_01114_1.mt;
SELECT count() FROM test_01114_3.mt_tmp;
RENAME TABLE ${DATABASE_1}.mt_tmp TO ${DATABASE_3}.mt_tmp; /* move from Atomic to Ordinary */
RENAME TABLE ${DATABASE_3}.mt TO ${DATABASE_1}.mt; /* move from Ordinary to Atomic */
SELECT count() FROM ${DATABASE_1}.mt;
SELECT count() FROM ${DATABASE_3}.mt_tmp;
DROP DATABASE test_01114_3;
DROP DATABASE ${DATABASE_3};
"
explicit_uuid=$($CLICKHOUSE_CLIENT -q "SELECT generateUUIDv4()")
$CLICKHOUSE_CLIENT -q "CREATE TABLE test_01114_2.mt UUID '$explicit_uuid' (n UInt64) ENGINE=MergeTree() ORDER BY tuple() PARTITION BY (n % 5)"
$CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=1 -q "SHOW CREATE TABLE test_01114_2.mt" | sed "s/$explicit_uuid/00001114-0000-4000-8000-000000000002/g"
$CLICKHOUSE_CLIENT -q "SELECT name, uuid, create_table_query FROM system.tables WHERE database='test_01114_2'" | sed "s/$explicit_uuid/00001114-0000-4000-8000-000000000002/g"
$CLICKHOUSE_CLIENT -q "CREATE TABLE ${DATABASE_2}.mt UUID '$explicit_uuid' (n UInt64) ENGINE=MergeTree() ORDER BY tuple() PARTITION BY (n % 5)"
$CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=1 -q "SHOW CREATE TABLE ${DATABASE_2}.mt" | sed "s/$explicit_uuid/00001114-0000-4000-8000-000000000002/g"
$CLICKHOUSE_CLIENT -q "SELECT name, uuid, create_table_query FROM system.tables WHERE database='${DATABASE_2}'" | sed "s/$explicit_uuid/00001114-0000-4000-8000-000000000002/g"
RANDOM_COMMENT="$RANDOM"
$CLICKHOUSE_CLIENT --max-threads 5 --function_sleep_max_microseconds_per_block 60000000 -q "SELECT count(col), sum(col) FROM (SELECT n + sleepEachRow(1.5) AS col FROM ${DATABASE_1}.mt) -- ${RANDOM_COMMENT}" & # 33s (1.5s * 22 rows per partition [Using 5 threads in parallel]), result: 110, 5995
$CLICKHOUSE_CLIENT --max-threads 5 --function_sleep_max_microseconds_per_block 60000000 -q "INSERT INTO ${DATABASE_2}.mt SELECT number + sleepEachRow(1.5) FROM numbers(30) -- ${RANDOM_COMMENT}" & # 45s (1.5s * 30 rows)
$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "SELECT count(col), sum(col) FROM (SELECT n + sleepEachRow(1.5) AS col FROM test_01114_1.mt)" & # 33s (1.5s * 22 rows per partition), result: 110, 5995
$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "INSERT INTO test_01114_2.mt SELECT number + sleepEachRow(1.5) FROM numbers(30)" & # 45s (1.5s * 30 rows)
sleep 1 # SELECT and INSERT should start before the following RENAMEs
it=0
while [[ $($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE query_id != queryID() AND current_database = currentDatabase() AND query LIKE '%-- ${RANDOM_COMMENT}%'") -ne 2 ]]; do
it=$((it+1))
if [ $it -ge 50 ];
then
echo "Failed to wait for first batch of queries"
$CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE query_id != queryID() AND current_database = currentDatabase() AND query LIKE '%-- ${RANDOM_COMMENT}%'"
fi
sleep 0.1
done
$CLICKHOUSE_CLIENT -nm -q "
RENAME TABLE test_01114_1.mt TO test_01114_1.mt_tmp;
RENAME TABLE test_01114_1.mt_tmp TO test_01114_2.mt_tmp;
EXCHANGE TABLES test_01114_2.mt AND test_01114_2.mt_tmp;
RENAME TABLE test_01114_2.mt_tmp TO test_01114_1.mt;
EXCHANGE TABLES test_01114_1.mt AND test_01114_2.mt;
RENAME TABLE ${DATABASE_1}.mt TO ${DATABASE_1}.mt_tmp;
RENAME TABLE ${DATABASE_1}.mt_tmp TO ${DATABASE_2}.mt_tmp;
EXCHANGE TABLES ${DATABASE_2}.mt AND ${DATABASE_2}.mt_tmp;
RENAME TABLE ${DATABASE_2}.mt_tmp TO ${DATABASE_1}.mt;
EXCHANGE TABLES ${DATABASE_1}.mt AND ${DATABASE_2}.mt;
"
# Check that nothing changed
$CLICKHOUSE_CLIENT -q "SELECT count() FROM test_01114_1.mt"
uuid_mt1=$($CLICKHOUSE_CLIENT -q "SELECT uuid FROM system.tables WHERE database='test_01114_1' AND name='mt'")
$CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=1 -q "SHOW CREATE TABLE test_01114_1.mt" | sed "s/$uuid_mt1/00001114-0000-4000-8000-000000000001/g"
$CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=1 -q "SHOW CREATE TABLE test_01114_2.mt" | sed "s/$explicit_uuid/00001114-0000-4000-8000-000000000002/g"
$CLICKHOUSE_CLIENT -q "SELECT count() FROM ${DATABASE_1}.mt"
uuid_mt1=$($CLICKHOUSE_CLIENT -q "SELECT uuid FROM system.tables WHERE database='${DATABASE_1}' AND name='mt'")
$CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=1 -q "SHOW CREATE TABLE ${DATABASE_1}.mt" | sed "s/$uuid_mt1/00001114-0000-4000-8000-000000000001/g"
$CLICKHOUSE_CLIENT --show_table_uuid_in_table_create_query_if_not_nil=1 -q "SHOW CREATE TABLE ${DATABASE_2}.mt" | sed "s/$explicit_uuid/00001114-0000-4000-8000-000000000002/g"
$CLICKHOUSE_CLIENT -nm -q "
DROP TABLE test_01114_1.mt SETTINGS database_atomic_wait_for_drop_and_detach_synchronously=0;
CREATE TABLE test_01114_1.mt (s String) ENGINE=Log();
INSERT INTO test_01114_1.mt SELECT 's' || toString(number) FROM numbers(5);
SELECT count() FROM test_01114_1.mt
DROP TABLE ${DATABASE_1}.mt SETTINGS database_atomic_wait_for_drop_and_detach_synchronously=0;
CREATE TABLE ${DATABASE_1}.mt (s String) ENGINE=Log();
INSERT INTO ${DATABASE_1}.mt SELECT 's' || toString(number) FROM numbers(5);
SELECT count() FROM ${DATABASE_1}.mt
" # result: 5
$CLICKHOUSE_CLIENT --function_sleep_max_microseconds_per_block 60000000 -q "SELECT tuple(s, sleepEachRow(3)) FROM test_01114_1.mt" > /dev/null & # 15s (3s * 5 rows)
sleep 1
$CLICKHOUSE_CLIENT -q "DROP DATABASE test_01114_1" --database_atomic_wait_for_drop_and_detach_synchronously=0 && echo "dropped"
RANDOM_TUPLE="${RANDOM}_tuple"
$CLICKHOUSE_CLIENT --max-threads 5 --function_sleep_max_microseconds_per_block 60000000 -q "SELECT tuple(s, sleepEachRow(3)) FROM ${DATABASE_1}.mt -- ${RANDOM_TUPLE}" > /dev/null & # 15s (3s * 5 rows)
it=0
while [[ $($CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE query_id != queryID() AND current_database = currentDatabase() AND query LIKE '%-- ${RANDOM_TUPLE}%'") -ne 1 ]]; do
it=$((it+1))
if [ $it -ge 50 ];
then
echo "Failed to wait for second batch of queries"
$CLICKHOUSE_CLIENT -q "SELECT count() FROM system.processes WHERE query_id != queryID() AND current_database = currentDatabase() AND query LIKE '%-- ${RANDOM_TUPLE}%'"
fi
sleep 0.1
done
$CLICKHOUSE_CLIENT -q "DROP DATABASE ${DATABASE_1}" --database_atomic_wait_for_drop_and_detach_synchronously=0 && echo "dropped"
wait # for INSERT and SELECT
$CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM test_01114_2.mt" # result: 30, 435
$CLICKHOUSE_CLIENT -q "DROP DATABASE test_01114_2" --database_atomic_wait_for_drop_and_detach_synchronously=0
$CLICKHOUSE_CLIENT -q "SELECT count(n), sum(n) FROM ${DATABASE_2}.mt" # result: 30, 435
$CLICKHOUSE_CLIENT -q "DROP DATABASE ${DATABASE_2}" --database_atomic_wait_for_drop_and_detach_synchronously=0