From 263be33297a2ada5e5c5281924b56e5ffaa3f80f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 13 Jun 2023 16:37:52 +0200 Subject: [PATCH 1/5] Fix tests for throttling by allowing more margin of error for trottling event Right now 02703_max_local_write_bandwidth is flaky, and the reason I believe is that the server spent spent sometime somewhere else, which means that the throttler will sleep less. But what is important here is that the overall query duration time matches the expectation, so it is OK to match the LocalWriteThrottlerSleepMicroseconds/LocalReadThrottlerSleepMicroseconds with some error rate. Signed-off-by: Azat Khuzhin --- tests/queries/0_stateless/02703_max_local_read_bandwidth.sh | 2 +- tests/queries/0_stateless/02703_max_local_write_bandwidth.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh b/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh index d47e2f363bd..c78cd202f1b 100755 --- a/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh +++ b/tests/queries/0_stateless/02703_max_local_read_bandwidth.sh @@ -32,7 +32,7 @@ for read_method in "${read_methods[@]}"; do query_duration_ms >= 7e3, ProfileEvents['ReadBufferFromFileDescriptorReadBytes'] > 8e6, ProfileEvents['LocalReadThrottlerBytes'] > 8e6, - ProfileEvents['LocalReadThrottlerSleepMicroseconds'] > 7e6*0.9 + ProfileEvents['LocalReadThrottlerSleepMicroseconds'] > 7e6*0.5 FROM system.query_log WHERE current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' AND type != 'QueryStart' " diff --git a/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh b/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh index 41165d35d37..ccde0903278 100755 --- a/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh +++ b/tests/queries/0_stateless/02703_max_local_write_bandwidth.sh @@ -19,7 +19,7 @@ $CLICKHOUSE_CLIENT -nm -q " query_duration_ms >= 7e3, ProfileEvents['WriteBufferFromFileDescriptorWriteBytes'] > 8e6, ProfileEvents['LocalWriteThrottlerBytes'] > 8e6, - ProfileEvents['LocalWriteThrottlerSleepMicroseconds'] > 7e6*0.9 + ProfileEvents['LocalWriteThrottlerSleepMicroseconds'] > 7e6*0.5 FROM system.query_log WHERE current_database = '$CLICKHOUSE_DATABASE' AND query_id = '$query_id' AND type != 'QueryStart' " From 85db1eb6fcb4313d40b2c6a80e46d257fd44f7d1 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Wed, 14 Jun 2023 14:55:11 +0200 Subject: [PATCH 2/5] Change submodule capnproto to it's fork in ClickHouse --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index 8bf4f6e8975..151dc28c55b 100644 --- a/.gitmodules +++ b/.gitmodules @@ -19,7 +19,7 @@ url = https://github.com/google/googletest [submodule "contrib/capnproto"] path = contrib/capnproto - url = https://github.com/capnproto/capnproto + url = https://github.com/ClickHouse/capnproto [submodule "contrib/double-conversion"] path = contrib/double-conversion url = https://github.com/google/double-conversion From c49bde89615dddbfda33f23a99babe4e0c21225c Mon Sep 17 00:00:00 2001 From: Thom O'Connor Date: Wed, 14 Jun 2023 07:11:28 -0600 Subject: [PATCH 3/5] Update postgresql.md Updated PostgreSQL documentation to better show example of using PostgreSQL() table engine for SELECT and INSERT, vs. copying data via postgresql() table function for copying data into ClickHouse --- .../table-engines/integrations/postgresql.md | 22 ++++++++++++++----- 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/postgresql.md b/docs/en/engines/table-engines/integrations/postgresql.md index 51b3048706f..aa8b4d074bc 100644 --- a/docs/en/engines/table-engines/integrations/postgresql.md +++ b/docs/en/engines/table-engines/integrations/postgresql.md @@ -136,7 +136,7 @@ postgresql> SELECT * FROM test; ### Creating Table in ClickHouse, and connecting to PostgreSQL table created above -This example uses the [PostgreSQL table engine](/docs/en/engines/table-engines/integrations/postgresql.md) to connect the ClickHouse table to the PostgreSQL table: +This example uses the [PostgreSQL table engine](/docs/en/engines/table-engines/integrations/postgresql.md) to connect the ClickHouse table to the PostgreSQL table and use both SELECT and INSERT statements to the PostgreSQL database: ``` sql CREATE TABLE default.postgresql_table @@ -150,10 +150,20 @@ ENGINE = PostgreSQL('localhost:5432', 'public', 'test', 'postges_user', 'postgre ### Inserting initial data from PostgreSQL table into ClickHouse table, using a SELECT query -The [postgresql table function](/docs/en/sql-reference/table-functions/postgresql.md) copies the data from PostgreSQL to ClickHouse, which is often used for improving the query performance of the data by querying or performing analytics in ClickHouse rather than in PostgreSQL, or can also be used for migrating data from PostgreSQL to ClickHouse: +The [postgresql table function](/docs/en/sql-reference/table-functions/postgresql.md) copies the data from PostgreSQL to ClickHouse, which is often used for improving the query performance of the data by querying or performing analytics in ClickHouse rather than in PostgreSQL, or can also be used for migrating data from PostgreSQL to ClickHouse. Since we will be copying the data from PostgreSQL to ClickHouse, we will use a MergeTree table engine in ClickHouse and call it postgresql_copy: ``` sql -INSERT INTO default.postgresql_table +CREATE TABLE default.postgresql_copy +( + `float_nullable` Nullable(Float32), + `str` String, + `int_id` Int32 +) +ENGINE = MergeTree; +``` + +``` sql +INSERT INTO default.postgresql_copy SELECT * FROM postgresql('localhost:5432', 'public', 'test', 'postges_user', 'postgres_password'); ``` @@ -164,13 +174,13 @@ If then performing ongoing synchronization between the PostgreSQL table and Clic This would require keeping track of the max ID or timestamp previously added, such as the following: ``` sql -SELECT max(`int_id`) AS maxIntID FROM default.postgresql_table; +SELECT max(`int_id`) AS maxIntID FROM default.postgresql_copy; ``` Then inserting values from PostgreSQL table greater than the max ``` sql -INSERT INTO default.postgresql_table +INSERT INTO default.postgresql_copy SELECT * FROM postgresql('localhost:5432', 'public', 'test', 'postges_user', 'postgres_password'); WHERE int_id > maxIntID; ``` @@ -178,7 +188,7 @@ WHERE int_id > maxIntID; ### Selecting data from the resulting ClickHouse table ``` sql -SELECT * FROM postgresql_table WHERE str IN ('test'); +SELECT * FROM postgresql_copy WHERE str IN ('test'); ``` ``` text From 37245783abb01f63bf02d5ca445064b56bd92414 Mon Sep 17 00:00:00 2001 From: Thom O'Connor Date: Wed, 14 Jun 2023 08:03:18 -0600 Subject: [PATCH 4/5] Update mysql.md Updating mysql table function with example for copying data from MySQL into a ClickHouse MergeTree table --- .../en/sql-reference/table-functions/mysql.md | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/docs/en/sql-reference/table-functions/mysql.md b/docs/en/sql-reference/table-functions/mysql.md index 8d7656365f5..269a50ec8b7 100644 --- a/docs/en/sql-reference/table-functions/mysql.md +++ b/docs/en/sql-reference/table-functions/mysql.md @@ -107,6 +107,30 @@ SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123'); └────────┴───────┘ ``` +Copying data from MySQL table into ClickHouse table: + +```sql +CREATE TABLE mysql_copy +( + `id` UInt64, + `datetime` DateTime('UTC'), + `description` String, +) +ENGINE = MergeTree +ORDER BY (id,datetime); + +INSERT INTO mysql_copy +SELECT * FROM mysql('host:port', 'database', 'table', 'user', 'password'); +``` + +Or if copying only an incremental batch from MySQL based on the max current id: + +```sql +INSERT INTO mysql_copy +SELECT * FROM mysql('host:port', 'database', 'table', 'user', 'password') +WHERE id > (SELECT max(id) from mysql_copy); +``` + **See Also** - [The ‘MySQL’ table engine](../../engines/table-engines/integrations/mysql.md) From fb304c0b0567ce34e2ea399f7c3a1d1c43729129 Mon Sep 17 00:00:00 2001 From: Thom O'Connor Date: Wed, 14 Jun 2023 10:39:55 -0600 Subject: [PATCH 5/5] Update postgresql.md Fix table definition by adding ORDER BY() statement --- docs/en/engines/table-engines/integrations/postgresql.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/integrations/postgresql.md b/docs/en/engines/table-engines/integrations/postgresql.md index aa8b4d074bc..8eab6fdb421 100644 --- a/docs/en/engines/table-engines/integrations/postgresql.md +++ b/docs/en/engines/table-engines/integrations/postgresql.md @@ -159,7 +159,8 @@ CREATE TABLE default.postgresql_copy `str` String, `int_id` Int32 ) -ENGINE = MergeTree; +ENGINE = MergeTree +ORDER BY (int_id); ``` ``` sql