From 18c81ff68b51dac615acc90026ac4e446e35e7e8 Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Tue, 12 Jul 2022 20:20:58 +0200 Subject: [PATCH] Move benchmark into separate repository --- benchmark/.gitignore | 1 - benchmark/LICENSE | 360 ------- benchmark/README.md | 412 +------- benchmark/athena/README.md | 21 - benchmark/athena/create_partitioned.sql | 112 -- benchmark/athena/create_single.sql | 112 -- benchmark/athena/queries.sql | 43 - benchmark/athena/results/partitioned.json | 58 -- benchmark/athena/results/single.json | 58 -- benchmark/athena/run.sh | 9 - benchmark/aurora-mysql/README.md | 67 -- benchmark/aurora-mysql/create.sql | 109 -- benchmark/aurora-mysql/queries.sql | 43 - benchmark/aurora-mysql/results/16acu.json | 58 -- benchmark/aurora-mysql/run.sh | 9 - benchmark/aurora-postgresql/README.md | 59 -- benchmark/aurora-postgresql/create.sql | 109 -- benchmark/aurora-postgresql/queries.sql | 43 - .../aurora-postgresql/results/16acu.json | 58 -- benchmark/aurora-postgresql/run.sh | 10 - benchmark/bigquery/.gitignore | 1 - benchmark/bigquery/README.md | 38 - benchmark/bigquery/create.sql | 108 -- benchmark/bigquery/queries.sql | 43 - benchmark/bigquery/run.sh | 10 - benchmark/brytlytdb/README.md | 1 - benchmark/citus/benchmark.sh | 27 - benchmark/citus/create.sql | 110 -- benchmark/citus/queries.sql | 43 - benchmark/citus/results/c6a.4xlarge.json | 58 -- benchmark/citus/run.sh | 13 - benchmark/clickhouse-local/benchmark.sh | 14 - benchmark/clickhouse-local/create.sql | 109 -- benchmark/clickhouse-local/queries.sql | 43 - .../results/c6a.4xlarge.partitioned.json | 58 -- .../results/c6a.4xlarge.single.json | 58 -- benchmark/clickhouse-local/run.sh | 20 - benchmark/clickhouse/benchmark.sh | 22 - benchmark/clickhouse/create.sql | 110 -- benchmark/clickhouse/queries.sql | 43 - benchmark/clickhouse/results/c6a.4xlarge.json | 58 -- benchmark/clickhouse/results/c6a.metal.json | 58 -- benchmark/clickhouse/run.sh | 20 - benchmark/cratedb/benchmark.sh | 39 - benchmark/cratedb/create.sql | 109 -- benchmark/cratedb/queries.sql | 43 - benchmark/cratedb/results/c6a.4xlarge.json | 58 -- benchmark/cratedb/run.sh | 13 - benchmark/databend/README.md | 7 - benchmark/databend/benchmark.sh | 105 -- benchmark/databend/create.sql | 109 -- benchmark/databend/queries.sql | 43 - benchmark/databend/results/c6a.4xlarge.json | 58 -- benchmark/databend/run.sh | 20 - benchmark/druid/benchmark.sh | 43 - benchmark/druid/check.json | 1 - benchmark/druid/ingest.json | 573 ----------- benchmark/druid/queries.sql | 43 - benchmark/druid/results/c6a.4xlarge.json | 58 -- benchmark/druid/run.sh | 23 - benchmark/duckdb/README.md | 2 - benchmark/duckdb/benchmark.sh | 24 - benchmark/duckdb/create.sql | 109 -- benchmark/duckdb/load.py | 18 - benchmark/duckdb/queries.sql | 43 - benchmark/duckdb/query.py | 20 - benchmark/duckdb/results/c6a.4xlarge.json | 58 -- benchmark/duckdb/run.sh | 10 - benchmark/elasticsearch/README.md | 1 - benchmark/elasticsearch/benchmark.sh | 30 - benchmark/exasol/README.md | 3 - benchmark/generate-results.sh | 23 - benchmark/greenplum/benchmark.sh | 73 -- benchmark/greenplum/create.sql | 116 --- benchmark/greenplum/log.txt | 215 ---- benchmark/greenplum/queries.sql | 43 - benchmark/greenplum/results/c6a.4xlarge.json | 58 -- benchmark/greenplum/run.sh | 15 - benchmark/hardware.sh | 248 ++++- benchmark/hardware/benchmark-chyt.sh | 22 - benchmark/hardware/benchmark-new.sh | 29 - benchmark/hardware/benchmark-yql.sh | 19 - benchmark/hardware/benchmark_cloud.sh | 43 - benchmark/hardware/hardware.sh | 247 ----- benchmark/hardware/queries.sql | 43 - benchmark/heavyai/benchmark.sh | 50 - benchmark/heavyai/create.sql | 108 -- benchmark/heavyai/queries.sql | 43 - benchmark/heavyai/results/c6a.4xlarge.json | 58 -- benchmark/heavyai/run.sh | 13 - benchmark/index.html | 956 ------------------ benchmark/infobright/benchmark.sh | 38 - benchmark/infobright/create.sql | 108 -- benchmark/infobright/queries.sql | 43 - benchmark/infobright/results/c6a.4xlarge.json | 58 -- benchmark/infobright/run.sh | 12 - benchmark/locustdb/README.md | 24 - benchmark/locustdb/benchmark.sh | 46 - benchmark/mariadb-columnstore/README.md | 9 - benchmark/mariadb-columnstore/benchmark.sh | 36 - benchmark/mariadb-columnstore/create.sql | 108 -- benchmark/mariadb-columnstore/queries.sql | 43 - .../results/c6a.4xlarge.json | 58 -- benchmark/mariadb-columnstore/run.sh | 12 - benchmark/mariadb/benchmark.sh | 29 - benchmark/mariadb/create.sql | 109 -- benchmark/mariadb/queries.sql | 43 - .../mariadb/results/c6a.4xlarge.json.shame | 58 -- benchmark/mariadb/run.sh | 12 - benchmark/monetdb/benchmark.sh | 40 - benchmark/monetdb/create.sql | 108 -- benchmark/monetdb/queries.sql | 43 - benchmark/monetdb/query.expect | 10 - benchmark/monetdb/results/c6a.4xlarge.json | 58 -- benchmark/monetdb/run.sh | 12 - benchmark/mysql-myisam/benchmark.sh | 29 - benchmark/mysql-myisam/create.sql | 110 -- benchmark/mysql-myisam/queries.sql | 43 - .../mysql-myisam/results/c6a.4xlarge.json | 58 -- benchmark/mysql-myisam/run.sh | 12 - benchmark/mysql/benchmark.sh | 29 - benchmark/mysql/create.sql | 110 -- benchmark/mysql/queries.sql | 43 - benchmark/mysql/results/c6a.4xlarge.json | 58 -- benchmark/mysql/run.sh | 12 - benchmark/pinot/benchmark.sh | 44 - benchmark/pinot/local.yaml | 38 - benchmark/pinot/offline_table.json | 17 - benchmark/pinot/queries.sql | 43 - benchmark/pinot/results/c6a.4xlarge.json | 58 -- benchmark/pinot/run.sh | 15 - benchmark/pinot/schema.json | 437 -------- benchmark/pinot/splitted.yaml | 37 - benchmark/postgresql/benchmark.sh | 23 - benchmark/postgresql/create.sql | 109 -- benchmark/postgresql/queries.sql | 43 - benchmark/postgresql/results/c6a.4xlarge.json | 58 -- benchmark/postgresql/run.sh | 13 - benchmark/questdb/benchmark.sh | 29 - benchmark/questdb/create.sql | 108 -- benchmark/questdb/queries.sql | 43 - benchmark/questdb/results/c6a.4xlarge.json | 58 -- benchmark/questdb/run.sh | 19 - benchmark/redshift-serverless/README.md | 55 - benchmark/redshift-serverless/create.sql | 109 -- benchmark/redshift-serverless/queries.sql | 43 - .../results/serverless.json | 58 -- benchmark/redshift-serverless/run.sh | 10 - benchmark/redshift/README.md | 56 - benchmark/redshift/create.sql | 109 -- benchmark/redshift/queries.sql | 43 - benchmark/redshift/results/4x.ra3.xplus.json | 58 -- benchmark/redshift/run.sh | 10 - benchmark/singlestore/benchmark.sh | 44 - benchmark/singlestore/create.sql | 109 -- benchmark/singlestore/queries.sql | 43 - .../singlestore/results/c6a.4xlarge.json | 58 -- benchmark/singlestore/run.sh | 12 - benchmark/snowflake/NOTES.md | 67 -- benchmark/snowflake/README.md | 56 - benchmark/snowflake/create.sql | 109 -- benchmark/snowflake/queries.sql | 43 - benchmark/snowflake/results/2xl.json | 58 -- benchmark/snowflake/results/3xl.json | 58 -- benchmark/snowflake/results/4xl.json | 58 -- benchmark/snowflake/results/l.json | 58 -- benchmark/snowflake/results/m.json | 58 -- benchmark/snowflake/results/s.json | 58 -- benchmark/snowflake/results/xl.json | 58 -- benchmark/snowflake/results/xs.json | 58 -- benchmark/snowflake/run.sh | 3 - benchmark/sqlite/benchmark.sh | 20 - benchmark/sqlite/create.sql | 109 -- benchmark/sqlite/queries.sql | 43 - benchmark/sqlite/results/c6a.4xlarge.json | 58 -- benchmark/sqlite/run.sh | 13 - benchmark/starrocks/README.md | 3 - benchmark/starrocks/benchmark.sh | 3 - benchmark/timescaledb-compressed/benchmark.sh | 48 - benchmark/timescaledb-compressed/create.sql | 108 -- benchmark/timescaledb-compressed/queries.sql | 43 - .../results/c6a.4xlarge.json | 58 -- benchmark/timescaledb-compressed/run.sh | 13 - benchmark/timescaledb/benchmark.sh | 41 - benchmark/timescaledb/create.sql | 108 -- benchmark/timescaledb/queries.sql | 43 - .../timescaledb/results/c6a.4xlarge.json | 58 -- benchmark/timescaledb/run.sh | 13 - benchmark/trino/README.md | 1 - benchmark/trino/benchmark.sh | 10 - benchmark/trino/create_partitioned.sql | 112 -- benchmark/trino/create_single.sql | 107 -- benchmark/trino/queries.sql | 43 - benchmark/vertica/.gitignore | 1 - benchmark/vertica/README.md | 5 - benchmark/vertica/benchmark.sh | 26 - benchmark/vertica/create.sql | 109 -- benchmark/vertica/queries.sql | 43 - benchmark/vertica/run.sh | 13 - 199 files changed, 248 insertions(+), 12277 deletions(-) delete mode 100644 benchmark/.gitignore delete mode 100644 benchmark/LICENSE delete mode 100644 benchmark/athena/README.md delete mode 100644 benchmark/athena/create_partitioned.sql delete mode 100644 benchmark/athena/create_single.sql delete mode 100644 benchmark/athena/queries.sql delete mode 100644 benchmark/athena/results/partitioned.json delete mode 100644 benchmark/athena/results/single.json delete mode 100755 benchmark/athena/run.sh delete mode 100644 benchmark/aurora-mysql/README.md delete mode 100644 benchmark/aurora-mysql/create.sql delete mode 100644 benchmark/aurora-mysql/queries.sql delete mode 100644 benchmark/aurora-mysql/results/16acu.json delete mode 100755 benchmark/aurora-mysql/run.sh delete mode 100644 benchmark/aurora-postgresql/README.md delete mode 100644 benchmark/aurora-postgresql/create.sql delete mode 100644 benchmark/aurora-postgresql/queries.sql delete mode 100644 benchmark/aurora-postgresql/results/16acu.json delete mode 100755 benchmark/aurora-postgresql/run.sh delete mode 100644 benchmark/bigquery/.gitignore delete mode 100644 benchmark/bigquery/README.md delete mode 100644 benchmark/bigquery/create.sql delete mode 100644 benchmark/bigquery/queries.sql delete mode 100755 benchmark/bigquery/run.sh delete mode 100644 benchmark/brytlytdb/README.md delete mode 100755 benchmark/citus/benchmark.sh delete mode 100644 benchmark/citus/create.sql delete mode 100644 benchmark/citus/queries.sql delete mode 100644 benchmark/citus/results/c6a.4xlarge.json delete mode 100755 benchmark/citus/run.sh delete mode 100755 benchmark/clickhouse-local/benchmark.sh delete mode 100644 benchmark/clickhouse-local/create.sql delete mode 100644 benchmark/clickhouse-local/queries.sql delete mode 100644 benchmark/clickhouse-local/results/c6a.4xlarge.partitioned.json delete mode 100644 benchmark/clickhouse-local/results/c6a.4xlarge.single.json delete mode 100755 benchmark/clickhouse-local/run.sh delete mode 100755 benchmark/clickhouse/benchmark.sh delete mode 100644 benchmark/clickhouse/create.sql delete mode 100644 benchmark/clickhouse/queries.sql delete mode 100644 benchmark/clickhouse/results/c6a.4xlarge.json delete mode 100644 benchmark/clickhouse/results/c6a.metal.json delete mode 100755 benchmark/clickhouse/run.sh delete mode 100755 benchmark/cratedb/benchmark.sh delete mode 100644 benchmark/cratedb/create.sql delete mode 100644 benchmark/cratedb/queries.sql delete mode 100644 benchmark/cratedb/results/c6a.4xlarge.json delete mode 100755 benchmark/cratedb/run.sh delete mode 100644 benchmark/databend/README.md delete mode 100755 benchmark/databend/benchmark.sh delete mode 100644 benchmark/databend/create.sql delete mode 100644 benchmark/databend/queries.sql delete mode 100644 benchmark/databend/results/c6a.4xlarge.json delete mode 100755 benchmark/databend/run.sh delete mode 100755 benchmark/druid/benchmark.sh delete mode 100644 benchmark/druid/check.json delete mode 100644 benchmark/druid/ingest.json delete mode 100644 benchmark/druid/queries.sql delete mode 100644 benchmark/druid/results/c6a.4xlarge.json delete mode 100755 benchmark/druid/run.sh delete mode 100644 benchmark/duckdb/README.md delete mode 100755 benchmark/duckdb/benchmark.sh delete mode 100644 benchmark/duckdb/create.sql delete mode 100755 benchmark/duckdb/load.py delete mode 100644 benchmark/duckdb/queries.sql delete mode 100755 benchmark/duckdb/query.py delete mode 100644 benchmark/duckdb/results/c6a.4xlarge.json delete mode 100755 benchmark/duckdb/run.sh delete mode 100644 benchmark/elasticsearch/README.md delete mode 100755 benchmark/elasticsearch/benchmark.sh delete mode 100644 benchmark/exasol/README.md delete mode 100755 benchmark/generate-results.sh delete mode 100755 benchmark/greenplum/benchmark.sh delete mode 100644 benchmark/greenplum/create.sql delete mode 100644 benchmark/greenplum/log.txt delete mode 100644 benchmark/greenplum/queries.sql delete mode 100644 benchmark/greenplum/results/c6a.4xlarge.json delete mode 100755 benchmark/greenplum/run.sh mode change 120000 => 100755 benchmark/hardware.sh delete mode 100755 benchmark/hardware/benchmark-chyt.sh delete mode 100755 benchmark/hardware/benchmark-new.sh delete mode 100755 benchmark/hardware/benchmark-yql.sh delete mode 100755 benchmark/hardware/benchmark_cloud.sh delete mode 100755 benchmark/hardware/hardware.sh delete mode 100644 benchmark/hardware/queries.sql delete mode 100755 benchmark/heavyai/benchmark.sh delete mode 100644 benchmark/heavyai/create.sql delete mode 100644 benchmark/heavyai/queries.sql delete mode 100644 benchmark/heavyai/results/c6a.4xlarge.json delete mode 100755 benchmark/heavyai/run.sh delete mode 100644 benchmark/index.html delete mode 100755 benchmark/infobright/benchmark.sh delete mode 100644 benchmark/infobright/create.sql delete mode 100644 benchmark/infobright/queries.sql delete mode 100644 benchmark/infobright/results/c6a.4xlarge.json delete mode 100755 benchmark/infobright/run.sh delete mode 100644 benchmark/locustdb/README.md delete mode 100755 benchmark/locustdb/benchmark.sh delete mode 100644 benchmark/mariadb-columnstore/README.md delete mode 100755 benchmark/mariadb-columnstore/benchmark.sh delete mode 100644 benchmark/mariadb-columnstore/create.sql delete mode 100644 benchmark/mariadb-columnstore/queries.sql delete mode 100644 benchmark/mariadb-columnstore/results/c6a.4xlarge.json delete mode 100755 benchmark/mariadb-columnstore/run.sh delete mode 100755 benchmark/mariadb/benchmark.sh delete mode 100644 benchmark/mariadb/create.sql delete mode 100644 benchmark/mariadb/queries.sql delete mode 100644 benchmark/mariadb/results/c6a.4xlarge.json.shame delete mode 100755 benchmark/mariadb/run.sh delete mode 100755 benchmark/monetdb/benchmark.sh delete mode 100644 benchmark/monetdb/create.sql delete mode 100644 benchmark/monetdb/queries.sql delete mode 100755 benchmark/monetdb/query.expect delete mode 100644 benchmark/monetdb/results/c6a.4xlarge.json delete mode 100755 benchmark/monetdb/run.sh delete mode 100755 benchmark/mysql-myisam/benchmark.sh delete mode 100644 benchmark/mysql-myisam/create.sql delete mode 100644 benchmark/mysql-myisam/queries.sql delete mode 100644 benchmark/mysql-myisam/results/c6a.4xlarge.json delete mode 100755 benchmark/mysql-myisam/run.sh delete mode 100755 benchmark/mysql/benchmark.sh delete mode 100644 benchmark/mysql/create.sql delete mode 100644 benchmark/mysql/queries.sql delete mode 100644 benchmark/mysql/results/c6a.4xlarge.json delete mode 100755 benchmark/mysql/run.sh delete mode 100755 benchmark/pinot/benchmark.sh delete mode 100644 benchmark/pinot/local.yaml delete mode 100644 benchmark/pinot/offline_table.json delete mode 100644 benchmark/pinot/queries.sql delete mode 100644 benchmark/pinot/results/c6a.4xlarge.json delete mode 100755 benchmark/pinot/run.sh delete mode 100644 benchmark/pinot/schema.json delete mode 100644 benchmark/pinot/splitted.yaml delete mode 100755 benchmark/postgresql/benchmark.sh delete mode 100644 benchmark/postgresql/create.sql delete mode 100644 benchmark/postgresql/queries.sql delete mode 100644 benchmark/postgresql/results/c6a.4xlarge.json delete mode 100755 benchmark/postgresql/run.sh delete mode 100755 benchmark/questdb/benchmark.sh delete mode 100644 benchmark/questdb/create.sql delete mode 100644 benchmark/questdb/queries.sql delete mode 100644 benchmark/questdb/results/c6a.4xlarge.json delete mode 100755 benchmark/questdb/run.sh delete mode 100644 benchmark/redshift-serverless/README.md delete mode 100644 benchmark/redshift-serverless/create.sql delete mode 100644 benchmark/redshift-serverless/queries.sql delete mode 100644 benchmark/redshift-serverless/results/serverless.json delete mode 100755 benchmark/redshift-serverless/run.sh delete mode 100644 benchmark/redshift/README.md delete mode 100644 benchmark/redshift/create.sql delete mode 100644 benchmark/redshift/queries.sql delete mode 100644 benchmark/redshift/results/4x.ra3.xplus.json delete mode 100755 benchmark/redshift/run.sh delete mode 100755 benchmark/singlestore/benchmark.sh delete mode 100644 benchmark/singlestore/create.sql delete mode 100644 benchmark/singlestore/queries.sql delete mode 100644 benchmark/singlestore/results/c6a.4xlarge.json delete mode 100755 benchmark/singlestore/run.sh delete mode 100644 benchmark/snowflake/NOTES.md delete mode 100644 benchmark/snowflake/README.md delete mode 100644 benchmark/snowflake/create.sql delete mode 100644 benchmark/snowflake/queries.sql delete mode 100644 benchmark/snowflake/results/2xl.json delete mode 100644 benchmark/snowflake/results/3xl.json delete mode 100644 benchmark/snowflake/results/4xl.json delete mode 100644 benchmark/snowflake/results/l.json delete mode 100644 benchmark/snowflake/results/m.json delete mode 100644 benchmark/snowflake/results/s.json delete mode 100644 benchmark/snowflake/results/xl.json delete mode 100644 benchmark/snowflake/results/xs.json delete mode 100755 benchmark/snowflake/run.sh delete mode 100755 benchmark/sqlite/benchmark.sh delete mode 100644 benchmark/sqlite/create.sql delete mode 100644 benchmark/sqlite/queries.sql delete mode 100644 benchmark/sqlite/results/c6a.4xlarge.json delete mode 100755 benchmark/sqlite/run.sh delete mode 100644 benchmark/starrocks/README.md delete mode 100755 benchmark/starrocks/benchmark.sh delete mode 100755 benchmark/timescaledb-compressed/benchmark.sh delete mode 100644 benchmark/timescaledb-compressed/create.sql delete mode 100644 benchmark/timescaledb-compressed/queries.sql delete mode 100644 benchmark/timescaledb-compressed/results/c6a.4xlarge.json delete mode 100755 benchmark/timescaledb-compressed/run.sh delete mode 100755 benchmark/timescaledb/benchmark.sh delete mode 100644 benchmark/timescaledb/create.sql delete mode 100644 benchmark/timescaledb/queries.sql delete mode 100644 benchmark/timescaledb/results/c6a.4xlarge.json delete mode 100755 benchmark/timescaledb/run.sh delete mode 100644 benchmark/trino/README.md delete mode 100755 benchmark/trino/benchmark.sh delete mode 100644 benchmark/trino/create_partitioned.sql delete mode 100644 benchmark/trino/create_single.sql delete mode 100644 benchmark/trino/queries.sql delete mode 100644 benchmark/vertica/.gitignore delete mode 100644 benchmark/vertica/README.md delete mode 100755 benchmark/vertica/benchmark.sh delete mode 100644 benchmark/vertica/create.sql delete mode 100644 benchmark/vertica/queries.sql delete mode 100755 benchmark/vertica/run.sh diff --git a/benchmark/.gitignore b/benchmark/.gitignore deleted file mode 100644 index 751553b3acb..00000000000 --- a/benchmark/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.bak diff --git a/benchmark/LICENSE b/benchmark/LICENSE deleted file mode 100644 index 0f55d62d28d..00000000000 --- a/benchmark/LICENSE +++ /dev/null @@ -1,360 +0,0 @@ -Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International -Public License - -By exercising the Licensed Rights (defined below), You accept and agree -to be bound by the terms and conditions of this Creative Commons -Attribution-NonCommercial-ShareAlike 4.0 International Public License -("Public License"). To the extent this Public License may be -interpreted as a contract, You are granted the Licensed Rights in -consideration of Your acceptance of these terms and conditions, and the -Licensor grants You such rights in consideration of benefits the -Licensor receives from making the Licensed Material available under -these terms and conditions. - - -Section 1 -- Definitions. - - a. Adapted Material means material subject to Copyright and Similar - Rights that is derived from or based upon the Licensed Material - and in which the Licensed Material is translated, altered, - arranged, transformed, or otherwise modified in a manner requiring - permission under the Copyright and Similar Rights held by the - Licensor. For purposes of this Public License, where the Licensed - Material is a musical work, performance, or sound recording, - Adapted Material is always produced where the Licensed Material is - synched in timed relation with a moving image. - - b. Adapter's License means the license You apply to Your Copyright - and Similar Rights in Your contributions to Adapted Material in - accordance with the terms and conditions of this Public License. - - c. BY-NC-SA Compatible License means a license listed at - creativecommons.org/compatiblelicenses, approved by Creative - Commons as essentially the equivalent of this Public License. - - d. Copyright and Similar Rights means copyright and/or similar rights - closely related to copyright including, without limitation, - performance, broadcast, sound recording, and Sui Generis Database - Rights, without regard to how the rights are labeled or - categorized. For purposes of this Public License, the rights - specified in Section 2(b)(1)-(2) are not Copyright and Similar - Rights. - - e. Effective Technological Measures means those measures that, in the - absence of proper authority, may not be circumvented under laws - fulfilling obligations under Article 11 of the WIPO Copyright - Treaty adopted on December 20, 1996, and/or similar international - agreements. - - f. Exceptions and Limitations means fair use, fair dealing, and/or - any other exception or limitation to Copyright and Similar Rights - that applies to Your use of the Licensed Material. - - g. License Elements means the license attributes listed in the name - of a Creative Commons Public License. The License Elements of this - Public License are Attribution, NonCommercial, and ShareAlike. - - h. Licensed Material means the artistic or literary work, database, - or other material to which the Licensor applied this Public - License. - - i. Licensed Rights means the rights granted to You subject to the - terms and conditions of this Public License, which are limited to - all Copyright and Similar Rights that apply to Your use of the - Licensed Material and that the Licensor has authority to license. - - j. Licensor means the individual(s) or entity(ies) granting rights - under this Public License. - - k. NonCommercial means not primarily intended for or directed towards - commercial advantage or monetary compensation. For purposes of - this Public License, the exchange of the Licensed Material for - other material subject to Copyright and Similar Rights by digital - file-sharing or similar means is NonCommercial provided there is - no payment of monetary compensation in connection with the - exchange. - - l. Share means to provide material to the public by any means or - process that requires permission under the Licensed Rights, such - as reproduction, public display, public performance, distribution, - dissemination, communication, or importation, and to make material - available to the public including in ways that members of the - public may access the material from a place and at a time - individually chosen by them. - - m. Sui Generis Database Rights means rights other than copyright - resulting from Directive 96/9/EC of the European Parliament and of - the Council of 11 March 1996 on the legal protection of databases, - as amended and/or succeeded, as well as other essentially - equivalent rights anywhere in the world. - - n. You means the individual or entity exercising the Licensed Rights - under this Public License. Your has a corresponding meaning. - - -Section 2 -- Scope. - - a. License grant. - - 1. Subject to the terms and conditions of this Public License, - the Licensor hereby grants You a worldwide, royalty-free, - non-sublicensable, non-exclusive, irrevocable license to - exercise the Licensed Rights in the Licensed Material to: - - a. reproduce and Share the Licensed Material, in whole or - in part, for NonCommercial purposes only; and - - b. produce, reproduce, and Share Adapted Material for - NonCommercial purposes only. - - 2. Exceptions and Limitations. For the avoidance of doubt, where - Exceptions and Limitations apply to Your use, this Public - License does not apply, and You do not need to comply with - its terms and conditions. - - 3. Term. The term of this Public License is specified in Section - 6(a). - - 4. Media and formats; technical modifications allowed. The - Licensor authorizes You to exercise the Licensed Rights in - all media and formats whether now known or hereafter created, - and to make technical modifications necessary to do so. The - Licensor waives and/or agrees not to assert any right or - authority to forbid You from making technical modifications - necessary to exercise the Licensed Rights, including - technical modifications necessary to circumvent Effective - Technological Measures. For purposes of this Public License, - simply making modifications authorized by this Section 2(a) - (4) never produces Adapted Material. - - 5. Downstream recipients. - - a. Offer from the Licensor -- Licensed Material. Every - recipient of the Licensed Material automatically - receives an offer from the Licensor to exercise the - Licensed Rights under the terms and conditions of this - Public License. - - b. Additional offer from the Licensor -- Adapted Material. - Every recipient of Adapted Material from You - automatically receives an offer from the Licensor to - exercise the Licensed Rights in the Adapted Material - under the conditions of the Adapter's License You apply. - - c. No downstream restrictions. You may not offer or impose - any additional or different terms or conditions on, or - apply any Effective Technological Measures to, the - Licensed Material if doing so restricts exercise of the - Licensed Rights by any recipient of the Licensed - Material. - - 6. No endorsement. Nothing in this Public License constitutes or - may be construed as permission to assert or imply that You - are, or that Your use of the Licensed Material is, connected - with, or sponsored, endorsed, or granted official status by, - the Licensor or others designated to receive attribution as - provided in Section 3(a)(1)(A)(i). - - b. Other rights. - - 1. Moral rights, such as the right of integrity, are not - licensed under this Public License, nor are publicity, - privacy, and/or other similar personality rights; however, to - the extent possible, the Licensor waives and/or agrees not to - assert any such rights held by the Licensor to the limited - extent necessary to allow You to exercise the Licensed - Rights, but not otherwise. - - 2. Patent and trademark rights are not licensed under this - Public License. - - 3. To the extent possible, the Licensor waives any right to - collect royalties from You for the exercise of the Licensed - Rights, whether directly or through a collecting society - under any voluntary or waivable statutory or compulsory - licensing scheme. In all other cases the Licensor expressly - reserves any right to collect such royalties, including when - the Licensed Material is used other than for NonCommercial - purposes. - - -Section 3 -- License Conditions. - -Your exercise of the Licensed Rights is expressly made subject to the -following conditions. - - a. Attribution. - - 1. If You Share the Licensed Material (including in modified - form), You must: - - a. retain the following if it is supplied by the Licensor - with the Licensed Material: - - i. identification of the creator(s) of the Licensed - Material and any others designated to receive - attribution, in any reasonable manner requested by - the Licensor (including by pseudonym if - designated); - - ii. a copyright notice; - - iii. a notice that refers to this Public License; - - iv. a notice that refers to the disclaimer of - warranties; - - v. a URI or hyperlink to the Licensed Material to the - extent reasonably practicable; - - b. indicate if You modified the Licensed Material and - retain an indication of any previous modifications; and - - c. indicate the Licensed Material is licensed under this - Public License, and include the text of, or the URI or - hyperlink to, this Public License. - - 2. You may satisfy the conditions in Section 3(a)(1) in any - reasonable manner based on the medium, means, and context in - which You Share the Licensed Material. For example, it may be - reasonable to satisfy the conditions by providing a URI or - hyperlink to a resource that includes the required - information. - 3. If requested by the Licensor, You must remove any of the - information required by Section 3(a)(1)(A) to the extent - reasonably practicable. - - b. ShareAlike. - - In addition to the conditions in Section 3(a), if You Share - Adapted Material You produce, the following conditions also apply. - - 1. The Adapter's License You apply must be a Creative Commons - license with the same License Elements, this version or - later, or a BY-NC-SA Compatible License. - - 2. You must include the text of, or the URI or hyperlink to, the - Adapter's License You apply. You may satisfy this condition - in any reasonable manner based on the medium, means, and - context in which You Share Adapted Material. - - 3. You may not offer or impose any additional or different terms - or conditions on, or apply any Effective Technological - Measures to, Adapted Material that restrict exercise of the - rights granted under the Adapter's License You apply. - - -Section 4 -- Sui Generis Database Rights. - -Where the Licensed Rights include Sui Generis Database Rights that -apply to Your use of the Licensed Material: - - a. for the avoidance of doubt, Section 2(a)(1) grants You the right - to extract, reuse, reproduce, and Share all or a substantial - portion of the contents of the database for NonCommercial purposes - only; - - b. if You include all or a substantial portion of the database - contents in a database in which You have Sui Generis Database - Rights, then the database in which You have Sui Generis Database - Rights (but not its individual contents) is Adapted Material, - including for purposes of Section 3(b); and - - c. You must comply with the conditions in Section 3(a) if You Share - all or a substantial portion of the contents of the database. - -For the avoidance of doubt, this Section 4 supplements and does not -replace Your obligations under this Public License where the Licensed -Rights include other Copyright and Similar Rights. - - -Section 5 -- Disclaimer of Warranties and Limitation of Liability. - - a. UNLESS OTHERWISE SEPARATELY UNDERTAKEN BY THE LICENSOR, TO THE - EXTENT POSSIBLE, THE LICENSOR OFFERS THE LICENSED MATERIAL AS-IS - AND AS-AVAILABLE, AND MAKES NO REPRESENTATIONS OR WARRANTIES OF - ANY KIND CONCERNING THE LICENSED MATERIAL, WHETHER EXPRESS, - IMPLIED, STATUTORY, OR OTHER. THIS INCLUDES, WITHOUT LIMITATION, - WARRANTIES OF TITLE, MERCHANTABILITY, FITNESS FOR A PARTICULAR - PURPOSE, NON-INFRINGEMENT, ABSENCE OF LATENT OR OTHER DEFECTS, - ACCURACY, OR THE PRESENCE OR ABSENCE OF ERRORS, WHETHER OR NOT - KNOWN OR DISCOVERABLE. WHERE DISCLAIMERS OF WARRANTIES ARE NOT - ALLOWED IN FULL OR IN PART, THIS DISCLAIMER MAY NOT APPLY TO YOU. - - b. TO THE EXTENT POSSIBLE, IN NO EVENT WILL THE LICENSOR BE LIABLE - TO YOU ON ANY LEGAL THEORY (INCLUDING, WITHOUT LIMITATION, - NEGLIGENCE) OR OTHERWISE FOR ANY DIRECT, SPECIAL, INDIRECT, - INCIDENTAL, CONSEQUENTIAL, PUNITIVE, EXEMPLARY, OR OTHER LOSSES, - COSTS, EXPENSES, OR DAMAGES ARISING OUT OF THIS PUBLIC LICENSE OR - USE OF THE LICENSED MATERIAL, EVEN IF THE LICENSOR HAS BEEN - ADVISED OF THE POSSIBILITY OF SUCH LOSSES, COSTS, EXPENSES, OR - DAMAGES. WHERE A LIMITATION OF LIABILITY IS NOT ALLOWED IN FULL OR - IN PART, THIS LIMITATION MAY NOT APPLY TO YOU. - - c. The disclaimer of warranties and limitation of liability provided - above shall be interpreted in a manner that, to the extent - possible, most closely approximates an absolute disclaimer and - waiver of all liability. - - -Section 6 -- Term and Termination. - - a. This Public License applies for the term of the Copyright and - Similar Rights licensed here. However, if You fail to comply with - this Public License, then Your rights under this Public License - terminate automatically. - - b. Where Your right to use the Licensed Material has terminated under - Section 6(a), it reinstates: - - 1. automatically as of the date the violation is cured, provided - it is cured within 30 days of Your discovery of the - violation; or - - 2. upon express reinstatement by the Licensor. - - For the avoidance of doubt, this Section 6(b) does not affect any - right the Licensor may have to seek remedies for Your violations - of this Public License. - - c. For the avoidance of doubt, the Licensor may also offer the - Licensed Material under separate terms or conditions or stop - distributing the Licensed Material at any time; however, doing so - will not terminate this Public License. - - d. Sections 1, 5, 6, 7, and 8 survive termination of this Public - License. - - -Section 7 -- Other Terms and Conditions. - - a. The Licensor shall not be bound by any additional or different - terms or conditions communicated by You unless expressly agreed. - - b. Any arrangements, understandings, or agreements regarding the - Licensed Material not stated herein are separate from and - independent of the terms and conditions of this Public License. - - -Section 8 -- Interpretation. - - a. For the avoidance of doubt, this Public License does not, and - shall not be interpreted to, reduce, limit, restrict, or impose - conditions on any use of the Licensed Material that could lawfully - be made without permission under this Public License. - - b. To the extent possible, if any provision of this Public License is - deemed unenforceable, it shall be automatically reformed to the - minimum extent necessary to make it enforceable. If the provision - cannot be reformed, it shall be severed from this Public License - without affecting the enforceability of the remaining terms and - conditions. - - c. No term or condition of this Public License will be waived and no - failure to comply consented to unless expressly agreed to by the - Licensor. - - d. Nothing in this Public License constitutes or may be interpreted - as a limitation upon, or waiver of, any privileges and immunities - that apply to the Licensor or You, including from the legal - processes of any jurisdiction or authority. diff --git a/benchmark/README.md b/benchmark/README.md index fa178537628..ac43d967d5f 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -1,411 +1 @@ -# ClickBench: a Benchmark For Analytical Databases - -## Overview - -This benchmark represents typical workload in the following areas: click stream and traffic analysis, web analytics, machine-generated data, structured logs, and events data. It covers the typical queries in ad-hoc analytics and real-time dashboards. - -The dataset from this benchmark was obtained from the actual traffic recording of one of the world's largest web analytics platforms. It is anonymized while keeping all the essential distributions of the data. The set of queries was improvised to reflect the realistic workloads, while the queries are not directly from production. - -## Goals - -The main goals of this benchmark are: - -### Reproducibility - -You can quickly reproduce every test in as little as 20 minutes (those some systems may take several hours) in a semi-automated way. The test setup is documented and uses inexpensive cloud VMs. The test process is documented in the form of a shell script, covering the installation of every system, loading of the data, running the workload, and collecting the result numbers. The dataset is published and made available for download in multiple formats. - -### Compatibility - -The tables and queries use mostly standard SQL and require minimum or no adaptation for most SQL DBMS. The dataset has been filtered to avoid difficulties with parsing and loading. - -### Diversity - -The benchmark process is easy enough to cover a wide range of systems. It includes: modern and historical self-managed OLAP DBMS; traditional OLTP DBMS are included for comparison baseline; managed database-as-a-service offerings are included, as well as serverless cloud-native databases; some NoSQL, document, and specialized time-series databases are included as well for a reference, even if they should not be comparable on the same workload. - -### Realism - -The dataset is derived from accurate production data. The realistic data distributions allow to correctly account for compression, indices, codecs, custom data structures, etc. which is not possible with most of the random dataset generators. The workload consists of 43 queries and can test the efficiency of full scan and filtered scan, as well as index lookups, and main relational operations. It can test various aspects of hardware as well: some queries require high storage throughput; some queries benefit from a large number of CPU cores and some benefit from single-core speed; some queries benefit from high main memory bandwidth. - -## Limitations - -The limitations of this benchmark allow keeping it easy to reproduce and to include more systems in the comparison. The benchmark represents only a subset of all possible workloads and scenarios. While it aims to be as fair as possible, the focus on a specific subset of workloads may give an advantage to the systems specialized in that workloads. - -The following limitations should be acknowledged: - -1. The dataset is represented by one flat table. This is not representative of classical data warehouses which are using a normalized star or snowflake data model. The systems for classical data warehouses may get an unfair disadvantage on this benchmark. - -2. The table consists of exactly 99 997 497 records. This is rather small by modern standards but allows to perform tests in a reasonable time. - -3. While this benchmark allows testing distributed systems, and it includes multi-node and serverless cloud-native setups, most of the results so far have been obtained on a single node setup. - -4. The benchmark runs queries one after another and does not test a workload with concurrent requests; neither does it test for system capacity. Every query is run only a few times and this allows some variability in the results. - -6. Many setups and systems are different enough to make direct comparison tricky. It is not possible to test the efficiency of storage used for in-memory databases, or the time of data loading for stateless query engines. The goal of the benchmark is to give the numbers for comparison and let you derive the conclusions on your own. - -TLDR: *All Benchmarks Are ~~Bastards~~ Liars*. - -## Rules and Contribution - -### How To Add a New Result - -To introduce a new system, simply copy-paste one of the directories and edit the files accordingly: -- `benchmark.sh`: this is the main script to run the benchmark on a fresh VM; Ubuntu 22.04 or newer should be used by default, or any other system if specified in the comments. The script is not necessarily can be run in a fully automated manner - it is recommended always to copy-paste the commands one by one and observe the results. For managed databases, if the setup requires clicking in the UI, write a `README.md` instead. -- `README.md`: contains comments and observations if needed. For managed databases, it can describe the setup procedure instead of a shell script. -- `create.sql`: a CREATE TABLE statement. If it's a NoSQL system, another file like `wtf.json` can be presented. -- `queries.sql`: contains 43 queries to run; -- `run.sh`: a loop of running the queries; every query is run three times; if it's a database with local on-disk storage, the first query should be run after dropping the page cache; -- `results`: put the .json files with the results for every hardware configuration there. - -To introduce a new result for the existing system on different hardware configurations, add a new file to `results`. - -To introduce a new result for an existing system with a different usage scenario, either copy the whole directory and name it differently (e.g. `timescaledb`, `timescaledb-compression`) or add a new file to the `results` directory. - -### Installation And Fine-Tuning - -The systems can be installed or used in any reasonable way: from a binary distribution, from a Docker container, from the package manager, or compiled - whatever is more natural and simple or gives better results. - -It's better to use the default settings and avoid fine-tuning. Configuration changes can be applied if it is considered strictly necessary and documented. - -Fine-tuning and optimization for the benchmark are not recommended but allowed. In this case, add the results on vanilla configuration and fine-tuned configuration separately. - -### Data Loading - -The dataset is available in `CSV`, `TSV`, `JSONlines` and `Parquet` formats by the following links: - -- https://datasets.clickhouse.com/hits_compatible/hits.csv.gz -- https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz -- https://datasets.clickhouse.com/hits_compatible/hits.json.gz -- https://datasets.clickhouse.com/hits_compatible/hits.parquet - -The format of the source data can be selected up to convenience. - -Additional sources for stateless table engines are provided: -- https://datasets.clickhouse.com/hits_compatible/athena/hits.parquet (the same parquet file in its own subdirectory) -- https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_{0..99}.parquet (100 files) - -To correctly compare the insertion time, the dataset should be downloaded and decompressed before loading (if it's using external compression; the parquet file includes internal compression and can be loaded as is). The dataset should be loaded as a single file in the most straightforward way. Splitting the dataset for parallel loading is not recommended, as it will make comparisons more difficult. Splitting the dataset is possible if the system cannot eat it as a whole due to its limitations. - -You should not wait for cool down after data loading or running OPTIMIZE / VACUUM before the main benchmark queries unless it is strictly required for the system. - -The used storage size can be measured without accounting for temporary data if there is temporary data that will be removed in the background. The built-in introspection capabilities can be used to measure the storage size, or it can be measured by checking the used space in the filesystem. - -### Indexing - -The benchmark table has one index - the primary key. The primary key is not necessary to be unique. The index of the primary key can be made clustered (ordered, partitioned, sharded). - -Manual creation of other indices is not recommended, although if the system creates indexes automatically, it is considered ok. - -### Preaggregation - -The creation of pre-aggregated tables or indices, projections, or materialized views is not recommended for the purpose of this benchmark. Although you can add results on fine-tuned setup for reference, they will be out of competition. - -If a system is of a "multidimensional OLAP" kind, so always or implicitly doing aggregations, it can be added for comparison. - -### Caching - -If the system contains a cache for query results, it should be disabled. - -It is okay if the system performs caching for source data (buffer pools and similar). If the cache or buffer pools can be flushed, it should be flushed before the first run of every query. - -If the system contains a cache for intermediate data, it should be disabled if this cache is located near the end of the query execution pipeline, thus similar to a query result cache. - -### Incomplete Results - -Many systems cannot run the full benchmark suite successfully due to OOMs, crashes, or unsupported queries. The partial results should be included nevertheless. Put `null` for the missing numbers. - -### If The Results Cannot Be Published - -Some vendors don't allow publishing the benchmark results due to the infamous [DeWitt Clause](https://cube.dev/blog/dewitt-clause-or-can-you-benchmark-a-database). Most of them are still allowed to use the system for benchmarks. In this case, please submit the full information about installation and reproduction, but without the `results` directory. A `.gitignore` file can be added to prevent accidental publishing. - -We allow both open-source and proprietary systems in our benchmark, as well as managed services, even if registration, credit card, or salesperson call is required - you still can submit the testing description if you don't violate the TOS. - -Please let us know if some results were published by mistake by opening an issue on GitHub. - -### If a Mistake Or Misrepresentation Is Found - -It is easy to accidentally misrepresent some systems. While acting in a good faith, the authors admit their lack of deep knowledge of most systems. Please send a pull request to correct the mistakes. - -### Results Usage And Scoreboards - -The results can be used for comparison of various systems, but always take them with a grain of salt due to the vast amount of caveats and hidden details. Always reference the original benchmark and this text. - -We allow but do not recommend creating scoreboards from this benchmark or tell that one system is better (faster, cheaper, etc) than another. - -There is a web page to navigate across benchmark results and present a summary report. It allows to filter out some systems, setups, or queries. E.g. if you found some subset of the 43 queries irrelevant, you can simply exclude them from the calculation and share the report without these queries. - -You can select the summary metric from one of the following: "Cold Run", "Hot Run", "Load Time", and "Data Size". If you select the "Load Time" or "Data Size", the entries will be simply ordered from best to worst, and additionally, the ratio to the best non-zero result will be shown (how times one system is worse than the best system in this metric). Load time can be zero for stateless query engines like `clickhouse-local` or `Amazon Athena`. - -If you select "Cold Run" or "Hot Run", the aggregation across the queries is performed in the following way: - -1. The first run for every query is selected for Cold Run. For Hot Run, the minimum from 2nd and 3rd run time is selected, if both runs are successful, or null if some were unsuccessful. - -By default, the "Hot Run" metric is selected, because it's not always possible to obtain a cold runtime for managed services, while for on-premise a quite slow EBS volume is used by default which makes the comparison slightly less interesting. - -2. For every query, find a system that demonstrated the best (fastest) query time and take it as a baseline. - -This gives us a point of comparison. Alternatively, we can take a benchmark entry like "ClickHouse on c6a.metal" as a baseline and divide all queries time by the times from a baseline. But it would be quite arbitrary and asymmetric. Instead, we take the best result for every query separately. - -3. For every query, if the result is present, calculate the ratio to the baseline, but add constant 10ms to the nominator and denominator, so the formula will be: `(10ms + query_time) / (10ms + baseline_query_time)`. This formula gives a value >= 1, which is equal to 1 for the best benchmark entry on this query. - -We are interested in relative query run times, not absolute. The benchmark has a broad set of queries, and there can be queries that typically run in 100ms (e.g. for interactive dashboards) and some queries that typically run in a minute (e.g. complex ad-hoc queries). And we want to treat these queries equally important in the benchmark, that's why we need relative values. - -The constant shift is needed to make the formula well-defined when query time approaches zero. E.g. some systems can get query results in 0 ms using table metadata lookup, and another in 10 ms by range scan. But this should not be treated as the infinite advantage of one system over the other. With the constant shift, we will treat it as only two times an advantage. - -4. For every query, if the result is not present, substitute it with a "penalty" calculated as follows: take the maximum query runtime for this benchmark entry across other queries that have a result, but if it is less than 300 seconds, put it 300 seconds. Then multiply the value by 2. Then calculate the ratio as explained above. - -For example, one system crashed while trying to run a query which can highlight the maturity, or lack of maturity, of a system. Or does not run a query due to limitations. If this system shows run times like 1..1000 sec. on other queries, we will substitute 2000 sec. instead of this missing result. - -5. Take the geometric mean of the ratios across the queries. It will be the summary rating. - -Why geometric mean? The ratios can only be naturally averaged in this way. Imagine there are two queries and two systems. The first system ran the first query in 1s and the second query in 20s. The second system ran the first query in 2s and the second query in 10s. So, the first system is two times faster on the first query and two times slower on the second query and vice-versa. The final score should be identical for these systems. - - -## History and Motivation - -The benchmark has been created in October 2013 for evaluating various DBMS to use for a web analytics system. It has been made by taking a 1/50th of one week of production pageviews (a.k.a. "hits") data and taking the first one billion, one hundred million, and ten million records from it. It has been run on a 3-node cluster of Xeon E2650v2 with 128 GiB RAM, 8x6TB HDD in md-RAID-6, and 10 Gbit network in a private datacenter in Finland. - -The following systems were tested in 2013: ClickHouse, MonetDB, InfiniDB, Infobright, LucidDB, Vertica, Hive and MySQL. To ensure fairness, the benchmark has been conducted by a person without ClickHouse experience. ClickHouse has been selected for production usage by the results of this benchmark. - -The benchmark continued to be occasionally used privately until 2016 when the results has been published with the ClickHouse release in open-source. While the results were made public, the datasets were not, as they contain customer data. - -We needed to publish the dataset to facilitate open-source development and testing, but it was not possible to do it as is. In 2019, the `clickhouse-obfuscator` tool has been introduced to anonymize the data, and the dataset has been published. Read more about the challenge of data obfuscation [here](https://habr.com/en/company/yandex/blog/485096/). - -More systems were included in the benchmark over time: Greenplum, MemSQL (now SingleStore), OmniSci (now HeavyAI), DuckDB, PostgreSQL, and TimescaleDB. - -In [2021](https://clickhouse.com/blog/introducing-click-house-inc/) the original cluster for benchmark stopped being used, and we were unable to add new results without rerunning the old results on different hardware. Rerunning the old results appeared to be difficult: due to the natural churn of the software, the old step-by-step instructions become stale. - -The original benchmark dataset included many details that were natural for ClickHouse and web analytics data but hard for other systems: unsigned integers (not supported by standard SQL), strings with zero bytes, fixed-length string data types, etc. Only ClickHouse is being able to load the dataset as is, while most other databases require non-trivial adjustments to the data and queries. - -The idea of the new benchmark is: -- normalize the dataset to a "common denominator", so it can be loaded to most of the systems without a hassle. -- normalize the queries to use only standard SQL - they will not use any advantages of ClickHouse but will be runnable on every system. -- ideally make it automated. At least make it simple - runnable by a short shell script that can be run by copy-pasting a few commands in the terminal, in the worst case. -- run everything on widely available cloud VMs and allow to record the results from various types of instances. - -The benchmark is created and used by the ClickHouse team. It can be surprising, but we [did not perform](https://clickhouse.com/blog/clickhouse-over-the-years-with-benchmarks/) any specific optimizations in ClickHouse for the queries in the benchmark, which allowed us to keep some reasonable sense of fairness with respect to other systems. - -Now the new benchmark is easy to use and the results for any system can be reproduced in around 20 minutes. - -We also introduced the [Hardware Benchmark](https://clickhouse.com/benchmark/hardware/) for testing servers and VMs. - -## Systems Included - -- [x] ClickHouse -- [ ] ClickHouse operating like "Athena" -- [x] clickhouse-local without data loading -- [x] MySQL InnoDB -- [x] MySQL MyISAM -- [ ] MariaDB -- [x] MariaDB ColumnStore -- [x] MemSQL/SingleStore -- [x] PostgreSQL -- [x] Greenplum -- [x] TimescaleDB -- [x] Citus -- [x] Vertica (without publishing) -- [x] QuestDB -- [x] DuckDB -- [x] MonetDB -- [x] mapD/Omnisci/HeavyAI -- [x] Databend -- [ ] Doris/PALO -- [x] Druid -- [x] Pinot -- [x] CrateDB -- [ ] Spark SQL -- [ ] Starrocks -- [ ] ShitholeDB -- [ ] Hive -- [ ] Impala -- [ ] Hyper -- [x] SQLite -- [x] Redshift -- [x] Redshift Serverless -- [ ] Presto/Trino -- [x] Amazon Athena -- [x] Bigquery (without publishing) -- [x] Snowflake -- [ ] Rockset -- [ ] CockroachDB -- [ ] CockroachDB Serverless -- [ ] Databricks -- [ ] Planetscale (without publishing) -- [ ] TiDB (TiFlash) -- [x] Amazon RDS Aurora for MySQL -- [x] Amazon RDS Aurora for Postgres -- [ ] InfluxDB -- [ ] VictoriaMetrics -- [ ] TDEngine -- [ ] MongoDB -- [ ] Cassandra -- [ ] ScyllaDB -- [ ] Elasticsearch -- [ ] Apache Ignite -- [ ] Infobright -- [ ] Actian Vector -- [ ] Manticore Search -- [x] Vertica (without publishing) -- [ ] Azure Synapse -- [ ] Starburst Galaxy -- [ ] MS SQL Server with Column Store Index (without publishing) -- [ ] Dremio (without publishing) -- [ ] Exasol -- [ ] LocustDB -- [ ] EventQL -- [ ] Apache Drill -- [ ] Apache Kudu -- [ ] Apache Kylin - -By default, all tests are run on c6a.4xlarge VM in AWS with 500 GB gp2. - -Please help us add more systems and run the benchmarks on more types of VMs. - -## Similar Projects - -Many alternative benchmarks are applicable to OLAP DBMS with their own advantages and disadvantages. - -### Brown University Mgbench - -https://github.com/crottyan/mgbench - -A new analytical benchmark for machine-generated log data. By Andrew Crottyan from Brown University. - -Advantages: -- somewhat realistic dataset; -- a diverse set of queries; -- good coverage of systems; -- easy to reproduce; - -Disadvantages: -- very small dataset size; -- favors in-memory databases; -- mostly abandoned. - -### UC Berkeley AMPLab Big Data Benchmark - -https://amplab.cs.berkeley.edu/benchmark/ - -Poor coverage of queries that are too simple. The benchmark is abandoned. - -### Mark Litwinschik's NYC Taxi - -https://tech.marksblogg.com/benchmarks.html - -Advantages: -- real-world dataset; -- good coverage of systems; many unusual entries; -- contains a story for every benchmark entry; - -Disadvantages: -- unreasonably small set of queries: 4 mostly trivial queries don't represent any realistic workload and are subjects for over-optimization; -- compares different systems on different hardware; -- many results are outdated; -- no automated or easy way to reproduce the results; -- while many results are performed independently of corporations or academia, some benchmark entries may have been sponsored; -- the dataset is not readily available for downloads: originally 1.1 billion records are used, while it's more than 4 billion records in 2022. - -### Database-like ops Benchmark from h2o.ai - -https://h2oai.github.io/db-benchmark/ - -A benchmark for data-frame libraries and embedded databases. Good coverage of data-frame libraries and a few full-featured DBMS as well. - -### A benchmark for querying large JSON datasets - -https://colab.research.google.com/github/dcmoura/spyql/blob/master/notebooks/json_benchmark.ipynb - -A good benchmark for command-line tools for processing semistructured data. Can be used to test DBMS as well. - -### Star Schema Benchmark - -Pat O'Neil, Betty O'Neil, Xuedong Chen -https://www.cs.umb.edu/~poneil/StarSchemaB.PDF - -It is a simplified version of TPC-H. - -Advantages: -- well-specified; -- popular in academia; - -Disadvantages: -- represents a classic data warehouse schema; -- database generator produces random distributions that are not realistic and the benchmark does not allow to capture the difference in various optimizations that matter on real-world data; -- many research systems in academia targeting for this benchmark which makes many aspects of it exhausted; - -### TPC-H - -A benchmark suite from Transaction Processing Council - one of the oldest organizations specializing in DBMS benchmarks. - -Advantages: -- well-specified; - -Disadvantages: -- requires official certification; -- represents a classic data warehouse schema; -- database generator produces random distributions that are not realistic and the benchmark does not allow to capture the difference in various optimizations that matter on real-world data; -- many systems are targeting this benchmark which makes many aspects of it exhausted; - -### TPC-DS - -More advanced than TPC-H, focused on complex ad-hoc queries. This also requires official certification. - -Advantages: -- an extensive collection of complex queries. - -Disadvantages: -- requires official certification; -- official results have only sparse coverage of systems; -- biased towards complex queries over many tables. - -### Ontime - -Introduced by Vadim Tkachenko from Percona [in 2009](https://www.percona.com/blog/2009/10/02/analyzing-air-traffic-performance-with-infobright-and-monetdb/). - -Based on the US Bureau of Transportation Statistics open data. - -Advantages: -- real-world dataset; - -Disadvantages: -- not widely used; -- the set of queries is not standardized; -- the table contains too much redundancy; - -### TSBS - -Time Series Benchmark Suite. https://github.com/timescale/tsbs -Originally from InfluxDB, and supported by TimescaleDB. - -Advantages: -- a benchmark for time-series scenarios; - -Disadvantages: -- not applicable for scenarios with data analytics. - -### Fair Database Benchmarks - -https://github.com/db-benchmarks/db-benchmarks - -A benchmark suite inspired by ClickHouse benchmarks. -Used mostly to compare search engines: Elasticsearch and Manticore. - -### STAC - -https://www.stacresearch.com/ - -Disadvantages: -- requires a paid membership. - -### More... - -Please let me know if you know more well-defined, realistic, and reproducible benchmarks for analytical workloads. - -In addition, I collect every benchmark that includes ClickHouse [here](https://github.com/ClickHouse/ClickHouse/issues/22398). - -## Additional Outcomes - -This benchmark can be used to collect the snippets for installation and data loading across a wide variety of DBMS. The usability and quality of the documentation can be compared. It has been used to improve the quality of the participants as demonstrated in [duckdb#3969](https://github.com/duckdb/duckdb/issues/3969), [timescaledb#4473](https://github.com/timescale/timescaledb/issues/4473), [mariadb-corporation#16](https://github.com/mariadb-corporation/mariadb-community-columnstore-docker/issues/16), [MonetDB#7309](https://github.com/duckdb/duckdb/issues/3969), [questdb#2272](https://github.com/questdb/questdb/issues/2272), [crate#12654](https://github.com/crate/crate/issues/12654), [LocustDB#152](https://github.com/cswinter/LocustDB/issues/152), etc; - -### References and Citation - -Alexey Milovidov, 2022. +Benchmark is located in a separate repository: https://github.com/ClickHouse/ClickBench diff --git a/benchmark/athena/README.md b/benchmark/athena/README.md deleted file mode 100644 index ff94029b429..00000000000 --- a/benchmark/athena/README.md +++ /dev/null @@ -1,21 +0,0 @@ -Data Sources -> AWS Data Catalog -> Create Table -> Use S3 bucket data - -Note: Athena does not support files. Only directories. - -Go to query editor and run `create.sql`. - -``` -sudo apt-get install -y jq -export OUTPUT='s3://athena-experiments-milovidov/' - -./run1.sh | tee ids.txt -``` - -Wait a few minutes. Then: - -``` -cat ids.txt | xargs -I{} aws --output json athena get-query-execution --query-execution-id {} | tee log.txt - -cat log.txt | grep -P 'TotalExecutionTimeInMillis|FAILED' | grep -oP '\d+|FAILED' | - awk '{ if ($1 == "ERROR") { skip = 1 } else { if (i % 3 == 0) { printf "[" }; printf skip ? "null" : ($1 / 1000); if (i % 3 != 2) { printf "," } else { print "]," }; ++i; skip = 0; } }' -``` diff --git a/benchmark/athena/create_partitioned.sql b/benchmark/athena/create_partitioned.sql deleted file mode 100644 index b0f9e5d0be5..00000000000 --- a/benchmark/athena/create_partitioned.sql +++ /dev/null @@ -1,112 +0,0 @@ -CREATE EXTERNAL TABLE IF NOT EXISTS `test`.`hits` ( -`watchid` bigint, -`javaenable` smallint, -`title` string, -`goodevent` smallint, -`eventtime` timestamp, -`eventdate` date, -`counterid` int, -`clientip` int, -`regionid` int, -`userid` bigint, -`counterclass` smallint, -`os` smallint, -`useragent` smallint, -`url` string, -`referer` string, -`isrefresh` smallint, -`referercategoryid` smallint, -`refererregionid` int, -`urlcategoryid` smallint, -`urlregionid` int, -`resolutionwidth` smallint, -`resolutionheight` smallint, -`resolutiondepth` smallint, -`flashmajor` smallint, -`flashminor` smallint, -`flashminor2` string, -`netmajor` smallint, -`netminor` smallint, -`useragentmajor` smallint, -`useragentminor` string, -`cookieenable` smallint, -`javascriptenable` smallint, -`ismobile` smallint, -`mobilephone` smallint, -`mobilephonemodel` string, -`params` string, -`ipnetworkid` int, -`traficsourceid` smallint, -`searchengineid` smallint, -`searchphrase` string, -`advengineid` smallint, -`isartifical` smallint, -`windowclientwidth` smallint, -`windowclientheight` smallint, -`clienttimezone` smallint, -`clienteventtime` timestamp, -`silverlightversion1` smallint, -`silverlightversion2` smallint, -`silverlightversion3` int, -`silverlightversion4` smallint, -`pagecharset` string, -`codeversion` int, -`islink` smallint, -`isdownload` smallint, -`isnotbounce` smallint, -`funiqid` bigint, -`originalurl` string, -`hid` int, -`isoldcounter` smallint, -`isevent` smallint, -`isparameter` smallint, -`dontcounthits` smallint, -`withhash` smallint, -`hitcolor` string, -`localeventtime` timestamp, -`age` smallint, -`sex` smallint, -`income` smallint, -`interests` smallint, -`robotness` smallint, -`remoteip` int, -`windowname` int, -`openername` int, -`historylength` smallint, -`browserlanguage` string, -`browsercountry` string, -`socialnetwork` string, -`socialaction` string, -`httperror` smallint, -`sendtiming` int, -`dnstiming` int, -`connecttiming` int, -`responsestarttiming` int, -`responseendtiming` int, -`fetchtiming` int, -`socialsourcenetworkid` smallint, -`socialsourcepage` string, -`paramprice` bigint, -`paramorderid` string, -`paramcurrency` string, -`paramcurrencyid` smallint, -`openstatservicename` string, -`openstatcampaignid` string, -`openstatadid` string, -`openstatsourceid` string, -`utmsource` string, -`utmmedium` string, -`utmcampaign` string, -`utmcontent` string, -`utmterm` string, -`fromtag` string, -`hasgclid` smallint, -`refererhash` bigint, -`urlhash` bigint, -`clid` int -) -ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' -WITH SERDEPROPERTIES ( -'serialization.format' = '1' -) LOCATION 's3://clickhouse-public-datasets/hits_compatible/athena_partitioned' -TBLPROPERTIES ('has_encrypted_data'='false'); diff --git a/benchmark/athena/create_single.sql b/benchmark/athena/create_single.sql deleted file mode 100644 index ef8e12a9915..00000000000 --- a/benchmark/athena/create_single.sql +++ /dev/null @@ -1,112 +0,0 @@ -CREATE EXTERNAL TABLE IF NOT EXISTS `test`.`hits` ( -`watchid` bigint, -`javaenable` smallint, -`title` string, -`goodevent` smallint, -`eventtime` timestamp, -`eventdate` date, -`counterid` int, -`clientip` int, -`regionid` int, -`userid` bigint, -`counterclass` smallint, -`os` smallint, -`useragent` smallint, -`url` string, -`referer` string, -`isrefresh` smallint, -`referercategoryid` smallint, -`refererregionid` int, -`urlcategoryid` smallint, -`urlregionid` int, -`resolutionwidth` smallint, -`resolutionheight` smallint, -`resolutiondepth` smallint, -`flashmajor` smallint, -`flashminor` smallint, -`flashminor2` string, -`netmajor` smallint, -`netminor` smallint, -`useragentmajor` smallint, -`useragentminor` string, -`cookieenable` smallint, -`javascriptenable` smallint, -`ismobile` smallint, -`mobilephone` smallint, -`mobilephonemodel` string, -`params` string, -`ipnetworkid` int, -`traficsourceid` smallint, -`searchengineid` smallint, -`searchphrase` string, -`advengineid` smallint, -`isartifical` smallint, -`windowclientwidth` smallint, -`windowclientheight` smallint, -`clienttimezone` smallint, -`clienteventtime` timestamp, -`silverlightversion1` smallint, -`silverlightversion2` smallint, -`silverlightversion3` int, -`silverlightversion4` smallint, -`pagecharset` string, -`codeversion` int, -`islink` smallint, -`isdownload` smallint, -`isnotbounce` smallint, -`funiqid` bigint, -`originalurl` string, -`hid` int, -`isoldcounter` smallint, -`isevent` smallint, -`isparameter` smallint, -`dontcounthits` smallint, -`withhash` smallint, -`hitcolor` string, -`localeventtime` timestamp, -`age` smallint, -`sex` smallint, -`income` smallint, -`interests` smallint, -`robotness` smallint, -`remoteip` int, -`windowname` int, -`openername` int, -`historylength` smallint, -`browserlanguage` string, -`browsercountry` string, -`socialnetwork` string, -`socialaction` string, -`httperror` smallint, -`sendtiming` int, -`dnstiming` int, -`connecttiming` int, -`responsestarttiming` int, -`responseendtiming` int, -`fetchtiming` int, -`socialsourcenetworkid` smallint, -`socialsourcepage` string, -`paramprice` bigint, -`paramorderid` string, -`paramcurrency` string, -`paramcurrencyid` smallint, -`openstatservicename` string, -`openstatcampaignid` string, -`openstatadid` string, -`openstatsourceid` string, -`utmsource` string, -`utmmedium` string, -`utmcampaign` string, -`utmcontent` string, -`utmterm` string, -`fromtag` string, -`hasgclid` smallint, -`refererhash` bigint, -`urlhash` bigint, -`clid` int -) -ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' -WITH SERDEPROPERTIES ( -'serialization.format' = '1' -) LOCATION 's3://clickhouse-public-datasets/hits_compatible/athena' -TBLPROPERTIES ('has_encrypted_data'='false'); diff --git a/benchmark/athena/queries.sql b/benchmark/athena/queries.sql deleted file mode 100644 index ef8c727ead6..00000000000 --- a/benchmark/athena/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, extract(minute FROM EventTime), SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= DATE '2013-07-01' AND EventDate <= DATE '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= DATE '2013-07-01' AND EventDate <= DATE '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= DATE '2013-07-01' AND EventDate <= DATE '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC OFFSET 1000 LIMIT 10; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= DATE '2013-07-01' AND EventDate <= DATE '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END, URL ORDER BY PageViews DESC OFFSET 1000 LIMIT 10; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= DATE '2013-07-01' AND EventDate <= DATE '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC OFFSET 100 LIMIT 10; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= DATE '2013-07-01' AND EventDate <= DATE '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC OFFSET 10000 LIMIT 10; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= DATE '2013-07-14' AND EventDate <= DATE '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) OFFSET 1000 LIMIT 10; diff --git a/benchmark/athena/results/partitioned.json b/benchmark/athena/results/partitioned.json deleted file mode 100644 index 8a67c09bb47..00000000000 --- a/benchmark/athena/results/partitioned.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "Athena (partitioned)", - "date": "2022-07-01", - "machine": "serverless", - "cluster_size": "serverless", - "comment": "", - - "tags": ["stateless", "managed", "Java", "column-oriented"], - - "load_time": 0, - "data_size": 13800000000, - - "result": [ -[2.777,3.275,2.925], -[1.503,3.136,4.003], -[4.544,3.833,3.64], -[3.9,2.514,3.522], -[3.46,2.186,3.244], -[3.624,2.742,3.185], -[2.21,1.984,3.123], -[3.207,2.403,2.685], -[2.936,2.014,3.869], -[8.333,7.102,4.434], -[7.401,4.697,3.155], -[4.214,3.065,4.748], -[6.207,4.213,2.576], -[3.428,3.085,3.401], -[2.92,3.3,3.278], -[2.205,2.558,2.419], -[4.641,3.888,2.155], -[3.219,2.822,3.292], -[3.23,3.579,4.31], -[2.288,3.543,3.95], -[3.032,2.859,2.807], -[3.926,3.247,2.928], -[4.477,4.048,4.392], -[7.407,6.375,6.123], -[2.611,2.872,2.827], -[2.566,2.567,3.6], -[3.673,3.733,2.925], -[2.426,3.218,2.78], -[5.125,3.778,4.25], -[4.565,4.03,4.066], -[3.628,3.219,2.953], -[6.207,5.973,3.158], -[4.339,5.601,4.234], -[2.618,3.107,3.433], -[4.661,2.79,2.846], -[2.373,1.629,2.734], -[2.721,2.15,1.962], -[3.207,2.154,2.186], -[2.453,2.477,3.217], -[2.691,4.732,3.584], -[2.589,2.613,3.231], -[1.926,3.617,1.82], -[1.506,2.404,2.343] -] -} diff --git a/benchmark/athena/results/single.json b/benchmark/athena/results/single.json deleted file mode 100644 index d2b84187f11..00000000000 --- a/benchmark/athena/results/single.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "Athena (single)", - "date": "2022-07-01", - "machine": "serverless", - "cluster_size": "serverless", - "comment": "", - - "tags": ["stateless", "managed", "Java", "column-oriented"], - - "load_time": 0, - "data_size": 13800000000, - - "result": [ -[2.268,1.327,2.137], -[3.427,2.248,3.605], -[3.254,2.548,2.316], -[3.025,2.314,3.003], -[2.264,2.876,4.213], -[3.044,2.745,2.698], -[2.732,2.199,2.659], -[2.022,3.692,3.072], -[2.746,2.477,2.785], -[3.53,2.782,4.031], -[2.709,2.047,2.853], -[2.318,1.969,3.4], -[2.635,1.935,2.707], -[3.049,3.38,3.071], -[3.661,2.387,2.476], -[2.479,2.591,2.21], -[3.093,3.698,4.351], -[3.479,3.236,2.274], -[4.36,2.97,3.457], -[2.525,2.384,3.328], -[3.34,3.174,3.409], -[3.163,2.971,3.034], -[2.999,3.539,2.906], -[6.454,7.597,7.858], -[2.754,1.951,2.645], -[2.852,3.018,2.718], -[2.513,2.678,2.417], -[3.293,2.521,2.771], -[4.392,3.863,3.981], -[3.658,4.246,4.027], -[3.028,3.87,2.337], -[2.923,3.635,3.591], -[3.142,4.105,3.15], -[3.66,3.187,4.745], -[2.652,2.695,2.742], -[2.262,2.776,1.815], -[1.881,2.212,2.053], -[1.934,2.551,1.524], -[2.069,2.26,1.805], -[2.626,2.902,2.793], -[1.791,2.082,2.481], -[3.757,2.6,1.946], -[2.608,1.994,3.967] -] -} diff --git a/benchmark/athena/run.sh b/benchmark/athena/run.sh deleted file mode 100755 index f1ce446f0a8..00000000000 --- a/benchmark/athena/run.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -TRIES=3 - -cat queries.sql | while read query; do - for i in $(seq 1 $TRIES); do - aws athena --output json start-query-execution --query-execution-context 'Database=test' --result-configuration "OutputLocation=${OUTPUT}" --query-string "${query}" | jq '.QueryExecutionId' - done -done diff --git a/benchmark/aurora-mysql/README.md b/benchmark/aurora-mysql/README.md deleted file mode 100644 index 48faddfa2b3..00000000000 --- a/benchmark/aurora-mysql/README.md +++ /dev/null @@ -1,67 +0,0 @@ -Select Aurora. -Select Aurora for MySQL. -Select the latest version 3.02.0 (compatible with MySQL 8.0.23) -Select Production template. - -Database: database-1 -User name: admin -Master password: vci43A32#1 - -Select serverless. -16 minimum and maximum ACU (32 GB RAM). -Don't create an Aurora replica. -Public access: yes. -Turn off DevOps Guru. - -Creation took around 15 seconds. -But creation of endpoints took longer. - -Find the writer instance endpoint. -Example: database-1.cluster-cnkeohbxcwr1.eu-central-1.rds.amazonaws.com - -``` -sudo apt-get update -sudo apt-get install -y mysql-client -``` - -Find "Security", click on the group in "VPC security groups". -Edit "Inbound rules". Add "Custom TCP", port 3306, from 0.0.0.0/0. - -``` -export HOST="database-1.cluster-cnkeohbxcwr1.eu-central-1.rds.amazonaws.com" -export PASSWORD="..." - -mysql -h "${HOST}" -u admin --password="${PASSWORD}" -e "CREATE DATABASE test" -``` - -Load the data - -``` -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz' -gzip -d hits.tsv.gz - -mysql -h "${HOST}" -u admin --password="${PASSWORD}" test < create.sql - -time mysql --local-infile=1 -h "${HOST}" -u admin --password="${PASSWORD}" test -e "LOAD DATA LOCAL INFILE 'hits.tsv' INTO TABLE hits" -``` - -> 128m7.318s - -Go to "Monitoring", find "[Billed] Volume Bytes Used". - -> 83.46 GiB - -``` -./run.sh 2>&1 | tee log.txt - -cat log.txt | - grep -P 'rows? in set|Empty set|^ERROR' | - sed -r -e 's/^ERROR.*$/null/; s/^.*?\((([0-9.]+) min )?([0-9.]+) sec\).*?$/\2 \3/' | - awk '{ if ($2) { print $1 * 60 + $2 } else { print $1 } }' | - awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' -``` - -You will get -> ERROR 1114 (HY000) at line 1: The table '/rdsdbdata/tmp/#sqlaff_e5_0' is full - -to some queries. diff --git a/benchmark/aurora-mysql/create.sql b/benchmark/aurora-mysql/create.sql deleted file mode 100644 index 1850bffedce..00000000000 --- a/benchmark/aurora-mysql/create.sql +++ /dev/null @@ -1,109 +0,0 @@ -CREATE TABLE hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title TEXT NOT NULL, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL TEXT NOT NULL, - Referer TEXT NOT NULL, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 TEXT NOT NULL, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(255) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel TEXT NOT NULL, - Params TEXT NOT NULL, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase TEXT NOT NULL, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset TEXT NOT NULL, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL TEXT NOT NULL, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor CHAR NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage TEXT NOT NULL, - BrowserCountry TEXT NOT NULL, - SocialNetwork TEXT NOT NULL, - SocialAction TEXT NOT NULL, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage TEXT NOT NULL, - ParamPrice BIGINT NOT NULL, - ParamOrderID TEXT NOT NULL, - ParamCurrency TEXT NOT NULL, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName TEXT NOT NULL, - OpenstatCampaignID TEXT NOT NULL, - OpenstatAdID TEXT NOT NULL, - OpenstatSourceID TEXT NOT NULL, - UTMSource TEXT NOT NULL, - UTMMedium TEXT NOT NULL, - UTMCampaign TEXT NOT NULL, - UTMContent TEXT NOT NULL, - UTMTerm TEXT NOT NULL, - FromTag TEXT NOT NULL, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL, - PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID) -); diff --git a/benchmark/aurora-mysql/queries.sql b/benchmark/aurora-mysql/queries.sql deleted file mode 100644 index 3f04f648222..00000000000 --- a/benchmark/aurora-mysql/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, extract(minute FROM EventTime), SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_FORMAT(EventTime, '%Y-%m-%d %H:00:00') AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_FORMAT(EventTime, '%Y-%m-%d %H:00:00') ORDER BY DATE_FORMAT(EventTime, '%Y-%m-%d %H:00:00') LIMIT 10 OFFSET 1000; diff --git a/benchmark/aurora-mysql/results/16acu.json b/benchmark/aurora-mysql/results/16acu.json deleted file mode 100644 index ef00d213842..00000000000 --- a/benchmark/aurora-mysql/results/16acu.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "Aurora for MySQL", - "date": "2022-07-01", - "machine": "16acu", - "cluster_size": 1, - "comment": "Some queries cannot run due to ERROR 1114 (HY000) at line 1: The table '/rdsdbdata/tmp/#sqlaff_e5_0' is full", - - "tags": ["managed", "C++", "MySQL compatible", "row-oriented"], - - "load_time": 7687.318, - "data_size": 89614492631, - - "result": [ -[740.42,739.91,746.65], -[828.2,835.67,832.87], -[830.08,830.98,832.38], -[829.88,832.83,830.87], -[845.99,842.4,843.21], -[869.51,870.69,869.75], -[823.77,829.08,825.54], -[827.74,832.87,829.25], -[916.26,909.46,929.17], -[946.49,939.27,932.32], -[852.37,857.69,854.74], -[857.99,864.05,825.14], -[null,null,null], -[863.37,860.2,865.62], -[null,null,null], -[891.84,895.28,893.68], -[null,null,null], -[null,null,null], -[1420.12,1419.34,1445.08], -[28.94,0.21,0.21], -[917.64,917.56,916.92], -[923.47,921.7,923.82], -[919.95,918.37,920.17], -[1002.19,1002.07,1001.2], -[902.23,902.65,901.8], -[901.17,900.02,898.3], -[900.04,898.89,903.35], -[901.78,902.71,901.28], -[null,null,null], -[1153.29,1154,1156.46], -[862.57,863.35,859.69], -[923.14,921.1,923.92], -[1370.78,1401.72,1401.44], -[1454.67,1455.55,1458.79], -[1463.31,1466.75,1461.83], -[941.03,944.07,937.23], -[7.42,2.80,2.77], -[2.57,2.52,2.59], -[1.50,1.52,1.59], -[3.62,3.57,3.61], -[0.95,0.94,0.94], -[0.90,0.92,0.91], -[1.69,1.72,1.69] -] -} diff --git a/benchmark/aurora-mysql/run.sh b/benchmark/aurora-mysql/run.sh deleted file mode 100755 index c6e2bedd27b..00000000000 --- a/benchmark/aurora-mysql/run.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash - -TRIES=3 - -cat queries.sql | while read query; do - for i in $(seq 1 $TRIES); do - mysql -h "${HOST}" -u admin --password="${PASSWORD}" test -vvv -e "${query}" - done; -done; diff --git a/benchmark/aurora-postgresql/README.md b/benchmark/aurora-postgresql/README.md deleted file mode 100644 index 906258f65c5..00000000000 --- a/benchmark/aurora-postgresql/README.md +++ /dev/null @@ -1,59 +0,0 @@ -Select Aurora. -Select Aurora for PostgreSQL. -Select the latest version PostgreSQL 14.3. -Select Production template. - -Database: database-2 -User name: postgres -Master password: vci43A32#1 - -Select serverless. -16 minimum and maximum ACU (32 GB RAM). -Don't create an Aurora replica. -Public access: yes. -Turn off DevOps Guru. - -Creation took around 15 seconds. -But creation of endpoints took longer (around 5..10 minutes). - -Find the writer instance endpoint. -Example: database-1.cluster-cnkeohbxcwr1.eu-central-1.rds.amazonaws.com - -``` -sudo apt-get update -sudo apt-get install -y postgresql-client -``` - -Find "Security", click on the group in "VPC security groups". -Edit "Inbound rules". Add "Custom TCP", port 5432, from 0.0.0.0/0. - -``` -export HOST="database-2-instance-1.cnkeohbxcwr1.eu-central-1.rds.amazonaws.com" -echo "*:*:*:*:..." > .pgpass -chmod 400 .pgpass -``` - -Load the data - -``` -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz' -gzip -d hits.tsv.gz - -psql -U postgres -h "${HOST}" -t -c 'CREATE DATABASE test' -psql -U postgres -h "${HOST}" test -t < create.sql -psql -U postgres -h "${HOST}" test -t -c '\timing' -c "\\copy hits FROM 'hits.tsv'" -``` - -> COPY 99997497 -> Time: 2126515.516 ms (35:26.516) - -Go to "Monitoring", find "[Billed] Volume Bytes Used". - -> 48.6 GiB - -``` -./run.sh 2>&1 | tee log.txt - -cat log.txt | grep -oP 'Time: \d+\.\d+ ms' | sed -r -e 's/Time: ([0-9]+\.[0-9]+) ms/\1/' | - awk '{ if (i % 3 == 0) { printf "[" }; printf $1 / 1000; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' -``` diff --git a/benchmark/aurora-postgresql/create.sql b/benchmark/aurora-postgresql/create.sql deleted file mode 100644 index 1850bffedce..00000000000 --- a/benchmark/aurora-postgresql/create.sql +++ /dev/null @@ -1,109 +0,0 @@ -CREATE TABLE hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title TEXT NOT NULL, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL TEXT NOT NULL, - Referer TEXT NOT NULL, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 TEXT NOT NULL, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(255) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel TEXT NOT NULL, - Params TEXT NOT NULL, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase TEXT NOT NULL, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset TEXT NOT NULL, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL TEXT NOT NULL, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor CHAR NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage TEXT NOT NULL, - BrowserCountry TEXT NOT NULL, - SocialNetwork TEXT NOT NULL, - SocialAction TEXT NOT NULL, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage TEXT NOT NULL, - ParamPrice BIGINT NOT NULL, - ParamOrderID TEXT NOT NULL, - ParamCurrency TEXT NOT NULL, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName TEXT NOT NULL, - OpenstatCampaignID TEXT NOT NULL, - OpenstatAdID TEXT NOT NULL, - OpenstatSourceID TEXT NOT NULL, - UTMSource TEXT NOT NULL, - UTMMedium TEXT NOT NULL, - UTMCampaign TEXT NOT NULL, - UTMContent TEXT NOT NULL, - UTMTerm TEXT NOT NULL, - FromTag TEXT NOT NULL, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL, - PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID) -); diff --git a/benchmark/aurora-postgresql/queries.sql b/benchmark/aurora-postgresql/queries.sql deleted file mode 100644 index 31f65fc898d..00000000000 --- a/benchmark/aurora-postgresql/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/benchmark/aurora-postgresql/results/16acu.json b/benchmark/aurora-postgresql/results/16acu.json deleted file mode 100644 index 194329d7f4f..00000000000 --- a/benchmark/aurora-postgresql/results/16acu.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "Aurora for PostgreSQL", - "date": "2022-07-01", - "machine": "16acu", - "cluster_size": 1, - "comment": "", - - "tags": ["managed", "C", "PostgreSQL compatible", "row-oriented"], - - "load_time": 2127, - "data_size": 52183852646, - - "result": [ -[12.8361,5.71812,5.8241], -[61.2565,62.1402,63.7173], -[68.0578,68.1218,67.609], -[7.83207,5.90193,6.0461], -[48.7194,48.0233,48.2198], -[289.492,304.639,282.436], -[6.30572,6.31857,6.21598], -[53.644,53.8931,53.5307], -[131.526,131.45,131.102], -[137.724,136.921,137.758], -[57.2079,56.2775,56.2152], -[56.5349,56.2048,55.9569], -[82.3897,82.8866,83.534], -[97.0569,97.1392,96.4731], -[85.6557,86.7783,86.2804], -[49.4325,42.4309,42.5743], -[111.537,114.59,111.807], -[88.4322,89.3756,87.7899], -[160.781,163.866,161.394], -[1025.04,2.10165,2.10065], -[106.741,56.2731,56.1535], -[59.2681,59.5272,59.536], -[58.6083,57.6054,57.3935], -[54.8271,55.1397,56.3487], -[54.718,52.469,53.271], -[53.5387,53.1926,52.4008], -[52.0042,51.9581,52.2453], -[60.1317,59.9695,59.2187], -[244.608,242.954,243.815], -[91.8674,92.4165,91.5884], -[63.7122,64.277,64.2783], -[69.2596,68.9535,69.4508], -[234.222,241.138,240.316], -[488.169,462.257,460.466], -[472.929,471.809,476.635], -[103.664,116.131,103.467], -[16.8124,3.34058,3.37782], -[0.852414,0.832073,0.859857], -[0.305464,0.31166,0.306694], -[4.55625,4.54098,4.58501], -[0.299746,0.297532,0.30334], -[0.275732,0.279817,0.27766], -[0.332107,0.324387,0.320099] -] -} diff --git a/benchmark/aurora-postgresql/run.sh b/benchmark/aurora-postgresql/run.sh deleted file mode 100755 index f2c694359f1..00000000000 --- a/benchmark/aurora-postgresql/run.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -TRIES=3 - -cat queries.sql | while read query; do - echo "$query"; - for i in $(seq 1 $TRIES); do - psql -U postgres -h "${HOST}" test -t -c '\timing' -c "$query" | grep 'Time' - done; -done; diff --git a/benchmark/bigquery/.gitignore b/benchmark/bigquery/.gitignore deleted file mode 100644 index 1a06816d838..00000000000 --- a/benchmark/bigquery/.gitignore +++ /dev/null @@ -1 +0,0 @@ -results diff --git a/benchmark/bigquery/README.md b/benchmark/bigquery/README.md deleted file mode 100644 index 3563fd7a87b..00000000000 --- a/benchmark/bigquery/README.md +++ /dev/null @@ -1,38 +0,0 @@ -BigQuery has "DeWitt" clause that restricts from disclosing the benchmark results. -Nevertheless, it does not prevent from doing the benchmarks. - -It's very difficult to find, how to create a database. -Databases are named "datasets". You need to press on `⋮` near project. - -Create dataset `test`. -Go to the query editor and paste the contents of `create.sql`. -It will take two seconds to create a table. - -Download Google Cloud CLI: -``` -curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-392.0.0-linux-x86_64.tar.gz -tar -xf google-cloud-cli-392.0.0-linux-x86_64.tar.gz -./google-cloud-sdk/install.sh -source .bashrc -./google-cloud-sdk/bin/gcloud init -``` - -Load the data: -``` -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.csv.gz' -gzip -d hits.csv.gz - -time bq load --source_format CSV --allow_quoted_newlines=1 test.hits hits.csv -``` - -Run the benchmark: - -``` -./run.sh 2>&1 | tee log.txt - -cat log.txt | - grep -P '^real|^Error' | - sed -r -e 's/^Error.*$/null/; s/^real\s*([0-9.]+)m([0-9.]+)s$/\1 \2/' | - awk '{ if ($2) { print $1 * 60 + $2 } else { print $1 } }' | - awk '{ if ($1 == "null") { skip = 1 } else { if (i % 3 == 0) { printf "[" }; printf skip ? "null" : $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; skip = 0; } }' -``` diff --git a/benchmark/bigquery/create.sql b/benchmark/bigquery/create.sql deleted file mode 100644 index 9012df89a77..00000000000 --- a/benchmark/bigquery/create.sql +++ /dev/null @@ -1,108 +0,0 @@ -CREATE TABLE test.hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title String NOT NULL, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL String NOT NULL, - Referer String NOT NULL, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 String NOT NULL, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor String NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel String NOT NULL, - Params String NOT NULL, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase String NOT NULL, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset String NOT NULL, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL String NOT NULL, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor String NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage String NOT NULL, - BrowserCountry String NOT NULL, - SocialNetwork String NOT NULL, - SocialAction String NOT NULL, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage String NOT NULL, - ParamPrice BIGINT NOT NULL, - ParamOrderID String NOT NULL, - ParamCurrency String NOT NULL, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName String NOT NULL, - OpenstatCampaignID String NOT NULL, - OpenstatAdID String NOT NULL, - OpenstatSourceID String NOT NULL, - UTMSource String NOT NULL, - UTMMedium String NOT NULL, - UTMCampaign String NOT NULL, - UTMContent String NOT NULL, - UTMTerm String NOT NULL, - FromTag String NOT NULL, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL -); diff --git a/benchmark/bigquery/queries.sql b/benchmark/bigquery/queries.sql deleted file mode 100644 index 3dc8f405aaa..00000000000 --- a/benchmark/bigquery/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM test.hits; -SELECT COUNT(*) FROM test.hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM test.hits; -SELECT AVG(UserID) FROM test.hits; -SELECT COUNT(DISTINCT UserID) FROM test.hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM test.hits; -SELECT MIN(EventDate), MAX(EventDate) FROM test.hits; -SELECT AdvEngineID, COUNT(*) FROM test.hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM test.hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM test.hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM test.hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM test.hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM test.hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM test.hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM test.hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM test.hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM test.hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM test.hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM test.hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM test.hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM test.hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM test.hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM test.hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM test.hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM test.hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM test.hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM test.hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM test.hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM test.hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM test.hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM test.hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM test.hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM test.hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM test.hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM test.hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM test.hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM test.hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM test.hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM test.hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM test.hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM test.hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM test.hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC(EventTime, MINUTE) AS M, COUNT(*) AS PageViews FROM test.hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY M ORDER BY M LIMIT 10 OFFSET 1000; diff --git a/benchmark/bigquery/run.sh b/benchmark/bigquery/run.sh deleted file mode 100755 index 1a48f9a1c9b..00000000000 --- a/benchmark/bigquery/run.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -TRIES=3 - -cat queries.sql | while read query; do - echo "$query"; - for i in $(seq 1 $TRIES); do - time bq query --use_legacy_sql=false --use_cache=false <<< "$query" - done -done diff --git a/benchmark/brytlytdb/README.md b/benchmark/brytlytdb/README.md deleted file mode 100644 index f0b4e044940..00000000000 --- a/benchmark/brytlytdb/README.md +++ /dev/null @@ -1 +0,0 @@ -An attempt to use their service resulted in a failure. It showed "Error: cannot create connection" shortly after registration and advised to ask for support. I emailed to support, and they assured that the problem will be resolved soon. diff --git a/benchmark/citus/benchmark.sh b/benchmark/citus/benchmark.sh deleted file mode 100755 index c7b71b367c4..00000000000 --- a/benchmark/citus/benchmark.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash - -sudo apt-get update -sudo apt-get install -y docker.io -sudo apt-get install -y postgresql-client - -sudo docker run -d --name citus -p 5432:5432 -e POSTGRES_PASSWORD=mypass citusdata/citus:11.0 - -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz' -gzip -d hits.tsv.gz - -echo "*:*:*:*:mypass" > .pgpass -chmod 400 .pgpass - -psql -U postgres -h localhost -d postgres --no-password -t -c 'CREATE DATABASE test' -psql -U postgres -h localhost -d postgres --no-password test -t < create.sql -psql -U postgres -h localhost -d postgres --no-password test -t -c '\timing' -c "\\copy hits FROM 'hits.tsv'" - -# COPY 99997497 -# Time: 1579203.482 ms (26:19.203) - -./run.sh 2>&1 | tee log.txt - -sudo docker exec -it citus du -bcs /var/lib/postgresql/data - -cat log.txt | grep -oP 'Time: \d+\.\d+ ms' | sed -r -e 's/Time: ([0-9]+\.[0-9]+) ms/\1/' | - awk '{ if (i % 3 == 0) { printf "[" }; printf $1 / 1000; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' diff --git a/benchmark/citus/create.sql b/benchmark/citus/create.sql deleted file mode 100644 index ab013c71fd8..00000000000 --- a/benchmark/citus/create.sql +++ /dev/null @@ -1,110 +0,0 @@ -CREATE TABLE hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title TEXT NOT NULL, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL TEXT NOT NULL, - Referer TEXT NOT NULL, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 TEXT NOT NULL, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(255) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel TEXT NOT NULL, - Params TEXT NOT NULL, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase TEXT NOT NULL, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset TEXT NOT NULL, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL TEXT NOT NULL, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor CHAR NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage TEXT NOT NULL, - BrowserCountry TEXT NOT NULL, - SocialNetwork TEXT NOT NULL, - SocialAction TEXT NOT NULL, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage TEXT NOT NULL, - ParamPrice BIGINT NOT NULL, - ParamOrderID TEXT NOT NULL, - ParamCurrency TEXT NOT NULL, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName TEXT NOT NULL, - OpenstatCampaignID TEXT NOT NULL, - OpenstatAdID TEXT NOT NULL, - OpenstatSourceID TEXT NOT NULL, - UTMSource TEXT NOT NULL, - UTMMedium TEXT NOT NULL, - UTMCampaign TEXT NOT NULL, - UTMContent TEXT NOT NULL, - UTMTerm TEXT NOT NULL, - FromTag TEXT NOT NULL, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL, - PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID) -) -USING COLUMNAR; diff --git a/benchmark/citus/queries.sql b/benchmark/citus/queries.sql deleted file mode 100644 index 31f65fc898d..00000000000 --- a/benchmark/citus/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/benchmark/citus/results/c6a.4xlarge.json b/benchmark/citus/results/c6a.4xlarge.json deleted file mode 100644 index 23d170aa869..00000000000 --- a/benchmark/citus/results/c6a.4xlarge.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "Citus", - "date": "2022-07-01", - "machine": "c6a.4xlarge, 500gb gp2", - "cluster_size": 1, - "comment": "", - - "tags": ["C", "PostgreSQL compatible", "column-oriented"], - - "load_time": 1579, - "data_size": 18980918899, - - "result": [ -[7.58503,6.70447,6.52499], -[6.33941,5.06063,5.00238], -[11.7488,9.86417,9.93223], -[12.6306,9.36305,9.17061], -[40.6101,39.0803,38.1187], -[117.654,113.912,113.441], -[10.3404,8.08936,7.70732], -[6.31542,4.72821,4.72989], -[82.5425,77.2124,76.9219], -[91.1776,83.4492,82.4727], -[14.5474,10.0815,10.3873], -[15.4899,11.2922,11.1877], -[19.9794,15.5002,17.4492], -[76.9216,72.5172,72.7915], -[21.5446,17.5691,18.561], -[56.9438,54.6387,53.5745], -[75.0977,69.7842,70.0259], -[31.3299,27.0267,26.3216], -[129.417,122.956,121.182], -[3.73386,2.14148,2.12737], -[34.6021,27.9727,28.6878], -[37.152,29.6193,29.2966], -[52.2157,37.8589,37.6994], -[181.955,149.08,148.471], -[15.4687,11.3138,10.3856], -[10.2779,8.46868,8.8324], -[14.4687,10.4076,11.4263], -[47.009,40.2969,39.6888], -[749.946,742.979,744.461], -[69.4383,67.5636,67.2128], -[27.0317,21.4008,20.9524], -[36.6675,25.6347,26.4408], -[140.424,130.546,129.738], -[106.959,92.033,90.1609], -[110.98,94.4787,96.2656], -[64.4474,60.1853,60.6816], -[6.17549,6.25376,5.87004], -[1.99153,1.81776,1.80596], -[1.00141,0.800271,0.801975], -[7.91778,7.70928,8.33299], -[0.929845,0.642076,0.638478], -[0.866536,0.683567,0.680218], -[0.937823,0.784747,0.765929] -] -} diff --git a/benchmark/citus/run.sh b/benchmark/citus/run.sh deleted file mode 100755 index 7adee7c460d..00000000000 --- a/benchmark/citus/run.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -TRIES=3 - -cat queries.sql | while read query; do - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches - - echo "$query"; - for i in $(seq 1 $TRIES); do - psql -U postgres -h localhost -d postgres --no-password -t -c '\timing' -c "$query" | grep 'Time' - done; -done; diff --git a/benchmark/clickhouse-local/benchmark.sh b/benchmark/clickhouse-local/benchmark.sh deleted file mode 100755 index 9b660cc2cef..00000000000 --- a/benchmark/clickhouse-local/benchmark.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -# Install - -curl https://clickhouse.com/ | sh - -# wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.parquet' -seq 0 99 | xargs -P100 -I{} bash -c 'wget --continue https://datasets.clickhouse.com/hits_compatible/athena_partitioned/hits_{}.parquet' - -# Run the queries - -./run.sh - -du -b hits.parquet diff --git a/benchmark/clickhouse-local/create.sql b/benchmark/clickhouse-local/create.sql deleted file mode 100644 index 180a5806e46..00000000000 --- a/benchmark/clickhouse-local/create.sql +++ /dev/null @@ -1,109 +0,0 @@ -CREATE TABLE hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title TEXT NOT NULL, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL TEXT NOT NULL, - Referer TEXT NOT NULL, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 TEXT NOT NULL, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(255) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel TEXT NOT NULL, - Params TEXT NOT NULL, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase TEXT NOT NULL, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset TEXT NOT NULL, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL TEXT NOT NULL, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor CHAR NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage TEXT NOT NULL, - BrowserCountry TEXT NOT NULL, - SocialNetwork TEXT NOT NULL, - SocialAction TEXT NOT NULL, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage TEXT NOT NULL, - ParamPrice BIGINT NOT NULL, - ParamOrderID TEXT NOT NULL, - ParamCurrency TEXT NOT NULL, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName TEXT NOT NULL, - OpenstatCampaignID TEXT NOT NULL, - OpenstatAdID TEXT NOT NULL, - OpenstatSourceID TEXT NOT NULL, - UTMSource TEXT NOT NULL, - UTMMedium TEXT NOT NULL, - UTMCampaign TEXT NOT NULL, - UTMContent TEXT NOT NULL, - UTMTerm TEXT NOT NULL, - FromTag TEXT NOT NULL, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL -) -ENGINE = File(Parquet, 'hits_*.parquet'); diff --git a/benchmark/clickhouse-local/queries.sql b/benchmark/clickhouse-local/queries.sql deleted file mode 100644 index 31f65fc898d..00000000000 --- a/benchmark/clickhouse-local/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/benchmark/clickhouse-local/results/c6a.4xlarge.partitioned.json b/benchmark/clickhouse-local/results/c6a.4xlarge.partitioned.json deleted file mode 100644 index c44114d12c1..00000000000 --- a/benchmark/clickhouse-local/results/c6a.4xlarge.partitioned.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "clickhouse-local (partitioned)", - "date": "2022-07-01", - "machine": "c6a.4xlarge, 500gb gp2", - "cluster_size": 1, - "comment": "", - - "tags": ["C++", "column-oriented", "embedded", "stateless", "ClickHouse derivative"], - - "load_time": 0, - "data_size": 14737666736, - - "result": [ -[0.850, 0.102, 0.114], -[1.342, 0.090, 0.099], -[2.547, 0.179, 0.189], -[1.681, 0.245, 0.252], -[2.704, 1.680, 1.648], -[2.194, 1.460, 1.489], -[0.832, 0.113, 0.102], -[1.371, 0.106, 0.101], -[2.240, 0.790, 0.825], -[4.548, 1.021, 1.026], -[3.094, 0.552, 0.552], -[3.088, 0.623, 0.630], -[2.017, 1.170, 1.165], -[4.319, 1.677, 1.708], -[2.157, 1.496, 1.500], -[1.629, 1.138, 1.139], -[5.026, 3.267, 3.241], -[4.142, 2.303, 2.319], -[8.295, 5.569, 5.629], -[1.331, 0.255, 0.252], -[10.712, 3.668, 3.786], -[13.053, 4.185, 4.202], -[24.170, 7.935, 8.008], -[55.965, 23.933, 23.071], -[4.417, 0.947, 0.974], -[1.793, 0.698, 0.690], -[4.376, 0.955, 0.956], -[11.731, 4.385, 4.321], -[11.403, 8.549, 8.288], -[2.764, 2.754, 2.735], -[5.096, 1.262, 1.273], -[9.515, 1.682, 1.688], -[10.325, 6.745, 6.608], -[11.686, 6.261, 6.242], -[11.769, 6.301, 6.364], -[1.675, 1.490, 1.495], -[14.937, 3.631, 3.604], -[14.187, 3.609, 3.631], -[14.842, 3.769, 3.741], -[22.222, 6.355, 6.263], -[7.212, 0.836, 0.838], -[7.863, 0.716, 0.718], -[5.120, 0.587, 0.574] -] -} diff --git a/benchmark/clickhouse-local/results/c6a.4xlarge.single.json b/benchmark/clickhouse-local/results/c6a.4xlarge.single.json deleted file mode 100644 index 8d19a6d0ce4..00000000000 --- a/benchmark/clickhouse-local/results/c6a.4xlarge.single.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "clickhouse-local (single)", - "date": "2022-07-01", - "machine": "c6a.4xlarge, 500gb gp2", - "cluster_size": 1, - "comment": "", - - "tags": ["C++", "column-oriented", "embedded", "stateless", "ClickHouse derivative"], - - "load_time": 0, - "data_size": 14779976446, - - "result": [ -[1.176, 0.251, 0.249], -[1.037, 0.134, 0.111], -[1.609, 0.403, 0.369], -[1.616, 0.372, 0.370], -[3.008, 2.338, 2.266], -[9.061, 7.537, 7.535], -[1.206, 0.191, 0.187], -[0.882, 0.144, 0.135], -[4.610, 3.406, 3.256], -[6.712, 4.479, 4.469], -[4.081, 2.413, 2.394], -[3.894, 2.719, 2.691], -[7.651, 6.436, 6.243], -[10.765, 8.043, 7.894], -[9.860, 8.945, 8.235], -[7.159, 5.815, 5.814], -[20.916, 18.159, 18.013], -[20.952, 17.862, 17.850], -[37.585, 32.649, 32.487], -[1.767, 0.401, 0.393], -[23.713, 15.687, 15.755], -[28.700, 19.241, 19.198], -[50.740, 33.161, 33.011], -[152.485, 117.417, 118.178], -[7.606, 4.491, 5.326], -[4.331, 4.214, 3.587], -[6.743, 4.486, 5.357], -[22.910, 15.043, 15.183], -[43.342, 37.167, 36.842], -[11.807, 4.490, 4.546], -[9.557, 6.349, 6.263], -[13.964, 8.493, 8.464], -[38.110, 33.642, 33.996], -[41.266, 35.080, 27.073], -[34.056, 26.814, 26.902], -[8.855, 7.548, 7.475], -[22.596, 12.615, 12.669], -[23.217, 13.956, 13.831], -[22.528, 21.601, 13.207], -[37.890, 23.115, 22.955], -[6.490, 1.548, 1.522], -[6.413, 1.474, 1.416], -[3.796, 1.339, 1.316] -] -} diff --git a/benchmark/clickhouse-local/run.sh b/benchmark/clickhouse-local/run.sh deleted file mode 100755 index 6c6980ec68a..00000000000 --- a/benchmark/clickhouse-local/run.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -TRIES=3 -QUERY_NUM=1 -cat queries.sql | while read query; do - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null - - echo -n "[" - for i in $(seq 1 $TRIES); do - RES=$(./clickhouse local --time --format Null --multiquery --query="$(cat create.sql); $query" 2>&1 | tail -n1) - [[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null" - [[ "$i" != $TRIES ]] && echo -n ", " - - echo "${QUERY_NUM},${i},${RES}" >> result.csv - done - echo "]," - - QUERY_NUM=$((QUERY_NUM + 1)) -done diff --git a/benchmark/clickhouse/benchmark.sh b/benchmark/clickhouse/benchmark.sh deleted file mode 100755 index 2c06df90164..00000000000 --- a/benchmark/clickhouse/benchmark.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -# Install - -curl https://clickhouse.com/ | sh -sudo DEBIAN_FRONTEND=noninteractive ./clickhouse install -sudo clickhouse start - -# Load the data - -clickhouse-client < create.sql - -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz' -gzip -d hits.tsv.gz - -clickhouse-client --time --query "INSERT INTO hits FORMAT TSV" < hits.tsv - -# Run the queries - -./run.sh - -clickhouse-client --query "SELECT total_bytes FROM system.tables WHERE name = 'hits' AND database = 'default'" diff --git a/benchmark/clickhouse/create.sql b/benchmark/clickhouse/create.sql deleted file mode 100644 index 0297e914a19..00000000000 --- a/benchmark/clickhouse/create.sql +++ /dev/null @@ -1,110 +0,0 @@ -CREATE TABLE hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title TEXT NOT NULL, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL TEXT NOT NULL, - Referer TEXT NOT NULL, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 TEXT NOT NULL, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(255) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel TEXT NOT NULL, - Params TEXT NOT NULL, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase TEXT NOT NULL, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset TEXT NOT NULL, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL TEXT NOT NULL, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor CHAR NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage TEXT NOT NULL, - BrowserCountry TEXT NOT NULL, - SocialNetwork TEXT NOT NULL, - SocialAction TEXT NOT NULL, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage TEXT NOT NULL, - ParamPrice BIGINT NOT NULL, - ParamOrderID TEXT NOT NULL, - ParamCurrency TEXT NOT NULL, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName TEXT NOT NULL, - OpenstatCampaignID TEXT NOT NULL, - OpenstatAdID TEXT NOT NULL, - OpenstatSourceID TEXT NOT NULL, - UTMSource TEXT NOT NULL, - UTMMedium TEXT NOT NULL, - UTMCampaign TEXT NOT NULL, - UTMContent TEXT NOT NULL, - UTMTerm TEXT NOT NULL, - FromTag TEXT NOT NULL, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL, - PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID) -) -ENGINE = MergeTree; diff --git a/benchmark/clickhouse/queries.sql b/benchmark/clickhouse/queries.sql deleted file mode 100644 index 31f65fc898d..00000000000 --- a/benchmark/clickhouse/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/benchmark/clickhouse/results/c6a.4xlarge.json b/benchmark/clickhouse/results/c6a.4xlarge.json deleted file mode 100644 index 4bc9508ea6e..00000000000 --- a/benchmark/clickhouse/results/c6a.4xlarge.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "ClickHouse", - "date": "2022-07-01", - "machine": "c6a.4xlarge, 500gb gp2", - "cluster_size": 1, - "comment": "", - - "tags": ["C++", "column-oriented", "ClickHouse derivative"], - - "load_time": 475.529, - "data_size": 14345515782, - - "result": [ -[0.027, 0.001, 0.001], -[0.035, 0.015, 0.021], -[0.083, 0.034, 0.033], -[0.171, 0.044, 0.045], -[1.552, 1.495, 1.574], -[1.270, 1.075, 1.063], -[0.045, 0.026, 0.025], -[0.032, 0.016, 0.015], -[0.717, 0.615, 0.607], -[0.843, 0.821, 0.747], -[0.293, 0.219, 0.216], -[0.312, 0.226, 0.235], -[0.804, 0.694, 0.702], -[1.476, 1.047, 1.029], -[1.013, 0.898, 0.911], -[1.043, 0.964, 1.453], -[3.632, 2.715, 2.711], -[1.867, 1.750, 1.714], -[5.187, 4.797, 4.953], -[0.112, 0.068, 0.041], -[8.637, 1.761, 1.212], -[9.902, 0.902, 0.869], -[18.831, 2.067, 1.829], -[41.903, 4.476, 3.486], -[1.801, 0.254, 0.238], -[0.627, 0.214, 0.207], -[2.181, 0.241, 0.246], -[8.868, 0.748, 0.733], -[9.674, 6.891, 5.770], -[2.620, 2.355, 2.368], -[1.395, 0.533, 0.525], -[4.454, 0.730, 0.712], -[5.453, 4.990, 5.922], -[9.955, 3.968, 4.096], -[9.987, 4.035, 4.476], -[1.695, 1.236, 1.241], -[0.142, 0.079, 0.103], -[0.066, 0.033, 0.040], -[0.065, 0.030, 0.033], -[0.246, 0.207, 0.192], -[0.044, 0.019, 0.020], -[0.030, 0.023, 0.012], -[0.030, 0.018, 0.013] -] -} diff --git a/benchmark/clickhouse/results/c6a.metal.json b/benchmark/clickhouse/results/c6a.metal.json deleted file mode 100644 index 92727d881b8..00000000000 --- a/benchmark/clickhouse/results/c6a.metal.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "ClickHouse", - "date": "2022-07-01", - "machine": "c6a.metal, 500gb gp2", - "cluster_size": 1, - "comment": "", - - "tags": ["C++", "column-oriented", "ClickHouse derivative"], - - "load_time": 136.869, - "data_size": 14571706777, - - "result": [ -[0.011, 0.001, 0.001], -[0.040, 0.015, 0.013], -[0.045, 0.021, 0.023], -[0.090, 0.023, 0.023], -[1.922, 1.565, 1.576], -[0.961, 0.737, 0.739], -[0.040, 0.023, 0.018], -[0.032, 0.028, 0.028], -[0.321, 0.287, 0.275], -[0.632, 0.284, 0.287], -[0.166, 0.124, 0.118], -[0.235, 0.100, 0.102], -[1.006, 0.182, 0.159], -[1.637, 0.216, 0.213], -[0.871, 0.174, 0.177], -[0.258, 0.148, 0.148], -[1.804, 0.370, 0.358], -[1.235, 0.275, 0.278], -[3.143, 0.854, 0.815], -[0.071, 0.024, 0.016], -[8.816, 0.215, 0.155], -[10.239, 0.203, 0.173], -[19.179, 0.388, 0.357], -[43.152, 0.824, 0.823], -[1.821, 0.059, 0.052], -[0.992, 0.045, 0.051], -[2.539, 0.063, 0.058], -[9.258, 0.300, 0.278], -[7.923, 0.961, 0.936], -[0.445, 0.431, 0.428], -[1.367, 0.131, 0.113], -[4.819, 0.205, 0.175], -[3.808, 0.739, 0.726], -[8.935, 0.607, 0.600], -[8.988, 0.634, 0.615], -[0.242, 0.220, 0.226], -[0.075, 0.058, 0.056], -[0.038, 0.028, 0.026], -[0.043, 0.028, 0.021], -[0.172, 0.127, 0.119], -[0.028, 0.018, 0.017], -[0.027, 0.019, 0.014], -[0.018, 0.026, 0.015] -] -} diff --git a/benchmark/clickhouse/run.sh b/benchmark/clickhouse/run.sh deleted file mode 100755 index 86bc4c03f72..00000000000 --- a/benchmark/clickhouse/run.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -TRIES=3 -QUERY_NUM=1 -cat queries.sql | while read query; do - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null - - echo -n "[" - for i in $(seq 1 $TRIES); do - RES=$(clickhouse-client --time --format=Null --query="$query" 2>&1 ||:) - [[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null" - [[ "$i" != $TRIES ]] && echo -n ", " - - echo "${QUERY_NUM},${i},${RES}" >> result.csv - done - echo "]," - - QUERY_NUM=$((QUERY_NUM + 1)) -done diff --git a/benchmark/cratedb/benchmark.sh b/benchmark/cratedb/benchmark.sh deleted file mode 100755 index b45826ee861..00000000000 --- a/benchmark/cratedb/benchmark.sh +++ /dev/null @@ -1,39 +0,0 @@ -#!/bin/bash - -bash -c "$(curl -L https://try.crate.io/)" > crate.log 2>&1 & - -sudo apt-get update -sudo apt-get install -y postgresql-client - -psql -U crate -h localhost --no-password -t -c 'SELECT 1' - -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz' -gzip -d hits.tsv.gz - -psql -U crate -h localhost --no-password -t < create.sql - -psql -U crate -h localhost --no-password -t -c '\timing' -c " - COPY hits - FROM 'file://$(pwd)/hits.tsv' - WITH - ( - "delimiter"=e'\t', - "format"='csv', - "header"=false, - "empty_string_as_null"=false - ) - RETURN SUMMARY;" - -# One record did not load: -# 99997496 -# {"Missing closing quote for value\n at [Source: UNKNOWN; line: 1, column: 1069]":{"count":1,"line_numbers":[93557187]}} -# Time: 10687056.069 ms (02:58:07.056) - -./run.sh 2>&1 | tee log.txt - -# For some queries it gives "Data too large". - -du -bcs crate-* - -cat log.txt | grep -oP 'Time: \d+\.\d+ ms|ERROR' | sed -r -e 's/Time: ([0-9]+\.[0-9]+) ms/\1/' | - awk '{ if ($1 == "ERROR") { skip = 1 } else { if (i % 3 == 0) { printf "[" }; printf skip ? "null" : ($1 / 1000); if (i % 3 != 2) { printf "," } else { print "]," }; ++i; skip = 0; } }' diff --git a/benchmark/cratedb/create.sql b/benchmark/cratedb/create.sql deleted file mode 100644 index 3bf1815ceba..00000000000 --- a/benchmark/cratedb/create.sql +++ /dev/null @@ -1,109 +0,0 @@ -CREATE TABLE hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title TEXT NOT NULL, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate TIMESTAMP NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL TEXT NOT NULL, - Referer TEXT NOT NULL, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 TEXT NOT NULL, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(255) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel TEXT NOT NULL, - Params TEXT NOT NULL, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase TEXT NOT NULL, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset TEXT NOT NULL, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL TEXT NOT NULL, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor TEXT NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage TEXT NOT NULL, - BrowserCountry TEXT NOT NULL, - SocialNetwork TEXT NOT NULL, - SocialAction TEXT NOT NULL, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage TEXT NOT NULL, - ParamPrice BIGINT NOT NULL, - ParamOrderID TEXT NOT NULL, - ParamCurrency TEXT NOT NULL, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName TEXT NOT NULL, - OpenstatCampaignID TEXT NOT NULL, - OpenstatAdID TEXT NOT NULL, - OpenstatSourceID TEXT NOT NULL, - UTMSource TEXT NOT NULL, - UTMMedium TEXT NOT NULL, - UTMCampaign TEXT NOT NULL, - UTMContent TEXT NOT NULL, - UTMTerm TEXT NOT NULL, - FromTag TEXT NOT NULL, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL, - PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID) -); diff --git a/benchmark/cratedb/queries.sql b/benchmark/cratedb/queries.sql deleted file mode 100644 index 31f65fc898d..00000000000 --- a/benchmark/cratedb/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/benchmark/cratedb/results/c6a.4xlarge.json b/benchmark/cratedb/results/c6a.4xlarge.json deleted file mode 100644 index 0e469b6ca27..00000000000 --- a/benchmark/cratedb/results/c6a.4xlarge.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "CrateDB", - "date": "2022-07-01", - "machine": "c6a.4xlarge, 500gb gp2", - "cluster_size": 1, - "comment": "For some queries it gives \"Data too large\".", - - "tags": ["Java", "column-oriented"], - - "load_time": 10687, - "data_size": 109636633416, - - "result": [ -[0.008162,0.005118,0.002553], -[0.350014,0.39977,0.133775], -[2.58426,2.47192,2.59779], -[2.12939,0.532981,0.507246], -[null,null,null], -[null,null,null], -[1.18488,1.06603,1.07219], -[0.209264,0.073284,0.067912], -[null,null,null], -[null,null,null], -[1.68892,1.2866,1.47428], -[1.62976,1.43073,1.26904], -[12.7517,13.0334,13.2685], -[18.8587,null,18.6951], -[11.2982,11.2108,11.577], -[20.2964,20.4035,19.1076], -[null,null,null], -[null,null,null], -[null,null,null], -[0.202044,0.010009,0.005566], -[9.22964,4.54606,0.774149], -[1.41673,1.09885,0.789775], -[12.3933,8.06911,1.69671], -[1.45018,0.969528,0.979718], -[0.357589,0.14887,0.153326], -[0.189282,0.133963,0.130279], -[0.153222,0.140756,0.139861], -[27.5195,19.6862,20.1825], -[72.7575,68.2,67.1238], -[144.533,146.579,152.144], -[8.76866,9.00563,8.46917], -[17.6652,16.6755,16.0558], -[null,null,null], -[null,null,null], -[null,null,null], -[42.2967,44.9621,44.4386], -[0.786911,0.4904,0.508416], -[0.602075,0.226261,0.182399], -[0.131407,0.058958,0.054518], -[0.954736,1.1361,1.14233], -[0.23764,0.139109,0.134472], -[0.110253,0.057695,0.056073], -[0.124285,0.150479,0.066226] -] -} diff --git a/benchmark/cratedb/run.sh b/benchmark/cratedb/run.sh deleted file mode 100755 index 477f463289b..00000000000 --- a/benchmark/cratedb/run.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -TRIES=3 - -cat queries.sql | while read query; do - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches - - echo "$query"; - for i in $(seq 1 $TRIES); do - psql -U crate -h localhost --no-password -t -c '\timing' -c "$query" | grep 'Time' - done; -done; diff --git a/benchmark/databend/README.md b/benchmark/databend/README.md deleted file mode 100644 index fd7877efb27..00000000000 --- a/benchmark/databend/README.md +++ /dev/null @@ -1,7 +0,0 @@ -It is written in Rust and is blazing. - -Update from @BohuTANG: - -> Thanks for the benchmark! -> Databend is a cloud warehouse designed for object storage(like Amazon S3), not the local file system. The FS model is only used for testing for some cases, we didn't do any optimization, and we know it has some performance issues. -> I believe that ClickHouse is also being designed for the cloud, and looking forward to the S3 benchmark results :) diff --git a/benchmark/databend/benchmark.sh b/benchmark/databend/benchmark.sh deleted file mode 100755 index 47ff4ff564c..00000000000 --- a/benchmark/databend/benchmark.sh +++ /dev/null @@ -1,105 +0,0 @@ -#!/bin/bash - -mkdir databend && cd databend -curl -LJO 'https://github.com/datafuselabs/databend/releases/download/v0.7.113-nightly/databend-v0.7.113-nightly-x86_64-unknown-linux-musl.tar.gz' -tar xzvf 'databend-v0.7.113-nightly-x86_64-unknown-linux-musl.tar.gz' - -echo 'dir = "metadata/_logs" -admin_api_address = "127.0.0.1:8101" -grpc_api_address = "127.0.0.1:9101" - -[raft_config] -id = 1 -single = true -raft_dir = "metadata/datas"' > databend-meta.toml - -./bin/databend-meta -c ./databend-meta.toml > meta.log 2>&1 & -curl -I 'http://127.0.0.1:8101/v1/health' - -echo '[log] -level = "INFO" -dir = "benddata/_logs" - -[query] -# For admin RESET API. -admin_api_address = "127.0.0.1:8001" - -# Metrics. -metric_api_address = "127.0.0.1:7071" - -# Cluster flight RPC. -flight_api_address = "127.0.0.1:9091" - -# Query MySQL Handler. -mysql_handler_host = "127.0.0.1" -mysql_handler_port = 3307 - -# Query ClickHouse Handler. -clickhouse_handler_host = "127.0.0.1" -clickhouse_handler_port = 9001 - -# Query ClickHouse HTTP Handler. -clickhouse_http_handler_host = "127.0.0.1" -clickhouse_http_handler_port = 8125 - -# Query HTTP Handler. -http_handler_host = "127.0.0.1" -http_handler_port = 8081 - -tenant_id = "tenant1" -cluster_id = "cluster1" - -[meta] -# databend-meta grpc api address. -address = "127.0.0.1:9101" -username = "root" -password = "root" - -[storage] -# fs|s3 -type = "fs" - -[storage.fs] -data_path = "benddata/datas"' > databend-query.toml - -./bin/databend-query -c ./databend-query.toml > query.log 2>&1 & - -curl https://clickhouse.com/ | sh -sudo ./clickhouse install - -# Load the data - -curl 'http://default@localhost:8124/' --data-binary @create.sql - -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.csv.gz' -gzip -d hits.csv.gz - -# Note: if I run -# clickhouse-client --time --query "INSERT INTO hits FORMAT TSV" < hits.tsv -# it panics: -# ERROR common_tracing::panic_hook: panicked at 'called `Result::unwrap()` on an `Err` value: SendError - -# Note: if I run -# curl -XPUT 'http://root:@127.0.0.1:8000/v1/streaming_load' -H 'insert_sql: insert into hits format CSV' -H 'skip_header: 0' -H 'field_delimiter: ,' -H 'record_delimiter: \n' -F 'upload=@"./hits.csv"' -# curl: (55) Send failure: Broken pipe - -# This is not entirely correct, but starts to work: -# curl -XPUT 'http://root:@127.0.0.1:8000/v1/streaming_load' -H 'insert_sql: insert into hits format TSV' -H 'skip_header: 0' -H 'field_delimiter: \t' -H 'record_delimiter: \n' -F 'upload=@"./hits.tsv"' -# and fails after 7 minutes 38 seconds without loading any data: -# Code: 4000, displayText = invalid data (Expected to have terminated string literal.) (while in processor thread 5). -# the diagnostics is terrible. - -head -n 90000000 hits.tsv > hits90m.tsv -time curl -XPUT 'http://root:@127.0.0.1:8000/v1/streaming_load' -H 'insert_sql: insert into hits format TSV' -H 'skip_header: 0' -H 'field_delimiter: \t' -H 'record_delimiter: \n' -F 'upload=@"./hits90m.tsv"' - -# {"id":"08f59e6c-2924-483e-bb96-cbcb458588f5","state":"SUCCESS","stats":{"rows":90000000,"bytes":73152552024},"error":null} -# real 7m15.312s - -du -bcs _data -# 38714978944 - -# It does not support ClickHouse protocol well (it hangs on some queries if they are too long). - -./run.sh 2>&1 | tee log.txt - -# Note: divide every number by 0.9 as only 90% of the data was loaded successfully. diff --git a/benchmark/databend/create.sql b/benchmark/databend/create.sql deleted file mode 100644 index b446288b409..00000000000 --- a/benchmark/databend/create.sql +++ /dev/null @@ -1,109 +0,0 @@ -CREATE TABLE hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title TEXT NOT NULL, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL TEXT NOT NULL, - Referer TEXT NOT NULL, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 TEXT NOT NULL, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(255) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel TEXT NOT NULL, - Params TEXT NOT NULL, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase TEXT NOT NULL, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset TEXT NOT NULL, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL TEXT NOT NULL, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor CHAR NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage TEXT NOT NULL, - BrowserCountry TEXT NOT NULL, - SocialNetwork TEXT NOT NULL, - SocialAction TEXT NOT NULL, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage TEXT NOT NULL, - ParamPrice BIGINT NOT NULL, - ParamOrderID TEXT NOT NULL, - ParamCurrency TEXT NOT NULL, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName TEXT NOT NULL, - OpenstatCampaignID TEXT NOT NULL, - OpenstatAdID TEXT NOT NULL, - OpenstatSourceID TEXT NOT NULL, - UTMSource TEXT NOT NULL, - UTMMedium TEXT NOT NULL, - UTMCampaign TEXT NOT NULL, - UTMContent TEXT NOT NULL, - UTMTerm TEXT NOT NULL, - FromTag TEXT NOT NULL, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL -) -CLUSTER BY (CounterID, EventDate, UserID, EventTime, WatchID); diff --git a/benchmark/databend/queries.sql b/benchmark/databend/queries.sql deleted file mode 100644 index 31f65fc898d..00000000000 --- a/benchmark/databend/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/benchmark/databend/results/c6a.4xlarge.json b/benchmark/databend/results/c6a.4xlarge.json deleted file mode 100644 index a26bad5b49f..00000000000 --- a/benchmark/databend/results/c6a.4xlarge.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "Databend", - "date": "2022-07-01", - "machine": "c6a.4xlarge, 500gb gp2", - "cluster_size": 1, - "comment": "Only 90% of data successfully loaded.", - - "tags": ["Rust", "column-oriented", "ClickHouse derivative"], - - "load_time": 484, - "data_size": 43016643271, - - "result": [ -[0.010087, 0.002961, 0.003271], -[0.127964, 0.080012, 0.075741], -[0.162388, 0.143967, 0.144762], -[0.252904, 0.217471, 0.217369], -[34.281026, 34.844158, 34.526942], -[25.290307, 25.793068, 25.620563], -[0.112484, 0.093867, 0.090891], -[0.086604, 0.07796, 0.076448], -[20.723203, 20.7483, 20.354869], -[20.81994, 20.72446, 20.696573], -[1.964378, 1.93559, 1.893824], -[1.846866, 1.789111, 1.763664], -[4.468158, 4.407959, 4.438036], -[19.947276, 19.8859, 19.853514], -[5.478573, 5.474461, 5.460604], -[5.509521, 5.513413, 5.363123], -[15.430359, 15.5406, 15.461211], -[14.905998, 15.029721, 15.019642], -[31.069663, 30.811763, 30.737336], -[0.281067, 0.220021, 0.217741], -[8.89374, 4.12692, 4.131689], -[10.38448, 4.603694, 4.571757], -[19.980572, 8.836322, 8.892694], -[59.786474, 52.452881, 39.941988], -[2.804019, 0.994794, 0.958224], -[0.765299, 0.730434, 0.723964], -[2.784648, 0.94665, 0.936684], -[8.905027, 5.418438, 5.386109], -[12.187652, 12.230066, 12.164123], -[3.35748, 3.395991, 3.319434], -[4.309389, 3.854977, 3.772506], -[9.958201, 7.027432, 6.888253], -[50.200569, 50.535126, 50.283066], -[24.469412, 21.222713, 21.010188], -[26.115852, 23.93507, 24.835342], -[7.511517, 7.296179, 7.324549], -[2.156784, 1.298258, 1.278441], -[2.155447, 1.314499, 1.331237], -[2.007053, 1.181676, 1.155612], -[null, null, null], -[0.485363, 0.420291, 0.416819], -[0.372131, 0.322068, 0.323578], -[null, null, null] -] -} diff --git a/benchmark/databend/run.sh b/benchmark/databend/run.sh deleted file mode 100755 index 5a87f07b2ec..00000000000 --- a/benchmark/databend/run.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -TRIES=3 -QUERY_NUM=1 -cat queries.sql | while read query; do - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null - - echo -n "[" - for i in $(seq 1 $TRIES); do - RES=$(curl -w 'Time: %{time_total}\n' http://default@localhost:8124/ -d "${query}" 2>&1 | grep -P '^Time: ' | sed 's/Time: //') - [[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null" - [[ "$i" != $TRIES ]] && echo -n ", " - - echo "${QUERY_NUM},${i},${RES}" >> result.csv - done - echo "]," - - QUERY_NUM=$((QUERY_NUM + 1)) -done diff --git a/benchmark/druid/benchmark.sh b/benchmark/druid/benchmark.sh deleted file mode 100755 index 5c237a85d03..00000000000 --- a/benchmark/druid/benchmark.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash - -sudo apt-get update -sudo apt install -y openjdk-8-jdk python2 -sudo update-alternatives --config java - -# Install - -VERSION=0.23.0 - -wget -O"apache-druid-${VERSION}-bin.tar.gz" "https://dlcdn.apache.org/druid/${VERSION}/apache-druid-${VERSION}-bin.tar.gz" -tar xf apache-druid-${VERSION}-bin.tar.gz -./apache-druid-${VERSION}/bin/verify-java - -# Have to increase indexer memory limit -sed -i 's MaxDirectMemorySize=1g MaxDirectMemorySize=5g g' apache-druid-$VERSION/conf/druid/single-server/medium/middleManager/runtime.properties - -# Disable cache to test query performance -sed -i 's druid.historical.cache.useCache=true druid.historical.cache.useCache=false g' apache-druid-$VERSION/conf/druid/single-server/medium/historical/runtime.properties -sed -i 's druid.historical.cache.populateCache=true druid.historical.cache.populateCache=false g' apache-druid-$VERSION/conf/druid/single-server/medium/historical/runtime.properties -sed -i 's druid.processing.buffer.sizeBytes=500MiB druid.processing.buffer.sizeBytes=1000MiB g' apache-druid-$VERSION/conf/druid/single-server/medium/historical/runtime.properties - -echo "druid.query.groupBy.maxMergingDictionarySize=5000000000" >> apache-druid-$VERSION/conf/druid/single-server/medium/historical/runtime.properties -# Druid launcher does not start Druid as a daemon. Run it in background -./apache-druid-${VERSION}/bin/start-single-server-medium & - -# Load the data - -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz' -gzip -d hits.tsv.gz - -./apache-druid-${VERSION}/bin/post-index-task --file ingest.json --url http://localhost:8081 - -# The command above will fail due to timeout but still continue to run in background. -# The loading time should be checked from the logs. - -# Run the queries -./run.sh - -# stop Druid services -kill %1 - -du -bcs ./apache-druid-${VERSION}/var diff --git a/benchmark/druid/check.json b/benchmark/druid/check.json deleted file mode 100644 index a61bc7bacd9..00000000000 --- a/benchmark/druid/check.json +++ /dev/null @@ -1 +0,0 @@ -{"query": "SELECT COUNT(*) from hits"} diff --git a/benchmark/druid/ingest.json b/benchmark/druid/ingest.json deleted file mode 100644 index 8d4c741934c..00000000000 --- a/benchmark/druid/ingest.json +++ /dev/null @@ -1,573 +0,0 @@ -{ - "type": "index_parallel", - "spec": { - "ioConfig": { - "type": "index_parallel", - "inputSource": { - "type": "local", - "baseDir": "../", - "filter": "hits.tsv" - }, - "inputFormat": { - "type": "tsv", - "findColumnsFromHeader": false, - "columns": [ - "WatchID", - "JavaEnable", - "Title", - "GoodEvent", - "EventTime", - "EventDate", - "CounterID", - "ClientIP", - "RegionID", - "UserID", - "CounterClass", - "OS", - "UserAgent", - "URL", - "Referer", - "IsRefresh", - "RefererCategoryID", - "RefererRegionID", - "URLCategoryID", - "URLRegionID", - "ResolutionWidth", - "ResolutionHeight", - "ResolutionDepth", - "FlashMajor", - "FlashMinor", - "FlashMinor2", - "NetMajor", - "NetMinor", - "UserAgentMajor", - "UserAgentMinor", - "CookieEnable", - "JavascriptEnable", - "IsMobile", - "MobilePhone", - "MobilePhoneModel", - "Params", - "IPNetworkID", - "TraficSourceID", - "SearchEngineID", - "SearchPhrase", - "AdvEngineID", - "IsArtifical", - "WindowClientWidth", - "WindowClientHeight", - "ClientTimeZone", - "ClientEventTime", - "SilverlightVersion1", - "SilverlightVersion2", - "SilverlightVersion3", - "SilverlightVersion4", - "PageCharset", - "CodeVersion", - "IsLink", - "IsDownload", - "IsNotBounce", - "FUniqID", - "OriginalURL", - "HID", - "IsOldCounter", - "IsEvent", - "IsParameter", - "DontCountHits", - "WithHash", - "HitColor", - "LocalEventTime", - "Age", - "Sex", - "Income", - "Interests", - "Robotness", - "RemoteIP", - "WindowName", - "OpenerName", - "HistoryLength", - "BrowserLanguage", - "BrowserCountry", - "SocialNetwork", - "SocialAction", - "HTTPError", - "SendTiming", - "DNSTiming", - "ConnectTiming", - "ResponseStartTiming", - "ResponseEndTiming", - "FetchTiming", - "SocialSourceNetworkID", - "SocialSourcePage", - "ParamPrice", - "ParamOrderID", - "ParamCurrency", - "ParamCurrencyID", - "OpenstatServiceName", - "OpenstatCampaignID", - "OpenstatAdID", - "OpenstatSourceID", - "UTMSource", - "UTMMedium", - "UTMCampaign", - "UTMContent", - "UTMTerm", - "FromTag", - "HasGCLID", - "RefererHash", - "URLHash", - "CLID" - ] - } - }, - "tuningConfig": { - "type": "index_parallel", - "partitionsSpec": { - "type": "hashed", - "partitionDimensions": [ - "CounterID", - "EventDate", - "UserID", - "EventTime", - "WatchID" - ] - }, - "forceGuaranteedRollup": true, - "logParseExceptions": true, - "maxParseExceptions": 1, - "maxNumConcurrentSubTasks": 10 - }, - "dataSchema": { - "dataSource": "hits", - "timestampSpec": { - "column": "EventTime", - "format": "yyyy-MM-dd HH:mm:ss" - }, - "dimensionsSpec": { - "dimensions": [ - { - "name": "WatchID", - "type": "long" - }, - { - "name": "JavaEnable", - "type": "long" - }, - { - "name": "Title", - "type": "string" - }, - { - "name": "GoodEvent", - "type": "long" - }, - { - "name": "EventDate", - "type": "string" - }, - { - "name": "CounterID", - "type": "long" - }, - { - "name": "ClientIP", - "type": "long" - }, - { - "name": "RegionID", - "type": "long" - }, - { - "name": "UserID", - "type": "long" - }, - { - "name": "CounterClass", - "type": "long" - }, - { - "name": "OS", - "type": "long" - }, - { - "name": "UserAgent", - "type": "long" - }, - { - "name": "URL", - "type": "string" - }, - { - "name": "Referer", - "type": "string" - }, - { - "name": "IsRefresh", - "type": "long" - }, - { - "name": "RefererCategoryID", - "type": "long" - }, - { - "name": "RefererRegionID", - "type": "long" - }, - { - "name": "URLCategoryID", - "type": "long" - }, - { - "name": "URLRegionID", - "type": "long" - }, - { - "name": "ResolutionWidth", - "type": "long" - }, - { - "name": "ResolutionHeight", - "type": "long" - }, - { - "name": "ResolutionDepth", - "type": "long" - }, - { - "name": "FlashMajor", - "type": "long" - }, - { - "name": "FlashMinor", - "type": "long" - }, - { - "name": "FlashMinor2", - "type": "string" - }, - { - "name": "NetMajor", - "type": "long" - }, - { - "name": "NetMinor", - "type": "long" - }, - { - "name": "UserAgentMajor", - "type": "long" - }, - { - "name": "UserAgentMinor", - "type": "string" - }, - { - "name": "CookieEnable", - "type": "long" - }, - { - "name": "JavascriptEnable", - "type": "long" - }, - { - "name": "IsMobile", - "type": "long" - }, - { - "name": "MobilePhone", - "type": "long" - }, - { - "name": "MobilePhoneModel", - "type": "string" - }, - { - "name": "Params", - "type": "string" - }, - { - "name": "IPNetworkID", - "type": "long" - }, - { - "name": "TraficSourceID", - "type": "long" - }, - { - "name": "SearchEngineID", - "type": "long" - }, - { - "name": "SearchPhrase", - "type": "string" - }, - { - "name": "AdvEngineID", - "type": "long" - }, - { - "name": "IsArtifical", - "type": "long" - }, - { - "name": "WindowClientWidth", - "type": "long" - }, - { - "name": "WindowClientHeight", - "type": "long" - }, - { - "name": "ClientTimeZone", - "type": "long" - }, - { - "name": "ClientEventTime", - "type": "string" - }, - { - "name": "SilverlightVersion1", - "type": "long" - }, - { - "name": "SilverlightVersion2", - "type": "long" - }, - { - "name": "SilverlightVersion3", - "type": "long" - }, - { - "name": "SilverlightVersion4", - "type": "long" - }, - { - "name": "PageCharset", - "type": "string" - }, - { - "name": "CodeVersion", - "type": "long" - }, - { - "name": "IsLink", - "type": "long" - }, - { - "name": "IsDownload", - "type": "long" - }, - { - "name": "IsNotBounce", - "type": "long" - }, - { - "name": "FUniqID", - "type": "long" - }, - { - "name": "OriginalURL", - "type": "string" - }, - { - "name": "HID", - "type": "long" - }, - { - "name": "IsOldCounter", - "type": "long" - }, - { - "name": "IsEvent", - "type": "long" - }, - { - "name": "IsParameter", - "type": "long" - }, - { - "name": "DontCountHits", - "type": "long" - }, - { - "name": "WithHash", - "type": "long" - }, - { - "name": "HitColor", - "type": "string" - }, - { - "name": "LocalEventTime", - "type": "string" - }, - { - "name": "Age", - "type": "long" - }, - { - "name": "Sex", - "type": "long" - }, - { - "name": "Income", - "type": "long" - }, - { - "name": "Interests", - "type": "long" - }, - { - "name": "Robotness", - "type": "long" - }, - { - "name": "RemoteIP", - "type": "long" - }, - { - "name": "WindowName", - "type": "long" - }, - { - "name": "OpenerName", - "type": "long" - }, - { - "name": "HistoryLength", - "type": "long" - }, - { - "name": "BrowserLanguage", - "type": "string" - }, - { - "name": "BrowserCountry", - "type": "string" - }, - { - "name": "SocialNetwork", - "type": "string" - }, - { - "name": "SocialAction", - "type": "string" - }, - { - "name": "HTTPError", - "type": "long" - }, - { - "name": "SendTiming", - "type": "long" - }, - { - "name": "DNSTiming", - "type": "long" - }, - { - "name": "ConnectTiming", - "type": "long" - }, - { - "name": "ResponseStartTiming", - "type": "long" - }, - { - "name": "ResponseEndTiming", - "type": "long" - }, - { - "name": "FetchTiming", - "type": "long" - }, - { - "name": "SocialSourceNetworkID", - "type": "long" - }, - { - "name": "SocialSourcePage", - "type": "string" - }, - { - "name": "ParamPrice", - "type": "long" - }, - { - "name": "ParamOrderID", - "type": "string" - }, - { - "name": "ParamCurrency", - "type": "string" - }, - { - "name": "ParamCurrencyID", - "type": "long" - }, - { - "name": "OpenstatServiceName", - "type": "string" - }, - { - "name": "OpenstatCampaignID", - "type": "string" - }, - { - "name": "OpenstatAdID", - "type": "string" - }, - { - "name": "OpenstatSourceID", - "type": "string" - }, - { - "name": "UTMSource", - "type": "string" - }, - { - "name": "UTMMedium", - "type": "string" - }, - { - "name": "UTMCampaign", - "type": "string" - }, - { - "name": "UTMContent", - "type": "string" - }, - { - "name": "UTMTerm", - "type": "string" - }, - { - "name": "FromTag", - "type": "string" - }, - { - "name": "HasGCLID", - "type": "long" - }, - { - "name": "RefererHash", - "type": "long" - }, - { - "name": "URLHash", - "type": "long" - }, - { - "name": "CLID", - "type": "long" - } - ] - }, - "granularitySpec": { - "queryGranularity": "none", - "rollup": false, - "segmentGranularity": "day" - } - } - } -} diff --git a/benchmark/druid/queries.sql b/benchmark/druid/queries.sql deleted file mode 100644 index 31f65fc898d..00000000000 --- a/benchmark/druid/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/benchmark/druid/results/c6a.4xlarge.json b/benchmark/druid/results/c6a.4xlarge.json deleted file mode 100644 index 4759a3a05b6..00000000000 --- a/benchmark/druid/results/c6a.4xlarge.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "Druid", - "date": "2022-07-01", - "machine": "c6a.4xlarge, 500gb gp2", - "cluster_size": 1, - "comment": "Druid is killed and restarted after every query. Otherwise some queries make Druid degraded and results are incorrect. For example after Q13 even SELECT 1 works for 7 seconds", - - "tags": ["Java", "column-oriented"], - - "load_time": 19620, - "data_size": 45188608472, - - "result": [ -[0.032365, 0.016716, 0.016921], -[0.400766, 0.275591, 0.271057], -[0.382350, 0.152084, 0.151652], -[1.379492, 0.137201, 0.128902], -[3.278731, 2.527105, 2.515128], -[8.576382, 6.546540, 6.503001], -[null, null, null], -[0.563852, 0.273795, 0.275086], -[11.509993, 10.636571, 10.597993], -[13.357647, 12.421210, 12.337247], -[1.636875, 0.821300, 0.900056], -[1.692544, 0.512066, 0.440511], -[2.453274, 1.769806, 1.807207], -[8.503408, 7.261406, 7.334872], -[61.056041, 59.251083, 59.500549], -[8.620670, 8.236657, 8.225380], -[164.840762, null, null], -[24.165797, 22.308466, null], -[null, null, null], -[null, null, null], -[25.973369, 25.597864, 25.602509], -[null, null, null], -[null, null, null], -[7.805347, 6.629776, 6.947366], -[0.257845, 0.020327, 0.016976], -[null, null, null], -[null, null, null], -[32.948154, 31.046770, 29.221959], -[null, null, null], -[7.230865, 7.033713, 6.972421], -[20.546250, 19.237428, 19.258469], -[54.065945, 52.451318, 52.466653], -[null, null, null], -[17.499267, null, null], -[null, null, null], -[60.478315, 60.054940, 60.458946], -[1.698088, 1.490317, 1.461969], -[1.409572, 0.939003, 0.907252], -[0.866729, 0.329539, 0.287435], -[null, null, null], -[0.932473, 0.420781, 0.359095], -[0.723142, 0.325300, 0.296865], -[0.603483, 0.150892, 0.140716] - ] -} diff --git a/benchmark/druid/run.sh b/benchmark/druid/run.sh deleted file mode 100755 index faa88431bc0..00000000000 --- a/benchmark/druid/run.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -TRIES=3 -cat queries.sql | while read query; do - sync - for i in $(seq 1 100); do - CHECK=$(curl -o /dev/null -w '%{http_code}' -s -XPOST -H'Content-Type: application/json' http://localhost:8888/druid/v2/sql/ -d @check.json }) - [[ "$CHECK" == "200" ]] && break - sleep 1 - done - echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null - echo -n "[" - for i in $(seq 1 $TRIES); do - echo "{\"query\":\"$query\", \"context\": {\"timeout\": 1000000} }"| sed -e 's EventTime __time g' | tr -d ';' > query.json - curl -w '%{http_code} %{time_total}\n' -s -XPOST -H'Content-Type: application/json' http://localhost:8888/druid/v2/sql/ -d @query.json | awk '{ if ($1!="200") { printf "null" } }' - [[ "$i" != $TRIES ]] && echo -n ", " - done - echo "]," - - # Ugly hack to measure independently queries. Otherwise some queries make Druid degraded and results are incorrect. For example after Q13 even SELECT 1 works for 7 seconds - pkill -f historical - sleep 3 -done diff --git a/benchmark/duckdb/README.md b/benchmark/duckdb/README.md deleted file mode 100644 index d2d7b22c81b..00000000000 --- a/benchmark/duckdb/README.md +++ /dev/null @@ -1,2 +0,0 @@ -DuckDB cannot load parquet file due to OOM. -The only option is to load a CSV file, but sometimes it also fails with OOM. diff --git a/benchmark/duckdb/benchmark.sh b/benchmark/duckdb/benchmark.sh deleted file mode 100755 index 392f084c560..00000000000 --- a/benchmark/duckdb/benchmark.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash - -# Install - -sudo apt-get update -sudo apt-get install -y python3-pip -pip install duckdb psutil - -# Load the data - -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.csv.gz' -gzip -d hits.csv.gz - -./load.py -# 4216.5390389899985 seconds - -# Run the queries - -./run.sh 2>&1 | tee log.txt - -wc -c my-db.duckdb - -cat log.txt | grep -P '^\d|Killed|Segmentation' | sed -r -e 's/^.*(Killed|Segmentation).*$/null\nnull\nnull/' | - awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' diff --git a/benchmark/duckdb/create.sql b/benchmark/duckdb/create.sql deleted file mode 100644 index 744d595ecac..00000000000 --- a/benchmark/duckdb/create.sql +++ /dev/null @@ -1,109 +0,0 @@ -CREATE TABLE hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title TEXT, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL TEXT, - Referer TEXT, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 TEXT, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(255) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel TEXT, - Params TEXT, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase TEXT, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset TEXT, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL TEXT, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor CHAR NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage TEXT, - BrowserCountry TEXT, - SocialNetwork TEXT, - SocialAction TEXT, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage TEXT, - ParamPrice BIGINT NOT NULL, - ParamOrderID TEXT, - ParamCurrency TEXT, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName TEXT, - OpenstatCampaignID TEXT, - OpenstatAdID TEXT, - OpenstatSourceID TEXT, - UTMSource TEXT, - UTMMedium TEXT, - UTMCampaign TEXT, - UTMContent TEXT, - UTMTerm TEXT, - FromTag TEXT, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL, - PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID) -); diff --git a/benchmark/duckdb/load.py b/benchmark/duckdb/load.py deleted file mode 100755 index d4265d15fc3..00000000000 --- a/benchmark/duckdb/load.py +++ /dev/null @@ -1,18 +0,0 @@ -#!/usr/bin/env python3 - -import duckdb -import timeit -import psutil - -con = duckdb.connect(database="my-db.duckdb", read_only=False) -# See https://github.com/duckdb/duckdb/issues/3969 -con.execute("PRAGMA memory_limit='{}b'".format(psutil.virtual_memory().total / 4)) -con.execute("PRAGMA threads={}".format(psutil.cpu_count(logical=False))) - -print("Will load the data") - -start = timeit.default_timer() -con.execute(open("create.sql").read()) -con.execute("INSERT INTO hits SELECT * FROM read_csv_auto('hits.csv')") -end = timeit.default_timer() -print(end - start) diff --git a/benchmark/duckdb/queries.sql b/benchmark/duckdb/queries.sql deleted file mode 100644 index 31f65fc898d..00000000000 --- a/benchmark/duckdb/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/benchmark/duckdb/query.py b/benchmark/duckdb/query.py deleted file mode 100755 index 43739be56f3..00000000000 --- a/benchmark/duckdb/query.py +++ /dev/null @@ -1,20 +0,0 @@ -#!/usr/bin/env python3 - -import duckdb -import timeit -import psutil -import sys - -query = sys.stdin.read() -print(query) - -con = duckdb.connect(database="my-db.duckdb", read_only=False) -# See https://github.com/duckdb/duckdb/issues/3969 -con.execute("PRAGMA memory_limit='{}b'".format(psutil.virtual_memory().total / 4)) -con.execute("PRAGMA threads={}".format(psutil.cpu_count(logical=False))) - -for try_num in range(3): - start = timeit.default_timer() - con.execute(query) - end = timeit.default_timer() - print(end - start) diff --git a/benchmark/duckdb/results/c6a.4xlarge.json b/benchmark/duckdb/results/c6a.4xlarge.json deleted file mode 100644 index 40b9d88bc71..00000000000 --- a/benchmark/duckdb/results/c6a.4xlarge.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "DuckDB", - "date": "2022-07-01", - "machine": "c6a.4xlarge, 500gb gp2", - "cluster_size": 1, - "comment": "Many queries triggered OOM", - - "tags": ["C", "column-oriented", "embedded"], - - "load_time": 4217, - "data_size": 27241492480, - - "result": [ -[0.005694353996659629,0.003944558004150167,0.003837226002360694], -[0.16991353100456763,0.03919722700084094,0.03835860399703961], -[0.44898432699847035,0.04947217500011902,0.04852217998995911], -[0.07586832098604646,0.07051395199960098,0.07007493599667214], -[9.554053236002801,8.153356187991449,8.73448242500308], -[7.66042533799191,6.931124911992811,7.103380946995458], -[0.030703739990713075,0.027668555994750932,0.027583695002249442], -[0.1778664360026596,0.03942437999648973,0.03882004099432379], -[8.53439180701389,8.869582625004114,9.020313234999776], -[10.40215514000738,11.125320470004226,8.941559945000336], -[1.1747649609897053,1.04221136700653,1.004799570000614], -[1.2380354650085792,1.1211603130068397,2.4278587239969056], -[3.1751541379926493,0.9360461989999749,0.8868292279948946], -[6.855684430003748,7.300301584007684,5.712960822012974], -[3.70588762400439,1.0249276379909134,0.9473389159975341], -[2.1037107890006155,1.6215517020027619,1.5671920729946578], -[null,null,null], -[null,null,null], -[null,null,null], -[0.0002772739971987903,0.00016792300448287278,0.0001574420020915568], -[null,null,null], -[null,null,null], -[null,null,null], -[null,null,null], -[2.9310110910009826,0.19020285899750888,0.1736805049877148], -[2.939304119994631,0.18754731099761557,0.18073286200524308], -[2.8706370779982535,0.18822155400994234,0.17905898999015335], -[null,null,null], -[null,null,null], -[0.884408778991201,0.714329167996766,0.7135983259940986], -[5.3762675570033025,0.8803737630078103,0.8728962720051641], -[7.249190265996731,2.9648747390019707,2.866687831003219], -[null,null,null], -[null,null,null], -[null,null,null], -[4.515183198003797,4.030519469000865,4.014251719010645], -[0.11604027298744768,0.040539135996368714,0.04280066800129134], -[0.0457908230018802,0.021069509006338194,0.019683108999743126], -[0.0680370800109813,0.011889394998434,0.01056639499438461], -[0.22029169600864407,0.08547276000899728,0.09095505000732373], -[0.03759863799496088,0.008373684002435766,0.007633563989656977], -[0.025631797994719818,0.008081699008471332,0.007858585988287814], -[0.034359957004198804,0.025543516996549442,0.02533275399764534] -] -} diff --git a/benchmark/duckdb/run.sh b/benchmark/duckdb/run.sh deleted file mode 100755 index 68cea914794..00000000000 --- a/benchmark/duckdb/run.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -cat queries.sql | while read query; do - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null - - # We need to restart and reinitialize DuckDB after every query, - # because it often fails with Segmentation fault (core dumped) - ./query.py <<< "${query}" -done diff --git a/benchmark/elasticsearch/README.md b/benchmark/elasticsearch/README.md deleted file mode 100644 index 82b0b2ff959..00000000000 --- a/benchmark/elasticsearch/README.md +++ /dev/null @@ -1 +0,0 @@ -Incomplete. diff --git a/benchmark/elasticsearch/benchmark.sh b/benchmark/elasticsearch/benchmark.sh deleted file mode 100755 index 740df544b3d..00000000000 --- a/benchmark/elasticsearch/benchmark.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -wget -qO - https://artifacts.elastic.co/GPG-KEY-elasticsearch | sudo gpg --dearmor -o /usr/share/keyrings/elasticsearch-keyring.gpg -sudo apt-get update && sudo apt-get install -y apt-transport-https -echo "deb [signed-by=/usr/share/keyrings/elasticsearch-keyring.gpg] https://artifacts.elastic.co/packages/8.x/apt stable main" | sudo tee /etc/apt/sources.list.d/elastic-8.x.list -sudo apt-get update && sudo apt-get install -y elasticsearch - -sudo systemctl start elasticsearch.service -sudo /usr/share/elasticsearch/bin/elasticsearch-reset-password -u elastic - -# Example: -# User: elastic -# Password: C0Qq9kNYMUunKTXMDOUZ - -export PASSWORD='...' - -curl -k -XGET 'https://localhost:9200' -u "elastic:${PASSWORD}" - -# This will create an index. -curl -k -XPUT -u "elastic:${PASSWORD}" 'https://localhost:9200/hits' - -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.json.gz' -gzip -d hits.json.gz - -# Prevent 'curl' from OOM. - -split -l 1000000000 hits.json hits_ -for table in hits_*; do mv ${table} ${table}.json; done - -time for table in hits_*; do curl -k -H "Transfer-Encoding: chunked" -XPOST -u "elastic:${PASSWORD}" 'https://localhost:9200/_bulk' -T ${table}; done diff --git a/benchmark/exasol/README.md b/benchmark/exasol/README.md deleted file mode 100644 index abe9c5dfd67..00000000000 --- a/benchmark/exasol/README.md +++ /dev/null @@ -1,3 +0,0 @@ -EXASOL does not allow using the community edition for more than 10 GiB data, therefore testing is not possible. I advise you not to trust the unfounded claims of performance. - -https://github.com/exasol/docker-db diff --git a/benchmark/generate-results.sh b/benchmark/generate-results.sh deleted file mode 100755 index 7651fd6d17e..00000000000 --- a/benchmark/generate-results.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash -e - -# This script will substitute the benchmark results into the HTML page. -# Note: editing HTML with sed may look strange, but at least we avoid using node.js and npm, and that's good. - -( - sed '/^const data = \[$/q' index.html - - FIRST=1 - ls -1 */results/*.json | while read file - do - [ "${FIRST}" = "0" ] && echo -n ',' - jq --compact-output ". += {\"source\": \"${file}\"}" "${file}" - FIRST=0 - done - - echo ']; // end of data' - sed '0,/^\]; \/\/ end of data$/d' index.html - -) > index.html.new - -mv index.html index.html.bak -mv index.html.new index.html diff --git a/benchmark/greenplum/benchmark.sh b/benchmark/greenplum/benchmark.sh deleted file mode 100755 index a1802ee223b..00000000000 --- a/benchmark/greenplum/benchmark.sh +++ /dev/null @@ -1,73 +0,0 @@ -#!/bin/bash - -# NOTE: it requires Ubuntu 18.04 -# Greenplum does not install on any newer system. - -echo "This script must be run from gpadmin user. Press enter to continue." -read -sudo apt update -sudo apt install -y software-properties-common -sudo add-apt-repository ppa:greenplum/db -sudo apt update -sudo apt install greenplum-db-6 -sudo rm -rf /gpmaster /gpdata* -ssh-keygen -t rsa -b 4096 -cat /home/gpadmin/.ssh/id_rsa.pub >> /home/gpadmin/.ssh/authorized_keys -mod 600 ~/.ssh/authorized_keys -sudo echo "# kernel.shmall = _PHYS_PAGES / 2 # See Shared Memory Pages -kernel.shmall = 197951838 -# kernel.shmmax = kernel.shmall * PAGE_SIZE -kernel.shmmax = 810810728448 -kernel.shmmni = 4096 -vm.overcommit_memory = 2 # See Segment Host Memory -vm.overcommit_ratio = 95 # See Segment Host Memory - -net.ipv4.ip_local_port_range = 10000 65535 # See Port Settings -kernel.sem = 500 2048000 200 4096 -kernel.sysrq = 1 -kernel.core_uses_pid = 1 -kernel.msgmnb = 65536 -kernel.msgmax = 65536 -kernel.msgmni = 2048 -net.ipv4.tcp_syncookies = 1 -net.ipv4.conf.default.accept_source_route = 0 -net.ipv4.tcp_max_syn_backlog = 4096 -net.ipv4.conf.all.arp_filter = 1 -net.core.netdev_max_backlog = 10000 -net.core.rmem_max = 2097152 -net.core.wmem_max = 2097152 -vm.swappiness = 10 -vm.zone_reclaim_mode = 0 -vm.dirty_expire_centisecs = 500 -vm.dirty_writeback_centisecs = 100 -vm.dirty_background_ratio = 0 # See System Memory -vm.dirty_ratio = 0 -vm.dirty_background_bytes = 1610612736 -vm.dirty_bytes = 4294967296" |sudo tee -a /etc/sysctl.conf -sudo sysctl -p - -echo "* soft nofile 524288 -* hard nofile 524288 -* soft nproc 131072 -* hard nproc 131072" |sudo tee -a /etc/security/limits.conf -echo "RemoveIPC=no" |sudo tee -a /etc/systemd/logind.conf -echo "Now you need to reboot the machine. Press Enter if you already rebooted, or reboot now and run the script once again" -read -source /opt/greenplum-db-*.0/greenplum_path.sh -cp $GPHOME/docs/cli_help/gpconfigs/gpinitsystem_singlenode . -echo localhost > ./hostlist_singlenode -sed -i "s/MASTER_HOSTNAME=[a-z_]*/MASTER_HOSTNAME=$(hostname)/" gpinitsystem_singlenode -sed -i "s@declare -a DATA_DIRECTORY=(/gpdata1 /gpdata2)@declare -a DATA_DIRECTORY=(/gpdata1 /gpdata2 /gpdata3 /gpdata4 /gpdata5 /gpdata6 /gpdata7 /gpdata8 /gpdata9 /gpdata10 /gpdata11 /gpdata12 /gpdata13 /gpdata14)@" gpinitsystem_singlenode -sudo mkdir /gpmaster /gpdata1 /gpdata2 /gpdata3 /gpdata4 /gpdata5 /gpdata6 /gpdata7 /gpdata8 /gpdata9 /gpdata10 /gpdata11 /gpdata12 /gpdata13 /gpdata14 -sudo chmod 777 /gpmaster /gpdata1 /gpdata2 /gpdata3 /gpdata4 /gpdata5 /gpdata6 /gpdata7 /gpdata8 /gpdata9 /gpdata10 /gpdata11 /gpdata12 /gpdata13 /gpdata14 -gpinitsystem -ac gpinitsystem_singlenode -export MASTER_DATA_DIRECTORY=/gpmaster/gpsne-1/ -#wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz' -#gzip -d hits.tsv.gz -chmod 777 ~ hits.tsv -psql -d postgres -f create.sql -nohup gpfdist & -time psql -d postgres -t -c '\timing' -c "insert into hits select * from hits_ext;" -du -sh /gpdata* -./run.sh 2>&1 | tee log.txt -cat log.txt | grep -oP 'Time: \d+\.\d+ ms' | sed -r -e 's/Time: ([0-9]+\.[0-9]+) ms/\1/' |awk '{ if (i % 3 == 0) { printf "[" }; printf $1 / 1000; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' diff --git a/benchmark/greenplum/create.sql b/benchmark/greenplum/create.sql deleted file mode 100644 index ddaf587d0f9..00000000000 --- a/benchmark/greenplum/create.sql +++ /dev/null @@ -1,116 +0,0 @@ -drop table if exists hits; -CREATE TABLE hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title TEXT NOT NULL, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL TEXT NOT NULL, - Referer TEXT NOT NULL, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 TEXT NOT NULL, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(255) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel TEXT NOT NULL, - Params TEXT NOT NULL, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase TEXT NOT NULL, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset TEXT NOT NULL, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL TEXT NOT NULL, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor CHAR NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage TEXT NOT NULL, - BrowserCountry TEXT NOT NULL, - SocialNetwork TEXT NOT NULL, - SocialAction TEXT NOT NULL, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage TEXT NOT NULL, - ParamPrice BIGINT NOT NULL, - ParamOrderID TEXT NOT NULL, - ParamCurrency TEXT NOT NULL, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName TEXT NOT NULL, - OpenstatCampaignID TEXT NOT NULL, - OpenstatAdID TEXT NOT NULL, - OpenstatSourceID TEXT NOT NULL, - UTMSource TEXT NOT NULL, - UTMMedium TEXT NOT NULL, - UTMCampaign TEXT NOT NULL, - UTMContent TEXT NOT NULL, - UTMTerm TEXT NOT NULL, - FromTag TEXT NOT NULL, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL -) -with (appendoptimized=true,orientation=column,compresstype=zstd) -DISTRIBUTED RANDOMLY; -CREATE INDEX hits_idx on hits using btree (CounterID, EventDate, UserID, EventTime, WatchID); -drop external table if exists hits_ext; -CREATE EXTERNAL TABLE hits_ext (like hits) -LOCATION ('gpfdist://localhost:8080/hits.tsv') -FORMAT 'TEXT'; diff --git a/benchmark/greenplum/log.txt b/benchmark/greenplum/log.txt deleted file mode 100644 index 91eeeb8447e..00000000000 --- a/benchmark/greenplum/log.txt +++ /dev/null @@ -1,215 +0,0 @@ -3 -SELECT COUNT(*) FROM hits; -Time: 2200.088 ms -Time: 1458.828 ms -Time: 1135.728 ms -3 -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -Time: 1286.610 ms -Time: 1078.970 ms -Time: 1018.564 ms -3 -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -Time: 2258.198 ms -Time: 2421.929 ms -Time: 2596.925 ms -3 -SELECT AVG(UserID) FROM hits; -Time: 1893.709 ms -Time: 1924.597 ms -Time: 1738.663 ms -3 -SELECT COUNT(DISTINCT UserID) FROM hits; -Time: 9544.705 ms -Time: 9719.561 ms -Time: 10035.660 ms -3 -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -Time: 4559.571 ms -Time: 4101.119 ms -Time: 4704.585 ms -3 -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -Time: 1651.671 ms -Time: 1897.668 ms -Time: 1970.511 ms -3 -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -Time: 1025.366 ms -Time: 999.323 ms -Time: 1005.235 ms -3 -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -Time: 7897.922 ms -Time: 7757.179 ms -Time: 8012.193 ms -3 -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -Time: 11377.889 ms -Time: 11600.291 ms -Time: 11255.236 ms -3 -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -Time: 2119.548 ms -Time: 2050.025 ms -Time: 1955.304 ms -3 -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -Time: 2019.971 ms -Time: 1979.324 ms -Time: 1982.835 ms -3 -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -Time: 2886.663 ms -Time: 2928.279 ms -Time: 2968.219 ms -3 -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -Time: 4062.722 ms -Time: 4017.708 ms -Time: 4030.113 ms -3 -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -Time: 3463.033 ms -Time: 3247.791 ms -Time: 3064.752 ms -3 -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -Time: 8437.169 ms -Time: 8186.413 ms -Time: 7992.017 ms -3 -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -Time: 9021.759 ms -Time: 8915.718 ms -Time: 8927.481 ms -3 -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -Time: 8669.821 ms -Time: 8393.315 ms -Time: 8624.970 ms -3 -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -Time: 15132.099 ms -Time: 14950.178 ms -Time: 15234.422 ms -3 -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -Time: 1008.775 ms -Time: 934.589 ms -Time: 975.342 ms -3 -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -Time: 11058.183 ms -Time: 3258.070 ms -Time: 3176.875 ms -3 -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -Time: 12263.435 ms -Time: 3518.802 ms -Time: 3937.050 ms -3 -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -Time: 24152.214 ms -Time: 4863.278 ms -Time: 4887.644 ms -3 -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -Time: 67151.698 ms -Time: 20400.002 ms -Time: 20261.041 ms -3 -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -Time: 2579.895 ms -Time: 1661.144 ms -Time: 1904.318 ms -3 -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -Time: 1728.308 ms -Time: 1895.359 ms -Time: 1251.501 ms -3 -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -Time: 2441.544 ms -Time: 1465.950 ms -Time: 1882.380 ms -3 -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -Time: 11117.452 ms -Time: 6095.514 ms -Time: 6022.919 ms -3 -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www.)?([^/]+)/.*$', '1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -Time: 83040.984 ms -Time: 82978.396 ms -Time: 82867.763 ms -3 -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -Time: 82544.095 ms -Time: 82542.673 ms -Time: 83983.646 ms -3 -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -Time: 3096.198 ms -Time: 3222.251 ms -Time: 3194.314 ms -3 -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -Time: 5775.759 ms -Time: 4059.735 ms -Time: 4003.655 ms -3 -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -Time: 43682.613 ms -Time: 40437.523 ms -Time: 40107.640 ms -3 -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -Time: 18097.819 ms -Time: 13274.130 ms -Time: 12889.385 ms -3 -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -Time: 17318.672 ms -Time: 13541.070 ms -Time: 13592.715 ms -3 -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -Time: 9724.053 ms -Time: 9900.294 ms -Time: 10017.686 ms -3 -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -Time: 294.344 ms -Time: 169.606 ms -Time: 173.804 ms -3 -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -Time: 162.524 ms -Time: 117.489 ms -Time: 115.532 ms -3 -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -Time: 145.205 ms -Time: 98.342 ms -Time: 97.275 ms -3 -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -Time: 334.809 ms -Time: 275.365 ms -Time: 265.053 ms -3 -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -Time: 154.522 ms -Time: 107.654 ms -Time: 105.290 ms -3 -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -Time: 158.957 ms -Time: 117.284 ms -Time: 119.068 ms -3 -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; -Time: 193.756 ms -Time: 144.787 ms -Time: 145.485 ms diff --git a/benchmark/greenplum/queries.sql b/benchmark/greenplum/queries.sql deleted file mode 100644 index 31f65fc898d..00000000000 --- a/benchmark/greenplum/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/benchmark/greenplum/results/c6a.4xlarge.json b/benchmark/greenplum/results/c6a.4xlarge.json deleted file mode 100644 index 68052fdd12b..00000000000 --- a/benchmark/greenplum/results/c6a.4xlarge.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "Greenplum", - "date": "2022-07-01", - "machine": "c6a.4xlarge, 500gb gp2", - "cluster_size": 1, - "comment": "", - - "tags": ["C", "column-oriented", "PostgreSQL compatible"], - - "load_time": 1080, - "data_size": 42000000000, - - "result": [ -[2.20009,1.45883,1.13573], -[1.28661,1.07897,1.01856], -[2.2582,2.42193,2.59693], -[1.89371,1.9246,1.73866], -[9.54471,9.71956,10.0357], -[4.55957,4.10112,4.70458], -[1.65167,1.89767,1.97051], -[1.02537,0.999323,1.00524], -[7.89792,7.75718,8.01219], -[11.3779,11.6003,11.2552], -[2.11955,2.05003,1.9553], -[2.01997,1.97932,1.98284], -[2.88666,2.92828,2.96822], -[4.06272,4.01771,4.03011], -[3.46303,3.24779,3.06475], -[8.43717,8.18641,7.99202], -[9.02176,8.91572,8.92748], -[8.66982,8.39332,8.62497], -[15.1321,14.9502,15.2344], -[1.00877,0.934589,0.975342], -[11.0582,3.25807,3.17687], -[12.2634,3.5188,3.93705], -[24.1522,4.86328,4.88764], -[67.1517,20.4,20.261], -[2.5799,1.66114,1.90432], -[1.72831,1.89536,1.2515], -[2.44154,1.46595,1.88238], -[11.1175,6.09551,6.02292], -[83.041,82.9784,82.8678], -[82.5441,82.5427,83.9836], -[3.0962,3.22225,3.19431], -[5.77576,4.05973,4.00366], -[43.6826,40.4375,40.1076], -[18.0978,13.2741,12.8894], -[17.3187,13.5411,13.5927], -[9.72405,9.90029,10.0177], -[0.294344,0.169606,0.173804], -[0.162524,0.117489,0.115532], -[0.145205,0.098342,0.097275], -[0.334809,0.275365,0.265053], -[0.154522,0.107654,0.10529], -[0.158957,0.117284,0.119068], -[0.193756,0.144787,0.145485] -] -} diff --git a/benchmark/greenplum/run.sh b/benchmark/greenplum/run.sh deleted file mode 100755 index 1eb622de977..00000000000 --- a/benchmark/greenplum/run.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -TRIES=3 - -cat queries.sql | while read query; do - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches - - echo "$query"; - for i in $(seq 1 $TRIES); do - echo '\timing' > /tmp/query_temp.sql - echo "$query" >> /tmp/query_temp.sql - psql -d postgres -t -f /tmp/query_temp.sql | grep 'Time' - done; -done; diff --git a/benchmark/hardware.sh b/benchmark/hardware.sh deleted file mode 120000 index 8b5c8838fb3..00000000000 --- a/benchmark/hardware.sh +++ /dev/null @@ -1 +0,0 @@ -hardware/hardware.sh \ No newline at end of file diff --git a/benchmark/hardware.sh b/benchmark/hardware.sh new file mode 100755 index 00000000000..635f3a9eb61 --- /dev/null +++ b/benchmark/hardware.sh @@ -0,0 +1,247 @@ +#!/bin/bash -e + +QUERIES_FILE="queries.sql" +TRIES=3 + +mkdir -p clickhouse-benchmark +pushd clickhouse-benchmark + +# Download the binary +if [[ ! -x clickhouse ]]; then + curl https://clickhouse.com/ | sh +fi + +if [[ ! -f $QUERIES_FILE ]]; then + wget "https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/hardware/$QUERIES_FILE" +fi + +uptime + +echo "Starting clickhouse-server" + +./clickhouse server >/dev/null 2>&1 & +PID=$! + +function finish { + kill $PID + wait +} +trap finish EXIT + +echo "Waiting for clickhouse-server to start" + +for i in {1..30}; do + sleep 1 + ./clickhouse client --query "SELECT 'Ok.'" 2>/dev/null && break || echo -n '.' + if [[ $i == 30 ]]; then exit 1; fi +done + +if [[ $(./clickhouse client --query "EXISTS hits") == '1' && $(./clickhouse client --query "SELECT count() FROM hits") == '100000000' ]]; then + echo "Dataset already downloaded" +else + echo "Will download the dataset" + if [ "`uname`" = "Darwin" ] + then + ./clickhouse client --receive_timeout 1000 --max_insert_threads $(sysctl -n hw.ncpu) --progress --query " + CREATE OR REPLACE TABLE hits ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime) + AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst')" + else + ./clickhouse client --receive_timeout 1000 --max_insert_threads $(nproc || 4) --progress --query " + CREATE OR REPLACE TABLE hits ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime) + AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst')" + fi + ./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM hits" +fi + +if [[ $(./clickhouse client --query "SELECT count() FROM system.parts WHERE table = 'hits' AND database = 'default' AND active") == '1' ]]; then + echo "Dataset already prepared" +else + echo "Will prepare the dataset" + ./clickhouse client --receive_timeout 1000 --query "OPTIMIZE TABLE hits FINAL" +fi + +echo +echo "Will perform benchmark. Results:" +echo + +>result.csv +QUERY_NUM=1 + +cat "$QUERIES_FILE" | sed "s/{table}/hits/g" | while read query; do + sync + if [ "`uname`" = "Darwin" ] + then + sudo purge > /dev/null + else + echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null + fi + + echo -n "[" + for i in $(seq 1 $TRIES); do + RES=$(./clickhouse client --time --format=Null --query="$query" 2>&1 ||:) + [[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null" + [[ "$i" != $TRIES ]] && echo -n ", " + + echo "${QUERY_NUM},${i},${RES}" >> result.csv + done + echo "]," + + QUERY_NUM=$((QUERY_NUM + 1)) +done + + +echo +echo "Benchmark complete. System info:" +echo + +touch {cpu_model,cpu,df,memory,memory_total,blk,mdstat,instance}.txt + +if [ "`uname`" = "Darwin" ] +then + echo '----Version, build id-----------' + ./clickhouse local --query "SELECT format('Version: {}', version())" + ./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw + ./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))" + echo '----CPU-------------------------' + sysctl hw.model | tee cpu_model.txt + sysctl -a | grep -E 'hw.activecpu|hw.memsize|hw.byteorder|cachesize' | tee cpu.txt + echo '----Disk Free and Total--------' + df -h . | tee df.txt + echo '----Memory Free and Total-------' + vm_stat | tee memory.txt + echo '----Physical Memory Amount------' + ls -l /var/vm | tee memory_total.txt + echo '--------------------------------' +else + echo '----Version, build id-----------' + ./clickhouse local --query "SELECT format('Version: {}, build id: {}', version(), buildId())" + ./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw + ./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))" + echo '----CPU-------------------------' + cat /proc/cpuinfo | grep -i -F 'model name' | uniq | tee cpu_model.txt + lscpu | tee cpu.txt + echo '----Block Devices---------------' + lsblk | tee blk.txt + echo '----Disk Free and Total--------' + df -h . | tee df.txt + echo '----Memory Free and Total-------' + free -h | tee memory.txt + echo '----Physical Memory Amount------' + cat /proc/meminfo | grep MemTotal | tee memory_total.txt + echo '----RAID Info-------------------' + cat /proc/mdstat| tee mdstat.txt + echo '--------------------------------' +fi +echo + +echo "Instance type from IMDS (if available):" +curl -s --connect-timeout 1 'http://169.254.169.254/latest/meta-data/instance-type' | tee instance.txt +echo + +echo "Uploading the results (if possible)" + +UUID=$(./clickhouse local --query "SELECT generateUUIDv4()") + +./clickhouse local --query " + SELECT + '${UUID}' AS run_id, + version() AS version, + now() AS test_time, + (SELECT value FROM system.settings WHERE name = 'max_threads') AS threads, + filesystemCapacity() AS fs_capacity, + filesystemAvailable() AS fs_available, + file('cpu_model.txt') AS cpu_model, + file('cpu.txt') AS cpu, + file('df.txt') AS df, + file('memory.txt') AS memory, + file('memory_total.txt') AS memory_total, + file('blk.txt') AS blk, + file('mdstat.txt') AS mdstat, + file('instance.txt') AS instance +" | tee meta.tsv | ./clickhouse client --host play.clickhouse.com --secure --user benchmark --query " + INSERT INTO benchmark_runs + (run_id, version, test_time, threads, fs_capacity, fs_available, cpu_model, cpu, df, memory, memory_total, blk, mdstat, instance) + FORMAT TSV" || echo "Cannot upload results." + +./clickhouse local --query " + SELECT + '${UUID}' AS run_id, + c1 AS query_num, + c2 AS try_num, + c3 AS time + FROM file('result.csv') +" | tee results.tsv | ./clickhouse client --host play.clickhouse.com --secure --user benchmark --query " + INSERT INTO benchmark_results + (run_id, query_num, try_num, time) + FORMAT TSV" || echo "Cannot upload results. Please send the output to feedback@clickhouse.com" + +< 1 + ) + GROUP BY run_id + ORDER BY k ASC +) AS t +INNER JOIN benchmark_runs USING (run_id) + +//// diff --git a/benchmark/hardware/benchmark-chyt.sh b/benchmark/hardware/benchmark-chyt.sh deleted file mode 100755 index 778ce4f86ce..00000000000 --- a/benchmark/hardware/benchmark-chyt.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/usr/bin/env bash - -QUERIES_FILE="queries.sql" -TABLE=$1 -TRIES=3 - -cat "$QUERIES_FILE" | sed "s|{table}|\"${TABLE}\"|g" | while read query; do - - echo -n "[" - for i in $(seq 1 $TRIES); do - while true; do - RES=$(command time -f %e -o /dev/stdout curl -sS -G --data-urlencode "query=$query" --data "default_format=Null&max_memory_usage=100000000000&max_memory_usage_for_all_queries=100000000000&max_concurrent_queries_for_user=100&database=*$YT_CLIQUE_ID" --location-trusted -H "Authorization: OAuth $YT_TOKEN" "$YT_PROXY.yt.yandex.net/query" 2>/dev/null); - if [[ $? == 0 ]]; then - [[ $RES =~ 'fail|Exception' ]] || break; - fi - done - - [[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null" - [[ "$i" != $TRIES ]] && echo -n ", " - done - echo "]," -done diff --git a/benchmark/hardware/benchmark-new.sh b/benchmark/hardware/benchmark-new.sh deleted file mode 100755 index 0c4cad6e5e3..00000000000 --- a/benchmark/hardware/benchmark-new.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env bash - -QUERIES_FILE="queries.sql" -TABLE=$1 -TRIES=3 - -if [ -x ./clickhouse ] -then - CLICKHOUSE_CLIENT="./clickhouse client" -elif command -v clickhouse-client >/dev/null 2>&1 -then - CLICKHOUSE_CLIENT="clickhouse-client" -else - echo "clickhouse-client is not found" - exit 1 -fi - -cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query; do - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null - - echo -n "[" - for i in $(seq 1 $TRIES); do - RES=$(${CLICKHOUSE_CLIENT} --time --format=Null --max_memory_usage=100G --query="$query" 2>&1) - [[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null" - [[ "$i" != $TRIES ]] && echo -n ", " - done - echo "]," -done diff --git a/benchmark/hardware/benchmark-yql.sh b/benchmark/hardware/benchmark-yql.sh deleted file mode 100755 index 7d30d39e7d3..00000000000 --- a/benchmark/hardware/benchmark-yql.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/usr/bin/env bash - -QUERIES_FILE="queries.sql" -TABLE=$1 -TRIES=3 - -cat "$QUERIES_FILE" | sed "s|{table}|\"${TABLE}\"|g" | while read query; do - - echo -n "[" - for i in $(seq 1 $TRIES); do - while true; do - RES=$(command time -f %e -o time ./yql --clickhouse --syntax-version 1 -f empty <<< "USE chyt.hume; PRAGMA max_memory_usage = 100000000000; PRAGMA max_memory_usage_for_all_queries = 100000000000; $query" >/dev/null 2>&1 && cat time) && break; - done - - [[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null" - [[ "$i" != $TRIES ]] && echo -n ", " - done - echo "]," -done diff --git a/benchmark/hardware/benchmark_cloud.sh b/benchmark/hardware/benchmark_cloud.sh deleted file mode 100755 index 01376e4009e..00000000000 --- a/benchmark/hardware/benchmark_cloud.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/usr/bin/env bash - -QUERIES_FILE="queries.sql" -TABLE=$1 -TRIES=3 - -PARAMS="--host ... --secure --password ..." - -if [ -x ./clickhouse ] -then - CLICKHOUSE_CLIENT="./clickhouse client" -elif command -v clickhouse-client >/dev/null 2>&1 -then - CLICKHOUSE_CLIENT="clickhouse-client" -else - echo "clickhouse-client is not found" - exit 1 -fi - -QUERY_ID_PREFIX="benchmark_$RANDOM" -QUERY_NUM=1 - -cat "$QUERIES_FILE" | sed "s/{table}/${TABLE}/g" | while read query - do - for i in $(seq 1 $TRIES) - do - QUERY_ID="${QUERY_ID_PREFIX}_${QUERY_NUM}_${i}" - ${CLICKHOUSE_CLIENT} ${PARAMS} --query_id "${QUERY_ID}" --format=Null --max_memory_usage=100G --query="$query" - echo -n '.' - done - QUERY_NUM=$((QUERY_NUM + 1)) - echo -done - -sleep 10 - -${CLICKHOUSE_CLIENT} ${PARAMS} --query " - WITH extractGroups(query_id, '(\d+)_(\d+)\$') AS num_run, num_run[1]::UInt8 AS num, num_run[2]::UInt8 AS run - SELECT groupArrayInsertAt(query_duration_ms / 1000, (run - 1)::UInt8)::String || ',' - FROM clusterAllReplicas(default, system.query_log) - WHERE event_date >= yesterday() AND type = 2 AND query_id LIKE '${QUERY_ID_PREFIX}%' - GROUP BY num ORDER BY num FORMAT TSV -" diff --git a/benchmark/hardware/hardware.sh b/benchmark/hardware/hardware.sh deleted file mode 100755 index 635f3a9eb61..00000000000 --- a/benchmark/hardware/hardware.sh +++ /dev/null @@ -1,247 +0,0 @@ -#!/bin/bash -e - -QUERIES_FILE="queries.sql" -TRIES=3 - -mkdir -p clickhouse-benchmark -pushd clickhouse-benchmark - -# Download the binary -if [[ ! -x clickhouse ]]; then - curl https://clickhouse.com/ | sh -fi - -if [[ ! -f $QUERIES_FILE ]]; then - wget "https://raw.githubusercontent.com/ClickHouse/ClickHouse/master/benchmark/hardware/$QUERIES_FILE" -fi - -uptime - -echo "Starting clickhouse-server" - -./clickhouse server >/dev/null 2>&1 & -PID=$! - -function finish { - kill $PID - wait -} -trap finish EXIT - -echo "Waiting for clickhouse-server to start" - -for i in {1..30}; do - sleep 1 - ./clickhouse client --query "SELECT 'Ok.'" 2>/dev/null && break || echo -n '.' - if [[ $i == 30 ]]; then exit 1; fi -done - -if [[ $(./clickhouse client --query "EXISTS hits") == '1' && $(./clickhouse client --query "SELECT count() FROM hits") == '100000000' ]]; then - echo "Dataset already downloaded" -else - echo "Will download the dataset" - if [ "`uname`" = "Darwin" ] - then - ./clickhouse client --receive_timeout 1000 --max_insert_threads $(sysctl -n hw.ncpu) --progress --query " - CREATE OR REPLACE TABLE hits ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime) - AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst')" - else - ./clickhouse client --receive_timeout 1000 --max_insert_threads $(nproc || 4) --progress --query " - CREATE OR REPLACE TABLE hits ENGINE = MergeTree PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDate, intHash32(UserID), EventTime) - AS SELECT * FROM url('https://datasets.clickhouse.com/hits/native/hits_100m_obfuscated_{0..255}.native.zst')" - fi - ./clickhouse client --query "SELECT 'The dataset size is: ', count() FROM hits" -fi - -if [[ $(./clickhouse client --query "SELECT count() FROM system.parts WHERE table = 'hits' AND database = 'default' AND active") == '1' ]]; then - echo "Dataset already prepared" -else - echo "Will prepare the dataset" - ./clickhouse client --receive_timeout 1000 --query "OPTIMIZE TABLE hits FINAL" -fi - -echo -echo "Will perform benchmark. Results:" -echo - ->result.csv -QUERY_NUM=1 - -cat "$QUERIES_FILE" | sed "s/{table}/hits/g" | while read query; do - sync - if [ "`uname`" = "Darwin" ] - then - sudo purge > /dev/null - else - echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null - fi - - echo -n "[" - for i in $(seq 1 $TRIES); do - RES=$(./clickhouse client --time --format=Null --query="$query" 2>&1 ||:) - [[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null" - [[ "$i" != $TRIES ]] && echo -n ", " - - echo "${QUERY_NUM},${i},${RES}" >> result.csv - done - echo "]," - - QUERY_NUM=$((QUERY_NUM + 1)) -done - - -echo -echo "Benchmark complete. System info:" -echo - -touch {cpu_model,cpu,df,memory,memory_total,blk,mdstat,instance}.txt - -if [ "`uname`" = "Darwin" ] -then - echo '----Version, build id-----------' - ./clickhouse local --query "SELECT format('Version: {}', version())" - ./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw - ./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))" - echo '----CPU-------------------------' - sysctl hw.model | tee cpu_model.txt - sysctl -a | grep -E 'hw.activecpu|hw.memsize|hw.byteorder|cachesize' | tee cpu.txt - echo '----Disk Free and Total--------' - df -h . | tee df.txt - echo '----Memory Free and Total-------' - vm_stat | tee memory.txt - echo '----Physical Memory Amount------' - ls -l /var/vm | tee memory_total.txt - echo '--------------------------------' -else - echo '----Version, build id-----------' - ./clickhouse local --query "SELECT format('Version: {}, build id: {}', version(), buildId())" - ./clickhouse local --query "SELECT format('The number of threads is: {}', value) FROM system.settings WHERE name = 'max_threads'" --output-format TSVRaw - ./clickhouse local --query "SELECT format('Current time: {}', toString(now(), 'UTC'))" - echo '----CPU-------------------------' - cat /proc/cpuinfo | grep -i -F 'model name' | uniq | tee cpu_model.txt - lscpu | tee cpu.txt - echo '----Block Devices---------------' - lsblk | tee blk.txt - echo '----Disk Free and Total--------' - df -h . | tee df.txt - echo '----Memory Free and Total-------' - free -h | tee memory.txt - echo '----Physical Memory Amount------' - cat /proc/meminfo | grep MemTotal | tee memory_total.txt - echo '----RAID Info-------------------' - cat /proc/mdstat| tee mdstat.txt - echo '--------------------------------' -fi -echo - -echo "Instance type from IMDS (if available):" -curl -s --connect-timeout 1 'http://169.254.169.254/latest/meta-data/instance-type' | tee instance.txt -echo - -echo "Uploading the results (if possible)" - -UUID=$(./clickhouse local --query "SELECT generateUUIDv4()") - -./clickhouse local --query " - SELECT - '${UUID}' AS run_id, - version() AS version, - now() AS test_time, - (SELECT value FROM system.settings WHERE name = 'max_threads') AS threads, - filesystemCapacity() AS fs_capacity, - filesystemAvailable() AS fs_available, - file('cpu_model.txt') AS cpu_model, - file('cpu.txt') AS cpu, - file('df.txt') AS df, - file('memory.txt') AS memory, - file('memory_total.txt') AS memory_total, - file('blk.txt') AS blk, - file('mdstat.txt') AS mdstat, - file('instance.txt') AS instance -" | tee meta.tsv | ./clickhouse client --host play.clickhouse.com --secure --user benchmark --query " - INSERT INTO benchmark_runs - (run_id, version, test_time, threads, fs_capacity, fs_available, cpu_model, cpu, df, memory, memory_total, blk, mdstat, instance) - FORMAT TSV" || echo "Cannot upload results." - -./clickhouse local --query " - SELECT - '${UUID}' AS run_id, - c1 AS query_num, - c2 AS try_num, - c3 AS time - FROM file('result.csv') -" | tee results.tsv | ./clickhouse client --host play.clickhouse.com --secure --user benchmark --query " - INSERT INTO benchmark_results - (run_id, query_num, try_num, time) - FORMAT TSV" || echo "Cannot upload results. Please send the output to feedback@clickhouse.com" - -< 1 - ) - GROUP BY run_id - ORDER BY k ASC -) AS t -INNER JOIN benchmark_runs USING (run_id) - -//// diff --git a/benchmark/hardware/queries.sql b/benchmark/hardware/queries.sql deleted file mode 100644 index 89c4616c642..00000000000 --- a/benchmark/hardware/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT count() FROM {table}; -SELECT count() FROM {table} WHERE AdvEngineID != 0; -SELECT sum(AdvEngineID), count(), avg(ResolutionWidth) FROM {table} ; -SELECT sum(UserID) FROM {table} ; -SELECT uniq(UserID) FROM {table} ; -SELECT uniq(SearchPhrase) FROM {table} ; -SELECT min(EventDate), max(EventDate) FROM {table} ; -SELECT AdvEngineID, count() FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count() DESC; -SELECT RegionID, uniq(UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, sum(AdvEngineID), count() AS c, avg(ResolutionWidth), uniq(UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, uniq(UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, uniq(UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, count() AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, uniq(UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, count() AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, count() FROM {table} GROUP BY UserID ORDER BY count() DESC LIMIT 10; -SELECT UserID, SearchPhrase, count() FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count() DESC LIMIT 10; -SELECT UserID, SearchPhrase, count() FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, toMinute(EventTime) AS m, SearchPhrase, count() FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count() DESC LIMIT 10; -SELECT UserID FROM {table} WHERE UserID = 12345678901234567890; -SELECT count() FROM {table} WHERE URL LIKE '%metrika%'; -SELECT SearchPhrase, any(URL), count() AS c FROM {table} WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, any(URL), any(Title), count() AS c, uniq(UserID) FROM {table} WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM {table} WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, avg(length(URL)) AS l, count() AS c FROM {table} WHERE URL != '' GROUP BY CounterID HAVING c > 100000 ORDER BY l DESC LIMIT 25; -SELECT domainWithoutWWW(Referer) AS key, avg(length(Referer)) AS l, count() AS c, any(Referer) FROM {table} WHERE Referer != '' GROUP BY key HAVING c > 100000 ORDER BY l DESC LIMIT 25; -SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table}; -SELECT SearchEngineID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, count() AS c, sum(Refresh), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, count() AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, count() AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP AS x, x - 1, x - 2, x - 3, count() AS c FROM {table} GROUP BY x, x - 1, x - 2, x - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(URL) GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT DontCountHits AND NOT Refresh AND notEmpty(Title) GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND IsLink AND NOT IsDownload GROUP BY URL ORDER BY PageViews DESC LIMIT 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, ((SearchEngineID = 0 AND AdvEngineID = 0) ? Referer : '') AS Src, URL AS Dst, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000; -SELECT URLHash, EventDate, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND TraficSourceID IN (-1, 6) AND RefererHash = halfMD5('http://example.ru/') GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100; -SELECT WindowClientWidth, WindowClientHeight, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND NOT Refresh AND NOT DontCountHits AND URLHash = halfMD5('http://example.ru/') GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000; -SELECT toStartOfMinute(EventTime) AS Minute, count() AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND NOT Refresh AND NOT DontCountHits GROUP BY Minute ORDER BY Minute; diff --git a/benchmark/heavyai/benchmark.sh b/benchmark/heavyai/benchmark.sh deleted file mode 100755 index 366d19bac34..00000000000 --- a/benchmark/heavyai/benchmark.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash - -# Install - -sudo apt update -sudo apt install default-jre-headless -sudo apt install apt-transport-https -sudo useradd -U -m heavyai -sudo curl https://releases.heavy.ai/GPG-KEY-heavyai | sudo apt-key add - -echo "deb https://releases.heavy.ai/os/apt/ stable cpu" | sudo tee /etc/apt/sources.list.d/heavyai.list -sudo apt update -sudo apt install heavyai - -export HEAVYAI_USER=heavyai -export HEAVYAI_GROUP=heavyai -export HEAVYAI_STORAGE=/var/lib/heavyai -export HEAVYAI_PATH=/opt/heavyai -export HEAVYAI_LOG=/var/lib/heavyai/data/mapd_log - -cd $HEAVYAI_PATH/systemd -./install_heavy_systemd.sh - -# Press Enter multiple times. - -sudo systemctl start heavydb -sudo systemctl enable heavydb - -# Load the data - -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.csv.gz' -gzip -d hits.csv.gz -chmod 777 ~ hits.csv - -sudo bash -c "echo 'allowed-import-paths = [\"/home/ubuntu/\"]' > /var/lib/heavyai/heavy.conf_" -sudo bash -c "cat /var/lib/heavyai/heavy.conf >> /var/lib/heavyai/heavy.conf_" -sudo bash -c "mv /var/lib/heavyai/heavy.conf_ /var/lib/heavyai/heavy.conf && chown heavyai /var/lib/heavyai/heavy.conf" -sudo systemctl restart heavydb - -/opt/heavyai/bin/heavysql -t -p HyperInteractive < create.sql -time /opt/heavyai/bin/heavysql -t -p HyperInteractive <<< "COPY hits FROM '$(pwd)/hits.csv' WITH (HEADER = 'false');" - -# Loaded: 99997497 recs, Rejected: 0 recs in 572.633000 secs - -./run.sh 2>&1 | tee log.txt - -du -bcs /var/lib/heavyai/ - -cat log.txt | grep -P 'Total time|null' | sed -r -e 's/^.*Total time: ([0-9]+) ms$/\1/' | - awk '{ if ($1 == "null") { print } else { print $1 / 1000 } }' | - awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' diff --git a/benchmark/heavyai/create.sql b/benchmark/heavyai/create.sql deleted file mode 100644 index 41c961c00fc..00000000000 --- a/benchmark/heavyai/create.sql +++ /dev/null @@ -1,108 +0,0 @@ -CREATE TABLE hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title TEXT NOT NULL, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL TEXT NOT NULL, - Referer TEXT NOT NULL, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 TEXT NOT NULL, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(255) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel TEXT NOT NULL, - Params TEXT NOT NULL, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase TEXT NOT NULL, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset TEXT NOT NULL, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL TEXT NOT NULL, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor CHAR NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage TEXT NOT NULL, - BrowserCountry TEXT NOT NULL, - SocialNetwork TEXT NOT NULL, - SocialAction TEXT NOT NULL, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage TEXT NOT NULL, - ParamPrice BIGINT NOT NULL, - ParamOrderID TEXT NOT NULL, - ParamCurrency TEXT NOT NULL, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName TEXT NOT NULL, - OpenstatCampaignID TEXT NOT NULL, - OpenstatAdID TEXT NOT NULL, - OpenstatSourceID TEXT NOT NULL, - UTMSource TEXT NOT NULL, - UTMMedium TEXT NOT NULL, - UTMCampaign TEXT NOT NULL, - UTMContent TEXT NOT NULL, - UTMTerm TEXT NOT NULL, - FromTag TEXT NOT NULL, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL -); diff --git a/benchmark/heavyai/queries.sql b/benchmark/heavyai/queries.sql deleted file mode 100644 index 31f65fc898d..00000000000 --- a/benchmark/heavyai/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/benchmark/heavyai/results/c6a.4xlarge.json b/benchmark/heavyai/results/c6a.4xlarge.json deleted file mode 100644 index 61307fec311..00000000000 --- a/benchmark/heavyai/results/c6a.4xlarge.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "HeavyAI", - "date": "2022-07-01", - "machine": "c6a.4xlarge, 500gb gp2", - "cluster_size": 1, - "comment": "Previous names: OmniSci, mapD. Many queries cannot run due to errors and limitations.", - - "tags": ["C++", "column-oriented"], - - "load_time": 572.633, - "data_size": 50887437386, - - "result": [ -[6.525,0.022,0.029], -[0.301,0.042,0.04], -[0.287,0.095,0.093], -[2.572,0.039,0.04], -[null,null,null], -[null,null,null], -[7.327,0.093,0.097], -[0.244,0.043,0.038], -[null,null,null], -[null,null,null], -[null,null,null], -[null,null,null], -[2.939,0.295,0.294], -[null,null,null], -[null,null,null], -[null,null,null], -[null,null,null], -[4.716,3.91,3.955], -[null,null,null], -[0.154,0.083,0.106], -[14.426,0.07,0.071], -[null,null,null], -[null,null,null], -[null,null,null], -[2.276,0.258,0.272], -[null,null,null], -[null,null,null], -[null,null,null], -[null,null,null], -[1.832,1.64,1.602], -[null,null,null], -[null,null,null], -[null,null,null], -[14.811,0.494,0.497], -[null,null,null], -[null,null,null], -[1.941,0.255,0.255], -[5.457,0.172,0.283], -[0.476,0.269,0.256], -[14.239,0.179,0.178], -[3.992,0.112,0.112], -[1.031,0.116,0.116], -[1.365,0.089,0.088] -] -} diff --git a/benchmark/heavyai/run.sh b/benchmark/heavyai/run.sh deleted file mode 100755 index 97387bee42b..00000000000 --- a/benchmark/heavyai/run.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -TRIES=3 - -cat queries.sql | while read query; do - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches - - echo "$query"; - for i in $(seq 1 $TRIES); do - /opt/heavyai/bin/heavysql -t -p HyperInteractive <<< "${query}" | grep 'Total time' || echo 'null' - done; -done; diff --git a/benchmark/index.html b/benchmark/index.html deleted file mode 100644 index ce4e6044353..00000000000 --- a/benchmark/index.html +++ /dev/null @@ -1,956 +0,0 @@ - - - - - ClickBench — a Benchmark For Analytical DBMS - - - - - - - - - - - -
- 🌚🌞 -

ClickBench — a Benchmark For Analytical DBMS

- Methodology | Reproduce and Validate the Results | Add a System | Report Mistake | Hardware Benchmark -
- - - - - - - - - - - - - - - - - - - - - - -
System: - All -
Type: - All -
Machine: - All -
Cluster size: - All -
Metric: - Cold Run - Hot Run - Load Time - Storage Size -
- - - - - - - - - - -
- System & Machine - - Relative time (lower is better) -
- -
Nothing selected
- -
-

Detailed Comparison

-
- - - - - - - - -
- - - - diff --git a/benchmark/infobright/benchmark.sh b/benchmark/infobright/benchmark.sh deleted file mode 100755 index 5d80afed863..00000000000 --- a/benchmark/infobright/benchmark.sh +++ /dev/null @@ -1,38 +0,0 @@ -#!/bin/bash - -# Install - -sudo apt-get update -sudo apt-get install -y docker.io - -mkdir infobright -sudo docker run --name mysql_ib -e MYSQL_ROOT_PASSWORD=mypass -v $(pwd)/infobright:/mnt/mysql_data -p 5029:5029 -p 5555 -d flolas/infobright - -sudo docker run -it --rm --network host mysql:5 mysql --host 127.0.0.1 --port 5029 --user=root --password=mypass -e "CREATE DATABASE test" -sudo docker run -it --rm --network host mysql:5 mysql --host 127.0.0.1 --port 5029 --user=root --password=mypass --database=test -e "$(cat create.sql)" - -# Load the data - -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz' -gzip -d hits.tsv.gz - -# ERROR 2 (HY000) at line 1: Wrong data or column definition. Row: 93557187, field: 100. -head -n 90000000 hits.tsv > hits90m.tsv - -time sudo docker run -it --rm --volume $(pwd):/workdir --network host mysql:5 mysql --host 127.0.0.1 --port 5029 --user=root --password=mypass --database=test -e " - LOAD DATA LOCAL INFILE '/workdir/hits90m.tsv' INTO TABLE test.hits - FIELDS TERMINATED BY '\\t' ENCLOSED BY '' ESCAPED BY '\\\\' LINES TERMINATED BY '\\n' STARTING BY ''" - -# 38m37.466s - -sudo docker exec mysql_ib du -bcs /mnt/mysql_data/ /usr/local/infobright-4.0.7-x86_64/cache - -# 13 760 341 294 - -./run.sh 2>&1 | log - -cat log.txt | - grep -P 'rows? in set|Empty set|^ERROR' | - sed -r -e 's/^ERROR.*$/null/; s/^.*?\((([0-9.]+) days? )?(([0-9.]+) hours? )?(([0-9.]+) min )?([0-9.]+) sec\).*?$/\2,\4,\6,\7/' | - awk -F, '{ if ($1 == "null") { print } else { print $1 * 86400 + $2 * 3600 + $3 * 60 + $4 } }' | - awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' diff --git a/benchmark/infobright/create.sql b/benchmark/infobright/create.sql deleted file mode 100644 index 4d23eaac635..00000000000 --- a/benchmark/infobright/create.sql +++ /dev/null @@ -1,108 +0,0 @@ -CREATE TABLE hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title TEXT, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL TEXT, - Referer TEXT, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 TEXT, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(255) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel TEXT, - Params TEXT, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase TEXT, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset TEXT, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL TEXT, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor CHAR NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage TEXT, - BrowserCountry TEXT, - SocialNetwork TEXT, - SocialAction TEXT, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage TEXT, - ParamPrice BIGINT NOT NULL, - ParamOrderID TEXT, - ParamCurrency TEXT, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName TEXT, - OpenstatCampaignID TEXT, - OpenstatAdID TEXT, - OpenstatSourceID TEXT, - UTMSource TEXT, - UTMMedium TEXT, - UTMCampaign TEXT, - UTMContent TEXT, - UTMTerm TEXT, - FromTag TEXT, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL -); diff --git a/benchmark/infobright/queries.sql b/benchmark/infobright/queries.sql deleted file mode 100644 index ea2bde47802..00000000000 --- a/benchmark/infobright/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_FORMAT(EventTime, '%Y-%m-%d %H:00:00') AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_FORMAT(EventTime, '%Y-%m-%d %H:00:00') ORDER BY DATE_FORMAT(EventTime, '%Y-%m-%d %H:00:00') LIMIT 10 OFFSET 1000; diff --git a/benchmark/infobright/results/c6a.4xlarge.json b/benchmark/infobright/results/c6a.4xlarge.json deleted file mode 100644 index c9157d0b101..00000000000 --- a/benchmark/infobright/results/c6a.4xlarge.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "Infobright", - "date": "2022-07-01", - "machine": "c6a.4xlarge, 500gb gp2", - "cluster_size": 1, - "comment": "Only 90% of data successfully loaded. Some queries run for days.", - - "tags": ["C++", "column-oriented", "MySQL compatible"], - - "load_time": 2317, - "data_size": 13760341294, - - "result": [ -[0.01, 0, 0], -[2.39, 2.4, 2.44], -[0, 0, 0], -[7.21, 6.04, 6.91], -[16.09, 16.86, 15.69], -[48.8, 42.37, 48.63], -[0, 0, 0], -[3.48, 2.42, 2.42], -[23.56, 24.78, 22.21], -[32.87, 31.71, 34.48], -[14.8, 14.83, 14.11], -[16.7, 16.53, 17.37], -[1752.91, 1999.88, 1961.4], -[1193.43, 1167, 1220.47], -[2184.81, 2316.12, 2224.14], -[32.58, 30.69, 31.58], -[300.17, 16221.33, 16168.44], -[122.4, 120.49, 124.67], -[78927.44, 79250.44, 78504.89], -[3.38, 1.22, 1.21], -[289.73, 302.3, 285.83], -[415.82, 389.23, 403.28], -[573.82, 590.81, 575.06], -[300.13, 293.96, 285.64], -[41.42, 37.48, 39.64], -[75.2, 75.37, 72.07], -[39.22, 41.52, 40.11], -[449.56, 445.03, 448.68], -[null, null, null], -[450.87, 488.3, 453.83], -[58.69, 59.29, 58.07], -[84.47, 78.92, 79.38], -[517.97, 520.29, 504.96], -[182468.89, 182468.89, 182468.89], -[182468.89, 182468.89, 182468.89], -[68.43, 66.93, 67.68], -[8.3, 3.62, 3.61], -[1.04, 0.62, 0.62], -[0.22, 0.18, 0.18], -[567.78, 566.52, 563.02], -[0.29, 0.14, 0.11], -[0.17, 0.08, 0.08], -[1.37, 1.34, 1.32] -] -} diff --git a/benchmark/infobright/run.sh b/benchmark/infobright/run.sh deleted file mode 100755 index 36e5fa167fb..00000000000 --- a/benchmark/infobright/run.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -TRIES=3 - -cat queries.sql | while read query; do - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches - - for i in $(seq 1 $TRIES); do - sudo docker run --rm --network host mysql:5 mysql --host 127.0.0.1 --port 5029 --user=root --password=mypass --database=test -vvv -e "${query}" - done; -done; diff --git a/benchmark/locustdb/README.md b/benchmark/locustdb/README.md deleted file mode 100644 index fb40147cf72..00000000000 --- a/benchmark/locustdb/README.md +++ /dev/null @@ -1,24 +0,0 @@ -This system does not work at all: - -``` -locustdb> SELECT * FROM default LIMIT 1 -thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace -thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -``` - -It is memory-safe and blazing fast. diff --git a/benchmark/locustdb/benchmark.sh b/benchmark/locustdb/benchmark.sh deleted file mode 100755 index 89cb950d90f..00000000000 --- a/benchmark/locustdb/benchmark.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash - -# https://rustup.rs/ -curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -source $HOME/.cargo/env - -sudo apt-get update -sudo apt-get install -y git - -git clone https://github.com/cswinter/LocustDB.git -cd LocustDB - -sudo apt-get install -y g++ capnproto libclang-14-dev - -cargo build --features "enable_rocksdb" --features "enable_lz4" --release - -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.csv.gz' -gzip -d hits.csv.gz - -target/release/repl --load hits.csv --db-path db - -# Loaded data in 920s. -# Table `default` (99997496 rows, 15.0GiB) - -# SELECT * FROM default LIMIT 1 - -# And it immediately panicked and hung: - -#locustdb> SELECT * FROM default LIMIT 1 -#thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -#note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace -#thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -#thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -#thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -#thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -#thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -#thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -#thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -#thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -#thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -#thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -#thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -#thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -#thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -#thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 -#thread '' panicked at 'index out of bounds: the len is 65536 but the index is 65536', src/stringpack.rs:91:15 diff --git a/benchmark/mariadb-columnstore/README.md b/benchmark/mariadb-columnstore/README.md deleted file mode 100644 index 6be1ce43e2b..00000000000 --- a/benchmark/mariadb-columnstore/README.md +++ /dev/null @@ -1,9 +0,0 @@ -MariaDB ColumnStore failed after 5 minutes of data loading: - -``` -ubuntu@ip-172-31-4-179:~$ time mysql --password="${PASSWORD}" --host 127.0.0.1 test -e "LOAD DATA LOCAL INFILE 'hits.tsv' INTO TABLE hits" -ERROR 1030 (HY000) at line 1: Got error -1 "Internal error < 0 (Not system error)" from storage engine ColumnStore -``` - -They don't have an issue tracker on GitHub, only JIRA. -JIRA requires login, but does not support SSO. diff --git a/benchmark/mariadb-columnstore/benchmark.sh b/benchmark/mariadb-columnstore/benchmark.sh deleted file mode 100755 index 450120af282..00000000000 --- a/benchmark/mariadb-columnstore/benchmark.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash - -# Install - -sudo apt-get update -sudo apt-get install -y docker.io -sudo docker run -d -p 3306:3306 -e ANALYTICS_ONLY=1 --name mcs_container mariadb/columnstore - -export PASSWORD="tsFgm457%3cj" -sudo docker exec mcs_container mariadb -e "GRANT ALL PRIVILEGES ON *.* TO 'ubuntu'@'%' IDENTIFIED BY '${PASSWORD}';" - -sudo apt-get install -y mariadb-client - -mysql --password="${PASSWORD}" --host 127.0.0.1 -e "CREATE DATABASE test" -mysql --password="${PASSWORD}" --host 127.0.0.1 test < create.sql - -# Load the data - -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz' -gzip -d hits.tsv.gz - -time mysql --password="${PASSWORD}" --host 127.0.0.1 test -e " - LOAD DATA LOCAL INFILE 'hits.tsv' INTO TABLE hits - FIELDS TERMINATED BY '\\t' ENCLOSED BY '' ESCAPED BY '\\\\' LINES TERMINATED BY '\\n' STARTING BY ''" - -# 41m47.856s - -./run.sh 2>&1 | tee log.txt - -sudo docker exec mcs_container du -bcs /var/lib/columnstore - -cat log.txt | - grep -P 'rows? in set|Empty set|^ERROR' | - sed -r -e 's/^ERROR.*$/null/; s/^.*?\((([0-9.]+) min )?([0-9.]+) sec\).*?$/\2 \3/' | - awk '{ if ($2) { print $1 * 60 + $2 } else { print $1 } }' | - awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' diff --git a/benchmark/mariadb-columnstore/create.sql b/benchmark/mariadb-columnstore/create.sql deleted file mode 100644 index 565596f04c9..00000000000 --- a/benchmark/mariadb-columnstore/create.sql +++ /dev/null @@ -1,108 +0,0 @@ -CREATE TABLE hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title TEXT, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL TEXT, - Referer TEXT, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 TEXT, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(255) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel TEXT, - Params TEXT, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase TEXT, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset TEXT, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL TEXT, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor CHAR NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage TEXT, - BrowserCountry TEXT, - SocialNetwork TEXT, - SocialAction TEXT, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage TEXT, - ParamPrice BIGINT NOT NULL, - ParamOrderID TEXT, - ParamCurrency TEXT, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName TEXT, - OpenstatCampaignID TEXT, - OpenstatAdID TEXT, - OpenstatSourceID TEXT, - UTMSource TEXT, - UTMMedium TEXT, - UTMCampaign TEXT, - UTMContent TEXT, - UTMTerm TEXT, - FromTag TEXT, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL -) ENGINE=Columnstore; diff --git a/benchmark/mariadb-columnstore/queries.sql b/benchmark/mariadb-columnstore/queries.sql deleted file mode 100644 index ea2bde47802..00000000000 --- a/benchmark/mariadb-columnstore/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_FORMAT(EventTime, '%Y-%m-%d %H:00:00') AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_FORMAT(EventTime, '%Y-%m-%d %H:00:00') ORDER BY DATE_FORMAT(EventTime, '%Y-%m-%d %H:00:00') LIMIT 10 OFFSET 1000; diff --git a/benchmark/mariadb-columnstore/results/c6a.4xlarge.json b/benchmark/mariadb-columnstore/results/c6a.4xlarge.json deleted file mode 100644 index e318b64da41..00000000000 --- a/benchmark/mariadb-columnstore/results/c6a.4xlarge.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "MariaDB ColumnStore", - "date": "2022-07-01", - "machine": "c6a.4xlarge, 500gb gp2", - "cluster_size": 1, - "comment": "Previous name: InfiniDB.", - - "tags": ["C++", "column-oriented", "MySQL compatible"], - - "load_time": 2507.8, - "data_size": 19712857022, - - "result": [ -[0.151,0.158,0.148], -[0.100,0.101,0.106], -[1.221,1.233,1.226], -[0.739,0.736,0.741], -[2.025,2.046,2.004], -[3.725,4.801,3.755], -[0.871,0.749,0.736], -[0.118,0.108,0.103], -[2.108,2.029,2.029], -[4.225,4.271,4.288], -[1.711,1.402,1.407], -[1.526,1.435,1.420], -[5.339,4.172,3.610], -[4.692,4.729,4.960], -[4.013,3.860,3.918], -[3.236,2.680,2.629], -[null,null,null], -[null,null,null], -[null,null,null], -[0.137,0.109,0.147], -[12.331,6.069,8.619], -[2.162,2.178,2.192], -[16.849,30.463,26.639], -[92,90.208,92.814], -[3.042,1.763,1.791], -[1.779,1.772,1.749], -[1.793,1.821,1.888], -[13.036,10.747,9.590], -[null,null,null], -[78.224,79.141,77.806], -[2.837,2.654,2.675], -[5.833,4.552,3.678], -[null,null,null], -[null,null,null], -[null,null,null], -[3.626,3.546,3.709], -[1.719,1.787,1.876], -[1.345,0.906,0.910], -[0.117,0.091,0.093], -[1.217,1.133,1.133], -[0.114,0.063,0.062], -[0.100,0.062,0.061], -[null,null,null] -] -} diff --git a/benchmark/mariadb-columnstore/run.sh b/benchmark/mariadb-columnstore/run.sh deleted file mode 100755 index b917a815d2c..00000000000 --- a/benchmark/mariadb-columnstore/run.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -TRIES=3 - -cat queries.sql | while read query; do - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches - - for i in $(seq 1 $TRIES); do - mysql --password="${PASSWORD}" --host 127.0.0.1 -vvv test -e "${query}" - done; -done; diff --git a/benchmark/mariadb/benchmark.sh b/benchmark/mariadb/benchmark.sh deleted file mode 100755 index 33f69f45c39..00000000000 --- a/benchmark/mariadb/benchmark.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash - -# Install - -sudo apt-get update -sudo apt-get install -y mariadb-server -sudo bash -c "echo -e '[mysql]\nlocal-infile=1\n\n[mysqld]\nlocal-infile=1\n' > /etc/mysql/conf.d/local_infile.cnf" -sudo service mariadb restart - -# Load the data - -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz' -gzip -d hits.tsv.gz - -sudo mariadb -e "CREATE DATABASE test" -sudo mariadb test < create.sql -time sudo mariadb test -e "LOAD DATA LOCAL INFILE 'hits.tsv' INTO TABLE hits" - -# 2:23:45 elapsed - -./run.sh 2>&1 | tee log.txt - -sudo du -bcs /var/lib/mysql - -cat log.txt | - grep -P 'rows? in set|Empty set|^ERROR' | - sed -r -e 's/^ERROR.*$/null/; s/^.*?\((([0-9.]+) days? )?(([0-9.]+) hours? )?(([0-9.]+) min )?([0-9.]+) sec\).*?$/\2,\4,\6,\7/' | - awk -F, '{ if ($1 == "null") { print } else { print $1 * 86400 + $2 * 3600 + $3 * 60 + $4 } }' | - awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' diff --git a/benchmark/mariadb/create.sql b/benchmark/mariadb/create.sql deleted file mode 100644 index 1850bffedce..00000000000 --- a/benchmark/mariadb/create.sql +++ /dev/null @@ -1,109 +0,0 @@ -CREATE TABLE hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title TEXT NOT NULL, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL TEXT NOT NULL, - Referer TEXT NOT NULL, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 TEXT NOT NULL, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(255) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel TEXT NOT NULL, - Params TEXT NOT NULL, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase TEXT NOT NULL, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset TEXT NOT NULL, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL TEXT NOT NULL, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor CHAR NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage TEXT NOT NULL, - BrowserCountry TEXT NOT NULL, - SocialNetwork TEXT NOT NULL, - SocialAction TEXT NOT NULL, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage TEXT NOT NULL, - ParamPrice BIGINT NOT NULL, - ParamOrderID TEXT NOT NULL, - ParamCurrency TEXT NOT NULL, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName TEXT NOT NULL, - OpenstatCampaignID TEXT NOT NULL, - OpenstatAdID TEXT NOT NULL, - OpenstatSourceID TEXT NOT NULL, - UTMSource TEXT NOT NULL, - UTMMedium TEXT NOT NULL, - UTMCampaign TEXT NOT NULL, - UTMContent TEXT NOT NULL, - UTMTerm TEXT NOT NULL, - FromTag TEXT NOT NULL, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL, - PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID) -); diff --git a/benchmark/mariadb/queries.sql b/benchmark/mariadb/queries.sql deleted file mode 100644 index 31f65fc898d..00000000000 --- a/benchmark/mariadb/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/benchmark/mariadb/results/c6a.4xlarge.json.shame b/benchmark/mariadb/results/c6a.4xlarge.json.shame deleted file mode 100644 index e402032e20c..00000000000 --- a/benchmark/mariadb/results/c6a.4xlarge.json.shame +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "MariaDB", - "date": "2022-07-12", - "machine": "c6a.4xlarge, 500gb gp2", - "cluster_size": 1, - "comment": "For some reason many queries run in almost exactly one hour. Most likely a coincidence. It is installed from Debian packages, which might explain the abysmal performance.", - - "tags": ["C++", "row-oriented", "MySQL compatible"], - - "load_time": 8625, - "data_size": 107252505530, - - "result": [ -[7910.23,10229.8,3610.54], -[3595.79,3595.59,3595.21], -[3596.12,3595.34,3595.49], -[3597.22,3595.7,3595.09], -[3600.57,3596.08,3595.82], -[3596.57,3597.76,3598.47], -[3595.94,3595.36,3596.2], -[3595.95,3595.38,3595.45], -[3649.15,3624.26,3623.71], -[3645.39,3763.75,3630.71], -[4519.77,4552.84,4552.7], -[4628.21,4629.55,4628.16], -[3773.52,3774.44,3779.05], -[12198,12192.4,12192.6], -[3779.67,3774.97,3777.86], -[4277.99,4302.56,4275.29], -[4697.19,4367.3,4372.73], -[4401.2,4340.43,4310.13], -[5335.93,5373.04,5361.51], -[3596.91,3598.61,3598.71], -[3598.76,3598.56,3598.68], -[3598.76,3598.63,3599.39], -[3604.28,3603.71,3603.66], -[3598.83,3598.2,3598.19], -[3599.11,3598.1,3598.17], -[3599.04,3598.46,3598.13], -[3598.76,3598.71,3598.22], -[3599.02,3599.76,3598.18], -[3644.12,3716.36,3725.92], -[3933.26,3854.55,3784.74], -[3392.13,3402.26,3602.15], -[3717.3,3735.06,3723.07], -[6126.66,6135.63,6193.45], -[4029.78,3998.89,3952.7], -[3955.34,4029.28,3941.84], -[4881.07,4782.53,4900.72], -[24.455,23.862,25.173], -[24.195,23.529,23.473], -[20.719,21.629,21.582], -[24.577,25.877,25.953], -[20.686,20.787,21.199], -[20.036,17.973,22.759], -[null,null,null] -] -} diff --git a/benchmark/mariadb/run.sh b/benchmark/mariadb/run.sh deleted file mode 100755 index 5f80a539bbe..00000000000 --- a/benchmark/mariadb/run.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -TRIES=3 - -cat queries.sql | while read query; do - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches - - for i in $(seq 1 $TRIES); do - sudo mariadb test -vvv -e "${query}" - done; -done; diff --git a/benchmark/monetdb/benchmark.sh b/benchmark/monetdb/benchmark.sh deleted file mode 100755 index f0afb400802..00000000000 --- a/benchmark/monetdb/benchmark.sh +++ /dev/null @@ -1,40 +0,0 @@ -#!/bin/bash - -# Install - -echo "deb https://dev.monetdb.org/downloads/deb/ $(lsb_release -cs) monetdb" | sudo tee /etc/apt/sources.list.d/monetdb.list - -sudo wget --output-document=/etc/apt/trusted.gpg.d/monetdb.gpg https://www.monetdb.org/downloads/MonetDB-GPG-KEY.gpg -sudo apt-get update -sudo apt-get install -y monetdb5-sql monetdb-client dos2unix - -sudo systemctl enable monetdbd -sudo systemctl start monetdbd - -sudo monetdbd create /var/lib/monetdb -sudo monetdbd start /var/lib/monetdb -sudo usermod -a -G monetdb $USER - -sudo monetdb create test -sudo monetdb release test - -sudo apt-get install -y expect - -./query.expect "$(cat create.sql)" - -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz' -gzip -d hits.tsv.gz -chmod 777 ~ hits.tsv - -./query.expect "COPY INTO hits FROM '$(pwd)/hits.tsv' USING DELIMITERS '\t'" - -# 99997497 affected rows -# clk: 15:39 min - -./run.sh 2>&1 | tee log.txt - -sudo du -bcs /var/monetdb5/ - -cat log.txt | dos2unix -f | grep -P 'clk|tuple' | - awk '/tuple/ { ok = 1 } /clk/ { if (ok) { if ($3 == "ms") { print $2 / 1000 } else { print $2 } } else { print "null" }; ok = 0 }' | - awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' diff --git a/benchmark/monetdb/create.sql b/benchmark/monetdb/create.sql deleted file mode 100644 index 41c961c00fc..00000000000 --- a/benchmark/monetdb/create.sql +++ /dev/null @@ -1,108 +0,0 @@ -CREATE TABLE hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title TEXT NOT NULL, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL TEXT NOT NULL, - Referer TEXT NOT NULL, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 TEXT NOT NULL, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(255) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel TEXT NOT NULL, - Params TEXT NOT NULL, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase TEXT NOT NULL, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset TEXT NOT NULL, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL TEXT NOT NULL, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor CHAR NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage TEXT NOT NULL, - BrowserCountry TEXT NOT NULL, - SocialNetwork TEXT NOT NULL, - SocialAction TEXT NOT NULL, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage TEXT NOT NULL, - ParamPrice BIGINT NOT NULL, - ParamOrderID TEXT NOT NULL, - ParamCurrency TEXT NOT NULL, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName TEXT NOT NULL, - OpenstatCampaignID TEXT NOT NULL, - OpenstatAdID TEXT NOT NULL, - OpenstatSourceID TEXT NOT NULL, - UTMSource TEXT NOT NULL, - UTMMedium TEXT NOT NULL, - UTMCampaign TEXT NOT NULL, - UTMContent TEXT NOT NULL, - UTMTerm TEXT NOT NULL, - FromTag TEXT NOT NULL, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL -); diff --git a/benchmark/monetdb/queries.sql b/benchmark/monetdb/queries.sql deleted file mode 100644 index 31f65fc898d..00000000000 --- a/benchmark/monetdb/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/benchmark/monetdb/query.expect b/benchmark/monetdb/query.expect deleted file mode 100755 index 8e8e43cea98..00000000000 --- a/benchmark/monetdb/query.expect +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/expect - -set timeout 3600 -set query [lindex $argv 0] - -spawn mclient -u monetdb -d test; -expect "password:"; send "monetdb\r"; -expect "sql>"; send "\\t clock\r"; -expect "sql>"; send "$query;\r"; -expect "sql>"; send "\4" diff --git a/benchmark/monetdb/results/c6a.4xlarge.json b/benchmark/monetdb/results/c6a.4xlarge.json deleted file mode 100644 index eecb21b37d7..00000000000 --- a/benchmark/monetdb/results/c6a.4xlarge.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "MonetDB", - "date": "2022-07-01", - "machine": "c6a.4xlarge, 500gb gp2", - "cluster_size": 1, - "comment": "", - - "tags": ["C", "column-oriented"], - - "load_time": 939, - "data_size": 49696606499, - - "result": [ -[0.000218,0.000157,0.000155], -[0.101903,0.019908,0.018439], -[0.282431,0.035987,0.034938], -[2.868,0.029387,0.029207], -[4.675,4.515,4.511], -[6.584,4.269,4.650], -[0.528827,0.063135,0.065742], -[0.506863,0.020966,0.021687], -[8.343,4.457,4.408], -[7.224,6.548,7.576], -[0.267003,0.233353,0.230444], -[0.347206,0.28358,0.266085], -[5.389,3.099,3.074], -[7.653,7.759,8.596], -[3.276,3.326,3.292], -[5.310,3.465,3.578], -[9.341,9.143,9.536], -[9.584,9.604,9.419], -[19.539,19.783,19.611], -[0.004509,0.000702,0.000643], -[20.801,1.570,1.603], -[2.752,0.418221,0.395884], -[14.717,0.800894,0.395477], -[14.429,1.804,1.869], -[1.386,0.159602,0.156426], -[0.189736,0.167664,0.168781], -[0.164681,0.176666,0.17126], -[3.005,3.113,3.882], -[null,null,null], -[2.751,2.846,2.676], -[7.937,2.579,2.447], -[5.120,3.492,3.467], -[22.862,22.567,23.211], -[33.437,18.889,19.043], -[18.898,19.583,19.047], -[14.774,12.984,13.803], -[3.865,0.322143,0.323117], -[0.192149,0.177791,0.175984], -[0.194173,0.159398,0.165201], -[0.680778,0.592252,0.560738], -[0.106465,0.10638,0.102692], -[0.154871,0.153752,0.155782], -[0.11459,0.09639,0.095594] -] -} diff --git a/benchmark/monetdb/run.sh b/benchmark/monetdb/run.sh deleted file mode 100755 index c1eadeab22c..00000000000 --- a/benchmark/monetdb/run.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -TRIES=3 - -cat queries.sql | while read query; do - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches - - for i in $(seq 1 $TRIES); do - ./query.expect "$query" 2>&1 - done; -done; diff --git a/benchmark/mysql-myisam/benchmark.sh b/benchmark/mysql-myisam/benchmark.sh deleted file mode 100755 index fa948d86132..00000000000 --- a/benchmark/mysql-myisam/benchmark.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash - -# Install - -sudo apt-get update -sudo apt-get install -y mysql-server-8.0 -sudo bash -c "echo -e '[mysql]\nlocal-infile=1\n\n[mysqld]\nlocal-infile=1\n' > /etc/mysql/conf.d/local_infile.cnf" -sudo service mysql restart - -# Load the data - -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz' -gzip -d hits.tsv.gz - -sudo mysql -e "CREATE DATABASE test" -sudo mysql test < create.sql -time sudo mysql test -e "LOAD DATA LOCAL INFILE 'hits.tsv' INTO TABLE hits" - -# 41m8.979s - -./run.sh 2>&1 | tee log.txt - -sudo du -bcs /var/lib/mysql - -cat log.txt | - grep -P 'rows? in set|Empty set|^ERROR' | - sed -r -e 's/^ERROR.*$/null/; s/^.*?\((([0-9.]+) min )?([0-9.]+) sec\).*?$/\2 \3/' | - awk '{ if ($2) { print $1 * 60 + $2 } else { print $1 } }' | - awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' diff --git a/benchmark/mysql-myisam/create.sql b/benchmark/mysql-myisam/create.sql deleted file mode 100644 index 961a0fea486..00000000000 --- a/benchmark/mysql-myisam/create.sql +++ /dev/null @@ -1,110 +0,0 @@ -CREATE TABLE hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title TEXT NOT NULL, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL TEXT NOT NULL, - Referer TEXT NOT NULL, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 TEXT NOT NULL, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(255) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel TEXT NOT NULL, - Params TEXT NOT NULL, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase TEXT NOT NULL, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset TEXT NOT NULL, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL TEXT NOT NULL, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor CHAR NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage TEXT NOT NULL, - BrowserCountry TEXT NOT NULL, - SocialNetwork TEXT NOT NULL, - SocialAction TEXT NOT NULL, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage TEXT NOT NULL, - ParamPrice BIGINT NOT NULL, - ParamOrderID TEXT NOT NULL, - ParamCurrency TEXT NOT NULL, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName TEXT NOT NULL, - OpenstatCampaignID TEXT NOT NULL, - OpenstatAdID TEXT NOT NULL, - OpenstatSourceID TEXT NOT NULL, - UTMSource TEXT NOT NULL, - UTMMedium TEXT NOT NULL, - UTMCampaign TEXT NOT NULL, - UTMContent TEXT NOT NULL, - UTMTerm TEXT NOT NULL, - FromTag TEXT NOT NULL, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL, - PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID) -) -ENGINE = MyISAM; diff --git a/benchmark/mysql-myisam/queries.sql b/benchmark/mysql-myisam/queries.sql deleted file mode 100644 index ea2bde47802..00000000000 --- a/benchmark/mysql-myisam/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_FORMAT(EventTime, '%Y-%m-%d %H:00:00') AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_FORMAT(EventTime, '%Y-%m-%d %H:00:00') ORDER BY DATE_FORMAT(EventTime, '%Y-%m-%d %H:00:00') LIMIT 10 OFFSET 1000; diff --git a/benchmark/mysql-myisam/results/c6a.4xlarge.json b/benchmark/mysql-myisam/results/c6a.4xlarge.json deleted file mode 100644 index b4493f1fb4a..00000000000 --- a/benchmark/mysql-myisam/results/c6a.4xlarge.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "MySQL (MyISAM)", - "date": "2022-07-01", - "machine": "c6a.4xlarge, 500gb gp2", - "cluster_size": 1, - "comment": "", - - "tags": ["C++", "row-oriented", "MySQL compatible"], - - "load_time": 2512, - "data_size": 121588958061, - - "result": [ -[0.00,0.00,0.00], -[283.32,276.83,274.52], -[276.93,278.29,283.27], -[28.83,23.63,21.55], -[46.41,40.81,40.93], -[467.04,467.39,469.08], -[31.02,25.89,24.20], -[277.89,275.3,277.3], -[329.34,325.8,325.35], -[342.86,338.43,336.95], -[282.03,279.87,281.22], -[277.74,282.68,282], -[335.66,334.83,336.44], -[305.24,310.39,307.3], -[337.41,338.52,342.94], -[308.66,307.34,306.27], -[738.38,748.44,740.75], -[738.75,734.01,738.25], -[867.01,872.92,868.84], -[25.65,20.61,18.46], -[312.39,313.67,306.66], -[301.66,305.12,308.01], -[298.12,298.44,312.4], -[311.34,309.9,311.85], -[281.87,278.5,275], -[277.46,277.46,277.46], -[280.75,278.04,281.76], -[263.9,417.39,406.88], -[707.21,711.96,705], -[668.1,668.33,665.96], -[330.31,333.36,331.94], -[506.57,506.18,500.53], -[2604.49,2681.96,2703.12], -[830.65,832.88,831.14], -[831.98,830.46,833.41], -[608.49,608.51,613.68], -[4.56,4.13,4.16], -[3.80,3.80,3.70], -[1.65,1.45,1.46], -[6.33,5.14,6.15], -[1.60,1.41,1.41], -[1.56,1.42,1.39], -[7.04,1.17,1.13] -] -} diff --git a/benchmark/mysql-myisam/run.sh b/benchmark/mysql-myisam/run.sh deleted file mode 100755 index 31f31682f32..00000000000 --- a/benchmark/mysql-myisam/run.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -TRIES=3 - -cat queries.sql | while read query; do - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches - - for i in $(seq 1 $TRIES); do - sudo mysql test -vvv -e "${query}" - done; -done; diff --git a/benchmark/mysql/benchmark.sh b/benchmark/mysql/benchmark.sh deleted file mode 100755 index ab44a3ce5d7..00000000000 --- a/benchmark/mysql/benchmark.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash - -# Install - -sudo apt-get update -sudo apt-get install -y mysql-server-8.0 -sudo bash -c "echo -e '[mysql]\nlocal-infile=1\n\n[mysqld]\nlocal-infile=1\n' > /etc/mysql/conf.d/local_infile.cnf" -sudo service mysql restart - -# Load the data - -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz' -gzip -d hits.tsv.gz - -sudo mysql -e "CREATE DATABASE test" -sudo mysql test < create.sql -time sudo mysql test -e "LOAD DATA LOCAL INFILE 'hits.tsv' INTO TABLE hits" - -# 2:37:52 elapsed - -./run.sh 2>&1 | tee log.txt - -sudo du -bcs /var/lib/mysql - -cat log.txt | - grep -P 'rows? in set|Empty set|^ERROR' | - sed -r -e 's/^ERROR.*$/null/; s/^.*?\((([0-9.]+) min )?([0-9.]+) sec\).*?$/\2 \3/' | - awk '{ if ($2) { print $1 * 60 + $2 } else { print $1 } }' | - awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' diff --git a/benchmark/mysql/create.sql b/benchmark/mysql/create.sql deleted file mode 100644 index 7c1b36b4f4a..00000000000 --- a/benchmark/mysql/create.sql +++ /dev/null @@ -1,110 +0,0 @@ -CREATE TABLE hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title TEXT NOT NULL, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL TEXT NOT NULL, - Referer TEXT NOT NULL, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 TEXT NOT NULL, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(255) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel TEXT NOT NULL, - Params TEXT NOT NULL, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase TEXT NOT NULL, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset TEXT NOT NULL, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL TEXT NOT NULL, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor CHAR NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage TEXT NOT NULL, - BrowserCountry TEXT NOT NULL, - SocialNetwork TEXT NOT NULL, - SocialAction TEXT NOT NULL, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage TEXT NOT NULL, - ParamPrice BIGINT NOT NULL, - ParamOrderID TEXT NOT NULL, - ParamCurrency TEXT NOT NULL, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName TEXT NOT NULL, - OpenstatCampaignID TEXT NOT NULL, - OpenstatAdID TEXT NOT NULL, - OpenstatSourceID TEXT NOT NULL, - UTMSource TEXT NOT NULL, - UTMMedium TEXT NOT NULL, - UTMCampaign TEXT NOT NULL, - UTMContent TEXT NOT NULL, - UTMTerm TEXT NOT NULL, - FromTag TEXT NOT NULL, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL, - PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID) -) -ENGINE = InnoDB; diff --git a/benchmark/mysql/queries.sql b/benchmark/mysql/queries.sql deleted file mode 100644 index ea2bde47802..00000000000 --- a/benchmark/mysql/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_FORMAT(EventTime, '%Y-%m-%d %H:00:00') AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_FORMAT(EventTime, '%Y-%m-%d %H:00:00') ORDER BY DATE_FORMAT(EventTime, '%Y-%m-%d %H:00:00') LIMIT 10 OFFSET 1000; diff --git a/benchmark/mysql/results/c6a.4xlarge.json b/benchmark/mysql/results/c6a.4xlarge.json deleted file mode 100644 index 18b8509215f..00000000000 --- a/benchmark/mysql/results/c6a.4xlarge.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "MySQL", - "date": "2022-07-01", - "machine": "c6a.4xlarge, 500gb gp2", - "cluster_size": 1, - "comment": "", - - "tags": ["C++", "row-oriented", "MySQL compatible"], - - "load_time": 9472, - "data_size": 171953585825, - - "result": [ -[339.77,339.88,339.77], -[364.91,371.86,367.55], -[366.2,368.91,389.66], -[364.39,377.53,571.45], -[377.69,390.02,384.86], -[569.48,576.51,574.68], -[367.4,368.23,370.41], -[371.29,384.02,613.22], -[478.85,683.22,495.68], -[489.9,635.96,662.43], -[386.07,396.49,640.15], -[389.13,412.55,444.12], -[447.97,455.54,448.06], -[423.22,845.44,813.6], -[452.48,460.07,453.98], -[577.54,623.21,586.49], -[852.07,856.36,862.66], -[838.09,848.92,851.12], -[1006.37,1011.16,1023.17], -[369.76,375.61,415.28], -[412.45,419.9,456.62], -[411.65,432.88,482.2], -[412.73,420.73,429.5], -[551.16,577.62,545.45], -[382.89,394.76,386.37], -[380.9,391.4,385.05], -[385.3,394.67,460.32], -[388.95,394.7,387.21], -[800.33,807.90,807.11], -[706.03,745.27,718.9], -[450.9,489.59,530.97], -[625.5,651.93,647.32], -[2721.13,2792.12,2819.26], -[945.9,954.94,957.54], -[945.42,953.78,965.16], -[684.36,716.29,708.75], -[10.01,3.79,3.77], -[7.48,3.32,3.27], -[5.09,0.98,0.96], -[8.70,4.77,4.68], -[4.82,0.76,0.74], -[4.46,0.77,0.75], -[7.04,1.17,1.13] -] -} diff --git a/benchmark/mysql/run.sh b/benchmark/mysql/run.sh deleted file mode 100755 index 31f31682f32..00000000000 --- a/benchmark/mysql/run.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -TRIES=3 - -cat queries.sql | while read query; do - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches - - for i in $(seq 1 $TRIES); do - sudo mysql test -vvv -e "${query}" - done; -done; diff --git a/benchmark/pinot/benchmark.sh b/benchmark/pinot/benchmark.sh deleted file mode 100755 index fffcfe6702d..00000000000 --- a/benchmark/pinot/benchmark.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash - -sudo apt-get update -sudo apt install openjdk-11-jdk jq -y -sudo update-alternatives --config java - -# Install - -PINOT_VERSION=0.10.0 - -wget https://downloads.apache.org/pinot/apache-pinot-$PINOT_VERSION/apache-pinot-$PINOT_VERSION-bin.tar.gz -tar -zxvf apache-pinot-$PINOT_VERSION-bin.tar.gz - -./apache-pinot-$PINOT_VERSION-bin/bin/pinot-admin.sh QuickStart -type batch & -sleep 30 -./apache-pinot-$PINOT_VERSION-bin/bin/pinot-admin.sh AddTable -tableConfigFile offline_table.json -schemaFile schema.json -exec - -# Load the data - -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz' -gzip -d hits.tsv.gz - -# Pinot was unable to load data as a single file wihout any errors returned. We have to split the data -split -d --additional-suffix .tsv --verbose -n l/100 hits.tsv parts - -# Pinot can't load value '"tatuirovarki_redmond' so we need to fix this row to make it work -sed parts93.tsv -e 's "tatuirovarki_redmond tatuirovarki_redmond g' -i - -# Fix path to local directory -sed splitted.yaml 's PWD_DIR_PLACEHOLDER '$PWD' g' -i -sed local.yaml 's PWD_DIR_PLACEHOLDER '$PWD' g' -i - -# Load data -./apache-pinot-$PINOT_VERSION-bin/bin/pinot-admin.sh LaunchDataIngestionJob -jobSpecFile splitted.yaml - -# After upload it shows 94465149 rows instead of 99997497 in the dataset - -# Run the queries -./run.sh - -# stop Druid services -kill %1 - -du -bcs ./batch diff --git a/benchmark/pinot/local.yaml b/benchmark/pinot/local.yaml deleted file mode 100644 index e2bae09526f..00000000000 --- a/benchmark/pinot/local.yaml +++ /dev/null @@ -1,38 +0,0 @@ -executionFrameworkSpec: - name: 'standalone' - segmentGenerationJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner' - segmentTarPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentTarPushJobRunner' - segmentUriPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentUriPushJobRunner' - segmentMetadataPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentMetadataPushJobRunner' -jobType: SegmentCreationAndTarPush -inputDirURI: 'PWD_DIR_PLACEHOLDER' -includeFileNamePattern: 'glob:PWD_DIR_PLACEHOLDER/hits.tsv' -outputDirURI: 'batch/hits/segments' -overwriteOutput: true -segmentCreationJobParallelism: 10 -pinotFSSpecs: - - scheme: file - className: org.apache.pinot.spi.filesystem.LocalPinotFS -recordReaderSpec: - dataFormat: 'csv' - className: 'org.apache.pinot.plugin.inputformat.csv.CSVRecordReader' - configClassName: 'org.apache.pinot.plugin.inputformat.csv.CSVRecordReaderConfig' - configs: - fileFormat: 'default' - delimiter: ' ' - multiValueDelimiter: '' - header: 'WatchID JavaEnable Title GoodEvent EventTime EventDate CounterID ClientIP RegionID UserID CounterClass OS UserAgent URL Referer IsRefresh RefererCategoryID RefererRegionID URLCategoryID URLRegionID ResolutionWidth ResolutionHeight ResolutionDepth FlashMajor FlashMinor FlashMinor2 NetMajor NetMinor UserAgentMajor UserAgentMinor CookieEnable JavascriptEnable IsMobile MobilePhone MobilePhoneModel Params IPNetworkID TraficSourceID SearchEngineID SearchPhrase AdvEngineID IsArtifical WindowClientWidth WindowClientHeight ClientTimeZone ClientEventTime SilverlightVersion1 SilverlightVersion2 SilverlightVersion3 SilverlightVersion4 PageCharset CodeVersion IsLink IsDownload IsNotBounce FUniqID OriginalURL HID IsOldCounter IsEvent IsParameter DontCountHits WithHash HitColor LocalEventTime Age Sex Income Interests Robotness RemoteIP WindowName OpenerName HistoryLength BrowserLanguage BrowserCountry SocialNetwork SocialAction HTTPError SendTiming DNSTiming ConnectTiming ResponseStartTiming ResponseEndTiming FetchTiming SocialSourceNetworkID SocialSourcePage ParamPrice ParamOrderID ParamCurrency ParamCurrencyID OpenstatServiceName OpenstatCampaignID OpenstatAdID OpenstatSourceID UTMSource UTMMedium UTMCampaign UTMContent UTMTerm FromTag HasGCLID RefererHash URLHash' - - -tableSpec: - tableName: 'hits' - schemaURI: 'http://localhost:9000/tables/hits/schema' - tableConfigURI: 'http://localhost:9000/tables/hits' - -pinotClusterSpecs: - - controllerURI: 'http://localhost:9000' - -pushJobSpec: - pushAttempts: 10 - pushRetryIntervalMillis: 1000 - segmentUriPrefix: 'file://' diff --git a/benchmark/pinot/offline_table.json b/benchmark/pinot/offline_table.json deleted file mode 100644 index da386d53d38..00000000000 --- a/benchmark/pinot/offline_table.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "tableName": "hits", - "tableType": "OFFLINE", - "segmentsConfig": { - "segmentPushType": "APPEND", - "segmentAssignmentStrategy": "BalanceNumSegmentAssignmentStrategy", - "schemaName": "hitsSchema", - "replication": "1" - }, - "tableIndexConfig":{}, - "metadata":{}, - "tenants": - { - "server": "DefaultTenant", - "broker": "DefaultTenant" - } -} diff --git a/benchmark/pinot/queries.sql b/benchmark/pinot/queries.sql deleted file mode 100644 index 31f65fc898d..00000000000 --- a/benchmark/pinot/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/benchmark/pinot/results/c6a.4xlarge.json b/benchmark/pinot/results/c6a.4xlarge.json deleted file mode 100644 index 8be94900f42..00000000000 --- a/benchmark/pinot/results/c6a.4xlarge.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "Pinot", - "date": "2022-07-01", - "machine": "c6a.4xlarge, 500gb gp2", - "cluster_size": 1, - "comment": "It successfully loaded only 94465149 out of 99997497 records. Some queries returned NullPointerException. The loading process is painful - splitting to 100 pieces required. It does not correctly report errors on data loading, the results may be incorrect.", - - "tags": ["Java", "column-oriented"], - - "load_time": 2032, - "data_size": null, - - "result": [ -[0.002, 0.001, 0.001], -[0.186, 0.186, 0.185], -[0.251, 0.276, 0.258], -[0.475, 0.281, 0.238], -[3.907, 3.655, 3.633], -[30.471, 14.687, 14.93], -[null, null, null], -[0.135, 0.134, 0.148], -[3.039, 2.902, 2.938], -[3.159, 3.212, 3.225], -[4.217, 4.197, 4.384], -[4.145, 4.124, 4.121], -[2.989, 3.145, 3.18], -[6.402, 6.886, 6.374], -[3.245, 3.35, 3.129], -[5.112, 5.027, 5.141], -[5.509, 5.279, 5.257], -[0.865, 0.856, 0.829], -[null, null, null], -[0.017, 0.015, 0.015], -[54.348, 19.562, 19.128], -[null, null, null], -[76.596, 74.719, 14.228], -[7.441, 5.77, 5.87], -[0.376, 0.327, 0.286], -[7.689, 0.395, 1.281], -[3.434, 0.499, 0.5], -[27.679, 2.378, 2.393], -[null, null, null], -[2.221, 2.227, 2.167], -[4.941, 4.639, 4.565], -[5.641, 5.37, 5.007], -[5.295, 5.006, 5.357], -[5.28, 5.21, 5.105], -[6.231, 6.238, 6.385], -[5.918, 5.933, 5.934], -[0.26, 0.202, 0.21], -[0.364, 0.072, 0.069], -[0.042, 0.034, 0.035], -[1.483, 0.686, 0.651], -[0.113, 0.071, 0.079], -[0.042, 0.051, 0.037], -[null, null, null] -] -} diff --git a/benchmark/pinot/run.sh b/benchmark/pinot/run.sh deleted file mode 100755 index 9434c6ee8a7..00000000000 --- a/benchmark/pinot/run.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -TRIES=3 -cat queries.sql | while read query; do - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null - echo -n "[" - for i in $(seq 1 $TRIES); do - echo "{\"sql\":\"$query option(timeoutMs=300000)\"}"| tr -d ';' > query.json - RES=$(curl -s -XPOST -H'Content-Type: application/json' http://localhost:8000/query/sql/ -d @query.json | jq 'if .exceptions == [] then .timeUsedMs/1000 else "-" end' ) - [[ "$?" == "0" ]] && echo -n "${RES}" || echo -n "null" - [[ "$i" != $TRIES ]] && echo -n ", " - done - echo "]," -done diff --git a/benchmark/pinot/schema.json b/benchmark/pinot/schema.json deleted file mode 100644 index cc603fb45a4..00000000000 --- a/benchmark/pinot/schema.json +++ /dev/null @@ -1,437 +0,0 @@ -{ - "metricFieldSpecs": [ -{ - "name": "AdvEngineID", - "dataType": "LONG" - }, - { - "name": "ClientIP", - "dataType": "LONG" - }, - { - "name": "RegionID", - "dataType": "LONG" - }, - { - "name": "IsRefresh", - "dataType": "LONG" - }, - { - "name": "ResolutionWidth", - "dataType": "LONG" - }, - { - "name": "ResolutionHeight", - "dataType": "LONG" - }, - { - "name": "WindowClientWidth", - "dataType": "LONG" - }, - { - "name": "WindowClientHeight", - "dataType": "LONG" - }, - { - "name": "URLHash", - "dataType": "LONG" - }, - { - "name": "JavaEnable", - "dataType": "LONG" - }, - { - "name": "GoodEvent", - "dataType": "LONG" - }, - { - "name": "CounterClass", - "dataType": "LONG" - }, - { - "name": "OS", - "dataType": "LONG" - }, - { - "name": "UserAgent", - "dataType": "LONG" - }, - { - "name": "RefererCategoryID", - "dataType": "LONG" - }, - { - "name": "RefererRegionID", - "dataType": "LONG" - }, - { - "name": "URLCategoryID", - "dataType": "LONG" - }, - { - "name": "URLRegionID", - "dataType": "LONG" - }, - { - "name": "ResolutionDepth", - "dataType": "LONG" - }, - { - "name": "FlashMajor", - "dataType": "LONG" - }, - { - "name": "FlashMinor", - "dataType": "LONG" - }, - { - "name": "NetMajor", - "dataType": "LONG" - }, - { - "name": "NetMinor", - "dataType": "LONG" - }, - { - "name": "UserAgentMajor", - "dataType": "LONG" - }, - { - "name": "CookieEnable", - "dataType": "LONG" - }, - { - "name": "JavascriptEnable", - "dataType": "LONG" - }, - { - "name": "IsMobile", - "dataType": "LONG" - }, - { - "name": "MobilePhone", - "dataType": "LONG" - }, - { - "name": "IPNetworkID", - "dataType": "LONG" - }, - { - "name": "TraficSourceID", - "dataType": "LONG" - }, - { - "name": "SearchEngineID", - "dataType": "LONG" - }, - { - "name": "IsArtifical", - "dataType": "LONG" - }, - { - "name": "ClientTimeZone", - "dataType": "LONG" - }, - { - "name": "SilverlightVersion1", - "dataType": "LONG" - }, - { - "name": "SilverlightVersion2", - "dataType": "LONG" - }, - { - "name": "SilverlightVersion3", - "dataType": "LONG" - }, - { - "name": "SilverlightVersion4", - "dataType": "LONG" - }, - { - "name": "CodeVersion", - "dataType": "LONG" - }, - { - "name": "IsLink", - "dataType": "LONG" - }, - { - "name": "IsDownload", - "dataType": "LONG" - }, - { - "name": "IsNotBounce", - "dataType": "LONG" - }, - { - "name": "FUniqID", - "dataType": "LONG" - }, - { - "name": "HID", - "dataType": "LONG" - }, - { - "name": "IsOldCounter", - "dataType": "LONG" - }, - { - "name": "IsEvent", - "dataType": "LONG" - }, - { - "name": "IsParameter", - "dataType": "LONG" - }, - { - "name": "DontCountHits", - "dataType": "LONG" - }, - { - "name": "WithHash", - "dataType": "LONG" - }, - { - "name": "Age", - "dataType": "LONG" - }, - { - "name": "Sex", - "dataType": "LONG" - }, - { - "name": "Income", - "dataType": "LONG" - }, - { - "name": "Interests", - "dataType": "LONG" - }, - { - "name": "Robotness", - "dataType": "LONG" - }, - { - "name": "RemoteIP", - "dataType": "LONG" - }, - { - "name": "WindowName", - "dataType": "LONG" - }, - { - "name": "OpenerName", - "dataType": "LONG" - }, - { - "name": "HistoryLength", - "dataType": "LONG" - }, - { - "name": "HTTPError", - "dataType": "LONG" - }, - { - "name": "SendTiming", - "dataType": "LONG" - }, - { - "name": "DNSTiming", - "dataType": "LONG" - }, - { - "name": "ConnectTiming", - "dataType": "LONG" - }, - { - "name": "ResponseStartTiming", - "dataType": "LONG" - }, - { - "name": "ResponseEndTiming", - "dataType": "LONG" - }, - { - "name": "FetchTiming", - "dataType": "LONG" - }, - { - "name": "SocialSourceNetworkID", - "dataType": "LONG" - }, - { - "name": "ParamPrice", - "dataType": "LONG" - }, - { - "name": "ParamCurrencyID", - "dataType": "LONG" - }, - { - "name": "HasGCLID", - "dataType": "LONG" - }, - { - "name": "RefererHash", - "dataType": "LONG" - }, - { - "name": "CLID", - "dataType": "LONG" - } - - ], - "dimensionFieldSpecs": [ - { - "name": "CounterID", - "dataType": "LONG" - }, - { - "name": "UserID", - "dataType": "LONG" - }, - { - "name": "URL", - "dataType": "STRING" - }, - { - "name": "OpenstatServiceName", - "dataType": "STRING" - }, - { - "name": "OpenstatCampaignID", - "dataType": "STRING" - }, - { - "name": "OpenstatAdID", - "dataType": "STRING" - }, - { - "name": "UserAgentMinor", - "dataType": "STRING" - }, - { - "name": "OpenstatSourceID", - "dataType": "STRING" - }, - { - "name": "MobilePhoneModel", - "dataType": "STRING" - }, - { - "name": "Params", - "dataType": "STRING" - }, - { - "name": "UTMSource", - "dataType": "STRING" - }, - { - "name": "PageCharset", - "dataType": "STRING" - }, - { - "name": "SearchPhrase", - "dataType": "STRING" - }, - { - "name": "UTMMedium", - "dataType": "STRING" - }, - { - "name": "OriginalURL", - "dataType": "STRING" - }, - { - "name": "UTMCampaign", - "dataType": "STRING" - }, - { - "name": "ClientEventTime", - "dataType": "STRING" - }, - { - "name": "ParamOrderID", - "dataType": "STRING" - }, - { - "name": "ParamCurrency", - "dataType": "STRING" - }, - { - "name": "UTMContent", - "dataType": "STRING" - }, - { - "name": "UTMTerm", - "dataType": "STRING" - }, - { - "name": "FlashMinor2", - "dataType": "STRING" - }, - { - "name": "FromTag", - "dataType": "STRING" - }, - - { - "name": "Referer", - "dataType": "STRING" - }, - { - "name": "Title", - "dataType": "STRING" - }, - { - "name": "HitColor", - "dataType": "STRING" - }, - { - "name": "LocalEventTime", - "dataType": "STRING" - }, - { - "name": "BrowserLanguage", - "dataType": "STRING" - }, - { - "name": "SocialSourcePage", - "dataType": "STRING" - }, - { - "name": "BrowserCountry", - "dataType": "STRING" - }, - { - "name": "SocialNetwork", - "dataType": "STRING" - }, - { - "name": "SocialAction", - "dataType": "STRING" - }, - - { - "name": "WatchID", - "dataType": "LONG" - } - - ], - "dateTimeFieldSpecs": [ - { - "name": "EventTime", - "dataType": "STRING", - "format": "1:SECONDS:SIMPLE_DATE_FORMAT:yyyy-MM-dd HH:mm:ss", - "granularity": "1:SECONDS" - }, - { - "name": "EventDate", - "dataType": "STRING", - "format": "1:DAYS:SIMPLE_DATE_FORMAT:yyyy-MM-dd", - "granularity": "1:DAYS" - } - ], - "schemaName": "hitsSchema" -} diff --git a/benchmark/pinot/splitted.yaml b/benchmark/pinot/splitted.yaml deleted file mode 100644 index 4f2b1eec62c..00000000000 --- a/benchmark/pinot/splitted.yaml +++ /dev/null @@ -1,37 +0,0 @@ -executionFrameworkSpec: - name: 'standalone' - segmentGenerationJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentGenerationJobRunner' - segmentTarPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentTarPushJobRunner' - segmentUriPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentUriPushJobRunner' - segmentMetadataPushJobRunnerClassName: 'org.apache.pinot.plugin.ingestion.batch.standalone.SegmentMetadataPushJobRunner' -jobType: SegmentCreationAndTarPush -inputDirURI: 'PWD_DIR_PLACEHOLDER' -includeFileNamePattern: 'glob:PWD_DIR_PLACEHOLDER/parts??.tsv' -outputDirURI: 'batch/hits/segments' -overwriteOutput: true -segmentCreationJobParallelism: 16 -pinotFSSpecs: - - scheme: file - className: org.apache.pinot.spi.filesystem.LocalPinotFS -recordReaderSpec: - dataFormat: 'csv' - className: 'org.apache.pinot.plugin.inputformat.csv.CSVRecordReader' - configClassName: 'org.apache.pinot.plugin.inputformat.csv.CSVRecordReaderConfig' - configs: - fileFormat: 'default' - delimiter: ' ' - multiValueDelimiter: '' - header: 'WatchID JavaEnable Title GoodEvent EventTime EventDate CounterID ClientIP RegionID UserID CounterClass OS UserAgent URL Referer IsRefresh RefererCategoryID RefererRegionID URLCategoryID URLRegionID ResolutionWidth ResolutionHeight ResolutionDepth FlashMajor FlashMinor FlashMinor2 NetMajor NetMinor UserAgentMajor UserAgentMinor CookieEnable JavascriptEnable IsMobile MobilePhone MobilePhoneModel Params IPNetworkID TraficSourceID SearchEngineID SearchPhrase AdvEngineID IsArtifical WindowClientWidth WindowClientHeight ClientTimeZone ClientEventTime SilverlightVersion1 SilverlightVersion2 SilverlightVersion3 SilverlightVersion4 PageCharset CodeVersion IsLink IsDownload IsNotBounce FUniqID OriginalURL HID IsOldCounter IsEvent IsParameter DontCountHits WithHash HitColor LocalEventTime Age Sex Income Interests Robotness RemoteIP WindowName OpenerName HistoryLength BrowserLanguage BrowserCountry SocialNetwork SocialAction HTTPError SendTiming DNSTiming ConnectTiming ResponseStartTiming ResponseEndTiming FetchTiming SocialSourceNetworkID SocialSourcePage ParamPrice ParamOrderID ParamCurrency ParamCurrencyID OpenstatServiceName OpenstatCampaignID OpenstatAdID OpenstatSourceID UTMSource UTMMedium UTMCampaign UTMContent UTMTerm FromTag HasGCLID RefererHash URLHash' - - -tableSpec: - tableName: 'hits' - schemaURI: 'http://localhost:9000/tables/hits/schema' - tableConfigURI: 'http://localhost:9000/tables/hits' - -pinotClusterSpecs: - - controllerURI: 'http://localhost:9000' - -pushJobSpec: - pushAttempts: 10 - pushRetryIntervalMillis: 1000 diff --git a/benchmark/postgresql/benchmark.sh b/benchmark/postgresql/benchmark.sh deleted file mode 100755 index 13459abfe8a..00000000000 --- a/benchmark/postgresql/benchmark.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/bash - -sudo apt-get update -sudo apt-get install -y postgresql-common -sudo apt-get install -y postgresql-14 - -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz' -gzip -d hits.tsv.gz -chmod 777 ~ hits.tsv - -sudo -u postgres psql -t -c 'CREATE DATABASE test' -sudo -u postgres psql test -t < create.sql -sudo -u postgres psql test -t -c '\timing' -c "\\copy hits FROM 'hits.tsv'" - -# COPY 99997497 -# Time: 2341543.463 ms (39:01.543) - -./run.sh 2>&1 | tee log.txt - -sudo du -bcs /var/lib/postgresql/14/main/ - -cat log.txt | grep -oP 'Time: \d+\.\d+ ms' | sed -r -e 's/Time: ([0-9]+\.[0-9]+) ms/\1/' | - awk '{ if (i % 3 == 0) { printf "[" }; printf $1 / 1000; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' diff --git a/benchmark/postgresql/create.sql b/benchmark/postgresql/create.sql deleted file mode 100644 index 1850bffedce..00000000000 --- a/benchmark/postgresql/create.sql +++ /dev/null @@ -1,109 +0,0 @@ -CREATE TABLE hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title TEXT NOT NULL, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL TEXT NOT NULL, - Referer TEXT NOT NULL, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 TEXT NOT NULL, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(255) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel TEXT NOT NULL, - Params TEXT NOT NULL, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase TEXT NOT NULL, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset TEXT NOT NULL, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL TEXT NOT NULL, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor CHAR NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage TEXT NOT NULL, - BrowserCountry TEXT NOT NULL, - SocialNetwork TEXT NOT NULL, - SocialAction TEXT NOT NULL, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage TEXT NOT NULL, - ParamPrice BIGINT NOT NULL, - ParamOrderID TEXT NOT NULL, - ParamCurrency TEXT NOT NULL, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName TEXT NOT NULL, - OpenstatCampaignID TEXT NOT NULL, - OpenstatAdID TEXT NOT NULL, - OpenstatSourceID TEXT NOT NULL, - UTMSource TEXT NOT NULL, - UTMMedium TEXT NOT NULL, - UTMCampaign TEXT NOT NULL, - UTMContent TEXT NOT NULL, - UTMTerm TEXT NOT NULL, - FromTag TEXT NOT NULL, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL, - PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID) -); diff --git a/benchmark/postgresql/queries.sql b/benchmark/postgresql/queries.sql deleted file mode 100644 index 31f65fc898d..00000000000 --- a/benchmark/postgresql/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/benchmark/postgresql/results/c6a.4xlarge.json b/benchmark/postgresql/results/c6a.4xlarge.json deleted file mode 100644 index 0a0a6731021..00000000000 --- a/benchmark/postgresql/results/c6a.4xlarge.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "PostgreSQL", - "date": "2022-07-01", - "machine": "c6a.4xlarge, 500gb gp2", - "cluster_size": 1, - "comment": "", - - "tags": ["C", "row-oriented", "PostgreSQL compatible"], - - "load_time": 2342, - "data_size": 77797067741, - - "result": [ -[439.753,309.785,282.017], -[317.874,254.238,254.941], -[262.883,263.072,263.090], -[32.421,5.310,5.060], -[57.134,42.648,42.334], -[358.423,356.315,358.342], -[31.524,5.350,4.994], -[263.145,263.193,263.165], -[323.659,322.858,321.918], -[327.395,326.170,326.231], -[265.983,265.681,265.912], -[269.984,265.336,265.379], -[284.096,284.560,282.234], -[277.250,279.455,280.035], -[285.660,286.200,283.611], -[66.605,32.023,38.282], -[312.452,304.431,305.391], -[289.209,290.449,287.578], -[331.706,327.485,334.428], -[24.646,2.543,2.263], -[267.561,267.496,267.524], -[267.729,267.690,268.184], -[263.074,263.120,267.040], -[267.602,267.488,267.494], -[263.141,263.859,263.137], -[262.923,263.102,263.113], -[262.885,263.088,263.114], -[267.864,269.127,268.204], -[303.376,306.925,308.664], -[263.221,263.119,263.148], -[270.814,270.575,270.294], -[278.342,275.925,276.224], -[584.599,576.932,591.502], -[462.576,446.962,439.779], -[429.930,417.696,416.704], -[296.875,297.283,295.140], -[3.461,0.842,0.794], -[2.179,0.564,0.558], -[2.258,0.566,0.416], -[2.805,1.311,1.317], -[2.936,0.820,0.615], -[2.197,0.736,0.535], -[1.983,0.320,0.312] -] -} diff --git a/benchmark/postgresql/run.sh b/benchmark/postgresql/run.sh deleted file mode 100755 index ad95151ab55..00000000000 --- a/benchmark/postgresql/run.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -TRIES=3 - -cat queries.sql | while read query; do - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches - - echo "$query"; - for i in $(seq 1 $TRIES); do - sudo -u postgres psql test -t -c '\timing' -c "$query" | grep 'Time' - done; -done; diff --git a/benchmark/questdb/benchmark.sh b/benchmark/questdb/benchmark.sh deleted file mode 100755 index dce8569e927..00000000000 --- a/benchmark/questdb/benchmark.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/bin/bash - -# Install - -wget https://github.com/questdb/questdb/releases/download/6.4.1/questdb-6.4.1-rt-linux-amd64.tar.gz -tar xf questdb*.tar.gz -questdb-6.4.1-rt-linux-amd64/bin/questdb.sh start - -# Import the data - -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.csv.gz' -gzip -d hits.csv.gz - -curl -G --data-urlencode "query=$(cat create.sql)" 'http://localhost:9000/exec?timings=true' -time curl -F data=@hits.csv 'http://localhost:9000/imp?name=hits' - -# 27m 47.546s - -sed -i 's/query.timeout.sec=60/query.timeout.sec=6000/' .questdb/conf/server.conf -questdb-6.4.1-rt-linux-amd64/bin/questdb.sh stop -questdb-6.4.1-rt-linux-amd64/bin/questdb.sh start - -./run.sh 2>&1 | tee log.txt - -du -bcs .questdb/db/hits - -cat log.txt | grep -P '"timings"|"error"|null' | sed -r -e 's/^.*"error".*$/null/; s/^.*"compiler":([0-9]*),"execute":([0-9]*),.*$/\1 \2/' | - awk '{ print ($1 + $2) / 1000000000 }' | sed -r -e 's/^0$/null/' | - awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' diff --git a/benchmark/questdb/create.sql b/benchmark/questdb/create.sql deleted file mode 100644 index 5c0f777a8b2..00000000000 --- a/benchmark/questdb/create.sql +++ /dev/null @@ -1,108 +0,0 @@ -CREATE TABLE hits -( - WatchID long, - JavaEnable int, - Title string, - GoodEvent int, - EventTime timestamp, - Eventdate date, - CounterID int, - ClientIP int, - RegionID int, - UserID long, - CounterClass int, - OS int, - UserAgent int, - URL string, - Referer string, - IsRefresh int, - RefererCategoryID int, - RefererRegionID int, - URLCategoryID int, - URLRegionID int, - ResolutionWidth int, - ResolutionHeight int, - ResolutionDepth int, - FlashMajor int, - FlashMinor int, - FlashMinor2 string, - NetMajor int, - NetMinor int, - UserAgentMajor int, - UserAgentMinor string, - CookieEnable int, - JavascriptEnable int, - IsMobile int, - MobilePhone int, - MobilePhoneModel string, - Params string, - IPNetworkID int, - TraficSourceID int, - SearchEngineID int, - SearchPhrase string, - AdvEngineID int, - IsArtifical int, - WindowClientWidth int, - WindowClientHeight int, - ClientTimeZone int, - ClientEventTime timestamp, - SilverlightVersion1 int, - SilverlightVersion2 int, - SilverlightVersion3 int, - SilverlightVersion4 int, - PageCharset string, - CodeVersion int, - IsLink int, - IsDownload int, - IsNotBounce int, - FUniqID long, - OriginalURL string, - HID int, - IsOldCounter int, - IsEvent int, - IsParameter int, - DontCountHits int, - WithHash int, - HitColor string, - LocalEventTime timestamp, - Age int, - Sex int, - Income int, - Interests int, - Robotness int, - RemoteIP int, - WindowName int, - OpenerName int, - HistoryLength int, - BrowserLanguage string, - BrowserCountry string, - SocialNetwork string, - SocialAction string, - HTTPError int, - SendTiming int, - DNSTiming int, - ConnectTiming int, - ResponseStartTiming int, - ResponseEndTiming int, - FetchTiming int, - SocialSourceNetworkID int, - SocialSourcePage string, - ParamPrice long, - ParamOrderID string, - ParamCurrency string, - ParamCurrencyID int, - OpenstatServiceName string, - OpenstatCampaignID string, - OpenstatAdID string, - OpenstatSourceID string, - UTMSource string, - UTMMedium string, - UTMCampaign string, - UTMContent string, - UTMTerm string, - FromTag string, - HasGCLID int, - RefererHash long, - URLHash long, - CLID int -); diff --git a/benchmark/questdb/queries.sql b/benchmark/questdb/queries.sql deleted file mode 100644 index 5ddf9fa318c..00000000000 --- a/benchmark/questdb/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT count_distinct(CAST(UserID AS string)) FROM hits; -SELECT count_distinct(SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) AS c FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY c DESC; -SELECT RegionID, count_distinct(CAST(UserID AS string)) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), count_distinct(CAST(UserID AS string)) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, count_distinct(CAST(UserID AS string)) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, count_distinct(CAST(UserID AS string)) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, count_distinct(CAST(UserID AS string)) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) AS c FROM hits GROUP BY UserID ORDER BY c DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) AS c FROM hits GROUP BY UserID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) AS c FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, count_distinct(CAST(UserID AS string)) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventTime >= '2013-07-01T00:00:00Z' AND EventTime <= '2013-07-31T23:59:59Z' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventTime >= '2013-07-01T00:00:00Z' AND EventTime <= '2013-07-31T23:59:59Z' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventTime >= '2013-07-01T00:00:00Z' AND EventTime <= '2013-07-31T23:59:59Z' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000, 10; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventTime >= '2013-07-01T00:00:00Z' AND EventTime <= '2013-07-31T23:59:59Z' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000, 10; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventTime >= '2013-07-01T00:00:00Z' AND EventTime <= '2013-07-31T23:59:59Z' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100, 10; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventTime >= '2013-07-01T00:00:00Z' AND EventTime <= '2013-07-31T23:59:59Z' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 100000, 10; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventTime >= '2013-07-14T00:00:00Z' AND EventTime <= '2013-07-15T23:59:59Z' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY M LIMIT 1000, 10; diff --git a/benchmark/questdb/results/c6a.4xlarge.json b/benchmark/questdb/results/c6a.4xlarge.json deleted file mode 100644 index 95b9c382464..00000000000 --- a/benchmark/questdb/results/c6a.4xlarge.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "QuestDB", - "date": "2022-07-01", - "machine": "c6a.4xlarge, 500gb gp2", - "cluster_size": 1, - "comment": "Many queries cannot run. It also crashes and hangs.", - - "tags": ["Java", "time-series"], - - "load_time": 1667.5, - "data_size": 126680518680, - - "result": [ -[0.0155878,0.000283657,0.000328587], -[0.586734,0.0486348,0.0485346], -[6.41881,5.57954,6.25638], -[6.8554,3.11997,3.11813], -[47.8723,54.5425,53.1198], -[null,null,null], -[5.63439,3.70619,3.92043], -[0.699764,0.130462,0.126272], -[55.7179,62.8433,62.0045], -[null,59.6416,64.7753], -[63.7134,null,39.8928], -[46.759,46.8544,null], -[16.7181,10.9064,10.0735], -[79.9885,null,52.5617], -[16.1015,12.1835,13.1469], -[17.0225,11.7342,10.6194], -[24.1167,15.6749,16.8193], -[23.0848,15.3149,17.375], -[42.0965,26.2421,25.7593], -[0.0356335,0.027459,0.0289404], -[70.7679,71.5825,71.4573], -[null,null,null], -[null,null,null], -[77.0625,13.2257,12.7578], -[11.0775,2.90421,2.33398], -[3.31611,3.38837,3.35419], -[3.13233,5.2785,3.07075], -[null,null,null], -[null,null,null], -[186.032,185.627,180.963], -[26.7279,16.4799,18.8758], -[63.8785,32.1097,32.1561], -[64.4635,31.4538,35.654], -[118.897,null,119.015], -[null,null,null], -[19.6853,17.4427,16.7998], -[1.50985,0.877967,0.885536], -[0.805639,0.577352,0.534731], -[0.509284,0.448942,0.467679], -[1.37609,0.966942,0.912858], -[0.567887,0.394619,0.438952], -[0.508977,0.441015,0.40528], -[null,null,null] -] -} diff --git a/benchmark/questdb/run.sh b/benchmark/questdb/run.sh deleted file mode 100755 index a9c27c952c6..00000000000 --- a/benchmark/questdb/run.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - -TRIES=3 - -questdb-6.4.1-rt-linux-amd64/bin/questdb.sh stop -questdb-6.4.1-rt-linux-amd64/bin/questdb.sh start -sleep 5 - -cat queries.sql | while read query; do - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches - - echo "$query"; - for i in $(seq 1 $TRIES); do - curl -sS --max-time 6000 -G --data-urlencode "query=${query}" 'http://localhost:9000/exec?timings=true' 2>&1 | grep '"timings"' || - (questdb-6.4.1-rt-linux-amd64/bin/questdb.sh stop >/dev/null 2>&1; questdb-6.4.1-rt-linux-amd64/bin/questdb.sh start >/dev/null 2>&1; sleep 5; echo 'null') - echo - done; -done; diff --git a/benchmark/redshift-serverless/README.md b/benchmark/redshift-serverless/README.md deleted file mode 100644 index 59aec7496cd..00000000000 --- a/benchmark/redshift-serverless/README.md +++ /dev/null @@ -1,55 +0,0 @@ -This benchmark is not automated. - -Go to AWS Redshift service. -Try Redshift Serverless. Use the default configuration. -The cluster will take a few minutes to start. -Go to "Query Editor". Establishing a connection takes around 10 seconds. - -Run the CREATE TABLE statement you find in `create.sql`. - -Note: Redshift prefers VARCHAR(MAX) instead of TEXT. - -Then press on the "Load data". -This will generate a statement: - -``` -COPY dev.public.hits FROM 's3://clickhouse-public-datasets/hits_compatible/hits.csv.gz' GZIP -IAM_ROLE 'arn:aws:iam::...:role/service-role/AmazonRedshift-CommandsAccessRole-...' -FORMAT AS CSV DELIMITER ',' QUOTE '"' -REGION AS 'eu-central-1' -``` - -> Elapsed time: 32m 13.7s - -It also have run 2380 "queries" for this task. - -Namespace configuration, -General Information, Storage used: - -30.3 GB - -Change admin user password: -dev, fGH4{dbas7 - -It's very difficult to find how to connect to it: -https://docs.aws.amazon.com/redshift/latest/mgmt/serverless-connecting.html - -We will run the queries from another server with `psql` client. - -``` -sudo apt-get install -y postgresql-client - -echo "*:*:*:*:your_password" > .pgpass -chmod 400 .pgpass - -psql -h default.111111111111.eu-central-1.redshift-serverless.amazonaws.com -U dev -d dev -p 5439 -``` - -Then run the benchmark: -``` -export HOST=... -./run.sh 2>&1 | tee log.txt - -cat log.txt | grep -oP 'Time: \d+\.\d+ ms|ERROR' | sed -r -e 's/Time: ([0-9]+\.[0-9]+) ms/\1/' | - awk '{ if ($1 == "ERROR") { skip = 1 } else { if (i % 3 == 0) { printf "[" }; printf skip ? "null" : ($1 / 1000); if (i % 3 != 2) { printf "," } else { print "]," }; ++i; skip = 0; } }' -``` diff --git a/benchmark/redshift-serverless/create.sql b/benchmark/redshift-serverless/create.sql deleted file mode 100644 index 94a8fb0958a..00000000000 --- a/benchmark/redshift-serverless/create.sql +++ /dev/null @@ -1,109 +0,0 @@ -CREATE TABLE hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title VARCHAR(MAX) NOT NULL, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL VARCHAR(MAX) NOT NULL, - Referer VARCHAR(MAX) NOT NULL, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 VARCHAR(MAX) NOT NULL, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(MAX) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel VARCHAR(MAX) NOT NULL, - Params VARCHAR(MAX) NOT NULL, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase VARCHAR(MAX) NOT NULL, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset VARCHAR(MAX) NOT NULL, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL VARCHAR(MAX) NOT NULL, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor VARCHAR(MAX) NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage VARCHAR(MAX) NOT NULL, - BrowserCountry VARCHAR(MAX) NOT NULL, - SocialNetwork VARCHAR(MAX) NOT NULL, - SocialAction VARCHAR(MAX) NOT NULL, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage VARCHAR(MAX) NOT NULL, - ParamPrice BIGINT NOT NULL, - ParamOrderID VARCHAR(MAX) NOT NULL, - ParamCurrency VARCHAR(MAX) NOT NULL, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName VARCHAR(MAX) NOT NULL, - OpenstatCampaignID VARCHAR(MAX) NOT NULL, - OpenstatAdID VARCHAR(MAX) NOT NULL, - OpenstatSourceID VARCHAR(MAX) NOT NULL, - UTMSource VARCHAR(MAX) NOT NULL, - UTMMedium VARCHAR(MAX) NOT NULL, - UTMCampaign VARCHAR(MAX) NOT NULL, - UTMContent VARCHAR(MAX) NOT NULL, - UTMTerm VARCHAR(MAX) NOT NULL, - FromTag VARCHAR(MAX) NOT NULL, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL, - PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID) -); diff --git a/benchmark/redshift-serverless/queries.sql b/benchmark/redshift-serverless/queries.sql deleted file mode 100644 index 3a5a4145464..00000000000 --- a/benchmark/redshift-serverless/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(www\.)?([^/]+)/.*$', '\2') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/benchmark/redshift-serverless/results/serverless.json b/benchmark/redshift-serverless/results/serverless.json deleted file mode 100644 index 3ba94e542dc..00000000000 --- a/benchmark/redshift-serverless/results/serverless.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "Redshift", - "date": "2022-07-01", - "machine": "serverless", - "cluster_size": "serverless", - "comment": "One query did not run due to overflow check and another due to missing regexp function.", - - "tags": ["managed", "column-oriented"], - - "load_time": 1933, - "data_size": 30300000000, - - "result": [ -[0.436955,0.390225,0.387823], -[2.74529,0.280571,0.198074], -[7.24569,0.295612,0.386568], -[null,null,null], -[3.98825,0.488446,0.325645], -[4.24601,0.538891,0.690124], -[7.25492,0.288505,0.207515], -[5.99594,0.212732,0.281278], -[6.17534,0.681868,0.586073], -[6.37779,1.37865,1.31614], -[6.41405,0.549735,0.421345], -[6.64688,0.445678,0.477323], -[4.2294,0.361772,0.520471], -[4.73701,0.788258,0.743465], -[4.49038,0.670446,0.52727], -[6.19886,0.663381,0.583588], -[5.9867,0.679607,0.672772], -[2.76661,0.567555,0.51494], -[6.20219,0.816422,0.760568], -[2.51526,0.053,0.025066], -[2.96003,0.282737,0.226979], -[6.79648,0.57495,0.400798], -[6.37854,0.92746,1.05793], -[25.8462,0.902664,0.905365], -[5.00521,0.247895,0.308836], -[6.38373,0.308781,0.244082], -[4.25427,0.229966,0.247201], -[5.98382,0.398218,0.455249], -[null,null,null], -[6.52367,1.35877,1.30562], -[7.36935,0.536226,0.582304], -[7.05948,0.621982,0.639653], -[4.62901,0.954522,0.908651], -[4.95273,1.03062,1.10289], -[4.71404,1.06378,1.04157], -[4.8201,0.499996,0.575546], -[5.32757,0.566517,0.562058], -[5.37681,0.626458,0.658628], -[5.23137,0.470622,0.540079], -[6.09326,0.561312,0.574978], -[4.86561,0.595546,0.534209], -[4.34256,0.433804,0.414541], -[4.19814,0.288269,0.415328] -] -} diff --git a/benchmark/redshift-serverless/run.sh b/benchmark/redshift-serverless/run.sh deleted file mode 100755 index 07a47eda1ab..00000000000 --- a/benchmark/redshift-serverless/run.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -TRIES=3 - -cat queries.sql | while read query; do - echo "$query"; - for i in $(seq 1 $TRIES); do - psql -h "${HOST}" -U awsuser -d dev -p 5439 -t -c 'SET enable_result_cache_for_session = off' -c '\timing' -c "$query" | grep 'Time' - done; -done; diff --git a/benchmark/redshift/README.md b/benchmark/redshift/README.md deleted file mode 100644 index e8d56840ae2..00000000000 --- a/benchmark/redshift/README.md +++ /dev/null @@ -1,56 +0,0 @@ -This benchmark is not automated. - -Go to AWS Redshift service. -Create a cluster. Note: this is a classic Redshift, not "serverless". - -Choose the node type and cluster size. -I've selected 4 nodes of ra3.xplus 4vCPU to get 16vCPU in total. - -Set up some password for the admin user. -The cluster will take a few minutes to start. - -We need to perform two modifications: -1. Allow inbound access. Go to VPC and edit the security group. Modify inbound rules. Allow connections from any IPv4 to port 5439. -2. Add IAM role. Just create something by default. - -To create a table, you can go to the Query Editor v2. -Open the "dev" database. -Run the CREATE TABLE statement you find in `create.sql`. - -Note: Redshift prefers VARCHAR(MAX) instead of TEXT. - -Then press on the "Load data". -This will generate a statement: - -``` -COPY dev.public.hits FROM 's3://clickhouse-public-datasets/hits_compatible/hits.csv.gz' GZIP -IAM_ROLE 'arn:aws:iam::...:role/service-role/AmazonRedshift-CommandsAccessRole-...' -FORMAT AS CSV DELIMITER ',' QUOTE '"' -REGION AS 'eu-central-1' -``` - -> Elapsed time: 35m 35.9s - -We will run the queries from another server with `psql` client. - -``` -sudo apt-get install -y postgresql-client - -echo "*:*:*:*:your_password" > .pgpass -chmod 400 .pgpass - -psql -h redshift-cluster-1.chedgchbam32.eu-central-1.redshift.amazonaws.com -U awsuser -d dev -p 5439 -``` - -Then run the benchmark: -``` -export HOST=... -./run.sh 2>&1 | tee log.txt - -cat log.txt | grep -oP 'Time: \d+\.\d+ ms|ERROR' | sed -r -e 's/Time: ([0-9]+\.[0-9]+) ms/\1/' | - awk '{ if ($1 == "ERROR") { skip = 1 } else { if (i % 3 == 0) { printf "[" }; printf skip ? "null" : ($1 / 1000); if (i % 3 != 2) { printf "," } else { print "]," }; ++i; skip = 0; } }' -``` - -`SELECT sum(used * 1048576) FROM stv_node_storage_capacity` - -> 30 794 579 968 diff --git a/benchmark/redshift/create.sql b/benchmark/redshift/create.sql deleted file mode 100644 index 94a8fb0958a..00000000000 --- a/benchmark/redshift/create.sql +++ /dev/null @@ -1,109 +0,0 @@ -CREATE TABLE hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title VARCHAR(MAX) NOT NULL, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL VARCHAR(MAX) NOT NULL, - Referer VARCHAR(MAX) NOT NULL, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 VARCHAR(MAX) NOT NULL, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(MAX) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel VARCHAR(MAX) NOT NULL, - Params VARCHAR(MAX) NOT NULL, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase VARCHAR(MAX) NOT NULL, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset VARCHAR(MAX) NOT NULL, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL VARCHAR(MAX) NOT NULL, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor VARCHAR(MAX) NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage VARCHAR(MAX) NOT NULL, - BrowserCountry VARCHAR(MAX) NOT NULL, - SocialNetwork VARCHAR(MAX) NOT NULL, - SocialAction VARCHAR(MAX) NOT NULL, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage VARCHAR(MAX) NOT NULL, - ParamPrice BIGINT NOT NULL, - ParamOrderID VARCHAR(MAX) NOT NULL, - ParamCurrency VARCHAR(MAX) NOT NULL, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName VARCHAR(MAX) NOT NULL, - OpenstatCampaignID VARCHAR(MAX) NOT NULL, - OpenstatAdID VARCHAR(MAX) NOT NULL, - OpenstatSourceID VARCHAR(MAX) NOT NULL, - UTMSource VARCHAR(MAX) NOT NULL, - UTMMedium VARCHAR(MAX) NOT NULL, - UTMCampaign VARCHAR(MAX) NOT NULL, - UTMContent VARCHAR(MAX) NOT NULL, - UTMTerm VARCHAR(MAX) NOT NULL, - FromTag VARCHAR(MAX) NOT NULL, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL, - PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID) -); diff --git a/benchmark/redshift/queries.sql b/benchmark/redshift/queries.sql deleted file mode 100644 index 3a5a4145464..00000000000 --- a/benchmark/redshift/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(www\.)?([^/]+)/.*$', '\2') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/benchmark/redshift/results/4x.ra3.xplus.json b/benchmark/redshift/results/4x.ra3.xplus.json deleted file mode 100644 index fecc515e475..00000000000 --- a/benchmark/redshift/results/4x.ra3.xplus.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "Redshift", - "date": "2022-07-01", - "machine": "ra3.xplus", - "cluster_size": 4, - "comment": "One query did not run due to overflow check and another due to missing regexp function.", - - "tags": ["managed", "column-oriented"], - - "load_time": 2136, - "data_size": 30794579968, - - "result": [ -[0.081437,0.022376,0.022491], -[2.54868,0.024112,0.024072], -[2.62053,0.049948,0.049768], -[null,null,null], -[0.62714,0.447655,0.43262], -[1.14153,1.09479,1.09441], -[0.153399,0.053941,0.048224], -[3.76153,0.032235,0.030949], -[4.43439,0.746808,0.723719], -[6.13424,2.20458,2.20031], -[4.35338,0.220122,0.228684], -[5.15139,0.231856,0.230993], -[5.00006,1.10841,1.12871], -[5.80125,1.86531,1.90209], -[5.16246,1.21239,1.14848], -[0.579428,0.535859,0.546178], -[6.05764,2.17455,2.18286], -[5.86612,2.52634,2.48672], -[7.80075,3.35512,3.41153], -[2.54112,0.036378,0.035944], -[6.47189,2.26909,2.2673], -[6.95344,2.61929,2.65637], -[12.9508,6.85457,6.99], -[25.3022,11.8857,11.8493], -[4.37592,0.452737,0.452867], -[4.17199,0.469457,0.476302], -[6.24746,0.470935,0.482502], -[6.00065,2.08332,2.08059], -[null,null,null], -[22.2567,18.5376,18.3441], -[5.37492,0.868068,0.849486], -[7.29067,1.06155,1.11209], -[7.7832,4.07132,4.00384], -[8.95385,5.15488,5.21863], -[9.2232,5.32052,5.73207], -[4.99205,0.664347,0.618918], -[6.82279,1.89738,1.89398], -[7.09077,2.19008,2.26612], -[5.29731,0.19626,0.204603], -[6.07138,0.276315,0.267161], -[4.03108,0.054134,0.058568], -[4.35647,0.061157,0.053367], -[5.23605,0.037217,0.036335] -] -} diff --git a/benchmark/redshift/run.sh b/benchmark/redshift/run.sh deleted file mode 100755 index 07a47eda1ab..00000000000 --- a/benchmark/redshift/run.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -TRIES=3 - -cat queries.sql | while read query; do - echo "$query"; - for i in $(seq 1 $TRIES); do - psql -h "${HOST}" -U awsuser -d dev -p 5439 -t -c 'SET enable_result_cache_for_session = off' -c '\timing' -c "$query" | grep 'Time' - done; -done; diff --git a/benchmark/singlestore/benchmark.sh b/benchmark/singlestore/benchmark.sh deleted file mode 100755 index 492a7aa1e86..00000000000 --- a/benchmark/singlestore/benchmark.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash - -# Install - -sudo apt-get update -sudo apt-get install -y docker.io - -export LICENSE_KEY="..." -export ROOT_PASSWORD="..." - -sudo docker run -i --init \ - --name memsql-ciab \ - -e LICENSE_KEY="${LICENSE_KEY}" \ - -e ROOT_PASSWORD="${ROOT_PASSWORD}" \ - -p 3306:3306 -p 8080:8080 \ - memsql/cluster-in-a-box - -sudo docker start memsql-ciab - -sudo docker exec -it memsql-ciab memsql -p"${ROOT_PASSWORD}" - -# Load the data - -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz' -gzip -d hits.tsv.gz -sudo docker cp hits.tsv memsql-ciab:/ - -sudo docker exec -it memsql-ciab memsql -p"${ROOT_PASSWORD}" -e "CREATE DATABASE test" -sudo docker exec memsql-ciab memsql -p"${ROOT_PASSWORD}" --database=test -e "USE test; $(cat create.sql)" -time sudo docker exec -it memsql-ciab memsql -vvv -p"${ROOT_PASSWORD}" --database=test -e "LOAD DATA INFILE '/hits.tsv' INTO TABLE test.hits" - -# Query OK, 99997497 rows affected (11 min 30.11 sec) - -./run.sh 2>&1 | tee log.txt - -sudo docker exec memsql-ciab du -bcs /var/lib/memsql - -# 29836263469 bytes - -cat log.txt | - grep -P 'rows? in set|Empty set|^ERROR' | - sed -r -e 's/^ERROR.*$/null/; s/^.*?\((([0-9.]+) min )?([0-9.]+) sec\).*?$/\2 \3/' | - awk '{ if ($2) { print $1 * 60 + $2 } else { print $1 } }' | - awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' diff --git a/benchmark/singlestore/create.sql b/benchmark/singlestore/create.sql deleted file mode 100644 index 84dc14ea22f..00000000000 --- a/benchmark/singlestore/create.sql +++ /dev/null @@ -1,109 +0,0 @@ -CREATE TABLE hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title TEXT NOT NULL, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL TEXT NOT NULL, - Referer TEXT NOT NULL, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 TEXT NOT NULL, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(255) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel TEXT NOT NULL, - Params TEXT NOT NULL, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase TEXT NOT NULL, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset TEXT NOT NULL, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL TEXT NOT NULL, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor CHAR NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage TEXT NOT NULL, - BrowserCountry TEXT NOT NULL, - SocialNetwork TEXT NOT NULL, - SocialAction TEXT NOT NULL, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage TEXT NOT NULL, - ParamPrice BIGINT NOT NULL, - ParamOrderID TEXT NOT NULL, - ParamCurrency TEXT NOT NULL, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName TEXT NOT NULL, - OpenstatCampaignID TEXT NOT NULL, - OpenstatAdID TEXT NOT NULL, - OpenstatSourceID TEXT NOT NULL, - UTMSource TEXT NOT NULL, - UTMMedium TEXT NOT NULL, - UTMCampaign TEXT NOT NULL, - UTMContent TEXT NOT NULL, - UTMTerm TEXT NOT NULL, - FromTag TEXT NOT NULL, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL, - SORT KEY (CounterID, EventDate, UserID, EventTime, WatchID) -); diff --git a/benchmark/singlestore/queries.sql b/benchmark/singlestore/queries.sql deleted file mode 100644 index 31f65fc898d..00000000000 --- a/benchmark/singlestore/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/benchmark/singlestore/results/c6a.4xlarge.json b/benchmark/singlestore/results/c6a.4xlarge.json deleted file mode 100644 index d434ddafdb1..00000000000 --- a/benchmark/singlestore/results/c6a.4xlarge.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "SingleStore", - "date": "2022-07-01", - "machine": "c6a.4xlarge, 500gb gp2", - "cluster_size": 1, - "comment": "Previous name: MemSQL. Some queries did not run due to memory limits", - - "tags": ["MySQL compatible", "column-oriented"], - - "load_time": 690, - "data_size": 29836263469, - - "result": [ -[0.09,0.00,0.00], -[0.23,0.04,0.01], -[0.47,0.15,0.15], -[0.63,0.09,0.08], -[1.65,1.23,1.20], -[7.96,2.79,2.63], -[0.10,0.00,0.00], -[0.17,0.02,0.02], -[1.90,1.40,1.39], -[4.79,3.52,3.48], -[0.94,0.22,0.23], -[0.89,0.24,0.23], -[5.82,2.26,2.25], -[6.97,4.62,4.66], -[3.05,2.28,2.31], -[3.92,2.70,2.28], -[5.83,4.48,4.42], -[4.76,4.13,4.12], -[14.03,null,null], -[0.57,0.05,0.04], -[18.18,1.74,1.94], -[20.85,2.17,0.98], -[31.98,3.12,1.22], -[78.96,3.35,108.85], -[2.82,0.39,0.32], -[1.83,0.44,0.35], -[2.81,0.33,0.32], -[18.33,2.57,1.15], -[null,null,null], -[3.56,2.40,2.40], -[3.83,1.11,1.11], -[7.35,1.73,1.70], -[null,null,null], -[null,null,null], -[null,null,null], -[2.53,1.92,1.84], -[0.92,0.23,0.19], -[0.84,0.15,0.08], -[0.70,0.05,0.05], -[3.12,0.38,0.36], -[0.29,0.03,0.03], -[0.22,0.06,0.02], -[0.27,0.11,0.12] -] -} diff --git a/benchmark/singlestore/run.sh b/benchmark/singlestore/run.sh deleted file mode 100755 index 16cc104e712..00000000000 --- a/benchmark/singlestore/run.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash - -TRIES=3 - -cat queries.sql | while read query; do - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches - - for i in $(seq 1 $TRIES); do - sudo docker exec memsql-ciab memsql -vvv -p"${ROOT_PASSWORD}" --database=test -e "USE test; ${query}" - done; -done; diff --git a/benchmark/snowflake/NOTES.md b/benchmark/snowflake/NOTES.md deleted file mode 100644 index c7159dc0c2b..00000000000 --- a/benchmark/snowflake/NOTES.md +++ /dev/null @@ -1,67 +0,0 @@ -The choice of a warehouse size is unclear. Let's choose X-Large by default. -It is using "credits" for pricing. - -Storage cost: $23 USD per compressed TB per month -One credit is: $2.016/hour -X-Large: 16 credits/hour = $32/hour - -It is very expensive, so let's touch it with a ten-foot pole and run away as quickly as possible. - -Set up SnowSQL. - -``` -curl -O https://sfc-repo.snowflakecomputing.com/snowsql/bootstrap/1.2/linux_x86_64/snowsql-1.2.22-linux_x86_64.bash -bash snowsql-1.2.22-linux_x86_64.bash -source .profile -``` - -``` -snowsql -a HA12345 -u USER -``` - -It does not connect after typing the password. - -``` -250001 (08001): Failed to connect to DB. Verify the account name is correct: HA12345.snowflakecomputing.com:443. 000403: 403: HTTP 403: Forbidden -If the error message is unclear, enable logging using -o log_level=DEBUG and see the log to find out the cause. Contact support for further help. -Goodbye! -``` - -It said "Goodbye!" in active-aggressive tone. - -To know the account name, we have to go to the "classic console" and look at the URL in the browser. - -> https://{this}.eu-central-1.snowflakecomputing.com/console/login?disableDirectLogin=true - -But it does not help. - -It works if I specify the region in the command line. -Although `snowsql --help` saying that it is DEPRECATED. - -``` -snowsql -a nn12345 -u USER --region eu-central-1 --schemaname PUBLIC --dbname TEST --warehouse TEST -``` - -Notes: SnowSQL is using autocomplete using well known Python library. -Autocomplete is not context-aware. - -Upload the data: - -``` -put file:///home/ubuntu/hits.csv @test.public.%hits -``` - -The syntax is strange (all these @%#). -The query hung and did nothing. - -Actually it is not hung. The snowsql is using 100% to parse CSV in Python for hours. - -Let's try a different upload method. - -``` -COPY INTO test.public.hits2 FROM 's3://clickhouse-public-datasets/hits_compatible/hits.csv.gz' FILE_FORMAT = (TYPE = CSV, COMPRESSION = GZIP, FIELD_OPTIONALLY_ENCLOSED_BY = '"') -``` - -For some reason, it has selected X-Small warehouse, will need to change to X-Large. - -42 min 4 sec. diff --git a/benchmark/snowflake/README.md b/benchmark/snowflake/README.md deleted file mode 100644 index c2de02dded7..00000000000 --- a/benchmark/snowflake/README.md +++ /dev/null @@ -1,56 +0,0 @@ -Snowflake recently removed the DeWitt Clause, so we are allowed to make benchmarks. - -> Customer may conduct benchmark tests of the Service (each a “Test”). Other than with respect to Tests involving Previews, which may not be disclosed externally, Customer may externally disclose a Test or otherwise cause the results of a Test to be externally disclosed if it includes as part of the disclosure all information necessary to replicate the Test. - -https://www.snowflake.com/legal/acceptable-use-policy/ - -Account setup took only 3 seconds. - -Data -> Databases -> + Database -Database 'test' created. -Press on "public" schema. - -Create table "standard". -Paste "create.sql". -Press on "create table" again. - -Press on "admin", "warehouses", + Warehouse -The choice of a warehouse size is unclear. Let's choose X-Large by default. -It is using "credits" for pricing. - -Set up SnowSQL. - -``` -curl -O https://sfc-repo.snowflakecomputing.com/snowsql/bootstrap/1.2/linux_x86_64/snowsql-1.2.22-linux_x86_64.bash -bash snowsql-1.2.22-linux_x86_64.bash -source .profile -``` - -Upload the data: - -``` -COPY INTO test.public.hits2 FROM 's3://clickhouse-public-datasets/hits_compatible/hits.csv.gz' FILE_FORMAT = (TYPE = CSV, COMPRESSION = GZIP, FIELD_OPTIONALLY_ENCLOSED_BY = '"') -``` - -42 min 4 sec. - -``` -export SNOWSQL_PWD='...' SNOWSQL_ACCOUNT='...' SNOWSQL_USER='myuser' - -snowsql --region eu-central-1 --schemaname PUBLIC --dbname HITS --warehouse TEST --query "SELECT 1" -``` - -Before the benchmark: -``` -ALTER USER myuser SET USE_CACHED_RESULT = false; -``` - -Run the benchmark: -``` -./run.sh 2>&1 | tee log.txt - -cat log.txt | - grep -P 'Time Elapsed|^\d+ \(\w+\):' | - sed -r -e 's/^[0-9]+ \([0-9A-Za-z]+\):.*$/null/; s/^.*Time Elapsed:\s*([0-9.]+)s$/\1/' | - awk '{ if (i % 3 == 0) { printf "[" }; printf $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' -``` diff --git a/benchmark/snowflake/create.sql b/benchmark/snowflake/create.sql deleted file mode 100644 index 62800d2e1b2..00000000000 --- a/benchmark/snowflake/create.sql +++ /dev/null @@ -1,109 +0,0 @@ -CREATE TABLE hits2 -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title TEXT NOT NULL, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL TEXT NOT NULL, - Referer TEXT NOT NULL, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 TEXT NOT NULL, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(255) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel TEXT NOT NULL, - Params TEXT NOT NULL, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase TEXT NOT NULL, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset TEXT NOT NULL, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL TEXT NOT NULL, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor CHAR NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage TEXT NOT NULL, - BrowserCountry TEXT NOT NULL, - SocialNetwork TEXT NOT NULL, - SocialAction TEXT NOT NULL, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage TEXT NOT NULL, - ParamPrice BIGINT NOT NULL, - ParamOrderID TEXT NOT NULL, - ParamCurrency TEXT NOT NULL, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName TEXT NOT NULL, - OpenstatCampaignID TEXT NOT NULL, - OpenstatAdID TEXT NOT NULL, - OpenstatSourceID TEXT NOT NULL, - UTMSource TEXT NOT NULL, - UTMMedium TEXT NOT NULL, - UTMCampaign TEXT NOT NULL, - UTMContent TEXT NOT NULL, - UTMTerm TEXT NOT NULL, - FromTag TEXT NOT NULL, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL, - PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID) -); diff --git a/benchmark/snowflake/queries.sql b/benchmark/snowflake/queries.sql deleted file mode 100644 index d188fd36e25..00000000000 --- a/benchmark/snowflake/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM test.public.hits2; -SELECT COUNT(*) FROM test.public.hits2 WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM test.public.hits2; -SELECT AVG(UserID) FROM test.public.hits2; -SELECT COUNT(DISTINCT UserID) FROM test.public.hits2; -SELECT COUNT(DISTINCT SearchPhrase) FROM test.public.hits2; -SELECT MIN(EventDate), MAX(EventDate) FROM test.public.hits2; -SELECT AdvEngineID, COUNT(*) FROM test.public.hits2 WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM test.public.hits2 GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM test.public.hits2 GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM test.public.hits2 WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM test.public.hits2 WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM test.public.hits2 WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM test.public.hits2 WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM test.public.hits2 WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM test.public.hits2 GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM test.public.hits2 GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM test.public.hits2 GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM test.public.hits2 GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM test.public.hits2 WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM test.public.hits2 WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM test.public.hits2 WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM test.public.hits2 WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM test.public.hits2 WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM test.public.hits2 WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM test.public.hits2 WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM test.public.hits2 WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM test.public.hits2 WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(www\.)?([^/]+)/.*$', '\2') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM test.public.hits2 WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM test.public.hits2; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM test.public.hits2 WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM test.public.hits2 WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM test.public.hits2 GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM test.public.hits2 GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM test.public.hits2 GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM test.public.hits2 GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM test.public.hits2 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM test.public.hits2 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM test.public.hits2 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM test.public.hits2 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM test.public.hits2 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM test.public.hits2 WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM test.public.hits2 WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/benchmark/snowflake/results/2xl.json b/benchmark/snowflake/results/2xl.json deleted file mode 100644 index 294eb66e65e..00000000000 --- a/benchmark/snowflake/results/2xl.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "Snowflake", - "date": "2022-07-01", - "machine": "2XL", - "cluster_size": 32, - "comment": "", - - "tags": ["managed", "column-oriented"], - - "load_time": 2524, - "data_size": 12300000000, - - "result": [ -[0.177,0.052,0.090], -[0.903,0.324,0.782], -[0.458,2.909,0.275], -[0.881,0.316,0.166], -[0.404,0.257,0.256], -[0.481,0.325,0.339], -[0.056,0.062,0.060], -[0.183,0.324,0.280], -[0.444,0.314,0.320], -[0.408,0.426,0.417], -[0.345,0.241,0.253], -[0.406,0.245,0.235], -[0.521,1.259,0.326], -[0.466,0.493,0.526], -[0.447,0.349,0.362], -[0.327,0.322,0.302], -[0.462,0.508,0.461], -[0.489,0.481,0.455], -[0.731,0.659,0.674], -[0.151,0.156,0.160], -[0.832,0.339,0.312], -[0.289,0.292,0.340], -[0.591,0.484,0.419], -[2.661,0.716,0.696], -[0.190,0.199,0.377], -[0.181,0.182,0.194], -[0.220,0.209,0.195], -[0.368,0.330,0.347], -[0.677,0.645,0.646], -[0.877,0.886,0.871], -[0.415,0.282,0.301], -[1.265,0.404,0.550], -[0.786,0.750,0.757], -[0.905,0.835,0.841], -[0.864,0.865,0.823], -[0.352,0.360,0.364], -[0.201,0.186,0.200], -[0.143,0.137,0.276], -[0.290,0.132,0.146], -[0.310,0.301,0.304], -[0.212,0.197,0.170], -[0.223,0.174,0.177], -[0.172,0.172,0.172] -] -} diff --git a/benchmark/snowflake/results/3xl.json b/benchmark/snowflake/results/3xl.json deleted file mode 100644 index 841d570b249..00000000000 --- a/benchmark/snowflake/results/3xl.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "Snowflake", - "date": "2022-07-01", - "machine": "3XL", - "cluster_size": 64, - "comment": "", - - "tags": ["managed", "column-oriented"], - - "load_time": 2524, - "data_size": 12300000000, - - "result": [ -[0.165,0.061,0.049], -[1.356,1.252,1.502], -[1.287,0.470,0.325], -[0.627,0.331,0.181], -[0.265,0.265,0.483], -[0.887,0.450,0.298], -[0.054,0.066,0.062], -[0.182,0.222,0.194], -[0.408,0.438,0.319], -[0.434,0.462,0.411], -[1.357,0.247,0.412], -[0.343,0.250,0.517], -[0.273,0.289,0.600], -[0.404,0.405,0.392], -[0.379,0.304,0.455], -[0.275,0.271,0.264], -[0.418,0.386,0.388], -[0.417,0.434,0.567], -[0.753,0.759,0.506], -[0.291,0.307,0.400], -[0.954,0.423,0.280], -[0.568,0.562,0.248], -[0.568,0.477,0.496], -[1.458,0.492,0.514], -[0.179,0.300,0.184], -[0.165,0.169,0.176], -[0.197,0.186,0.190], -[0.289,0.547,0.397], -[0.513,0.544,0.632], -[0.766,0.754,0.775], -[0.389,0.374,0.383], -[0.484,0.297,0.286], -[0.505,0.505,0.734], -[0.656,0.598,0.621], -[0.634,0.646,0.609], -[0.309,0.298,0.370], -[0.192,0.219,0.212], -[0.840,0.174,0.139], -[0.172,0.163,0.151], -[0.323,0.296,0.347], -[0.200,0.154,0.144], -[0.191,0.121,0.125], -[0.137,0.233,0.148] -] -} diff --git a/benchmark/snowflake/results/4xl.json b/benchmark/snowflake/results/4xl.json deleted file mode 100644 index 48c671277d4..00000000000 --- a/benchmark/snowflake/results/4xl.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "Snowflake", - "date": "2022-07-01", - "machine": "4XL", - "cluster_size": 128, - "comment": "", - - "tags": ["managed", "column-oriented"], - - "load_time": 2524, - "data_size": 12300000000, - - "result": [ -[0.164,0.061,0.078], -[2.471,2.436,1.927], -[1.656,0.222,0.639], -[0.336,1.244,0.206], -[0.435,0.414,0.373], -[0.520,0.495,0.326], -[0.052,0.051,0.057], -[0.244,0.515,0.358], -[0.473,0.477,0.659], -[0.706,0.523,0.499], -[0.619,0.361,0.303], -[0.463,0.367,0.290], -[0.385,0.319,0.376], -[0.661,0.436,0.452], -[0.446,0.317,0.336], -[0.504,0.283,0.395], -[0.478,1.395,0.350], -[0.555,1.041,0.993], -[0.565,0.558,1.473], -[0.310,0.684,0.278], -[0.637,1.202,0.249], -[0.467,0.294,0.436], -[0.671,0.478,0.611], -[6.262,0.449,0.425], -[0.476,0.213,0.184], -[0.552,0.241,0.201], -[0.458,0.415,0.402], -[0.339,0.357,0.322], -[0.732,0.549,0.483], -[0.914,0.757,0.743], -[0.718,0.310,0.606], -[0.605,0.363,0.425], -[0.468,0.860,0.784], -[0.868,1.787,0.713], -[0.807,0.691,0.544], -[0.485,0.338,0.672], -[0.263,0.221,0.230], -[0.147,0.145,0.139], -[0.135,0.239,0.136], -[0.322,0.378,0.348], -[0.236,0.138,0.132], -[0.193,0.124,0.139], -[0.146,0.145,0.139] -] -} diff --git a/benchmark/snowflake/results/l.json b/benchmark/snowflake/results/l.json deleted file mode 100644 index 05fd7793646..00000000000 --- a/benchmark/snowflake/results/l.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "Snowflake", - "date": "2022-07-01", - "machine": "L", - "cluster_size": 8, - "comment": "", - - "tags": ["managed", "column-oriented"], - - "load_time": 2524, - "data_size": 12300000000, - - "result": [ -[0.067,0.054,0.051], -[1.158,1.409,0.190], -[1.470,0.566,0.715], -[0.892,0.534,0.264], -[0.474,0.499,0.489], -[0.950,0.650,0.699], -[0.069,0.055,0.110], -[0.317,0.307,0.321], -[0.949,0.593,0.654], -[0.713,1.099,0.860], -[0.622,0.370,0.404], -[0.457,0.369,0.414], -[0.610,0.566,0.653], -[0.970,1.006,0.976], -[1.517,0.636,0.603], -[0.532,0.541,0.533], -[1.018,1.001,1.022], -[0.942,0.996,0.940], -[2.246,1.596,1.560], -[0.181,0.184,0.200], -[1.135,0.788,0.609], -[0.669,0.528,0.524], -[1.164,0.827,0.882], -[3.545,2.214,2.107], -[0.559,0.431,0.426], -[0.340,0.296,0.383], -[0.695,0.314,0.368], -[0.628,0.658,0.637], -[1.511,1.385,1,440], -[1.390,1.418,1.322], -[1.107,0.687,0.537], -[1.026,0.737,0.659], -[1.712,1.681,1.728], -[2.141,2.130,2.225], -[2.163,2.157,2.110], -[0.650,0.619,0.627], -[0.204,0.195,0.225], -[0.159,0.152,0.156], -[0.146,0.136,0.150], -[0.359,0.290,0.364], -[0.196,0.129,0.227], -[0.201,0.128,0.143], -[0.176,0.129,0.146] -] -} diff --git a/benchmark/snowflake/results/m.json b/benchmark/snowflake/results/m.json deleted file mode 100644 index fb98b643c9b..00000000000 --- a/benchmark/snowflake/results/m.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "Snowflake", - "date": "2022-07-01", - "machine": "M", - "cluster_size": 4, - "comment": "", - - "tags": ["managed", "column-oriented"], - - "load_time": 2524, - "data_size": 12300000000, - - "result": [ -[0.054,0.062,0.064], -[0.698,0.563,0.407], -[0.586,0.412,0.266], -[1.386,0.363,0.301], -[0.814,0.825,0.984], -[1.303,1.024,1.048], -[0.069,0.066,0.062], -[0.334,0.254,0.270], -[1.043,0.952,0.993], -[1.210,1.209,1.171], -[0.667,0.483,0.456], -[0.543,0.495,0.500], -[1.005,0.889,0.888], -[1.644,1.646,1.652], -[1.054,1.044,0.966], -[0.893,0.874,0.907], -[1.737,1.779,1.837], -[1.518,1.539,1.526], -[3.082,2.818,2.842], -[0.309,0.286,0.256], -[1.594,1.017,0.993], -[0.781,0.853,0.735], -[1.461,1.226,1.080], -[5.308,2.974,2.642], -[0.511,0.625,0.467], -[0.405,0.382,0.439], -[0.601,0.535,0.471], -[0.947,1.624,1.192], -[2.631,2.486,2.490], -[1.938,1.960,1.954], -[1.930,0.830,0.835], -[1.359,1.140,1.062], -[3.599,3.623,3.621], -[3.619,3.741,3.663], -[3.725,3.614,3.786], -[1.149,1.126,1.055], -[0.202,0.207,0.196], -[0.152,0.139,0.145], -[0.149,0.144,0.148], -[0.383,0.287,0.294], -[0.203,0.137,0.119], -[0.200,0.312,0.137], -[0.149,0.130,0.214] -] -} diff --git a/benchmark/snowflake/results/s.json b/benchmark/snowflake/results/s.json deleted file mode 100644 index 7a686b95204..00000000000 --- a/benchmark/snowflake/results/s.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "Snowflake", - "date": "2022-07-01", - "machine": "S", - "cluster_size": 2, - "comment": "", - - "tags": ["managed", "column-oriented"], - - "load_time": 2524, - "data_size": 12300000000, - - "result": [ -[0.186,0.060,0.062], -[0.980,0.574,0.311], -[0.977,0.554,0.426], -[0.686,0.573,0.404], -[1.386,1.384,1.349], -[1.871,1.697,1.704], -[0.052,0.059,0.227], -[0.309,0.536,0.508], -[1.768,1.631,1.635], -[2.039,2.219,1.908], -[0.807,0.647,0.587], -[0.763,0.690,0.631], -[1.403,1.586,1.404], -[2.593,2.584,2.554], -[1.670,1.538,1.653], -[1.659,1.509,1.514], -[2.875,2.990,2.998], -[2.605,2.549,2.598], -[6.120,5.894,5.766], -[0.320,0.431,0.416], -[2.406,1.703,1.609], -[1.189,1.186,1.163], -[2.104,1.441,1.370], -[7.144,5.174,4.139], -[0.839,0.659,0.641], -[0.527,0.518,0.509], -[0.633,0.621,0.695], -[1.491,1.509,1.514], -[4.848,4.485,4.571], -[3.067,3.106,3.098], -[1.521,1.224,1.236], -[1.839,1.690,1.497], -[5.692,5.751,6.087], -[6.733,6.755,6.712], -[6.722,6.709,6.676], -[1.704,1.686,1.676], -[0.203,0.231,0.218], -[0.151,0.134,0.214], -[0.140,0.156,0.163], -[0.317,0.328,0.319], -[0.166,0.133,0.141], -[0.166,0.120,0.140], -[0.120,0.119,0.126] -] -} diff --git a/benchmark/snowflake/results/xl.json b/benchmark/snowflake/results/xl.json deleted file mode 100644 index 9b417b24a42..00000000000 --- a/benchmark/snowflake/results/xl.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "Snowflake", - "date": "2022-07-01", - "machine": "XL", - "cluster_size": 16, - "comment": "", - - "tags": ["managed", "column-oriented"], - - "load_time": 2524, - "data_size": 12300000000, - - "result": [ -[0.071,0.053,0.057], -[0.998,0.610,0.240], -[0.420,1.138,1.051], -[0.653,0.264,0.178], -[0.352,0.312,0.349], -[1.126,0.431,0.420], -[0.067,0.057,0.054], -[0.225,0.217,0.200], -[0.617,0.366,0.371], -[1.006,0.541,0.498], -[0.463,0.425,0.293], -[0.431,0.360,0.339], -[0.392,0.371,0.386], -[0.588,0.581,0.590], -[0.634,0.414,0.400], -[0.368,0.410,0.388], -[0.594,0.639,0.663], -[0.616,0.581,0.569], -[1.092,0.933,0.901], -[0.493,0.213,0.160], -[0.886,0.480,0.442], -[0.448,0.337,0.399], -[0.840,0.572,0.505], -[2.251,1.230,0.959], -[0.295,0.253,0.241], -[0.214,0.239,0.278], -[0.261,0.232,0.314], -[0.422,0.429,0.403], -[0.892,0.934,0.883], -[1.041,1.017,1.009], -[0.715,0.442,0.363], -[0.845,0.413,0.461], -[1.101,1.085,1.102], -[1.294,1.272,1.339], -[1.839,1.327,1.241], -[0.439,0.399,0.393], -[0.199,0.211,0.190], -[0.157,0.143,0.140], -[0.145,0.157,0.141], -[0.331,0.291,0.333], -[0.173,0.214,0.138], -[0.189,0.150,0.159], -[0.135,0.149,0.138] -] -} diff --git a/benchmark/snowflake/results/xs.json b/benchmark/snowflake/results/xs.json deleted file mode 100644 index 32fbfeb0dff..00000000000 --- a/benchmark/snowflake/results/xs.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "Snowflake", - "date": "2022-07-01", - "machine": "XS", - "cluster_size": 1, - "comment": "", - - "tags": ["managed", "column-oriented"], - - "load_time": 2524, - "data_size": 12300000000, - - "result": [ -[0.169,0.055,0.056], -[1.184,0.582,0.386], -[1.350,0.560,0.568], -[1.270,0.554,0.538], -[2.516,2.564,2.506], -[2.935,2.649,2.670], -[0.052,0.050,0.064], -[0.383,0.387,0.397], -[3.249,2.993,3.014], -[3.589,3.627,3.887], -[1.243,0.986,0.966], -[1.325,1.080,1.073], -[2.038,2.046,2.035], -[3.738,3.626,3.718], -[2.318,2.159,2.176], -[2.733,2.637,2.668], -[5.607,5.683,5.667], -[3.978,3.923,3.879], -[10.085,9.871,9.844], -[0.450,0.375,0.469], -[5.474,3.103,3.060], -[2.012,1.982,1.971], -[3.365,2.471,2.501], -[11.960,10.619,9.518], -[1.074,1.059,1.026], -[0.856,0.846,0.879], -[1.100,1.085,1.083], -[3.057,3.228,3.117], -[9.406,9.019,9.158], -[6.196,6.243,6.911], -[2.906,2.343,2.017], -[2.954,2.666,2.565], -[9.459,9.565,9.557], -[9.555,9.529,9.368], -[9.409,9.185,9.294], -[2.796,2.880,2.685], -[0.299,0.249,0.262], -[0.156,0.145,0.180], -[0.147,0.146,0.160], -[0.371,0.357,0.356], -[0.166,0.133,0.155], -[0.218,0.140,0.135], -[0.140,0.152,0.158] -] -} diff --git a/benchmark/snowflake/run.sh b/benchmark/snowflake/run.sh deleted file mode 100755 index 35739a4d7f3..00000000000 --- a/benchmark/snowflake/run.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -sed -r -e 's/^(.*)$/\1 \1 \1/' queries.sql | snowsql --region eu-central-1 --schemaname PUBLIC --dbname HITS --warehouse TEST diff --git a/benchmark/sqlite/benchmark.sh b/benchmark/sqlite/benchmark.sh deleted file mode 100755 index a144acf680b..00000000000 --- a/benchmark/sqlite/benchmark.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash - -sudo apt-get update -sudo apt-get install -y sqlite3 - -sqlite3 mydb < create.sql - -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.csv.gz' -gzip -d hits.csv.gz - -time sqlite3 mydb '.import --csv hits.csv hits' -wc -c mydb - -./run.sh 2>&1 | tee log.txt - -cat log.txt | - grep -P '^real|^Error' | - sed -r -e 's/^Error.*$/null/; s/^real\s*([0-9.]+)m([0-9.]+)s$/\1 \2/' | - awk '{ if ($2) { print $1 * 60 + $2 } else { print $1 } }' | - awk '{ if ($1 == "null") { skip = 1 } else { if (i % 3 == 0) { printf "[" }; printf skip ? "null" : $1; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; skip = 0; } }' diff --git a/benchmark/sqlite/create.sql b/benchmark/sqlite/create.sql deleted file mode 100644 index 1850bffedce..00000000000 --- a/benchmark/sqlite/create.sql +++ /dev/null @@ -1,109 +0,0 @@ -CREATE TABLE hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title TEXT NOT NULL, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL TEXT NOT NULL, - Referer TEXT NOT NULL, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 TEXT NOT NULL, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(255) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel TEXT NOT NULL, - Params TEXT NOT NULL, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase TEXT NOT NULL, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset TEXT NOT NULL, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL TEXT NOT NULL, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor CHAR NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage TEXT NOT NULL, - BrowserCountry TEXT NOT NULL, - SocialNetwork TEXT NOT NULL, - SocialAction TEXT NOT NULL, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage TEXT NOT NULL, - ParamPrice BIGINT NOT NULL, - ParamOrderID TEXT NOT NULL, - ParamCurrency TEXT NOT NULL, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName TEXT NOT NULL, - OpenstatCampaignID TEXT NOT NULL, - OpenstatAdID TEXT NOT NULL, - OpenstatSourceID TEXT NOT NULL, - UTMSource TEXT NOT NULL, - UTMMedium TEXT NOT NULL, - UTMCampaign TEXT NOT NULL, - UTMContent TEXT NOT NULL, - UTMTerm TEXT NOT NULL, - FromTag TEXT NOT NULL, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL, - PRIMARY KEY (CounterID, EventDate, UserID, EventTime, WatchID) -); diff --git a/benchmark/sqlite/queries.sql b/benchmark/sqlite/queries.sql deleted file mode 100644 index ed185494351..00000000000 --- a/benchmark/sqlite/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, strftime('%M', EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT strftime('%M', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY strftime('%M', EventTime) ORDER BY strftime('%M', EventTime) LIMIT 10 OFFSET 1000; diff --git a/benchmark/sqlite/results/c6a.4xlarge.json b/benchmark/sqlite/results/c6a.4xlarge.json deleted file mode 100644 index e442dcadc80..00000000000 --- a/benchmark/sqlite/results/c6a.4xlarge.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "SQLite", - "date": "2022-07-01", - "machine": "c6a.4xlarge, 500gb gp2", - "cluster_size": 1, - "comment": "", - - "tags": ["C", "embedded", "row-oriented"], - - "load_time": 2608, - "data_size": 75776589824, - - "result": [ -[752.739,2.003,1.2], -[304.302,291.521,286.965], -[293.964,287.619,287.219], -[758.302,5.879,5.65], -[836.393,48.593,48.452], -[362.605,344.884,356.245], -[763.993,11.602,10.795], -[296.348,286.879,287.557], -[365.816,360.339,354.126], -[374.403,365.196,362.261], -[302.989,293.888,298.432], -[303.64,291.729,295.347], -[316.824,298.18,301.006], -[320.665,301.15,305.227], -[313.593,301.021,301.626], -[794.881,47,47.225], -[355.346,344.615,342.442], -[316.499,305.971,305.007], -[398.177,380.383,385.571], -[751.82,5.082,4.913], -[295.522,286.573,287.368], -[298.58,287.182,288.303], -[296.474,288.747,288.638], -[296.579,287.127,287.361], -[304.709,286.865,287.56], -[300.391,290.633,288.587], -[294.605,286.91,287.799], -[305.986,312.111,305.626], -[null,null,null], -[411.225,397.614,394.253], -[307.711,295.181,300.266], -[312.472,299.079,298.19], -[386.674,378.347,376.963], -[409.742,409.554,420.273], -[468.73,453.709,458.149], -[366.015,347.446,346.728], -[2.911,0.781,0.757], -[1.599,0.609,0.587], -[1.288,0.256,0.238], -[2.469,1.582,1.52], -[1.274,0.303,0.283], -[1.322,0.317,0.314], -[1.498,0.602,0.613] -] -} diff --git a/benchmark/sqlite/run.sh b/benchmark/sqlite/run.sh deleted file mode 100755 index e3eccc1cc0b..00000000000 --- a/benchmark/sqlite/run.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -TRIES=3 - -cat queries.sql | while read query; do - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches - - echo "$query"; - for i in $(seq 1 $TRIES); do - time sqlite3 mydb <<< "${query}" - done; -done; diff --git a/benchmark/starrocks/README.md b/benchmark/starrocks/README.md deleted file mode 100644 index a04fe13890d..00000000000 --- a/benchmark/starrocks/README.md +++ /dev/null @@ -1,3 +0,0 @@ -It requires providing an email for downloading. -But then the button on the website does not work and it does not download. -I cannot find the packages anywhere. diff --git a/benchmark/starrocks/benchmark.sh b/benchmark/starrocks/benchmark.sh deleted file mode 100755 index 2418d847397..00000000000 --- a/benchmark/starrocks/benchmark.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash - -# Install diff --git a/benchmark/timescaledb-compressed/benchmark.sh b/benchmark/timescaledb-compressed/benchmark.sh deleted file mode 100755 index 68600be3486..00000000000 --- a/benchmark/timescaledb-compressed/benchmark.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/bin/bash - -# Install -export DEBIAN_FRONTEND=noninteractive -sudo apt-get update -sudo apt-get install -y gnupg postgresql-common apt-transport-https lsb-release wget -sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh -sudo bash -c 'echo "deb https://packagecloud.io/timescale/timescaledb/ubuntu/ $(lsb_release -c -s) main" > /etc/apt/sources.list.d/timescaledb.list' -wget --quiet -O - https://packagecloud.io/timescale/timescaledb/gpgkey | sudo apt-key add - -sudo apt-get update -sudo apt-get install -y timescaledb-2-postgresql-14 -sudo bash -c "echo \"shared_preload_libraries = 'timescaledb'\" >> /etc/postgresql/14/main/postgresql.conf" -sudo systemctl restart postgresql - -sudo -u postgres psql -c "CREATE DATABASE test" -sudo -u postgres psql test -c "CREATE EXTENSION IF NOT EXISTS timescaledb" - -# Import the data - -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz' -gzip -d hits.tsv.gz -sudo chmod og+rX ~ -chmod 777 hits.tsv - -sudo -u postgres psql test < create.sql -sudo -u postgres psql test -c "SELECT create_hypertable('hits', 'eventtime')" -sudo -u postgres psql test -c "CREATE INDEX ix_counterid ON hits (counterid)" -sudo -u postgres psql test -c "ALTER TABLE hits SET (timescaledb.compress, timescaledb.compress_orderby = 'counterid, eventdate, userid, eventtime')" -sudo -u postgres psql test -c "SELECT add_compression_policy('hits', INTERVAL '1s')" - -sudo -u postgres psql test -t -c '\timing' -c "\\copy hits FROM 'hits.tsv'" - -# 1619875.288 ms (26:59.875) - -# See https://github.com/timescale/timescaledb/issues/4473#issuecomment-1167095245 -# https://docs.timescale.com/timescaledb/latest/how-to-guides/compression/manually-compress-chunks/#compress-chunks-manually -# TimescaleDB benchmark wihout compression is available in timescaledb directory - -time sudo -u postgres psql test -c "SELECT compress_chunk(i, if_not_compressed => true) FROM show_chunks('hits') i" - -# 49m45.120s - -./run.sh 2>&1 | tee log.txt - -sudo du -bcs /var/lib/postgresql/14/main/ - -cat log.txt | grep -oP 'Time: \d+\.\d+ ms' | sed -r -e 's/Time: ([0-9]+\.[0-9]+) ms/\1/' | - awk '{ if (i % 3 == 0) { printf "[" }; printf $1 / 1000; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' diff --git a/benchmark/timescaledb-compressed/create.sql b/benchmark/timescaledb-compressed/create.sql deleted file mode 100644 index 41c961c00fc..00000000000 --- a/benchmark/timescaledb-compressed/create.sql +++ /dev/null @@ -1,108 +0,0 @@ -CREATE TABLE hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title TEXT NOT NULL, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL TEXT NOT NULL, - Referer TEXT NOT NULL, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 TEXT NOT NULL, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(255) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel TEXT NOT NULL, - Params TEXT NOT NULL, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase TEXT NOT NULL, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset TEXT NOT NULL, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL TEXT NOT NULL, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor CHAR NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage TEXT NOT NULL, - BrowserCountry TEXT NOT NULL, - SocialNetwork TEXT NOT NULL, - SocialAction TEXT NOT NULL, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage TEXT NOT NULL, - ParamPrice BIGINT NOT NULL, - ParamOrderID TEXT NOT NULL, - ParamCurrency TEXT NOT NULL, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName TEXT NOT NULL, - OpenstatCampaignID TEXT NOT NULL, - OpenstatAdID TEXT NOT NULL, - OpenstatSourceID TEXT NOT NULL, - UTMSource TEXT NOT NULL, - UTMMedium TEXT NOT NULL, - UTMCampaign TEXT NOT NULL, - UTMContent TEXT NOT NULL, - UTMTerm TEXT NOT NULL, - FromTag TEXT NOT NULL, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL -); diff --git a/benchmark/timescaledb-compressed/queries.sql b/benchmark/timescaledb-compressed/queries.sql deleted file mode 100644 index 31f65fc898d..00000000000 --- a/benchmark/timescaledb-compressed/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/benchmark/timescaledb-compressed/results/c6a.4xlarge.json b/benchmark/timescaledb-compressed/results/c6a.4xlarge.json deleted file mode 100644 index 457038eb591..00000000000 --- a/benchmark/timescaledb-compressed/results/c6a.4xlarge.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "TimescaleDB (compression)", - "date": "2022-07-01", - "machine": "c6a.4xlarge, 500gb gp2", - "cluster_size": 1, - "comment": "", - - "tags": ["C", "PostgreSQL compatible", "column-oriented", "time-series"], - - "load_time": 4605, - "data_size": 20333747165, - - "result": [ -[2.28686,1.63642,1.64263], -[32.6848,1.63476,1.40052], -[60.8633,3.70484,3.59342], -[36.4029,2.87091,2.80739], -[110.391,38.9688,38.0549], -[147.379,66.2513,65.6379], -[33.0294,2.92031,2.84375], -[33.0221,1.2984,1.19227], -[115.694,47.4651,47.0125], -[156.347,51.577,51.2694], -[68.3301,4.75521,4.68007], -[77.4356,5.55128,5.56577], -[49.7741,11.2911,11.3265], -[81.1014,14.9111,14.9541], -[82.9569,14.6156,14.6331], -[62.0338,26.399,26.3351], -[103.259,36.4122,36.6076], -[92.8828,26.2395,25.8991], -[144.281,63.5102,63.7661], -[7.00679,0.573073,0.536283], -[75.0203,7.86344,7.90495], -[81.2825,9.15868,9.01775], -[104.084,13.9528,13.8435], -[132.531,81.522,82.1561], -[80.6965,3.28231,3.16574], -[39.7693,2.51443,2.43849], -[80.4245,3.26941,3.13916], -[104.015,13.7044,13.5313], -[307.26,253.127,252.147], -[42.8549,22.4187,22.0325], -[137.601,14.9592,14.6804], -[136.767,22.8007,22.131], -[263.005,168.551,163.355], -[156.919,92.6308,91.702], -[160.842,96.0512,97.1773], -[62.8357,28.0336,28.7397], -[1.75869,0.561604,0.541215], -[0.46607,0.191863,0.19021], -[0.303671,0.137579,0.136615], -[2.32031,1.49223,1.52369], -[0.563764,0.14192,0.138234], -[0.372428,0.122989,0.123709], -[0.448574,0.159092,0.154687] -] -} diff --git a/benchmark/timescaledb-compressed/run.sh b/benchmark/timescaledb-compressed/run.sh deleted file mode 100755 index 198ab546163..00000000000 --- a/benchmark/timescaledb-compressed/run.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -TRIES=3 - -cat queries.sql | while read query; do - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches - - echo "$query"; - for i in $(seq 1 $TRIES); do - psql test -t -c '\timing' -c "$query" | grep 'Time' - done; -done; diff --git a/benchmark/timescaledb/benchmark.sh b/benchmark/timescaledb/benchmark.sh deleted file mode 100755 index e43509ba915..00000000000 --- a/benchmark/timescaledb/benchmark.sh +++ /dev/null @@ -1,41 +0,0 @@ -#!/bin/bash - -# Install - -export DEBIAN_FRONTEND=noninteractive -sudo apt-get update -sudo apt-get install -y gnupg postgresql-common apt-transport-https lsb-release wget -sudo /usr/share/postgresql-common/pgdg/apt.postgresql.org.sh -sudo bash -c 'echo "deb https://packagecloud.io/timescale/timescaledb/ubuntu/ $(lsb_release -c -s) main" > /etc/apt/sources.list.d/timescaledb.list' -wget --quiet -O - https://packagecloud.io/timescale/timescaledb/gpgkey | sudo apt-key add - -sudo apt-get update -sudo apt-get install -y timescaledb-2-postgresql-14 -sudo bash -c "echo \"shared_preload_libraries = 'timescaledb'\" >> /etc/postgresql/14/main/postgresql.conf" -sudo systemctl restart postgresql - -sudo -u postgres psql -c "CREATE DATABASE uncompressed" -sudo -u postgres psql uncompressed -c "CREATE EXTENSION IF NOT EXISTS timescaledb" - -# Import the data - -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz' -gzip -d hits.tsv.gz -sudo chmod og+rX ~ -chmod 777 hits.tsv - -sudo -u postgres psql uncompressed < create.sql -sudo -u postgres psql uncompressed -c "SELECT create_hypertable('hits', 'eventtime')" -sudo -u postgres psql uncompressed -c "CREATE INDEX ix_counterid ON hits (counterid)" -sudo -u postgres psql uncompressed -c "ALTER TABLE hits SET (timescaledb.compress, timescaledb.compress_orderby = 'counterid, eventdate, userid, eventtime')" -sudo -u postgres psql uncompressed -c "SELECT add_compression_policy('hits', INTERVAL '1s')" - -sudo -u postgres psql uncompressed -t -c '\timing' -c "\\copy hits FROM 'hits.tsv'" - -# 1619875.288 ms (26:59.875) - -./run.sh 2>&1 | tee log.txt - -sudo du -bcs /var/lib/postgresql/14/main/ - -cat log.txt | grep -oP 'Time: \d+\.\d+ ms' | sed -r -e 's/Time: ([0-9]+\.[0-9]+) ms/\1/' | - awk '{ if (i % 3 == 0) { printf "[" }; printf $1 / 1000; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' diff --git a/benchmark/timescaledb/create.sql b/benchmark/timescaledb/create.sql deleted file mode 100644 index 41c961c00fc..00000000000 --- a/benchmark/timescaledb/create.sql +++ /dev/null @@ -1,108 +0,0 @@ -CREATE TABLE hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title TEXT NOT NULL, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL TEXT NOT NULL, - Referer TEXT NOT NULL, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 TEXT NOT NULL, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(255) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel TEXT NOT NULL, - Params TEXT NOT NULL, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase TEXT NOT NULL, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset TEXT NOT NULL, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL TEXT NOT NULL, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor CHAR NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage TEXT NOT NULL, - BrowserCountry TEXT NOT NULL, - SocialNetwork TEXT NOT NULL, - SocialAction TEXT NOT NULL, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage TEXT NOT NULL, - ParamPrice BIGINT NOT NULL, - ParamOrderID TEXT NOT NULL, - ParamCurrency TEXT NOT NULL, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName TEXT NOT NULL, - OpenstatCampaignID TEXT NOT NULL, - OpenstatAdID TEXT NOT NULL, - OpenstatSourceID TEXT NOT NULL, - UTMSource TEXT NOT NULL, - UTMMedium TEXT NOT NULL, - UTMCampaign TEXT NOT NULL, - UTMContent TEXT NOT NULL, - UTMTerm TEXT NOT NULL, - FromTag TEXT NOT NULL, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL -); diff --git a/benchmark/timescaledb/queries.sql b/benchmark/timescaledb/queries.sql deleted file mode 100644 index 31f65fc898d..00000000000 --- a/benchmark/timescaledb/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/benchmark/timescaledb/results/c6a.4xlarge.json b/benchmark/timescaledb/results/c6a.4xlarge.json deleted file mode 100644 index 63d57a74975..00000000000 --- a/benchmark/timescaledb/results/c6a.4xlarge.json +++ /dev/null @@ -1,58 +0,0 @@ -{ - "system": "TimescaleDB", - "date": "2022-07-01", - "machine": "c6a.4xlarge, 500gb gp2", - "cluster_size": 1, - "comment": "", - - "tags": ["C", "PostgreSQL compatible", "row-oriented", "time-series"], - - "load_time": 1620, - "data_size": 72882392030, - - "result": [ -[437.700,215.793,176.420], -[327.026,259.568,244.578], -[262.978,263.090,263.083], -[262.807,263.046,266.847], -[337.497,334.964,330.852], -[355.689,356.801,362.894], -[262.762,263.012,262.968], -[263.055,263.016,263.028], -[319.928,319.388,320.704], -[323.584,322.224,322.488], -[265.979,265.465,265.375], -[266.019,265.543,265.462], -[277.018,276.300,276.595], -[280.352,279.251,279.572], -[279.915,279.896,279.674], -[296.377,298.506,297.659], -[314.448,314.605,312.570], -[302.668,302.672,303.039], -[325.810,324.061,324.376], -[262.447,262.698,262.704], -[267.581,267.467,267.482], -[268.085,267.466,267.696], -[263.391,263.097,263.126], -[38.291,0.435,0.335], -[0.127,0.005,0.005], -[263.138,263.100,263.092], -[0.889,0.341,0.339], -[267.586,267.498,267.491], -[289.086,290.012,290.093], -[263.220,263.071,263.109], -[274.780,273.995,273.998], -[282.217,281.390,281.470], -[429.273,426.588,439.431], -[448.808,418.724,418.207], -[455.196,422.750,423.142], -[299.263,296.937,297.261], -[18.693,1.552,1.481], -[18.125,0.945,0.937], -[18.528,1.062,0.902], -[18.268,1.779,1.770], -[19.615,1.965,1.966], -[18.970,1.435,1.430], -[18.330,1.153,0.952] -] -} diff --git a/benchmark/timescaledb/run.sh b/benchmark/timescaledb/run.sh deleted file mode 100755 index 84edae1f3d3..00000000000 --- a/benchmark/timescaledb/run.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -TRIES=3 - -cat queries.sql | while read query; do - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches - - echo "$query"; - for i in $(seq 1 $TRIES); do - sudo -u postgres psql uncompressed -t -c '\timing' -c "$query" | grep 'Time' - done; -done; diff --git a/benchmark/trino/README.md b/benchmark/trino/README.md deleted file mode 100644 index 82b0b2ff959..00000000000 --- a/benchmark/trino/README.md +++ /dev/null @@ -1 +0,0 @@ -Incomplete. diff --git a/benchmark/trino/benchmark.sh b/benchmark/trino/benchmark.sh deleted file mode 100755 index 71fab8922f0..00000000000 --- a/benchmark/trino/benchmark.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash - -sudo apt-get update -sudo apt-get install -y docker.io -sudo docker run --network host -p 8080:8080 --name trino trinodb/trino - -sudo docker exec -it trino trino - -CREATE SCHEMA memory.test; -USE memory.test; diff --git a/benchmark/trino/create_partitioned.sql b/benchmark/trino/create_partitioned.sql deleted file mode 100644 index b0f9e5d0be5..00000000000 --- a/benchmark/trino/create_partitioned.sql +++ /dev/null @@ -1,112 +0,0 @@ -CREATE EXTERNAL TABLE IF NOT EXISTS `test`.`hits` ( -`watchid` bigint, -`javaenable` smallint, -`title` string, -`goodevent` smallint, -`eventtime` timestamp, -`eventdate` date, -`counterid` int, -`clientip` int, -`regionid` int, -`userid` bigint, -`counterclass` smallint, -`os` smallint, -`useragent` smallint, -`url` string, -`referer` string, -`isrefresh` smallint, -`referercategoryid` smallint, -`refererregionid` int, -`urlcategoryid` smallint, -`urlregionid` int, -`resolutionwidth` smallint, -`resolutionheight` smallint, -`resolutiondepth` smallint, -`flashmajor` smallint, -`flashminor` smallint, -`flashminor2` string, -`netmajor` smallint, -`netminor` smallint, -`useragentmajor` smallint, -`useragentminor` string, -`cookieenable` smallint, -`javascriptenable` smallint, -`ismobile` smallint, -`mobilephone` smallint, -`mobilephonemodel` string, -`params` string, -`ipnetworkid` int, -`traficsourceid` smallint, -`searchengineid` smallint, -`searchphrase` string, -`advengineid` smallint, -`isartifical` smallint, -`windowclientwidth` smallint, -`windowclientheight` smallint, -`clienttimezone` smallint, -`clienteventtime` timestamp, -`silverlightversion1` smallint, -`silverlightversion2` smallint, -`silverlightversion3` int, -`silverlightversion4` smallint, -`pagecharset` string, -`codeversion` int, -`islink` smallint, -`isdownload` smallint, -`isnotbounce` smallint, -`funiqid` bigint, -`originalurl` string, -`hid` int, -`isoldcounter` smallint, -`isevent` smallint, -`isparameter` smallint, -`dontcounthits` smallint, -`withhash` smallint, -`hitcolor` string, -`localeventtime` timestamp, -`age` smallint, -`sex` smallint, -`income` smallint, -`interests` smallint, -`robotness` smallint, -`remoteip` int, -`windowname` int, -`openername` int, -`historylength` smallint, -`browserlanguage` string, -`browsercountry` string, -`socialnetwork` string, -`socialaction` string, -`httperror` smallint, -`sendtiming` int, -`dnstiming` int, -`connecttiming` int, -`responsestarttiming` int, -`responseendtiming` int, -`fetchtiming` int, -`socialsourcenetworkid` smallint, -`socialsourcepage` string, -`paramprice` bigint, -`paramorderid` string, -`paramcurrency` string, -`paramcurrencyid` smallint, -`openstatservicename` string, -`openstatcampaignid` string, -`openstatadid` string, -`openstatsourceid` string, -`utmsource` string, -`utmmedium` string, -`utmcampaign` string, -`utmcontent` string, -`utmterm` string, -`fromtag` string, -`hasgclid` smallint, -`refererhash` bigint, -`urlhash` bigint, -`clid` int -) -ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe' -WITH SERDEPROPERTIES ( -'serialization.format' = '1' -) LOCATION 's3://clickhouse-public-datasets/hits_compatible/athena_partitioned' -TBLPROPERTIES ('has_encrypted_data'='false'); diff --git a/benchmark/trino/create_single.sql b/benchmark/trino/create_single.sql deleted file mode 100644 index 4a24f246618..00000000000 --- a/benchmark/trino/create_single.sql +++ /dev/null @@ -1,107 +0,0 @@ -CREATE TABLE IF NOT EXISTS "memory"."test"."hits" ( -"watchid" bigint, -"javaenable" smallint, -"title" varchar(65535), -"goodevent" smallint, -"eventtime" timestamp, -"eventdate" date, -"counterid" int, -"clientip" int, -"regionid" int, -"userid" bigint, -"counterclass" smallint, -"os" smallint, -"useragent" smallint, -"url" varchar(65535), -"referer" varchar(65535), -"isrefresh" smallint, -"referercategoryid" smallint, -"refererregionid" int, -"urlcategoryid" smallint, -"urlregionid" int, -"resolutionwidth" smallint, -"resolutionheight" smallint, -"resolutiondepth" smallint, -"flashmajor" smallint, -"flashminor" smallint, -"flashminor2" varchar(65535), -"netmajor" smallint, -"netminor" smallint, -"useragentmajor" smallint, -"useragentminor" varchar(65535), -"cookieenable" smallint, -"javascriptenable" smallint, -"ismobile" smallint, -"mobilephone" smallint, -"mobilephonemodel" varchar(65535), -"params" varchar(65535), -"ipnetworkid" int, -"traficsourceid" smallint, -"searchengineid" smallint, -"searchphrase" varchar(65535), -"advengineid" smallint, -"isartifical" smallint, -"windowclientwidth" smallint, -"windowclientheight" smallint, -"clienttimezone" smallint, -"clienteventtime" timestamp, -"silverlightversion1" smallint, -"silverlightversion2" smallint, -"silverlightversion3" int, -"silverlightversion4" smallint, -"pagecharset" varchar(65535), -"codeversion" int, -"islink" smallint, -"isdownload" smallint, -"isnotbounce" smallint, -"funiqid" bigint, -"originalurl" varchar(65535), -"hid" int, -"isoldcounter" smallint, -"isevent" smallint, -"isparameter" smallint, -"dontcounthits" smallint, -"withhash" smallint, -"hitcolor" varchar(65535), -"localeventtime" timestamp, -"age" smallint, -"sex" smallint, -"income" smallint, -"interests" smallint, -"robotness" smallint, -"remoteip" int, -"windowname" int, -"openername" int, -"historylength" smallint, -"browserlanguage" varchar(65535), -"browsercountry" varchar(65535), -"socialnetwork" varchar(65535), -"socialaction" varchar(65535), -"httperror" smallint, -"sendtiming" int, -"dnstiming" int, -"connecttiming" int, -"responsestarttiming" int, -"responseendtiming" int, -"fetchtiming" int, -"socialsourcenetworkid" smallint, -"socialsourcepage" varchar(65535), -"paramprice" bigint, -"paramorderid" varchar(65535), -"paramcurrency" varchar(65535), -"paramcurrencyid" smallint, -"openstatservicename" varchar(65535), -"openstatcampaignid" varchar(65535), -"openstatadid" varchar(65535), -"openstatsourceid" varchar(65535), -"utmsource" varchar(65535), -"utmmedium" varchar(65535), -"utmcampaign" varchar(65535), -"utmcontent" varchar(65535), -"utmterm" varchar(65535), -"fromtag" varchar(65535), -"hasgclid" smallint, -"refererhash" bigint, -"urlhash" bigint, -"clid" int -); diff --git a/benchmark/trino/queries.sql b/benchmark/trino/queries.sql deleted file mode 100644 index ef8c727ead6..00000000000 --- a/benchmark/trino/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, extract(minute FROM EventTime), SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= DATE '2013-07-01' AND EventDate <= DATE '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= DATE '2013-07-01' AND EventDate <= DATE '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= DATE '2013-07-01' AND EventDate <= DATE '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC OFFSET 1000 LIMIT 10; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= DATE '2013-07-01' AND EventDate <= DATE '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END, URL ORDER BY PageViews DESC OFFSET 1000 LIMIT 10; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= DATE '2013-07-01' AND EventDate <= DATE '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC OFFSET 100 LIMIT 10; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= DATE '2013-07-01' AND EventDate <= DATE '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC OFFSET 10000 LIMIT 10; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= DATE '2013-07-14' AND EventDate <= DATE '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) OFFSET 1000 LIMIT 10; diff --git a/benchmark/vertica/.gitignore b/benchmark/vertica/.gitignore deleted file mode 100644 index 1a06816d838..00000000000 --- a/benchmark/vertica/.gitignore +++ /dev/null @@ -1 +0,0 @@ -results diff --git a/benchmark/vertica/README.md b/benchmark/vertica/README.md deleted file mode 100644 index 0adab4f41f9..00000000000 --- a/benchmark/vertica/README.md +++ /dev/null @@ -1,5 +0,0 @@ -Although Vertica EULA does not prevent doing benchmarks, it restricts from disclosing the results: - -> You may not disclose to any third-party performance information or analysis (including, without limitation, benchmarks and performance tests) from any source relating to the Software. - -https://www.vertica.com/end-user-license-agreement-ce-version/ diff --git a/benchmark/vertica/benchmark.sh b/benchmark/vertica/benchmark.sh deleted file mode 100755 index 86312a3a438..00000000000 --- a/benchmark/vertica/benchmark.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash - -sudo apt-get update -sudo apt-get install -y docker.io - -sudo docker run -p 5433:5433 -p 5444:5444 --volume $(pwd):/workdir --mount type=volume,source=vertica-data,target=/data --name vertica_ce vertica/vertica-ce - -sudo docker exec vertica_ce /opt/vertica/bin/vsql -U dbadmin -c "$(cat create.sql)" - -wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz' -gzip -d hits.tsv.gz - -time sudo docker exec vertica_ce /opt/vertica/bin/vsql -U dbadmin -c "COPY hits FROM LOCAL '/workdir/hits.tsv' DELIMITER E'\\t' NULL E'\\001' DIRECT" - -sudo docker exec vertica_ce du -bcs /data/vertica/VMart - -./run.sh 2>&1 | tee log.txt - -# If you run the script on your own, you may get numbers like this: -# 200m00.000s -# 25000000000 - -# Note: the real numbers cannot be published. - -grep -F 'All rows formatted' logs.txt | sed -r -e 's/^.* ([0-9.]+) ms$/\1/' | - awk '{ if (i % 3 == 0) { printf "[" }; printf $1 / 1000; if (i % 3 != 2) { printf "," } else { print "]," }; ++i; }' diff --git a/benchmark/vertica/create.sql b/benchmark/vertica/create.sql deleted file mode 100644 index a6316c5e227..00000000000 --- a/benchmark/vertica/create.sql +++ /dev/null @@ -1,109 +0,0 @@ -CREATE TABLE hits -( - WatchID BIGINT NOT NULL, - JavaEnable SMALLINT NOT NULL, - Title VARCHAR(65000) NOT NULL, - GoodEvent SMALLINT NOT NULL, - EventTime TIMESTAMP NOT NULL, - EventDate Date NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - RegionID INTEGER NOT NULL, - UserID BIGINT NOT NULL, - CounterClass SMALLINT NOT NULL, - OS SMALLINT NOT NULL, - UserAgent SMALLINT NOT NULL, - URL VARCHAR(65000) NOT NULL, - Referer VARCHAR(65000) NOT NULL, - IsRefresh SMALLINT NOT NULL, - RefererCategoryID SMALLINT NOT NULL, - RefererRegionID INTEGER NOT NULL, - URLCategoryID SMALLINT NOT NULL, - URLRegionID INTEGER NOT NULL, - ResolutionWidth SMALLINT NOT NULL, - ResolutionHeight SMALLINT NOT NULL, - ResolutionDepth SMALLINT NOT NULL, - FlashMajor SMALLINT NOT NULL, - FlashMinor SMALLINT NOT NULL, - FlashMinor2 VARCHAR(65000) NOT NULL, - NetMajor SMALLINT NOT NULL, - NetMinor SMALLINT NOT NULL, - UserAgentMajor SMALLINT NOT NULL, - UserAgentMinor VARCHAR(255) NOT NULL, - CookieEnable SMALLINT NOT NULL, - JavascriptEnable SMALLINT NOT NULL, - IsMobile SMALLINT NOT NULL, - MobilePhone SMALLINT NOT NULL, - MobilePhoneModel VARCHAR(65000) NOT NULL, - Params VARCHAR(65000) NOT NULL, - IPNetworkID INTEGER NOT NULL, - TraficSourceID SMALLINT NOT NULL, - SearchEngineID SMALLINT NOT NULL, - SearchPhrase VARCHAR(65000) NOT NULL, - AdvEngineID SMALLINT NOT NULL, - IsArtifical SMALLINT NOT NULL, - WindowClientWidth SMALLINT NOT NULL, - WindowClientHeight SMALLINT NOT NULL, - ClientTimeZone SMALLINT NOT NULL, - ClientEventTime TIMESTAMP NOT NULL, - SilverlightVersion1 SMALLINT NOT NULL, - SilverlightVersion2 SMALLINT NOT NULL, - SilverlightVersion3 INTEGER NOT NULL, - SilverlightVersion4 SMALLINT NOT NULL, - PageCharset VARCHAR(65000) NOT NULL, - CodeVersion INTEGER NOT NULL, - IsLink SMALLINT NOT NULL, - IsDownload SMALLINT NOT NULL, - IsNotBounce SMALLINT NOT NULL, - FUniqID BIGINT NOT NULL, - OriginalURL VARCHAR(65000) NOT NULL, - HID INTEGER NOT NULL, - IsOldCounter SMALLINT NOT NULL, - IsEvent SMALLINT NOT NULL, - IsParameter SMALLINT NOT NULL, - DontCountHits SMALLINT NOT NULL, - WithHash SMALLINT NOT NULL, - HitColor CHAR NOT NULL, - LocalEventTime TIMESTAMP NOT NULL, - Age SMALLINT NOT NULL, - Sex SMALLINT NOT NULL, - Income SMALLINT NOT NULL, - Interests SMALLINT NOT NULL, - Robotness SMALLINT NOT NULL, - RemoteIP INTEGER NOT NULL, - WindowName INTEGER NOT NULL, - OpenerName INTEGER NOT NULL, - HistoryLength SMALLINT NOT NULL, - BrowserLanguage VARCHAR(65000) NOT NULL, - BrowserCountry VARCHAR(65000) NOT NULL, - SocialNetwork VARCHAR(65000) NOT NULL, - SocialAction VARCHAR(65000) NOT NULL, - HTTPError SMALLINT NOT NULL, - SendTiming INTEGER NOT NULL, - DNSTiming INTEGER NOT NULL, - ConnectTiming INTEGER NOT NULL, - ResponseStartTiming INTEGER NOT NULL, - ResponseEndTiming INTEGER NOT NULL, - FetchTiming INTEGER NOT NULL, - SocialSourceNetworkID SMALLINT NOT NULL, - SocialSourcePage VARCHAR(65000) NOT NULL, - ParamPrice BIGINT NOT NULL, - ParamOrderID VARCHAR(65000) NOT NULL, - ParamCurrency VARCHAR(65000) NOT NULL, - ParamCurrencyID SMALLINT NOT NULL, - OpenstatServiceName VARCHAR(65000) NOT NULL, - OpenstatCampaignID VARCHAR(65000) NOT NULL, - OpenstatAdID VARCHAR(65000) NOT NULL, - OpenstatSourceID VARCHAR(65000) NOT NULL, - UTMSource VARCHAR(65000) NOT NULL, - UTMMedium VARCHAR(65000) NOT NULL, - UTMCampaign VARCHAR(65000) NOT NULL, - UTMContent VARCHAR(65000) NOT NULL, - UTMTerm VARCHAR(65000) NOT NULL, - FromTag VARCHAR(65000) NOT NULL, - HasGCLID SMALLINT NOT NULL, - RefererHash BIGINT NOT NULL, - URLHash BIGINT NOT NULL, - CLID INTEGER NOT NULL -) -ORDER BY CounterID, EventDate, UserID, EventTime, WatchID; diff --git a/benchmark/vertica/queries.sql b/benchmark/vertica/queries.sql deleted file mode 100644 index 31f65fc898d..00000000000 --- a/benchmark/vertica/queries.sql +++ /dev/null @@ -1,43 +0,0 @@ -SELECT COUNT(*) FROM hits; -SELECT COUNT(*) FROM hits WHERE AdvEngineID <> 0; -SELECT SUM(AdvEngineID), COUNT(*), AVG(ResolutionWidth) FROM hits; -SELECT AVG(UserID) FROM hits; -SELECT COUNT(DISTINCT UserID) FROM hits; -SELECT COUNT(DISTINCT SearchPhrase) FROM hits; -SELECT MIN(EventDate), MAX(EventDate) FROM hits; -SELECT AdvEngineID, COUNT(*) FROM hits WHERE AdvEngineID <> 0 GROUP BY AdvEngineID ORDER BY COUNT(*) DESC; -SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10; -SELECT RegionID, SUM(AdvEngineID), COUNT(*) AS c, AVG(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10; -SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE MobilePhoneModel <> '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10; -SELECT SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10; -SELECT SearchEngineID, SearchPhrase, COUNT(*) AS c FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT UserID, COUNT(*) FROM hits GROUP BY UserID ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10; -SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, COUNT(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY COUNT(*) DESC LIMIT 10; -SELECT UserID FROM hits WHERE UserID = 435090932899640449; -SELECT COUNT(*) FROM hits WHERE URL LIKE '%google%'; -SELECT SearchPhrase, MIN(URL), COUNT(*) AS c FROM hits WHERE URL LIKE '%google%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10; -SELECT * FROM hits WHERE URL LIKE '%google%' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY SearchPhrase LIMIT 10; -SELECT SearchPhrase FROM hits WHERE SearchPhrase <> '' ORDER BY EventTime, SearchPhrase LIMIT 10; -SELECT CounterID, AVG(length(URL)) AS l, COUNT(*) AS c FROM hits WHERE URL <> '' GROUP BY CounterID HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS k, AVG(length(Referer)) AS l, COUNT(*) AS c, MIN(Referer) FROM hits WHERE Referer <> '' GROUP BY k HAVING COUNT(*) > 100000 ORDER BY l DESC LIMIT 25; -SELECT SUM(ResolutionWidth), SUM(ResolutionWidth + 1), SUM(ResolutionWidth + 2), SUM(ResolutionWidth + 3), SUM(ResolutionWidth + 4), SUM(ResolutionWidth + 5), SUM(ResolutionWidth + 6), SUM(ResolutionWidth + 7), SUM(ResolutionWidth + 8), SUM(ResolutionWidth + 9), SUM(ResolutionWidth + 10), SUM(ResolutionWidth + 11), SUM(ResolutionWidth + 12), SUM(ResolutionWidth + 13), SUM(ResolutionWidth + 14), SUM(ResolutionWidth + 15), SUM(ResolutionWidth + 16), SUM(ResolutionWidth + 17), SUM(ResolutionWidth + 18), SUM(ResolutionWidth + 19), SUM(ResolutionWidth + 20), SUM(ResolutionWidth + 21), SUM(ResolutionWidth + 22), SUM(ResolutionWidth + 23), SUM(ResolutionWidth + 24), SUM(ResolutionWidth + 25), SUM(ResolutionWidth + 26), SUM(ResolutionWidth + 27), SUM(ResolutionWidth + 28), SUM(ResolutionWidth + 29), SUM(ResolutionWidth + 30), SUM(ResolutionWidth + 31), SUM(ResolutionWidth + 32), SUM(ResolutionWidth + 33), SUM(ResolutionWidth + 34), SUM(ResolutionWidth + 35), SUM(ResolutionWidth + 36), SUM(ResolutionWidth + 37), SUM(ResolutionWidth + 38), SUM(ResolutionWidth + 39), SUM(ResolutionWidth + 40), SUM(ResolutionWidth + 41), SUM(ResolutionWidth + 42), SUM(ResolutionWidth + 43), SUM(ResolutionWidth + 44), SUM(ResolutionWidth + 45), SUM(ResolutionWidth + 46), SUM(ResolutionWidth + 47), SUM(ResolutionWidth + 48), SUM(ResolutionWidth + 49), SUM(ResolutionWidth + 50), SUM(ResolutionWidth + 51), SUM(ResolutionWidth + 52), SUM(ResolutionWidth + 53), SUM(ResolutionWidth + 54), SUM(ResolutionWidth + 55), SUM(ResolutionWidth + 56), SUM(ResolutionWidth + 57), SUM(ResolutionWidth + 58), SUM(ResolutionWidth + 59), SUM(ResolutionWidth + 60), SUM(ResolutionWidth + 61), SUM(ResolutionWidth + 62), SUM(ResolutionWidth + 63), SUM(ResolutionWidth + 64), SUM(ResolutionWidth + 65), SUM(ResolutionWidth + 66), SUM(ResolutionWidth + 67), SUM(ResolutionWidth + 68), SUM(ResolutionWidth + 69), SUM(ResolutionWidth + 70), SUM(ResolutionWidth + 71), SUM(ResolutionWidth + 72), SUM(ResolutionWidth + 73), SUM(ResolutionWidth + 74), SUM(ResolutionWidth + 75), SUM(ResolutionWidth + 76), SUM(ResolutionWidth + 77), SUM(ResolutionWidth + 78), SUM(ResolutionWidth + 79), SUM(ResolutionWidth + 80), SUM(ResolutionWidth + 81), SUM(ResolutionWidth + 82), SUM(ResolutionWidth + 83), SUM(ResolutionWidth + 84), SUM(ResolutionWidth + 85), SUM(ResolutionWidth + 86), SUM(ResolutionWidth + 87), SUM(ResolutionWidth + 88), SUM(ResolutionWidth + 89) FROM hits; -SELECT SearchEngineID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits WHERE SearchPhrase <> '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT WatchID, ClientIP, COUNT(*) AS c, SUM(IsRefresh), AVG(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10; -SELECT 1, URL, COUNT(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10; -SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, COUNT(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND URL <> '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10; -SELECT Title, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND IsRefresh = 0 AND Title <> '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10; -SELECT URL, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND IsLink <> 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 10 OFFSET 1000; -SELECT URLHash, EventDate, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 3594120000172545465 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 10 OFFSET 100; -SELECT WindowClientWidth, WindowClientHeight, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND IsRefresh = 0 AND DontCountHits = 0 AND URLHash = 2868770270353813622 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10 OFFSET 10000; -SELECT DATE_TRUNC('minute', EventTime) AS M, COUNT(*) AS PageViews FROM hits WHERE CounterID = 62 AND EventDate >= '2013-07-14' AND EventDate <= '2013-07-15' AND IsRefresh = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime) LIMIT 10 OFFSET 1000; diff --git a/benchmark/vertica/run.sh b/benchmark/vertica/run.sh deleted file mode 100755 index 7638dbb0299..00000000000 --- a/benchmark/vertica/run.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/bash - -TRIES=3 - -cat queries.sql | while read query; do - sync - echo 3 | sudo tee /proc/sys/vm/drop_caches - - echo "$query"; - for i in $(seq 1 $TRIES); do - sudo docker exec vertica_ce /opt/vertica/bin/vsql -U dbadmin -c '\timing' -c "$query" - done; -done;