From 5f0fa2c2fb01d0c790b14a162cacab0f2933ffc6 Mon Sep 17 00:00:00 2001 From: BayoNet Date: Sun, 11 Feb 2018 11:18:20 +0300 Subject: [PATCH] Document tree and project settings are prepared for site generation. Final step of NO-RST company. --- docs/build.sh | 1 + docs/create_contents.py | 2 +- docs/en/agg_functions/index.md | 11 -- docs/en/data_types/index.md | 10 +- .../nested_data_structures/index.md | 8 - .../en/data_types/special_data_types/index.md | 7 - docs/en/development/index.md | 7 - docs/en/dicts/index.md | 8 - docs/en/formats/index.md | 8 - docs/en/functions/index.md | 7 - docs/en/getting_started/index.md | 10 +- docs/en/index.md | 141 ++++++++++++++--- docs/en/interfaces/index.md | 10 +- docs/en/introduction/index.md | 12 -- docs/en/introduction/what_is_clickhouse.md | 123 --------------- docs/en/operations/index.md | 9 -- docs/en/operations/server_settings/index.md | 8 - docs/en/operations/settings/index.md | 7 - docs/en/query_language/index.md | 8 - docs/en/query_language/queries.md | 3 + docs/en/system_tables/index.md | 7 - docs/en/table_engines/index.md | 7 - docs/en/table_engines/materializedview.md | 2 +- docs/en/table_functions/index.md | 7 - .../assets/images/Copy (1) favicon.ico | Bin 1150 -> 0 bytes .../assets/images/Copy (1) favicon.png | Bin 521 -> 0 bytes .../assets/images/favicon.ico | Bin 1150 -> 171 bytes .../assets/images/favicon.png | Bin 521 -> 0 bytes .../assets/images/logo.ico | Bin 16958 -> 0 bytes .../assets/images/logo.svg | 12 -- .../assets/images/logo_ch.ico | Bin 4286 -> 0 bytes .../partials/header.html | 31 ++++ docs/mkdocs_en.yml | 9 +- docs/mkdocs_ru.yml | 7 +- docs/ru/agg_functions/index.md | 10 -- docs/ru/data_types/index.md | 9 +- .../nested_data_structures/index.md | 9 +- .../ru/data_types/special_data_types/index.md | 7 - docs/ru/development/index.md | 7 - docs/ru/dicts/index.md | 8 - docs/ru/formats/index.md | 7 - docs/ru/functions/index.md | 7 - docs/ru/getting_started/index.md | 9 +- docs/ru/index.md | 143 +++++++++++++++--- docs/ru/interfaces/index.md | 7 - docs/ru/introduction/index.md | 12 -- docs/ru/introduction/what_is_clickhouse.md | 122 --------------- docs/ru/operations/index.md | 8 - docs/ru/operations/server_settings/index.md | 7 - docs/ru/operations/settings/index.md | 7 - docs/ru/query_language/index.md | 7 - docs/ru/query_language/queries.md | 3 + docs/ru/system_tables/index.md | 7 - docs/ru/table_engines/dictionary.md | 2 +- docs/ru/table_engines/index.md | 7 - docs/ru/table_engines/materializedview.md | 2 +- docs/ru/table_functions/index.md | 7 - 57 files changed, 298 insertions(+), 598 deletions(-) delete mode 100644 docs/en/introduction/what_is_clickhouse.md delete mode 100644 docs/mkdocs-material-theme/assets/images/Copy (1) favicon.ico delete mode 100644 docs/mkdocs-material-theme/assets/images/Copy (1) favicon.png delete mode 100644 docs/mkdocs-material-theme/assets/images/favicon.png delete mode 100644 docs/mkdocs-material-theme/assets/images/logo.ico delete mode 100644 docs/mkdocs-material-theme/assets/images/logo.svg delete mode 100644 docs/mkdocs-material-theme/assets/images/logo_ch.ico delete mode 100644 docs/ru/introduction/what_is_clickhouse.md diff --git a/docs/build.sh b/docs/build.sh index cc6aa915b23..5fbff59a744 100644 --- a/docs/build.sh +++ b/docs/build.sh @@ -9,6 +9,7 @@ fi for lang in $LANGS; do echo -e "\n\nLANG=$lang. Creating single page source" mkdir $lang'_single_page' + cp -r $lang/images $lang'_single_page' ./concatenate.py $lang echo -e "\n\nLANG=$lang. Building multipage..." mkdocs build -f mkdocs_$lang.yml diff --git a/docs/create_contents.py b/docs/create_contents.py index 2847eef8385..46b822d6c51 100644 --- a/docs/create_contents.py +++ b/docs/create_contents.py @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -SOURCES_TREE = 'en' +SOURCES_TREE = 'ru' from os import walk def get_header(filepath): diff --git a/docs/en/agg_functions/index.md b/docs/en/agg_functions/index.md index 1ca9ca3283d..e87bf4ff833 100644 --- a/docs/en/agg_functions/index.md +++ b/docs/en/agg_functions/index.md @@ -8,14 +8,3 @@ ClickHouse also supports: - [Parametric aggregate functions](parametric_functions.md#aggregate_functions_parametric), which accept other parameters in addition to columns. - [Combinators](combinators.md#aggregate_functions_combinators), which change the behavior of aggregate functions. - -**Table of Contents** - -```eval_rst -.. toctree:: - - reference - parametric_functions - combinators -``` - diff --git a/docs/en/data_types/index.md b/docs/en/data_types/index.md index 4957f9c6c8c..c17b51c08a2 100644 --- a/docs/en/data_types/index.md +++ b/docs/en/data_types/index.md @@ -2,12 +2,6 @@ # Data types -```eval_rst -.. toctree:: - :glob: - - * - */index - -``` +ClickHouse table fields can contain data of different types. +The topic contains descriptions of data types supported and specificity of their usage of implementation if exists. \ No newline at end of file diff --git a/docs/en/data_types/nested_data_structures/index.md b/docs/en/data_types/nested_data_structures/index.md index 16317c4fb46..06f95b4a1c1 100644 --- a/docs/en/data_types/nested_data_structures/index.md +++ b/docs/en/data_types/nested_data_structures/index.md @@ -1,9 +1 @@ # Nested data structures - -```eval_rst -.. toctree:: - :glob: - - * -``` - diff --git a/docs/en/data_types/special_data_types/index.md b/docs/en/data_types/special_data_types/index.md index 8c575a42ad2..fcaf099fd17 100644 --- a/docs/en/data_types/special_data_types/index.md +++ b/docs/en/data_types/special_data_types/index.md @@ -2,10 +2,3 @@ Special data type values can't be saved to a table or output in results, but are used as the intermediate result of running a query. -```eval_rst -.. toctree:: - :glob: - - * -``` - diff --git a/docs/en/development/index.md b/docs/en/development/index.md index 44759f17870..a7b046fd22d 100644 --- a/docs/en/development/index.md +++ b/docs/en/development/index.md @@ -1,9 +1,2 @@ # ClickHouse Development -```eval_rst -.. toctree:: - :glob: - - * -``` - diff --git a/docs/en/dicts/index.md b/docs/en/dicts/index.md index fd055d85703..de89ef197f7 100644 --- a/docs/en/dicts/index.md +++ b/docs/en/dicts/index.md @@ -5,11 +5,3 @@ You can think of this as a more convenient and efficient type of JOIN with dimen There are built-in (internal) and add-on (external) dictionaries. -```eval_rst -.. toctree:: - - external_dicts - internal_dicts - -``` - diff --git a/docs/en/formats/index.md b/docs/en/formats/index.md index ad2b192d99f..815a2d060cb 100644 --- a/docs/en/formats/index.md +++ b/docs/en/formats/index.md @@ -3,11 +3,3 @@ # Formats The format determines how data is returned to you after SELECTs (how it is written and formatted by the server), and how it is accepted for INSERTs (how it is read and parsed by the server). - -```eval_rst -.. toctree:: - :glob: - - * -``` - diff --git a/docs/en/functions/index.md b/docs/en/functions/index.md index 5250025757a..9f92d009113 100644 --- a/docs/en/functions/index.md +++ b/docs/en/functions/index.md @@ -6,13 +6,6 @@ In this section we discuss regular functions. For aggregate functions, see the s \* - There is a third type of function that the 'arrayJoin' function belongs to; table functions can also be mentioned separately.\* -```eval_rst -.. toctree:: - :glob: - - * -``` - ## Strong typing In contrast to standard SQL, ClickHouse has strong typing. In other words, it doesn't make implicit conversions between types. Each function works for a specific set of types. This means that sometimes you need to use type conversion functions. diff --git a/docs/en/getting_started/index.md b/docs/en/getting_started/index.md index 6bcafc4c2c5..5b60367d3ab 100644 --- a/docs/en/getting_started/index.md +++ b/docs/en/getting_started/index.md @@ -132,12 +132,4 @@ SELECT 1 **Congratulations, the system works!** -To continue experimenting, you can try to download from the test data sets: - -```eval_rst -.. toctree:: - :glob: - - example_datasets/* -``` - +To continue experimenting, you can try to download from the test data sets. \ No newline at end of file diff --git a/docs/en/index.md b/docs/en/index.md index 7a550b3d1fd..72efa70802b 100644 --- a/docs/en/index.md +++ b/docs/en/index.md @@ -1,24 +1,123 @@ -# Documentation +# What is ClickHouse? -```eval_rst -.. toctree:: - :maxdepth: 6 - - introduction/index - getting_started/index - interfaces/index - query_language/index - table_engines/index - system_tables/index - table_functions/index - formats/index - data_types/index - operators/index - functions/index - agg_functions/index - dicts/index - operations/index - development/index - roadmap +ClickHouse is a columnar DBMS for OLAP. + +In a "normal" row-oriented DBMS, data is stored in this order: + +```text +5123456789123456789 1 Eurobasket - Greece - Bosnia and Herzegovina - example.com 1 2011-09-01 01:03:02 6274717 1294101174 11409 612345678912345678 0 33 6 http://www.example.com/basketball/team/123/match/456789.html http://www.example.com/basketball/team/123/match/987654.html 0 1366 768 32 10 3183 0 0 13 0\0 1 1 0 0 2011142 -1 0 0 01321 613 660 2011-09-01 08:01:17 0 0 0 0 utf-8 1466 0 0 0 5678901234567890123 277789954 0 0 0 0 0 +5234985259563631958 0 Consulting, Tax assessment, Accounting, Law 1 2011-09-01 01:03:02 6320881 2111222333 213 6458937489576391093 0 3 2 http://www.example.ru/ 0 800 600 16 10 2 153.1 0 0 10 63 1 1 0 0 2111678 000 0 588 368 240 2011-09-01 01:03:17 4 0 60310 0 windows-1251 1466 0 000 778899001 0 0 0 0 0 +... ``` +In order words, all the values related to a row are stored next to each other. +Examples of a row-oriented DBMS are MySQL, Postgres, MS SQL Server, and others. + +In a column-oriented DBMS, data is stored like this: + +```text +WatchID: 5385521489354350662 5385521490329509958 5385521489953706054 5385521490476781638 5385521490583269446 5385521490218868806 5385521491437850694 5385521491090174022 5385521490792669254 5385521490420695110 5385521491532181574 5385521491559694406 5385521491459625030 5385521492275175494 5385521492781318214 5385521492710027334 5385521492955615302 5385521493708759110 5385521494506434630 5385521493104611398 +JavaEnable: 1 0 1 0 0 0 1 0 1 1 1 1 1 1 0 1 0 0 1 1 +Title: Yandex Announcements - Investor Relations - Yandex Yandex — Contact us — Moscow Yandex — Mission Ru Yandex — History — History of Yandex Yandex Financial Releases - Investor Relations - Yandex Yandex — Locations Yandex Board of Directors - Corporate Governance - Yandex Yandex — Technologies +GoodEvent: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 +EventTime: 2016-05-18 05:19:20 2016-05-18 08:10:20 2016-05-18 07:38:00 2016-05-18 01:13:08 2016-05-18 00:04:06 2016-05-18 04:21:30 2016-05-18 00:34:16 2016-05-18 07:35:49 2016-05-18 11:41:59 2016-05-18 01:13:32 +``` + +These examples only show the order that data is arranged in. +The values from different columns are stored separately, and data from the same column is stored together. + +Examples of column-oriented DBMSs: `Vertica`, `Paraccel (Actian Matrix) (Amazon Redshift)`, `Sybase IQ`, `Exasol`, `Infobright`, `InfiniDB`, `MonetDB (VectorWise) (Actian Vector)`, `LucidDB`, `SAP HANA`, `Google Dremel`, `Google PowerDrill`, `Druid`, `kdb+`, and so on. + +Different orders for storing data are better suited to different scenarios. +The data access scenario refers to what queries are made, how often, and in what proportion; how much data is read for each type of query – rows, columns, and bytes; the relationship between reading and updating data; the working size of the data and how locally it is used; whether transactions are used, and how isolated they are; requirements for data replication and logical integrity; requirements for latency and throughput for each type of query, and so on. + +The higher the load on the system, the more important it is to customize the system to the scenario, and the more specific this customization becomes. There is no system that is equally well-suited to significantly different scenarios. If a system is adaptable to a wide set of scenarios, under a high load, the system will handle all the scenarios equally poorly, or will work well for just one of the scenarios. + +We'll say that the following is true for the OLAP (online analytical processing) scenario: + +- The vast majority of requests are for read access. +- Data is updated in fairly large batches (> 1000 rows), not by single rows; or it is not updated at all. +- Data is added to the DB but is not modified. +- For reads, quite a large number of rows are extracted from the DB, but only a small subset of columns. +- Tables are "wide," meaning they contain a large number of columns. +- Queries are relatively rare (usually hundreds of queries per server or less per second). +- For simple queries, latencies around 50 ms are allowed. +- Column values are fairly small: numbers and short strings (for example, 60 bytes per URL). +- Requires high throughput when processing a single query (up to billions of rows per second per server). +- There are no transactions. +- Low requirements for data consistency. +- There is one large table per query. All tables are small, except for one. +- A query result is significantly smaller than the source data. In other words, data is filtered or aggregated. The result fits in a single server's RAM. + +It is easy to see that the OLAP scenario is very different from other popular scenarios (such as OLTP or Key-Value access). So it doesn't make sense to try to use OLTP or a Key-Value DB for processing analytical queries if you want to get decent performance. For example, if you try to use MongoDB or Elliptics for analytics, you will get very poor performance compared to OLAP databases. + +Columnar-oriented databases are better suited to OLAP scenarios (at least 100 times better in processing speed for most queries), for the following reasons: + +1. For I/O. +2. For an analytical query, only a small number of table columns need to be read. In a column-oriented database, you can read just the data you need. For example, if you need 5 columns out of 100, you can expect a 20-fold reduction in I/O. +3. Since data is read in packets, it is easier to compress. Data in columns is also easier to compress. This further reduces the I/O volume. +4. Due to the reduced I/O, more data fits in the system cache. + +For example, the query "count the number of records for each advertising platform" requires reading one "advertising platform ID" column, which takes up 1 byte uncompressed. If most of the traffic was not from advertising platforms, you can expect at least 10-fold compression of this column. When using a quick compression algorithm, data decompression is possible at a speed of at least several gigabytes of uncompressed data per second. In other words, this query can be processed at a speed of approximately several billion rows per second on a single server. This speed is actually achieved in practice. + +Example: + +```bash +milovidov@hostname:~$ clickhouse-client +ClickHouse client version 0.0.52053. +Connecting to localhost:9000. +Connected to ClickHouse server version 0.0.52053. + +:) SELECT CounterID, count() FROM hits GROUP BY CounterID ORDER BY count() DESC LIMIT 20 + +SELECT + CounterID, + count() +FROM hits +GROUP BY CounterID +ORDER BY count() DESC +LIMIT 20 + +┌─CounterID─┬──count()─┐ +│ 114208 │ 56057344 │ +│ 115080 │ 51619590 │ +│ 3228 │ 44658301 │ +│ 38230 │ 42045932 │ +│ 145263 │ 42042158 │ +│ 91244 │ 38297270 │ +│ 154139 │ 26647572 │ +│ 150748 │ 24112755 │ +│ 242232 │ 21302571 │ +│ 338158 │ 13507087 │ +│ 62180 │ 12229491 │ +│ 82264 │ 12187441 │ +│ 232261 │ 12148031 │ +│ 146272 │ 11438516 │ +│ 168777 │ 11403636 │ +│ 4120072 │ 11227824 │ +│ 10938808 │ 10519739 │ +│ 74088 │ 9047015 │ +│ 115079 │ 8837972 │ +│ 337234 │ 8205961 │ +└───────────┴──────────┘ + +20 rows in set. Elapsed: 0.153 sec. Processed 1.00 billion rows, 4.00 GB (6.53 billion rows/s., 26.10 GB/s.) + +:) +``` + +2. For CPU. + +Since executing a query requires processing a large number of rows, it helps to dispatch all operations for entire vectors instead of for separate rows, or to implement the query engine so that there is almost no dispatching cost. If you don't do this, with any half-decent disk subsystem, the query interpreter inevitably stalls the CPU. +It makes sense to both store data in columns and process it, when possible, by columns. + +There are two ways to do this: + +1. A vector engine. All operations are written for vectors, instead of for separate values. This means you don't need to call operations very often, and dispatching costs are negligible. Operation code contains an optimized internal cycle. + +2. Code generation. The code generated for the query has all the indirect calls in it. + +This is not done in "normal" databases, because it doesn't make sense when running simple queries. However, there are exceptions. For example, MemSQL uses code generation to reduce latency when processing SQL queries. (For comparison, analytical DBMSs require optimization of throughput, not latency.) + +Note that for CPU efficiency, the query language must be declarative (SQL or MDX), or at least a vector (J, K). The query should only contain implicit loops, allowing for optimization. + diff --git a/docs/en/interfaces/index.md b/docs/en/interfaces/index.md index 12f2f1ee50f..3e3e3df4853 100644 --- a/docs/en/interfaces/index.md +++ b/docs/en/interfaces/index.md @@ -2,12 +2,4 @@ # Interfaces -To explore the system's capabilities, download data to tables, or make manual queries, use the clickhouse-client program. - -```eval_rst -.. toctree:: - :glob: - - * -``` - +To explore the system's capabilities, download data to tables, or make manual queries, use the clickhouse-client program. \ No newline at end of file diff --git a/docs/en/introduction/index.md b/docs/en/introduction/index.md index 68f765a3dd2..e10b99d0138 100644 --- a/docs/en/introduction/index.md +++ b/docs/en/introduction/index.md @@ -1,13 +1 @@ # Introduction - -```eval_rst -.. toctree:: - - what_is_clickhouse - distinctive_features - features_considered_disadvantages - ya_metrika_task - possible_silly_questions - performance -``` - diff --git a/docs/en/introduction/what_is_clickhouse.md b/docs/en/introduction/what_is_clickhouse.md deleted file mode 100644 index 72efa70802b..00000000000 --- a/docs/en/introduction/what_is_clickhouse.md +++ /dev/null @@ -1,123 +0,0 @@ -# What is ClickHouse? - -ClickHouse is a columnar DBMS for OLAP. - -In a "normal" row-oriented DBMS, data is stored in this order: - -```text -5123456789123456789 1 Eurobasket - Greece - Bosnia and Herzegovina - example.com 1 2011-09-01 01:03:02 6274717 1294101174 11409 612345678912345678 0 33 6 http://www.example.com/basketball/team/123/match/456789.html http://www.example.com/basketball/team/123/match/987654.html 0 1366 768 32 10 3183 0 0 13 0\0 1 1 0 0 2011142 -1 0 0 01321 613 660 2011-09-01 08:01:17 0 0 0 0 utf-8 1466 0 0 0 5678901234567890123 277789954 0 0 0 0 0 -5234985259563631958 0 Consulting, Tax assessment, Accounting, Law 1 2011-09-01 01:03:02 6320881 2111222333 213 6458937489576391093 0 3 2 http://www.example.ru/ 0 800 600 16 10 2 153.1 0 0 10 63 1 1 0 0 2111678 000 0 588 368 240 2011-09-01 01:03:17 4 0 60310 0 windows-1251 1466 0 000 778899001 0 0 0 0 0 -... -``` - -In order words, all the values related to a row are stored next to each other. -Examples of a row-oriented DBMS are MySQL, Postgres, MS SQL Server, and others. - -In a column-oriented DBMS, data is stored like this: - -```text -WatchID: 5385521489354350662 5385521490329509958 5385521489953706054 5385521490476781638 5385521490583269446 5385521490218868806 5385521491437850694 5385521491090174022 5385521490792669254 5385521490420695110 5385521491532181574 5385521491559694406 5385521491459625030 5385521492275175494 5385521492781318214 5385521492710027334 5385521492955615302 5385521493708759110 5385521494506434630 5385521493104611398 -JavaEnable: 1 0 1 0 0 0 1 0 1 1 1 1 1 1 0 1 0 0 1 1 -Title: Yandex Announcements - Investor Relations - Yandex Yandex — Contact us — Moscow Yandex — Mission Ru Yandex — History — History of Yandex Yandex Financial Releases - Investor Relations - Yandex Yandex — Locations Yandex Board of Directors - Corporate Governance - Yandex Yandex — Technologies -GoodEvent: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 -EventTime: 2016-05-18 05:19:20 2016-05-18 08:10:20 2016-05-18 07:38:00 2016-05-18 01:13:08 2016-05-18 00:04:06 2016-05-18 04:21:30 2016-05-18 00:34:16 2016-05-18 07:35:49 2016-05-18 11:41:59 2016-05-18 01:13:32 -``` - -These examples only show the order that data is arranged in. -The values from different columns are stored separately, and data from the same column is stored together. - -Examples of column-oriented DBMSs: `Vertica`, `Paraccel (Actian Matrix) (Amazon Redshift)`, `Sybase IQ`, `Exasol`, `Infobright`, `InfiniDB`, `MonetDB (VectorWise) (Actian Vector)`, `LucidDB`, `SAP HANA`, `Google Dremel`, `Google PowerDrill`, `Druid`, `kdb+`, and so on. - -Different orders for storing data are better suited to different scenarios. -The data access scenario refers to what queries are made, how often, and in what proportion; how much data is read for each type of query – rows, columns, and bytes; the relationship between reading and updating data; the working size of the data and how locally it is used; whether transactions are used, and how isolated they are; requirements for data replication and logical integrity; requirements for latency and throughput for each type of query, and so on. - -The higher the load on the system, the more important it is to customize the system to the scenario, and the more specific this customization becomes. There is no system that is equally well-suited to significantly different scenarios. If a system is adaptable to a wide set of scenarios, under a high load, the system will handle all the scenarios equally poorly, or will work well for just one of the scenarios. - -We'll say that the following is true for the OLAP (online analytical processing) scenario: - -- The vast majority of requests are for read access. -- Data is updated in fairly large batches (> 1000 rows), not by single rows; or it is not updated at all. -- Data is added to the DB but is not modified. -- For reads, quite a large number of rows are extracted from the DB, but only a small subset of columns. -- Tables are "wide," meaning they contain a large number of columns. -- Queries are relatively rare (usually hundreds of queries per server or less per second). -- For simple queries, latencies around 50 ms are allowed. -- Column values are fairly small: numbers and short strings (for example, 60 bytes per URL). -- Requires high throughput when processing a single query (up to billions of rows per second per server). -- There are no transactions. -- Low requirements for data consistency. -- There is one large table per query. All tables are small, except for one. -- A query result is significantly smaller than the source data. In other words, data is filtered or aggregated. The result fits in a single server's RAM. - -It is easy to see that the OLAP scenario is very different from other popular scenarios (such as OLTP or Key-Value access). So it doesn't make sense to try to use OLTP or a Key-Value DB for processing analytical queries if you want to get decent performance. For example, if you try to use MongoDB or Elliptics for analytics, you will get very poor performance compared to OLAP databases. - -Columnar-oriented databases are better suited to OLAP scenarios (at least 100 times better in processing speed for most queries), for the following reasons: - -1. For I/O. -2. For an analytical query, only a small number of table columns need to be read. In a column-oriented database, you can read just the data you need. For example, if you need 5 columns out of 100, you can expect a 20-fold reduction in I/O. -3. Since data is read in packets, it is easier to compress. Data in columns is also easier to compress. This further reduces the I/O volume. -4. Due to the reduced I/O, more data fits in the system cache. - -For example, the query "count the number of records for each advertising platform" requires reading one "advertising platform ID" column, which takes up 1 byte uncompressed. If most of the traffic was not from advertising platforms, you can expect at least 10-fold compression of this column. When using a quick compression algorithm, data decompression is possible at a speed of at least several gigabytes of uncompressed data per second. In other words, this query can be processed at a speed of approximately several billion rows per second on a single server. This speed is actually achieved in practice. - -Example: - -```bash -milovidov@hostname:~$ clickhouse-client -ClickHouse client version 0.0.52053. -Connecting to localhost:9000. -Connected to ClickHouse server version 0.0.52053. - -:) SELECT CounterID, count() FROM hits GROUP BY CounterID ORDER BY count() DESC LIMIT 20 - -SELECT - CounterID, - count() -FROM hits -GROUP BY CounterID -ORDER BY count() DESC -LIMIT 20 - -┌─CounterID─┬──count()─┐ -│ 114208 │ 56057344 │ -│ 115080 │ 51619590 │ -│ 3228 │ 44658301 │ -│ 38230 │ 42045932 │ -│ 145263 │ 42042158 │ -│ 91244 │ 38297270 │ -│ 154139 │ 26647572 │ -│ 150748 │ 24112755 │ -│ 242232 │ 21302571 │ -│ 338158 │ 13507087 │ -│ 62180 │ 12229491 │ -│ 82264 │ 12187441 │ -│ 232261 │ 12148031 │ -│ 146272 │ 11438516 │ -│ 168777 │ 11403636 │ -│ 4120072 │ 11227824 │ -│ 10938808 │ 10519739 │ -│ 74088 │ 9047015 │ -│ 115079 │ 8837972 │ -│ 337234 │ 8205961 │ -└───────────┴──────────┘ - -20 rows in set. Elapsed: 0.153 sec. Processed 1.00 billion rows, 4.00 GB (6.53 billion rows/s., 26.10 GB/s.) - -:) -``` - -2. For CPU. - -Since executing a query requires processing a large number of rows, it helps to dispatch all operations for entire vectors instead of for separate rows, or to implement the query engine so that there is almost no dispatching cost. If you don't do this, with any half-decent disk subsystem, the query interpreter inevitably stalls the CPU. -It makes sense to both store data in columns and process it, when possible, by columns. - -There are two ways to do this: - -1. A vector engine. All operations are written for vectors, instead of for separate values. This means you don't need to call operations very often, and dispatching costs are negligible. Operation code contains an optimized internal cycle. - -2. Code generation. The code generated for the query has all the indirect calls in it. - -This is not done in "normal" databases, because it doesn't make sense when running simple queries. However, there are exceptions. For example, MemSQL uses code generation to reduce latency when processing SQL queries. (For comparison, analytical DBMSs require optimization of throughput, not latency.) - -Note that for CPU efficiency, the query language must be declarative (SQL or MDX), or at least a vector (J, K). The query should only contain implicit loops, allowing for optimization. - diff --git a/docs/en/operations/index.md b/docs/en/operations/index.md index d7621689769..eb90f937cff 100644 --- a/docs/en/operations/index.md +++ b/docs/en/operations/index.md @@ -1,10 +1 @@ # Operation - -```eval_rst -.. toctree:: - :glob: - - * - */index -``` - diff --git a/docs/en/operations/server_settings/index.md b/docs/en/operations/server_settings/index.md index 365a23a7022..2293e86f5c7 100644 --- a/docs/en/operations/server_settings/index.md +++ b/docs/en/operations/server_settings/index.md @@ -9,11 +9,3 @@ These settings are stored in the ` config.xml` file on the ClickHouse server. Other settings are described in the "[Settings](../settings/index.md#settings)" section. Before studying the settings, read the [Configuration files](../configuration_files.md#configuration_files) section and note the use of substitutions (the `incl` and `optional` attributes). - -```eval_rst -.. toctree:: - :glob: - - * -``` - diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md index 70c660bbb9e..0e967a4c081 100644 --- a/docs/en/operations/settings/index.md +++ b/docs/en/operations/settings/index.md @@ -22,10 +22,3 @@ Similarly, you can use ClickHouse sessions in the HTTP protocol. To do this, you Settings that can only be made in the server config file are not covered in this section. -```eval_rst -.. toctree:: - :glob: - - * -``` - diff --git a/docs/en/query_language/index.md b/docs/en/query_language/index.md index 27acbf61e50..247d76fc6ed 100644 --- a/docs/en/query_language/index.md +++ b/docs/en/query_language/index.md @@ -1,9 +1 @@ # Query language - -```eval_rst -.. toctree:: - :glob: - - * -``` - diff --git a/docs/en/query_language/queries.md b/docs/en/query_language/queries.md index b669b9ff1c1..6e5a9ba88e0 100644 --- a/docs/en/query_language/queries.md +++ b/docs/en/query_language/queries.md @@ -11,6 +11,9 @@ CREATE DATABASE [IF NOT EXISTS] db_name `A database` is just a directory for tables. If `IF NOT EXISTS` is included, the query won't return an error if the database already exists. + + + ## CREATE TABLE The `CREATE TABLE` query can have several forms. diff --git a/docs/en/system_tables/index.md b/docs/en/system_tables/index.md index 71c4c058184..614ce4020ec 100644 --- a/docs/en/system_tables/index.md +++ b/docs/en/system_tables/index.md @@ -6,10 +6,3 @@ System tables don't have files with data on the disk or files with metadata. The System tables are read-only. System tables are located in the 'system' database. -```eval_rst -.. toctree:: - :glob: - - * -``` - diff --git a/docs/en/table_engines/index.md b/docs/en/table_engines/index.md index f8454e5e155..bb5e01e7903 100644 --- a/docs/en/table_engines/index.md +++ b/docs/en/table_engines/index.md @@ -12,10 +12,3 @@ The table engine (type of table) determines: Note that for most serious tasks, you should use engines from the MergeTree family. -```eval_rst -.. toctree:: - :glob: - - * -``` - diff --git a/docs/en/table_engines/materializedview.md b/docs/en/table_engines/materializedview.md index 3707b27078e..00f70bd72bd 100644 --- a/docs/en/table_engines/materializedview.md +++ b/docs/en/table_engines/materializedview.md @@ -1,4 +1,4 @@ # MaterializedView -Used for implementing materialized views (for more information, see `CREATE MATERIALIZED VIEW`). For storing data, it uses a different engine that was specified when creating the view. When reading from a table, it just uses this engine. +Used for implementing materialized views (for more information, see [CREATE TABLE](../query_language/queries.md#query_language-queries-create_table)). For storing data, it uses a different engine that was specified when creating the view. When reading from a table, it just uses this engine. diff --git a/docs/en/table_functions/index.md b/docs/en/table_functions/index.md index 6a0139f7866..fd479dd3faf 100644 --- a/docs/en/table_functions/index.md +++ b/docs/en/table_functions/index.md @@ -4,10 +4,3 @@ Table functions can be specified in the FROM clause instead of the database and Table functions can only be used if 'readonly' is not set. Table functions aren't related to other functions. -```eval_rst -.. toctree:: - :glob: - - * -``` - diff --git a/docs/mkdocs-material-theme/assets/images/Copy (1) favicon.ico b/docs/mkdocs-material-theme/assets/images/Copy (1) favicon.ico deleted file mode 100644 index e85006a3ce1c6fd81faa6d5a13095519c4a6fc96..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1150 zcmd6lF-yZh9L1kl>(HSEK`2y^4yB6->f+$wD)=oNY!UheIt03Q=;qj=;8*Bap_4*& za8yAl;wmmx5Yyi^7dXN-WYdJ-{qNqpcez|5t#Fr0qTSYcPTG`I2PBk8r$~4kg^0zN zCJe(rhix3do!L$bZ+IuZ{i08x=JR3=e+M4pv0KsKA??{u_*EFfo|`p&t`Vf=jn{)F z1fKk9hWsmYwqWAP^JO*5u*R;*L&dX3H$%S7oB$f0{ISh{QVXuncnzN67WQH2`lip7 zhX+VI$6x$1+$8gMjh4+1l0N#8_0Fh=N#EwpKk{SeE!)SHFB@xQFX3y+8sF#_@!bDW eIdI-IC`$c%>bk?KbPeN9RHtL<1^)v~#xMt8oB^@` diff --git a/docs/mkdocs-material-theme/assets/images/Copy (1) favicon.png b/docs/mkdocs-material-theme/assets/images/Copy (1) favicon.png deleted file mode 100644 index 76d17f57ad903c3ea2f1b564cafb95bf9af84ee3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 521 zcmV+k0`~ohP)kdg0005dNkl2WptjAn6@db&Pvy?U$ zv>P|<&rCZfZF0jmq0opf8)91(A<*iIVPPJJT((+JiF~>9KAA3%heFdnI;SaK+~|aU zQ~!x`%y{jX1<~SK2RxN7Db8`yWBbf6p7&07{VXfaam*cUs&eu*Zu(xaIL8rP){;a< zS~$}^Td32Rw+W1TqTd|L{#~jJet4!qwKsb5hq%YXiiUV!yH=ltu0>s|FLsT+Iy7K~ z!6*Z0a@vQ;AiZo!=s{{fqR+ct6YQPzbk+j}*qe7vtu39I7 zrOtZqU}=NnLchJxsU9iY+}3TYDl|BvPsX%E@dlyLgdV%q$UP|Y?DfcGb`}K&$;drd z+hL;zy7UTccUYU+h`ONIU|d=%`(0$=KW4%tVWXj~AE+T zHpJ=jR)_#456C)vcj>riCG|l7tj10Dz@Y@A4SW(HJ_Ex)TmFhl-Phqjdl)=j{an^L HB{Ts54eBv) literal 1150 zcmd6lF-yZh9L1kl>(HSEK`2y^4yB6->f+$wD)=oNY!UheIt03Q=;qj=;8*Bap_4*& za8yAl;wmmx5Yyi^7dXN-WYdJ-{qNqpcez|5t#Fr0qTSYcPTG`I2PBk8r$~4kg^0zN zCJe(rhix3do!L$bZ+IuZ{i08x=JR3=e+M4pv0KsKA??{u_*EFfo|`p&t`Vf=jn{)F z1fKk9hWsmYwqWAP^JO*5u*R;*L&dX3H$%S7oB$f0{ISh{QVXuncnzN67WQH2`lip7 zhX+VI$6x$1+$8gMjh4+1l0N#8_0Fh=N#EwpKk{SeE!)SHFB@xQFX3y+8sF#_@!bDW eIdI-IC`$c%>bk?KbPeN9RHtL<1^)v~#xMt8oB^@` diff --git a/docs/mkdocs-material-theme/assets/images/favicon.png b/docs/mkdocs-material-theme/assets/images/favicon.png deleted file mode 100644 index 76d17f57ad903c3ea2f1b564cafb95bf9af84ee3..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 521 zcmV+k0`~ohP)kdg0005dNkl2WptjAn6@db&Pvy?U$ zv>P|<&rCZfZF0jmq0opf8)91(A<*iIVPPJJT((+JiF~>9KAA3%heFdnI;SaK+~|aU zQ~!x`%y{jX1<~SK2RxN7Db8`yWBbf6p7&07{VXfaam*cUs&eu*Zu(xaIL8rP){;a< zS~$}^Td32Rw+W1TqTd|L{#~jJet4!qwKsb5hq%YXiiUV!yH=ltu0>s|FLsT+Iy7K~ z!6*Z0a@vQ;AiZo!=s{{fqR+ct6YQPzbk+j}*qe7vtu39I7 zrOtZqU}=NnLchJxsU9iY+}3TYDl|BvPsX%E@dlyLgdV%q$UP|Y?DfcGb`}K&$;drd z+hL;zy7UTccUYU+h`ONIU|d=%`(0$=KW4%tVWXj~AEWNJc&GyY~s-tP1w5O(4IWRjPhok+j$|(?lz@Q1N zR_@iC`^RU%P%Wi@qW<2j2MrA4Ay5#3MeRP;OyF!Dj2hiYzeIiar+1}KpF{gnF@pjD z2=t#ot-NnX?#CCw7Q2#uiuyX!iVL&H(2oBH4%86nJAqnreS2^}se&bTD;@Lvx}L|l z@TL6E6rk@t&;3Kdz1B0nT`JGV+-}PA$I1V5t++6Y{!jjAgpIqx2_WzX8 z=U4%OL|~!)KjUnl@2Eqw|2Gli41sb8H1F@Lj^FuowU=W(mOuak5P$##hDRX%4*=(3 G68Hs8^JrE8 diff --git a/docs/mkdocs-material-theme/assets/images/logo.svg b/docs/mkdocs-material-theme/assets/images/logo.svg deleted file mode 100644 index 70662da887e..00000000000 --- a/docs/mkdocs-material-theme/assets/images/logo.svg +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - diff --git a/docs/mkdocs-material-theme/assets/images/logo_ch.ico b/docs/mkdocs-material-theme/assets/images/logo_ch.ico deleted file mode 100644 index 8fb9595b72ff7980de29b67356b9bfaec211d95d..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4286 zcmeHK!3o1K3{(dw^woD>rr-?WF+4}M@Fv~IhK}F?{0*p*!-ahMwV_DiBqJFM4q0-D zZt^V)siyaX=t)E`fw(9L)Zay7pZkHs(KU^Bs0{{Ua}| + + + + + +