mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-26 17:41:59 +00:00
Initial copy of doc-preview
This commit is contained in:
parent
29296cb004
commit
a4a5a8a7d3
@ -4,5 +4,4 @@ collapsible: true
|
||||
collapsed: true
|
||||
link:
|
||||
type: generated-index
|
||||
title: Database & Table Engines
|
||||
slug: /en/engines
|
||||
slug: en/engines
|
||||
|
@ -1,9 +1,10 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/cell-towers
|
||||
sidebar_label: Cell Towers
|
||||
sidebar_label: Geo Data
|
||||
sidebar_position: 3
|
||||
title: "Cell Towers"
|
||||
title: "Geo Data using the Cell Tower Dataset"
|
||||
---
|
||||
|
||||
import ConnectionDetails from '@site/docs/en/_snippets/_gather_your_details_http.mdx';
|
||||
|
||||
import Tabs from '@theme/Tabs';
|
||||
|
@ -3,14 +3,56 @@ slug: /en/getting-started/example-datasets/criteo
|
||||
sidebar_label: Terabyte Click Logs from Criteo
|
||||
---
|
||||
|
||||
# Terabyte of Click Logs from Criteo
|
||||
# Terabyte of Click Logs from Criteo
|
||||
|
||||
Download the data from http://labs.criteo.com/downloads/download-terabyte-click-logs/
|
||||
|
||||
Create a table to import the log to:
|
||||
|
||||
``` sql
|
||||
CREATE TABLE criteo_log (date Date, clicked UInt8, int1 Int32, int2 Int32, int3 Int32, int4 Int32, int5 Int32, int6 Int32, int7 Int32, int8 Int32, int9 Int32, int10 Int32, int11 Int32, int12 Int32, int13 Int32, cat1 String, cat2 String, cat3 String, cat4 String, cat5 String, cat6 String, cat7 String, cat8 String, cat9 String, cat10 String, cat11 String, cat12 String, cat13 String, cat14 String, cat15 String, cat16 String, cat17 String, cat18 String, cat19 String, cat20 String, cat21 String, cat22 String, cat23 String, cat24 String, cat25 String, cat26 String) ENGINE = Log
|
||||
CREATE TABLE criteo_log (
|
||||
date Date,
|
||||
clicked UInt8,
|
||||
int1 Int32,
|
||||
int2 Int32,
|
||||
int3 Int32,
|
||||
int4 Int32,
|
||||
int5 Int32,
|
||||
int6 Int32,
|
||||
int7 Int32,
|
||||
int8 Int32,
|
||||
int9 Int32,
|
||||
int10 Int32,
|
||||
int11 Int32,
|
||||
int12 Int32,
|
||||
int13 Int32,
|
||||
cat1 String,
|
||||
cat2 String,
|
||||
cat3 String,
|
||||
cat4 String,
|
||||
cat5 String,
|
||||
cat6 String,
|
||||
cat7 String,
|
||||
cat8 String,
|
||||
cat9 String,
|
||||
cat10 String,
|
||||
cat11 String,
|
||||
cat12 String,
|
||||
cat13 String,
|
||||
cat14 String,
|
||||
cat15 String,
|
||||
cat16 String,
|
||||
cat17 String,
|
||||
cat18 String,
|
||||
cat19 String,
|
||||
cat20 String,
|
||||
cat21 String,
|
||||
cat22 String,
|
||||
cat23 String,
|
||||
cat24 String,
|
||||
cat25 String,
|
||||
cat26 String
|
||||
) ENGINE = Log;
|
||||
```
|
||||
|
||||
Download the data:
|
||||
@ -73,7 +115,52 @@ ORDER BY (date, icat1)
|
||||
Transform data from the raw log and put it in the second table:
|
||||
|
||||
``` sql
|
||||
INSERT INTO criteo SELECT date, clicked, int1, int2, int3, int4, int5, int6, int7, int8, int9, int10, int11, int12, int13, reinterpretAsUInt32(unhex(cat1)) AS icat1, reinterpretAsUInt32(unhex(cat2)) AS icat2, reinterpretAsUInt32(unhex(cat3)) AS icat3, reinterpretAsUInt32(unhex(cat4)) AS icat4, reinterpretAsUInt32(unhex(cat5)) AS icat5, reinterpretAsUInt32(unhex(cat6)) AS icat6, reinterpretAsUInt32(unhex(cat7)) AS icat7, reinterpretAsUInt32(unhex(cat8)) AS icat8, reinterpretAsUInt32(unhex(cat9)) AS icat9, reinterpretAsUInt32(unhex(cat10)) AS icat10, reinterpretAsUInt32(unhex(cat11)) AS icat11, reinterpretAsUInt32(unhex(cat12)) AS icat12, reinterpretAsUInt32(unhex(cat13)) AS icat13, reinterpretAsUInt32(unhex(cat14)) AS icat14, reinterpretAsUInt32(unhex(cat15)) AS icat15, reinterpretAsUInt32(unhex(cat16)) AS icat16, reinterpretAsUInt32(unhex(cat17)) AS icat17, reinterpretAsUInt32(unhex(cat18)) AS icat18, reinterpretAsUInt32(unhex(cat19)) AS icat19, reinterpretAsUInt32(unhex(cat20)) AS icat20, reinterpretAsUInt32(unhex(cat21)) AS icat21, reinterpretAsUInt32(unhex(cat22)) AS icat22, reinterpretAsUInt32(unhex(cat23)) AS icat23, reinterpretAsUInt32(unhex(cat24)) AS icat24, reinterpretAsUInt32(unhex(cat25)) AS icat25, reinterpretAsUInt32(unhex(cat26)) AS icat26 FROM criteo_log;
|
||||
INSERT INTO
|
||||
criteo
|
||||
SELECT
|
||||
date,
|
||||
clicked,
|
||||
int1,
|
||||
int2,
|
||||
int3,
|
||||
int4,
|
||||
int5,
|
||||
int6,
|
||||
int7,
|
||||
int8,
|
||||
int9,
|
||||
int10,
|
||||
int11,
|
||||
int12,
|
||||
int13,
|
||||
reinterpretAsUInt32(unhex(cat1)) AS icat1,
|
||||
reinterpretAsUInt32(unhex(cat2)) AS icat2,
|
||||
reinterpretAsUInt32(unhex(cat3)) AS icat3,
|
||||
reinterpretAsUInt32(unhex(cat4)) AS icat4,
|
||||
reinterpretAsUInt32(unhex(cat5)) AS icat5,
|
||||
reinterpretAsUInt32(unhex(cat6)) AS icat6,
|
||||
reinterpretAsUInt32(unhex(cat7)) AS icat7,
|
||||
reinterpretAsUInt32(unhex(cat8)) AS icat8,
|
||||
reinterpretAsUInt32(unhex(cat9)) AS icat9,
|
||||
reinterpretAsUInt32(unhex(cat10)) AS icat10,
|
||||
reinterpretAsUInt32(unhex(cat11)) AS icat11,
|
||||
reinterpretAsUInt32(unhex(cat12)) AS icat12,
|
||||
reinterpretAsUInt32(unhex(cat13)) AS icat13,
|
||||
reinterpretAsUInt32(unhex(cat14)) AS icat14,
|
||||
reinterpretAsUInt32(unhex(cat15)) AS icat15,
|
||||
reinterpretAsUInt32(unhex(cat16)) AS icat16,
|
||||
reinterpretAsUInt32(unhex(cat17)) AS icat17,
|
||||
reinterpretAsUInt32(unhex(cat18)) AS icat18,
|
||||
reinterpretAsUInt32(unhex(cat19)) AS icat19,
|
||||
reinterpretAsUInt32(unhex(cat20)) AS icat20,
|
||||
reinterpretAsUInt32(unhex(cat21)) AS icat21,
|
||||
reinterpretAsUInt32(unhex(cat22)) AS icat22,
|
||||
reinterpretAsUInt32(unhex(cat23)) AS icat23,
|
||||
reinterpretAsUInt32(unhex(cat24)) AS icat24,
|
||||
reinterpretAsUInt32(unhex(cat25)) AS icat25,
|
||||
reinterpretAsUInt32(unhex(cat26)) AS icat26
|
||||
FROM
|
||||
criteo_log;
|
||||
|
||||
DROP TABLE criteo_log;
|
||||
```
|
||||
|
@ -1,12 +1,13 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/github
|
||||
sidebar_label: GitHub Repo Analysis
|
||||
sidebar_label: Writing Queries
|
||||
sidebar_position: 1
|
||||
description: Analyze the ClickHouse GitHub repo or any repository of your choosing
|
||||
---
|
||||
|
||||
# ClickHouse GitHub data
|
||||
# Writing Queries in ClickHouse using GitHub Data
|
||||
|
||||
This dataset contains all of the commits and changes for the ClickHouse repository. It can be generated using the native `git-import` tool distributed with ClickHouse.
|
||||
This dataset contains all of the commits and changes for the ClickHouse repository. It can be generated using the native `git-import` tool distributed with ClickHouse.
|
||||
|
||||
The generated data provides a `tsv` file for each of the following tables:
|
||||
|
||||
@ -323,7 +324,7 @@ Note a more complex variant of this query exists where we find the [line-by-line
|
||||
|
||||
## Find the current active files
|
||||
|
||||
This is important for later analysis when we only want to consider the current files in the repository. We estimate this set as the files which haven't been renamed or deleted (and then re-added/re-named).
|
||||
This is important for later analysis when we only want to consider the current files in the repository. We estimate this set as the files which haven't been renamed or deleted (and then re-added/re-named).
|
||||
|
||||
**Note there appears to have been a broken commit history in relation to files under the `dbms`, `libs`, `tests/testflows/` directories during their renames. We also thus exclude these.**
|
||||
|
||||
@ -417,7 +418,7 @@ git ls-files | grep -v -E 'generated\.cpp|^(contrib|docs?|website|libs/(libcityh
|
||||
|
||||
The difference here is caused by a few factors:
|
||||
|
||||
- A rename can occur alongside other modifications to the file. These are listed as separate events in file_changes but with the same time. The `argMax` function has no way of distinguishing these - it picks the first value. The natural ordering of the inserts (the only means of knowing the correct order) is not maintained across the union so modified events can be selected. For example, below the `src/Functions/geometryFromColumn.h` file has several modifications before being renamed to `src/Functions/geometryConverters.h`. Our current solution may pick a Modify event as the latest change causing `src/Functions/geometryFromColumn.h` to be retained.
|
||||
- A rename can occur alongside other modifications to the file. These are listed as separate events in file_changes but with the same time. The `argMax` function has no way of distinguishing these - it picks the first value. The natural ordering of the inserts (the only means of knowing the correct order) is not maintained across the union so modified events can be selected. For example, below the `src/Functions/geometryFromColumn.h` file has several modifications before being renamed to `src/Functions/geometryConverters.h`. Our current solution may pick a Modify event as the latest change causing `src/Functions/geometryFromColumn.h` to be retained.
|
||||
|
||||
[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICAgIGNoYW5nZV90eXBlLAogICAgICBwYXRoLAogICAgICBvbGRfcGF0aCwKICAgICAgdGltZSwKICAgICAgY29tbWl0X2hhc2gKICBGUk9NIGdpdF9jbGlja2hvdXNlLmZpbGVfY2hhbmdlcwogIFdIRVJFIChwYXRoID0gJ3NyYy9GdW5jdGlvbnMvZ2VvbWV0cnlGcm9tQ29sdW1uLmgnKSBPUiAob2xkX3BhdGggPSAnc3JjL0Z1bmN0aW9ucy9nZW9tZXRyeUZyb21Db2x1bW4uaCcpCg==)
|
||||
|
||||
@ -1386,7 +1387,7 @@ LIMIT 1 BY day_of_week
|
||||
7 rows in set. Elapsed: 0.004 sec. Processed 21.82 thousand rows, 140.02 KB (4.88 million rows/s., 31.29 MB/s.)
|
||||
```
|
||||
|
||||
This is still a little simple and doesn't reflect people's work.
|
||||
This is still a little simple and doesn't reflect people's work.
|
||||
|
||||
A better metric might be who is the top contributor each day as a fraction of the total work performed in the last year. Note that we treat the deletion and adding code equally.
|
||||
|
||||
@ -1952,7 +1953,7 @@ SELECT
|
||||
|
||||
Most contributors write more code than tests, as you'd expect.
|
||||
|
||||
What about who adds the most comments when contributing code?
|
||||
What about who adds the most comments when contributing code?
|
||||
|
||||
[play](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBhdXRob3IsCiAgICBhdmcocmF0aW9fY29tbWVudHMpIEFTIGF2Z19yYXRpb19jb21tZW50cywKICAgIHN1bShjb2RlKSBBUyBjb2RlCkZST00KKAogICAgU0VMRUNUCiAgICAgICAgYXV0aG9yLAogICAgICAgIGNvbW1pdF9oYXNoLAogICAgICAgIGNvdW50SWYobGluZV90eXBlID0gJ0NvbW1lbnQnKSBBUyBjb21tZW50cywKICAgICAgICBjb3VudElmKGxpbmVfdHlwZSA9ICdDb2RlJykgQVMgY29kZSwKICAgICAgICBpZihjb21tZW50cyA+IDAsIGNvbW1lbnRzIC8gKGNvbW1lbnRzICsgY29kZSksIDApIEFTIHJhdGlvX2NvbW1lbnRzCiAgICBGUk9NIGdpdF9jbGlja2hvdXNlLmxpbmVfY2hhbmdlcwogICAgR1JPVVAgQlkKICAgICAgICBhdXRob3IsCiAgICAgICAgY29tbWl0X2hhc2gKKQpHUk9VUCBCWSBhdXRob3IKT1JERVIgQlkgY29kZSBERVNDCkxJTUlUIDEwCg==)
|
||||
|
||||
@ -2393,7 +2394,7 @@ WHERE (path = 'src/Storages/StorageReplicatedMergeTree.cpp') AND (change_type =
|
||||
|
||||
This makes viewing the full history of a file challenging since we don't have a single value connecting all line or file changes.
|
||||
|
||||
To address this, we can use User Defined Functions (UDFs). These cannot, currently, be recursive, so to identify the history of a file we must define a series of UDFs which call each other explicitly.
|
||||
To address this, we can use User Defined Functions (UDFs). These cannot, currently, be recursive, so to identify the history of a file we must define a series of UDFs which call each other explicitly.
|
||||
|
||||
This means we can only track renames to a maximum depth - the below example is 5 deep. It is unlikely a file will be renamed more times than this, so for now, this is sufficient.
|
||||
|
||||
|
@ -1,17 +1,17 @@
|
||||
---
|
||||
slug: /en/getting-started/example-datasets/uk-price-paid
|
||||
sidebar_label: UK Property Price Paid
|
||||
sidebar_label: Defining Projections
|
||||
sidebar_position: 1
|
||||
title: "UK Property Price Paid"
|
||||
---
|
||||
|
||||
The dataset contains data about prices paid for real-estate property in England and Wales. The data is available since year 1995.
|
||||
The size of the dataset in uncompressed form is about 4 GiB and it will take about 278 MiB in ClickHouse.
|
||||
# Improving Performance using Projections
|
||||
|
||||
Source: https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads
|
||||
Description of the fields: https://www.gov.uk/guidance/about-the-price-paid-data
|
||||
Projections are a great way to improve the performance of queries that you run frequently. We will demonstrate the power of projections
|
||||
using the UK property dataset, which contains data about prices paid for real-estate property in England and Wales. The data is available since 1995, and the size of the dataset in uncompressed form is about 4 GiB (which will only take about 278 MiB in ClickHouse).
|
||||
|
||||
Contains HM Land Registry data © Crown copyright and database right 2021. This data is licensed under the Open Government Licence v3.0.
|
||||
- Source: https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads
|
||||
- Description of the fields: https://www.gov.uk/guidance/about-the-price-paid-data
|
||||
- Contains HM Land Registry data © Crown copyright and database right 2021. This data is licensed under the Open Government Licence v3.0.
|
||||
|
||||
## Create the Table {#create-table}
|
||||
|
||||
|
@ -0,0 +1,4 @@
|
||||
:::tip
|
||||
If you are using a dictionary with ClickHouse Cloud please use the DDL query option to create your dictionaries, and create your dictionary as user `default`.
|
||||
Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/cloud/reference/cloud-compatibility.md).
|
||||
:::
|
File diff suppressed because it is too large
Load Diff
23
docs/en/sql-reference/index.md
Normal file
23
docs/en/sql-reference/index.md
Normal file
@ -0,0 +1,23 @@
|
||||
---
|
||||
slug: /en/sql-reference
|
||||
keywords: [clickhouse, docs, sql reference, sql statements, sql, syntax]
|
||||
title: SQL Reference
|
||||
---
|
||||
|
||||
import { TwoColumnList } from '/src/components/two_column_list'
|
||||
import { ClickableSquare } from '/src/components/clickable_square'
|
||||
import { HorizontalDivide } from '/src/components/horizontal_divide'
|
||||
import { ViewAllLink } from '/src/components/view_all_link'
|
||||
import { VideoContainer } from '/src/components/video_container'
|
||||
|
||||
import LinksDeployment from './sql-reference-links.json'
|
||||
|
||||
# ClickHouse SQL Reference
|
||||
|
||||
ClickHouse supports a declarative query language based on SQL that is identical to the ANSI SQL standard in many cases.
|
||||
|
||||
Supported queries include GROUP BY, ORDER BY, subqueries in FROM, JOIN clause, IN operator, window functions and scalar subqueries.
|
||||
|
||||
<HorizontalDivide />
|
||||
|
||||
<TwoColumnList items={LinksDeployment} />
|
12
docs/en/sql-reference/sql-reference-links.json
Normal file
12
docs/en/sql-reference/sql-reference-links.json
Normal file
@ -0,0 +1,12 @@
|
||||
[
|
||||
{
|
||||
"title": "Statements",
|
||||
"description": "A list of available SQL statements in ClickHouse",
|
||||
"url": "/docs/en/sql-reference/statements/"
|
||||
},
|
||||
{
|
||||
"title": "Database and Table Engines",
|
||||
"description": "Engines determine where and how your data is stored",
|
||||
"url": "/docs/en/engines"
|
||||
}
|
||||
]
|
@ -1,6 +1,6 @@
|
||||
---
|
||||
slug: /en/sql-reference/statements/select/join
|
||||
sidebar_label: JOIN
|
||||
sidebar_label: Joining Tables
|
||||
---
|
||||
|
||||
# JOIN Clause
|
||||
|
Loading…
Reference in New Issue
Block a user