│
+└─────────────────┴──────────────────────┴───────────────────────────────────────────────────────────────┘
+20 rows in set. Elapsed: 0.547 sec. Processed 7.88 million rows, 679.20 MB (14.42 million rows/s., 1.24 GB/s.)
+```
+
+We welcome exact and improved solutions here.
+
+
diff --git a/docs/en/getting-started/example-datasets/images/superset-add-dataset.png b/docs/en/getting-started/example-datasets/images/superset-add-dataset.png
new file mode 100644
index 00000000000..aaa976d76ce
Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-add-dataset.png differ
diff --git a/docs/en/getting-started/example-datasets/images/superset-add.png b/docs/en/getting-started/example-datasets/images/superset-add.png
new file mode 100644
index 00000000000..54bbf11a014
Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-add.png differ
diff --git a/docs/en/getting-started/example-datasets/images/superset-authors-matrix.png b/docs/en/getting-started/example-datasets/images/superset-authors-matrix.png
new file mode 100644
index 00000000000..bdfc6b6f304
Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-authors-matrix.png differ
diff --git a/docs/en/getting-started/example-datasets/images/superset-authors-matrix_v2.png b/docs/en/getting-started/example-datasets/images/superset-authors-matrix_v2.png
new file mode 100644
index 00000000000..aad98b5b077
Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-authors-matrix_v2.png differ
diff --git a/docs/en/getting-started/example-datasets/images/superset-cell-tower-dashboard.png b/docs/en/getting-started/example-datasets/images/superset-cell-tower-dashboard.png
new file mode 100644
index 00000000000..8197ea223c2
Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-cell-tower-dashboard.png differ
diff --git a/docs/en/getting-started/example-datasets/images/superset-choose-a-database.png b/docs/en/getting-started/example-datasets/images/superset-choose-a-database.png
new file mode 100644
index 00000000000..40c71e0a053
Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-choose-a-database.png differ
diff --git a/docs/en/getting-started/example-datasets/images/superset-commits-authors.png b/docs/en/getting-started/example-datasets/images/superset-commits-authors.png
new file mode 100644
index 00000000000..7be831467cf
Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-commits-authors.png differ
diff --git a/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png b/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png
new file mode 100644
index 00000000000..f67d0663063
Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-connect-a-database.png differ
diff --git a/docs/en/getting-started/example-datasets/images/superset-create-map.png b/docs/en/getting-started/example-datasets/images/superset-create-map.png
new file mode 100644
index 00000000000..5ad4395eb13
Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-create-map.png differ
diff --git a/docs/en/getting-started/example-datasets/images/superset-github-lines-added-deleted.png b/docs/en/getting-started/example-datasets/images/superset-github-lines-added-deleted.png
new file mode 100644
index 00000000000..48dbad1934d
Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-github-lines-added-deleted.png differ
diff --git a/docs/en/getting-started/example-datasets/images/superset-lon-lat.png b/docs/en/getting-started/example-datasets/images/superset-lon-lat.png
new file mode 100644
index 00000000000..f07fb899e72
Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-lon-lat.png differ
diff --git a/docs/en/getting-started/example-datasets/images/superset-mcc-204.png b/docs/en/getting-started/example-datasets/images/superset-mcc-204.png
new file mode 100644
index 00000000000..a561c539b58
Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-mcc-204.png differ
diff --git a/docs/en/getting-started/example-datasets/images/superset-radio-umts.png b/docs/en/getting-started/example-datasets/images/superset-radio-umts.png
new file mode 100644
index 00000000000..b0b31b6dbc0
Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-radio-umts.png differ
diff --git a/docs/en/getting-started/example-datasets/images/superset-umts-netherlands.png b/docs/en/getting-started/example-datasets/images/superset-umts-netherlands.png
new file mode 100644
index 00000000000..5cb887cb5c1
Binary files /dev/null and b/docs/en/getting-started/example-datasets/images/superset-umts-netherlands.png differ
diff --git a/docs/en/getting-started/example-datasets/nyc-taxi.md b/docs/en/getting-started/example-datasets/nyc-taxi.md
index 11621cfa5f5..69098f63037 100644
--- a/docs/en/getting-started/example-datasets/nyc-taxi.md
+++ b/docs/en/getting-started/example-datasets/nyc-taxi.md
@@ -5,285 +5,195 @@ sidebar_position: 2
description: Data for billions of taxi and for-hire vehicle (Uber, Lyft, etc.) trips originating in New York City since 2009
---
-# New York Taxi Data
+# New York Taxi Data
-This dataset can be obtained in two ways:
+The New York taxi data consists of 3+ billion taxi and for-hire vehicle (Uber, Lyft, etc.) trips originating in New York City since 2009. The dataset can be obtained in a couple of ways:
-- import from raw data
-- download of prepared partitions
+- insert the data directly into ClickHouse Cloud from S3
+- download prepared partitions
-## How to Import the Raw Data {#how-to-import-the-raw-data}
+## Retrieve the Data from S3
+
+Let's grab a small subset of the data for getting familiar with it. The data is in TSV files in AWS S3, which is easily streamed into
+ClickHouse Cloud using the `s3` table function. Start by creating a table for the taxi rides:
+
+```sql
+CREATE TABLE trips (
+ trip_id UInt32,
+ pickup_datetime DateTime,
+ dropoff_datetime DateTime,
+ pickup_longitude Nullable(Float64),
+ pickup_latitude Nullable(Float64),
+ dropoff_longitude Nullable(Float64),
+ dropoff_latitude Nullable(Float64),
+ passenger_count UInt8,
+ trip_distance Float32,
+ fare_amount Float32,
+ extra Float32,
+ tip_amount Float32,
+ tolls_amount Float32,
+ total_amount Float32,
+ payment_type Enum('CSH' = 1, 'CRE' = 2, 'NOC' = 3, 'DIS' = 4, 'UNK' = 5),
+ pickup_ntaname LowCardinality(String),
+ dropoff_ntaname LowCardinality(String)
+)
+ENGINE = MergeTree
+PRIMARY KEY (pickup_datetime, dropoff_datetime)
+```
+
+The following command streams three files from an S3 bucket into the `trips` table (the `{0..2}` syntax is a wildcard for the values 0, 1, and 2):
+
+```sql
+INSERT INTO trips
+SELECT
+ trip_id,
+ pickup_datetime,
+ dropoff_datetime,
+ pickup_longitude,
+ pickup_latitude,
+ dropoff_longitude,
+ dropoff_latitude,
+ passenger_count,
+ trip_distance,
+ fare_amount,
+ extra,
+ tip_amount,
+ tolls_amount,
+ total_amount,
+ payment_type,
+ pickup_ntaname,
+ dropoff_ntaname
+FROM s3(
+ 'https://datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi/trips_{0..2}.gz',
+ 'TabSeparatedWithNames'
+)
+```
+
+Let's see how many rows were inserted:
+
+```sql
+SELECT count()
+FROM trips
+```
+
+Each TSV file has about 1M rows, and the three files have 3,000,317 rows. Let's look at a few rows:
+
+```sql
+SELECT *
+FROM trips
+LIMIT 10
+```
+
+Notice there are columns for the pickup and dropoff dates, geo coordinates, fare details, New York neighborhoods, and more:
+
+```response
+┌────trip_id─┬─────pickup_datetime─┬────dropoff_datetime─┬───pickup_longitude─┬────pickup_latitude─┬──dropoff_longitude─┬───dropoff_latitude─┬─passenger_count─┬─trip_distance─┬─fare_amount─┬─extra─┬─tip_amount─┬─tolls_amount─┬─total_amount─┬─payment_type─┬─pickup_ntaname─────────────────────────────┬─dropoff_ntaname────────────────────────────┐
+│ 1200864931 │ 2015-07-01 00:00:13 │ 2015-07-01 00:14:41 │ -73.99046325683594 │ 40.746116638183594 │ -73.97918701171875 │ 40.78467559814453 │ 5 │ 3.54 │ 13.5 │ 0.5 │ 1 │ 0 │ 15.8 │ CSH │ Midtown-Midtown South │ Upper West Side │
+│ 1200018648 │ 2015-07-01 00:00:16 │ 2015-07-01 00:02:57 │ -73.78358459472656 │ 40.648677825927734 │ -73.80242919921875 │ 40.64767837524414 │ 1 │ 1.45 │ 6 │ 0.5 │ 0 │ 0 │ 7.3 │ CRE │ Airport │ Airport │
+│ 1201452450 │ 2015-07-01 00:00:20 │ 2015-07-01 00:11:07 │ -73.98579406738281 │ 40.72777557373047 │ -74.00482177734375 │ 40.73748779296875 │ 5 │ 1.56 │ 8.5 │ 0.5 │ 1.96 │ 0 │ 11.76 │ CSH │ East Village │ West Village │
+│ 1202368372 │ 2015-07-01 00:00:40 │ 2015-07-01 00:05:46 │ -74.00206756591797 │ 40.73833084106445 │ -74.00658416748047 │ 40.74875259399414 │ 2 │ 1 │ 6 │ 0.5 │ 0 │ 0 │ 7.3 │ CRE │ West Village │ Hudson Yards-Chelsea-Flatiron-Union Square │
+│ 1200831168 │ 2015-07-01 00:01:06 │ 2015-07-01 00:09:23 │ -73.98748016357422 │ 40.74344253540039 │ -74.00575256347656 │ 40.716793060302734 │ 1 │ 2.3 │ 9 │ 0.5 │ 2 │ 0 │ 12.3 │ CSH │ Hudson Yards-Chelsea-Flatiron-Union Square │ SoHo-TriBeCa-Civic Center-Little Italy │
+│ 1201362116 │ 2015-07-01 00:01:07 │ 2015-07-01 00:03:31 │ -73.9926986694336 │ 40.75826644897461 │ -73.98628997802734 │ 40.76075744628906 │ 1 │ 0.6 │ 4 │ 0.5 │ 0 │ 0 │ 5.3 │ CRE │ Clinton │ Midtown-Midtown South │
+│ 1200639419 │ 2015-07-01 00:01:13 │ 2015-07-01 00:03:56 │ -74.00382995605469 │ 40.741981506347656 │ -73.99711608886719 │ 40.742271423339844 │ 1 │ 0.49 │ 4 │ 0.5 │ 0 │ 0 │ 5.3 │ CRE │ Hudson Yards-Chelsea-Flatiron-Union Square │ Hudson Yards-Chelsea-Flatiron-Union Square │
+│ 1201181622 │ 2015-07-01 00:01:17 │ 2015-07-01 00:05:12 │ -73.9512710571289 │ 40.78261947631836 │ -73.95230865478516 │ 40.77476119995117 │ 4 │ 0.97 │ 5 │ 0.5 │ 1 │ 0 │ 7.3 │ CSH │ Upper East Side-Carnegie Hill │ Yorkville │
+│ 1200978273 │ 2015-07-01 00:01:28 │ 2015-07-01 00:09:46 │ -74.00822448730469 │ 40.72113037109375 │ -74.00422668457031 │ 40.70782470703125 │ 1 │ 1.71 │ 8.5 │ 0.5 │ 1.96 │ 0 │ 11.76 │ CSH │ SoHo-TriBeCa-Civic Center-Little Italy │ Battery Park City-Lower Manhattan │
+│ 1203283366 │ 2015-07-01 00:01:47 │ 2015-07-01 00:24:26 │ -73.98199462890625 │ 40.77289962768555 │ -73.91968536376953 │ 40.766082763671875 │ 3 │ 5.26 │ 19.5 │ 0.5 │ 5.2 │ 0 │ 26 │ CSH │ Lincoln Square │ Astoria │
+└────────────┴─────────────────────┴─────────────────────┴────────────────────┴────────────────────┴────────────────────┴────────────────────┴─────────────────┴───────────────┴─────────────┴───────┴────────────┴──────────────┴──────────────┴──────────────┴────────────────────────────────────────────┴────────────────────────────────────────────┘
+```
+
+Let's run a few queries. This query shows us the top 10 neighborhoods that have the most frequent pickups:
+
+``` sql
+SELECT
+ pickup_ntaname,
+ count(*) AS count
+FROM trips
+GROUP BY pickup_ntaname
+ORDER BY count DESC
+LIMIT 10
+```
+
+The result is:
+
+```response
+┌─pickup_ntaname─────────────────────────────┬──count─┐
+│ Midtown-Midtown South │ 526864 │
+│ Hudson Yards-Chelsea-Flatiron-Union Square │ 288797 │
+│ West Village │ 210436 │
+│ Turtle Bay-East Midtown │ 197111 │
+│ Upper East Side-Carnegie Hill │ 184327 │
+│ Airport │ 151343 │
+│ SoHo-TriBeCa-Civic Center-Little Italy │ 144967 │
+│ Murray Hill-Kips Bay │ 138599 │
+│ Upper West Side │ 135469 │
+│ Clinton │ 130002 │
+└────────────────────────────────────────────┴────────┘
+```
+
+This query shows the average fare based on the number of passengers:
+
+``` sql
+SELECT
+ passenger_count,
+ avg(total_amount)
+FROM trips
+GROUP BY passenger_count
+```
+
+```response
+┌─passenger_count─┬──avg(total_amount)─┐
+│ 0 │ 25.226335263065018 │
+│ 1 │ 15.961279340656672 │
+│ 2 │ 17.146174183960667 │
+│ 3 │ 17.65380033178517 │
+│ 4 │ 17.248804201047456 │
+│ 5 │ 16.353501285179135 │
+│ 6 │ 15.995094439202836 │
+│ 7 │ 62.077143805367605 │
+│ 8 │ 26.120000791549682 │
+│ 9 │ 10.300000190734863 │
+└─────────────────┴────────────────────┘
+```
+
+Here's a correlation between the number of passengers and the distance of the trip:
+
+``` sql
+SELECT
+ passenger_count,
+ toYear(pickup_datetime) AS year,
+ round(trip_distance) AS distance,
+ count(*)
+FROM trips
+GROUP BY passenger_count, year, distance
+ORDER BY year, count(*) DESC
+```
+
+The first part of the result is:
+
+```response
+┌─passenger_count─┬─year─┬─distance─┬─count()─┐
+│ 1 │ 2015 │ 1 │ 748644 │
+│ 1 │ 2015 │ 2 │ 521602 │
+│ 1 │ 2015 │ 3 │ 225077 │
+│ 2 │ 2015 │ 1 │ 144990 │
+│ 1 │ 2015 │ 4 │ 134782 │
+│ 1 │ 2015 │ 0 │ 127284 │
+│ 2 │ 2015 │ 2 │ 106411 │
+│ 1 │ 2015 │ 5 │ 72725 │
+│ 5 │ 2015 │ 1 │ 59343 │
+│ 1 │ 2015 │ 6 │ 53447 │
+│ 2 │ 2015 │ 3 │ 48019 │
+│ 3 │ 2015 │ 1 │ 44865 │
+│ 6 │ 2015 │ 1 │ 39409 │
+```
+
+## Download of Prepared Partitions {#download-of-prepared-partitions}
See https://github.com/toddwschneider/nyc-taxi-data and http://tech.marksblogg.com/billion-nyc-taxi-rides-redshift.html for the description of a dataset and instructions for downloading.
Downloading will result in about 227 GB of uncompressed data in CSV files. The download takes about an hour over a 1 Gbit connection (parallel downloading from s3.amazonaws.com recovers at least half of a 1 Gbit channel).
Some of the files might not download fully. Check the file sizes and re-download any that seem doubtful.
-Some of the files might contain invalid rows. You can fix them as follows:
-
-``` bash
-sed -E '/(.*,){18,}/d' data/yellow_tripdata_2010-02.csv > data/yellow_tripdata_2010-02.csv_
-sed -E '/(.*,){18,}/d' data/yellow_tripdata_2010-03.csv > data/yellow_tripdata_2010-03.csv_
-mv data/yellow_tripdata_2010-02.csv_ data/yellow_tripdata_2010-02.csv
-mv data/yellow_tripdata_2010-03.csv_ data/yellow_tripdata_2010-03.csv
-```
-
-Then the data must be pre-processed in PostgreSQL. This will create selections of points in the polygons (to match points on the map with the boroughs of New York City) and combine all the data into a single denormalized flat table by using a JOIN. To do this, you will need to install PostgreSQL with PostGIS support.
-
-Be careful when running `initialize_database.sh` and manually re-check that all the tables were created correctly.
-
-It takes about 20-30 minutes to process each month’s worth of data in PostgreSQL, for a total of about 48 hours.
-
-You can check the number of downloaded rows as follows:
-
-``` bash
-$ time psql nyc-taxi-data -c "SELECT count(*) FROM trips;"
-## Count
- 1298979494
-(1 row)
-
-real 7m9.164s
-```
-
-(This is slightly more than 1.1 billion rows reported by Mark Litwintschik in a series of blog posts.)
-
-The data in PostgreSQL uses 370 GB of space.
-
-Exporting the data from PostgreSQL:
-
-``` sql
-COPY
-(
- SELECT trips.id,
- trips.vendor_id,
- trips.pickup_datetime,
- trips.dropoff_datetime,
- trips.store_and_fwd_flag,
- trips.rate_code_id,
- trips.pickup_longitude,
- trips.pickup_latitude,
- trips.dropoff_longitude,
- trips.dropoff_latitude,
- trips.passenger_count,
- trips.trip_distance,
- trips.fare_amount,
- trips.extra,
- trips.mta_tax,
- trips.tip_amount,
- trips.tolls_amount,
- trips.ehail_fee,
- trips.improvement_surcharge,
- trips.total_amount,
- trips.payment_type,
- trips.trip_type,
- trips.pickup,
- trips.dropoff,
-
- cab_types.type cab_type,
-
- weather.precipitation_tenths_of_mm rain,
- weather.snow_depth_mm,
- weather.snowfall_mm,
- weather.max_temperature_tenths_degrees_celsius max_temp,
- weather.min_temperature_tenths_degrees_celsius min_temp,
- weather.average_wind_speed_tenths_of_meters_per_second wind,
-
- pick_up.gid pickup_nyct2010_gid,
- pick_up.ctlabel pickup_ctlabel,
- pick_up.borocode pickup_borocode,
- pick_up.boroname pickup_boroname,
- pick_up.ct2010 pickup_ct2010,
- pick_up.boroct2010 pickup_boroct2010,
- pick_up.cdeligibil pickup_cdeligibil,
- pick_up.ntacode pickup_ntacode,
- pick_up.ntaname pickup_ntaname,
- pick_up.puma pickup_puma,
-
- drop_off.gid dropoff_nyct2010_gid,
- drop_off.ctlabel dropoff_ctlabel,
- drop_off.borocode dropoff_borocode,
- drop_off.boroname dropoff_boroname,
- drop_off.ct2010 dropoff_ct2010,
- drop_off.boroct2010 dropoff_boroct2010,
- drop_off.cdeligibil dropoff_cdeligibil,
- drop_off.ntacode dropoff_ntacode,
- drop_off.ntaname dropoff_ntaname,
- drop_off.puma dropoff_puma
- FROM trips
- LEFT JOIN cab_types
- ON trips.cab_type_id = cab_types.id
- LEFT JOIN central_park_weather_observations_raw weather
- ON weather.date = trips.pickup_datetime::date
- LEFT JOIN nyct2010 pick_up
- ON pick_up.gid = trips.pickup_nyct2010_gid
- LEFT JOIN nyct2010 drop_off
- ON drop_off.gid = trips.dropoff_nyct2010_gid
-) TO '/opt/milovidov/nyc-taxi-data/trips.tsv';
-```
-
-The data snapshot is created at a speed of about 50 MB per second. While creating the snapshot, PostgreSQL reads from the disk at a speed of about 28 MB per second.
-This takes about 5 hours. The resulting TSV file is 590612904969 bytes.
-
-Create a temporary table in ClickHouse:
-
-``` sql
-CREATE TABLE trips
-(
-trip_id UInt32,
-vendor_id String,
-pickup_datetime DateTime,
-dropoff_datetime Nullable(DateTime),
-store_and_fwd_flag Nullable(FixedString(1)),
-rate_code_id Nullable(UInt8),
-pickup_longitude Nullable(Float64),
-pickup_latitude Nullable(Float64),
-dropoff_longitude Nullable(Float64),
-dropoff_latitude Nullable(Float64),
-passenger_count Nullable(UInt8),
-trip_distance Nullable(Float64),
-fare_amount Nullable(Float32),
-extra Nullable(Float32),
-mta_tax Nullable(Float32),
-tip_amount Nullable(Float32),
-tolls_amount Nullable(Float32),
-ehail_fee Nullable(Float32),
-improvement_surcharge Nullable(Float32),
-total_amount Nullable(Float32),
-payment_type Nullable(String),
-trip_type Nullable(UInt8),
-pickup Nullable(String),
-dropoff Nullable(String),
-cab_type Nullable(String),
-precipitation Nullable(UInt8),
-snow_depth Nullable(UInt8),
-snowfall Nullable(UInt8),
-max_temperature Nullable(UInt8),
-min_temperature Nullable(UInt8),
-average_wind_speed Nullable(UInt8),
-pickup_nyct2010_gid Nullable(UInt8),
-pickup_ctlabel Nullable(String),
-pickup_borocode Nullable(UInt8),
-pickup_boroname Nullable(String),
-pickup_ct2010 Nullable(String),
-pickup_boroct2010 Nullable(String),
-pickup_cdeligibil Nullable(FixedString(1)),
-pickup_ntacode Nullable(String),
-pickup_ntaname Nullable(String),
-pickup_puma Nullable(String),
-dropoff_nyct2010_gid Nullable(UInt8),
-dropoff_ctlabel Nullable(String),
-dropoff_borocode Nullable(UInt8),
-dropoff_boroname Nullable(String),
-dropoff_ct2010 Nullable(String),
-dropoff_boroct2010 Nullable(String),
-dropoff_cdeligibil Nullable(String),
-dropoff_ntacode Nullable(String),
-dropoff_ntaname Nullable(String),
-dropoff_puma Nullable(String)
-) ENGINE = Log;
-```
-
-It is needed for converting fields to more correct data types and, if possible, to eliminate NULLs.
-
-``` bash
-$ time clickhouse-client --query="INSERT INTO trips FORMAT TabSeparated" < trips.tsv
-
-real 75m56.214s
-```
-
-Data is read at a speed of 112-140 Mb/second.
-Loading data into a Log type table in one stream took 76 minutes.
-The data in this table uses 142 GB.
-
-(Importing data directly from Postgres is also possible using `COPY ... TO PROGRAM`.)
-
-Unfortunately, all the fields associated with the weather (precipitation…average_wind_speed) were filled with NULL. Because of this, we will remove them from the final data set.
-
-To start, we’ll create a table on a single server. Later we will make the table distributed.
-
-Create and populate a summary table:
-
-``` sql
-CREATE TABLE trips_mergetree
-ENGINE = MergeTree(pickup_date, pickup_datetime, 8192)
-AS SELECT
-
-trip_id,
-CAST(vendor_id AS Enum8('1' = 1, '2' = 2, 'CMT' = 3, 'VTS' = 4, 'DDS' = 5, 'B02512' = 10, 'B02598' = 11, 'B02617' = 12, 'B02682' = 13, 'B02764' = 14)) AS vendor_id,
-toDate(pickup_datetime) AS pickup_date,
-ifNull(pickup_datetime, toDateTime(0)) AS pickup_datetime,
-toDate(dropoff_datetime) AS dropoff_date,
-ifNull(dropoff_datetime, toDateTime(0)) AS dropoff_datetime,
-assumeNotNull(store_and_fwd_flag) IN ('Y', '1', '2') AS store_and_fwd_flag,
-assumeNotNull(rate_code_id) AS rate_code_id,
-assumeNotNull(pickup_longitude) AS pickup_longitude,
-assumeNotNull(pickup_latitude) AS pickup_latitude,
-assumeNotNull(dropoff_longitude) AS dropoff_longitude,
-assumeNotNull(dropoff_latitude) AS dropoff_latitude,
-assumeNotNull(passenger_count) AS passenger_count,
-assumeNotNull(trip_distance) AS trip_distance,
-assumeNotNull(fare_amount) AS fare_amount,
-assumeNotNull(extra) AS extra,
-assumeNotNull(mta_tax) AS mta_tax,
-assumeNotNull(tip_amount) AS tip_amount,
-assumeNotNull(tolls_amount) AS tolls_amount,
-assumeNotNull(ehail_fee) AS ehail_fee,
-assumeNotNull(improvement_surcharge) AS improvement_surcharge,
-assumeNotNull(total_amount) AS total_amount,
-CAST((assumeNotNull(payment_type) AS pt) IN ('CSH', 'CASH', 'Cash', 'CAS', 'Cas', '1') ? 'CSH' : (pt IN ('CRD', 'Credit', 'Cre', 'CRE', 'CREDIT', '2') ? 'CRE' : (pt IN ('NOC', 'No Charge', 'No', '3') ? 'NOC' : (pt IN ('DIS', 'Dispute', 'Dis', '4') ? 'DIS' : 'UNK'))) AS Enum8('CSH' = 1, 'CRE' = 2, 'UNK' = 0, 'NOC' = 3, 'DIS' = 4)) AS payment_type_,
-assumeNotNull(trip_type) AS trip_type,
-ifNull(toFixedString(unhex(pickup), 25), toFixedString('', 25)) AS pickup,
-ifNull(toFixedString(unhex(dropoff), 25), toFixedString('', 25)) AS dropoff,
-CAST(assumeNotNull(cab_type) AS Enum8('yellow' = 1, 'green' = 2, 'uber' = 3)) AS cab_type,
-
-assumeNotNull(pickup_nyct2010_gid) AS pickup_nyct2010_gid,
-toFloat32(ifNull(pickup_ctlabel, '0')) AS pickup_ctlabel,
-assumeNotNull(pickup_borocode) AS pickup_borocode,
-CAST(assumeNotNull(pickup_boroname) AS Enum8('Manhattan' = 1, 'Queens' = 4, 'Brooklyn' = 3, '' = 0, 'Bronx' = 2, 'Staten Island' = 5)) AS pickup_boroname,
-toFixedString(ifNull(pickup_ct2010, '000000'), 6) AS pickup_ct2010,
-toFixedString(ifNull(pickup_boroct2010, '0000000'), 7) AS pickup_boroct2010,
-CAST(assumeNotNull(ifNull(pickup_cdeligibil, ' ')) AS Enum8(' ' = 0, 'E' = 1, 'I' = 2)) AS pickup_cdeligibil,
-toFixedString(ifNull(pickup_ntacode, '0000'), 4) AS pickup_ntacode,
-
-CAST(assumeNotNull(pickup_ntaname) AS Enum16('' = 0, 'Airport' = 1, 'Allerton-Pelham Gardens' = 2, 'Annadale-Huguenot-Prince\'s Bay-Eltingville' = 3, 'Arden Heights' = 4, 'Astoria' = 5, 'Auburndale' = 6, 'Baisley Park' = 7, 'Bath Beach' = 8, 'Battery Park City-Lower Manhattan' = 9, 'Bay Ridge' = 10, 'Bayside-Bayside Hills' = 11, 'Bedford' = 12, 'Bedford Park-Fordham North' = 13, 'Bellerose' = 14, 'Belmont' = 15, 'Bensonhurst East' = 16, 'Bensonhurst West' = 17, 'Borough Park' = 18, 'Breezy Point-Belle Harbor-Rockaway Park-Broad Channel' = 19, 'Briarwood-Jamaica Hills' = 20, 'Brighton Beach' = 21, 'Bronxdale' = 22, 'Brooklyn Heights-Cobble Hill' = 23, 'Brownsville' = 24, 'Bushwick North' = 25, 'Bushwick South' = 26, 'Cambria Heights' = 27, 'Canarsie' = 28, 'Carroll Gardens-Columbia Street-Red Hook' = 29, 'Central Harlem North-Polo Grounds' = 30, 'Central Harlem South' = 31, 'Charleston-Richmond Valley-Tottenville' = 32, 'Chinatown' = 33, 'Claremont-Bathgate' = 34, 'Clinton' = 35, 'Clinton Hill' = 36, 'Co-op City' = 37, 'College Point' = 38, 'Corona' = 39, 'Crotona Park East' = 40, 'Crown Heights North' = 41, 'Crown Heights South' = 42, 'Cypress Hills-City Line' = 43, 'DUMBO-Vinegar Hill-Downtown Brooklyn-Boerum Hill' = 44, 'Douglas Manor-Douglaston-Little Neck' = 45, 'Dyker Heights' = 46, 'East Concourse-Concourse Village' = 47, 'East Elmhurst' = 48, 'East Flatbush-Farragut' = 49, 'East Flushing' = 50, 'East Harlem North' = 51, 'East Harlem South' = 52, 'East New York' = 53, 'East New York (Pennsylvania Ave)' = 54, 'East Tremont' = 55, 'East Village' = 56, 'East Williamsburg' = 57, 'Eastchester-Edenwald-Baychester' = 58, 'Elmhurst' = 59, 'Elmhurst-Maspeth' = 60, 'Erasmus' = 61, 'Far Rockaway-Bayswater' = 62, 'Flatbush' = 63, 'Flatlands' = 64, 'Flushing' = 65, 'Fordham South' = 66, 'Forest Hills' = 67, 'Fort Greene' = 68, 'Fresh Meadows-Utopia' = 69, 'Ft. Totten-Bay Terrace-Clearview' = 70, 'Georgetown-Marine Park-Bergen Beach-Mill Basin' = 71, 'Glen Oaks-Floral Park-New Hyde Park' = 72, 'Glendale' = 73, 'Gramercy' = 74, 'Grasmere-Arrochar-Ft. Wadsworth' = 75, 'Gravesend' = 76, 'Great Kills' = 77, 'Greenpoint' = 78, 'Grymes Hill-Clifton-Fox Hills' = 79, 'Hamilton Heights' = 80, 'Hammels-Arverne-Edgemere' = 81, 'Highbridge' = 82, 'Hollis' = 83, 'Homecrest' = 84, 'Hudson Yards-Chelsea-Flatiron-Union Square' = 85, 'Hunters Point-Sunnyside-West Maspeth' = 86, 'Hunts Point' = 87, 'Jackson Heights' = 88, 'Jamaica' = 89, 'Jamaica Estates-Holliswood' = 90, 'Kensington-Ocean Parkway' = 91, 'Kew Gardens' = 92, 'Kew Gardens Hills' = 93, 'Kingsbridge Heights' = 94, 'Laurelton' = 95, 'Lenox Hill-Roosevelt Island' = 96, 'Lincoln Square' = 97, 'Lindenwood-Howard Beach' = 98, 'Longwood' = 99, 'Lower East Side' = 100, 'Madison' = 101, 'Manhattanville' = 102, 'Marble Hill-Inwood' = 103, 'Mariner\'s Harbor-Arlington-Port Ivory-Graniteville' = 104, 'Maspeth' = 105, 'Melrose South-Mott Haven North' = 106, 'Middle Village' = 107, 'Midtown-Midtown South' = 108, 'Midwood' = 109, 'Morningside Heights' = 110, 'Morrisania-Melrose' = 111, 'Mott Haven-Port Morris' = 112, 'Mount Hope' = 113, 'Murray Hill' = 114, 'Murray Hill-Kips Bay' = 115, 'New Brighton-Silver Lake' = 116, 'New Dorp-Midland Beach' = 117, 'New Springville-Bloomfield-Travis' = 118, 'North Corona' = 119, 'North Riverdale-Fieldston-Riverdale' = 120, 'North Side-South Side' = 121, 'Norwood' = 122, 'Oakland Gardens' = 123, 'Oakwood-Oakwood Beach' = 124, 'Ocean Hill' = 125, 'Ocean Parkway South' = 126, 'Old Astoria' = 127, 'Old Town-Dongan Hills-South Beach' = 128, 'Ozone Park' = 129, 'Park Slope-Gowanus' = 130, 'Parkchester' = 131, 'Pelham Bay-Country Club-City Island' = 132, 'Pelham Parkway' = 133, 'Pomonok-Flushing Heights-Hillcrest' = 134, 'Port Richmond' = 135, 'Prospect Heights' = 136, 'Prospect Lefferts Gardens-Wingate' = 137, 'Queens Village' = 138, 'Queensboro Hill' = 139, 'Queensbridge-Ravenswood-Long Island City' = 140, 'Rego Park' = 141, 'Richmond Hill' = 142, 'Ridgewood' = 143, 'Rikers Island' = 144, 'Rosedale' = 145, 'Rossville-Woodrow' = 146, 'Rugby-Remsen Village' = 147, 'Schuylerville-Throgs Neck-Edgewater Park' = 148, 'Seagate-Coney Island' = 149, 'Sheepshead Bay-Gerritsen Beach-Manhattan Beach' = 150, 'SoHo-TriBeCa-Civic Center-Little Italy' = 151, 'Soundview-Bruckner' = 152, 'Soundview-Castle Hill-Clason Point-Harding Park' = 153, 'South Jamaica' = 154, 'South Ozone Park' = 155, 'Springfield Gardens North' = 156, 'Springfield Gardens South-Brookville' = 157, 'Spuyten Duyvil-Kingsbridge' = 158, 'St. Albans' = 159, 'Stapleton-Rosebank' = 160, 'Starrett City' = 161, 'Steinway' = 162, 'Stuyvesant Heights' = 163, 'Stuyvesant Town-Cooper Village' = 164, 'Sunset Park East' = 165, 'Sunset Park West' = 166, 'Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill' = 167, 'Turtle Bay-East Midtown' = 168, 'University Heights-Morris Heights' = 169, 'Upper East Side-Carnegie Hill' = 170, 'Upper West Side' = 171, 'Van Cortlandt Village' = 172, 'Van Nest-Morris Park-Westchester Square' = 173, 'Washington Heights North' = 174, 'Washington Heights South' = 175, 'West Brighton' = 176, 'West Concourse' = 177, 'West Farms-Bronx River' = 178, 'West New Brighton-New Brighton-St. George' = 179, 'West Village' = 180, 'Westchester-Unionport' = 181, 'Westerleigh' = 182, 'Whitestone' = 183, 'Williamsbridge-Olinville' = 184, 'Williamsburg' = 185, 'Windsor Terrace' = 186, 'Woodhaven' = 187, 'Woodlawn-Wakefield' = 188, 'Woodside' = 189, 'Yorkville' = 190, 'park-cemetery-etc-Bronx' = 191, 'park-cemetery-etc-Brooklyn' = 192, 'park-cemetery-etc-Manhattan' = 193, 'park-cemetery-etc-Queens' = 194, 'park-cemetery-etc-Staten Island' = 195)) AS pickup_ntaname,
-
-toUInt16(ifNull(pickup_puma, '0')) AS pickup_puma,
-
-assumeNotNull(dropoff_nyct2010_gid) AS dropoff_nyct2010_gid,
-toFloat32(ifNull(dropoff_ctlabel, '0')) AS dropoff_ctlabel,
-assumeNotNull(dropoff_borocode) AS dropoff_borocode,
-CAST(assumeNotNull(dropoff_boroname) AS Enum8('Manhattan' = 1, 'Queens' = 4, 'Brooklyn' = 3, '' = 0, 'Bronx' = 2, 'Staten Island' = 5)) AS dropoff_boroname,
-toFixedString(ifNull(dropoff_ct2010, '000000'), 6) AS dropoff_ct2010,
-toFixedString(ifNull(dropoff_boroct2010, '0000000'), 7) AS dropoff_boroct2010,
-CAST(assumeNotNull(ifNull(dropoff_cdeligibil, ' ')) AS Enum8(' ' = 0, 'E' = 1, 'I' = 2)) AS dropoff_cdeligibil,
-toFixedString(ifNull(dropoff_ntacode, '0000'), 4) AS dropoff_ntacode,
-
-CAST(assumeNotNull(dropoff_ntaname) AS Enum16('' = 0, 'Airport' = 1, 'Allerton-Pelham Gardens' = 2, 'Annadale-Huguenot-Prince\'s Bay-Eltingville' = 3, 'Arden Heights' = 4, 'Astoria' = 5, 'Auburndale' = 6, 'Baisley Park' = 7, 'Bath Beach' = 8, 'Battery Park City-Lower Manhattan' = 9, 'Bay Ridge' = 10, 'Bayside-Bayside Hills' = 11, 'Bedford' = 12, 'Bedford Park-Fordham North' = 13, 'Bellerose' = 14, 'Belmont' = 15, 'Bensonhurst East' = 16, 'Bensonhurst West' = 17, 'Borough Park' = 18, 'Breezy Point-Belle Harbor-Rockaway Park-Broad Channel' = 19, 'Briarwood-Jamaica Hills' = 20, 'Brighton Beach' = 21, 'Bronxdale' = 22, 'Brooklyn Heights-Cobble Hill' = 23, 'Brownsville' = 24, 'Bushwick North' = 25, 'Bushwick South' = 26, 'Cambria Heights' = 27, 'Canarsie' = 28, 'Carroll Gardens-Columbia Street-Red Hook' = 29, 'Central Harlem North-Polo Grounds' = 30, 'Central Harlem South' = 31, 'Charleston-Richmond Valley-Tottenville' = 32, 'Chinatown' = 33, 'Claremont-Bathgate' = 34, 'Clinton' = 35, 'Clinton Hill' = 36, 'Co-op City' = 37, 'College Point' = 38, 'Corona' = 39, 'Crotona Park East' = 40, 'Crown Heights North' = 41, 'Crown Heights South' = 42, 'Cypress Hills-City Line' = 43, 'DUMBO-Vinegar Hill-Downtown Brooklyn-Boerum Hill' = 44, 'Douglas Manor-Douglaston-Little Neck' = 45, 'Dyker Heights' = 46, 'East Concourse-Concourse Village' = 47, 'East Elmhurst' = 48, 'East Flatbush-Farragut' = 49, 'East Flushing' = 50, 'East Harlem North' = 51, 'East Harlem South' = 52, 'East New York' = 53, 'East New York (Pennsylvania Ave)' = 54, 'East Tremont' = 55, 'East Village' = 56, 'East Williamsburg' = 57, 'Eastchester-Edenwald-Baychester' = 58, 'Elmhurst' = 59, 'Elmhurst-Maspeth' = 60, 'Erasmus' = 61, 'Far Rockaway-Bayswater' = 62, 'Flatbush' = 63, 'Flatlands' = 64, 'Flushing' = 65, 'Fordham South' = 66, 'Forest Hills' = 67, 'Fort Greene' = 68, 'Fresh Meadows-Utopia' = 69, 'Ft. Totten-Bay Terrace-Clearview' = 70, 'Georgetown-Marine Park-Bergen Beach-Mill Basin' = 71, 'Glen Oaks-Floral Park-New Hyde Park' = 72, 'Glendale' = 73, 'Gramercy' = 74, 'Grasmere-Arrochar-Ft. Wadsworth' = 75, 'Gravesend' = 76, 'Great Kills' = 77, 'Greenpoint' = 78, 'Grymes Hill-Clifton-Fox Hills' = 79, 'Hamilton Heights' = 80, 'Hammels-Arverne-Edgemere' = 81, 'Highbridge' = 82, 'Hollis' = 83, 'Homecrest' = 84, 'Hudson Yards-Chelsea-Flatiron-Union Square' = 85, 'Hunters Point-Sunnyside-West Maspeth' = 86, 'Hunts Point' = 87, 'Jackson Heights' = 88, 'Jamaica' = 89, 'Jamaica Estates-Holliswood' = 90, 'Kensington-Ocean Parkway' = 91, 'Kew Gardens' = 92, 'Kew Gardens Hills' = 93, 'Kingsbridge Heights' = 94, 'Laurelton' = 95, 'Lenox Hill-Roosevelt Island' = 96, 'Lincoln Square' = 97, 'Lindenwood-Howard Beach' = 98, 'Longwood' = 99, 'Lower East Side' = 100, 'Madison' = 101, 'Manhattanville' = 102, 'Marble Hill-Inwood' = 103, 'Mariner\'s Harbor-Arlington-Port Ivory-Graniteville' = 104, 'Maspeth' = 105, 'Melrose South-Mott Haven North' = 106, 'Middle Village' = 107, 'Midtown-Midtown South' = 108, 'Midwood' = 109, 'Morningside Heights' = 110, 'Morrisania-Melrose' = 111, 'Mott Haven-Port Morris' = 112, 'Mount Hope' = 113, 'Murray Hill' = 114, 'Murray Hill-Kips Bay' = 115, 'New Brighton-Silver Lake' = 116, 'New Dorp-Midland Beach' = 117, 'New Springville-Bloomfield-Travis' = 118, 'North Corona' = 119, 'North Riverdale-Fieldston-Riverdale' = 120, 'North Side-South Side' = 121, 'Norwood' = 122, 'Oakland Gardens' = 123, 'Oakwood-Oakwood Beach' = 124, 'Ocean Hill' = 125, 'Ocean Parkway South' = 126, 'Old Astoria' = 127, 'Old Town-Dongan Hills-South Beach' = 128, 'Ozone Park' = 129, 'Park Slope-Gowanus' = 130, 'Parkchester' = 131, 'Pelham Bay-Country Club-City Island' = 132, 'Pelham Parkway' = 133, 'Pomonok-Flushing Heights-Hillcrest' = 134, 'Port Richmond' = 135, 'Prospect Heights' = 136, 'Prospect Lefferts Gardens-Wingate' = 137, 'Queens Village' = 138, 'Queensboro Hill' = 139, 'Queensbridge-Ravenswood-Long Island City' = 140, 'Rego Park' = 141, 'Richmond Hill' = 142, 'Ridgewood' = 143, 'Rikers Island' = 144, 'Rosedale' = 145, 'Rossville-Woodrow' = 146, 'Rugby-Remsen Village' = 147, 'Schuylerville-Throgs Neck-Edgewater Park' = 148, 'Seagate-Coney Island' = 149, 'Sheepshead Bay-Gerritsen Beach-Manhattan Beach' = 150, 'SoHo-TriBeCa-Civic Center-Little Italy' = 151, 'Soundview-Bruckner' = 152, 'Soundview-Castle Hill-Clason Point-Harding Park' = 153, 'South Jamaica' = 154, 'South Ozone Park' = 155, 'Springfield Gardens North' = 156, 'Springfield Gardens South-Brookville' = 157, 'Spuyten Duyvil-Kingsbridge' = 158, 'St. Albans' = 159, 'Stapleton-Rosebank' = 160, 'Starrett City' = 161, 'Steinway' = 162, 'Stuyvesant Heights' = 163, 'Stuyvesant Town-Cooper Village' = 164, 'Sunset Park East' = 165, 'Sunset Park West' = 166, 'Todt Hill-Emerson Hill-Heartland Village-Lighthouse Hill' = 167, 'Turtle Bay-East Midtown' = 168, 'University Heights-Morris Heights' = 169, 'Upper East Side-Carnegie Hill' = 170, 'Upper West Side' = 171, 'Van Cortlandt Village' = 172, 'Van Nest-Morris Park-Westchester Square' = 173, 'Washington Heights North' = 174, 'Washington Heights South' = 175, 'West Brighton' = 176, 'West Concourse' = 177, 'West Farms-Bronx River' = 178, 'West New Brighton-New Brighton-St. George' = 179, 'West Village' = 180, 'Westchester-Unionport' = 181, 'Westerleigh' = 182, 'Whitestone' = 183, 'Williamsbridge-Olinville' = 184, 'Williamsburg' = 185, 'Windsor Terrace' = 186, 'Woodhaven' = 187, 'Woodlawn-Wakefield' = 188, 'Woodside' = 189, 'Yorkville' = 190, 'park-cemetery-etc-Bronx' = 191, 'park-cemetery-etc-Brooklyn' = 192, 'park-cemetery-etc-Manhattan' = 193, 'park-cemetery-etc-Queens' = 194, 'park-cemetery-etc-Staten Island' = 195)) AS dropoff_ntaname,
-
-toUInt16(ifNull(dropoff_puma, '0')) AS dropoff_puma
-
-FROM trips
-```
-
-This takes 3030 seconds at a speed of about 428,000 rows per second.
-To load it faster, you can create the table with the `Log` engine instead of `MergeTree`. In this case, the download works faster than 200 seconds.
-
-The table uses 126 GB of disk space.
-
-``` sql
-SELECT formatReadableSize(sum(bytes)) FROM system.parts WHERE table = 'trips_mergetree' AND active
-```
-
-``` text
-┌─formatReadableSize(sum(bytes))─┐
-│ 126.18 GiB │
-└────────────────────────────────┘
-```
-
-Among other things, you can run the OPTIMIZE query on MergeTree. But it’s not required since everything will be fine without it.
-
-## Download of Prepared Partitions {#download-of-prepared-partitions}
-
``` bash
$ curl -O https://datasets.clickhouse.com/trips_mergetree/partitions/trips_mergetree.tar
$ tar xvf trips_mergetree.tar -C /var/lib/clickhouse # path to ClickHouse data directory
@@ -292,7 +202,7 @@ $ sudo service clickhouse-server restart
$ clickhouse-client --query "select count(*) from datasets.trips_mergetree"
```
-:::info
+:::info
If you will run the queries described below, you have to use the full table name, `datasets.trips_mergetree`.
:::
@@ -390,5 +300,3 @@ We ran queries using a client located in a different datacenter than where the c
| 1, AWS c5n.9xlarge | 0.130 | 0.584 | 0.777 | 1.811 |
| 3, AWS c5n.9xlarge | 0.057 | 0.231 | 0.285 | 0.641 |
| 140, E5-2650v2 | 0.028 | 0.043 | 0.051 | 0.072 |
-
-[Original article](https://clickhouse.com/docs/en/getting_started/example_datasets/nyc_taxi/)
diff --git a/docs/en/getting-started/example-datasets/recipes.md b/docs/en/getting-started/example-datasets/recipes.md
index cc059f6bd26..6a003571f6e 100644
--- a/docs/en/getting-started/example-datasets/recipes.md
+++ b/docs/en/getting-started/example-datasets/recipes.md
@@ -4,7 +4,7 @@ sidebar_label: Recipes Dataset
title: "Recipes Dataset"
---
-RecipeNLG dataset is available for download [here](https://recipenlg.cs.put.poznan.pl/dataset). It contains 2.2 million recipes. The size is slightly less than 1 GB.
+The RecipeNLG dataset is available for download [here](https://recipenlg.cs.put.poznan.pl/dataset). It contains 2.2 million recipes. The size is slightly less than 1 GB.
## Download and Unpack the Dataset
diff --git a/docs/en/getting-started/example-datasets/uk-price-paid.md b/docs/en/getting-started/example-datasets/uk-price-paid.md
index bb9046397a0..2a89bfda2e7 100644
--- a/docs/en/getting-started/example-datasets/uk-price-paid.md
+++ b/docs/en/getting-started/example-datasets/uk-price-paid.md
@@ -13,16 +13,6 @@ Description of the fields: https://www.gov.uk/guidance/about-the-price-paid-data
Contains HM Land Registry data © Crown copyright and database right 2021. This data is licensed under the Open Government Licence v3.0.
-## Download the Dataset {#download-dataset}
-
-Run the command:
-
-```bash
-wget http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv
-```
-
-Download will take about 2 minutes with good internet connection.
-
## Create the Table {#create-table}
```sql
@@ -41,31 +31,49 @@ CREATE TABLE uk_price_paid
locality LowCardinality(String),
town LowCardinality(String),
district LowCardinality(String),
- county LowCardinality(String),
- category UInt8
-) ENGINE = MergeTree ORDER BY (postcode1, postcode2, addr1, addr2);
+ county LowCardinality(String)
+)
+ENGINE = MergeTree
+ORDER BY (postcode1, postcode2, addr1, addr2);
```
-## Preprocess and Import Data {#preprocess-import-data}
+## Preprocess and Insert the Data {#preprocess-import-data}
-We will use `clickhouse-local` tool for data preprocessing and `clickhouse-client` to upload it.
+We will use the `url` function to stream the data into ClickHouse. We need to preprocess some of the incoming data first, which includes:
+- splitting the `postcode` to two different columns - `postcode1` and `postcode2`, which is better for storage and queries
+- converting the `time` field to date as it only contains 00:00 time
+- ignoring the [UUid](../../sql-reference/data-types/uuid.md) field because we don't need it for analysis
+- transforming `type` and `duration` to more readable `Enum` fields using the [transform](../../sql-reference/functions/other-functions.md#transform) function
+- transforming the `is_new` field from a single-character string (`Y`/`N`) to a [UInt8](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64-int128-int256) field with 0 or 1
+- drop the last two columns since they all have the same value (which is 0)
-In this example, we define the structure of source data from the CSV file and specify a query to preprocess the data with `clickhouse-local`.
+The `url` function streams the data from the web server into your ClickHouse table. The following command inserts 5 million rows into the `uk_price_paid` table:
-The preprocessing is:
-- splitting the postcode to two different columns `postcode1` and `postcode2` that is better for storage and queries;
-- coverting the `time` field to date as it only contains 00:00 time;
-- ignoring the [UUid](../../sql-reference/data-types/uuid.md) field because we don't need it for analysis;
-- transforming `type` and `duration` to more readable Enum fields with function [transform](../../sql-reference/functions/other-functions.md#transform);
-- transforming `is_new` and `category` fields from single-character string (`Y`/`N` and `A`/`B`) to [UInt8](../../sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64-int128-int256) field with 0 and 1.
-
-Preprocessed data is piped directly to `clickhouse-client` to be inserted into ClickHouse table in streaming fashion.
-
-```bash
-clickhouse-local --input-format CSV --structure '
- uuid String,
- price UInt32,
- time DateTime,
+```sql
+INSERT INTO uk_price_paid
+WITH
+ splitByChar(' ', postcode) AS p
+SELECT
+ toUInt32(price_string) AS price,
+ parseDateTimeBestEffortUS(time) AS date,
+ p[1] AS postcode1,
+ p[2] AS postcode2,
+ transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type,
+ b = 'Y' AS is_new,
+ transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration,
+ addr1,
+ addr2,
+ street,
+ locality,
+ town,
+ district,
+ county
+FROM url(
+ 'http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv',
+ 'CSV',
+ 'uuid_string String,
+ price_string String,
+ time String,
postcode String,
a String,
b String,
@@ -78,154 +86,136 @@ clickhouse-local --input-format CSV --structure '
district String,
county String,
d String,
- e String
-' --query "
- WITH splitByChar(' ', postcode) AS p
- SELECT
- price,
- toDate(time) AS date,
- p[1] AS postcode1,
- p[2] AS postcode2,
- transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type,
- b = 'Y' AS is_new,
- transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration,
- addr1,
- addr2,
- street,
- locality,
- town,
- district,
- county,
- d = 'B' AS category
- FROM table" --date_time_input_format best_effort < pp-complete.csv | clickhouse-client --query "INSERT INTO uk_price_paid FORMAT TSV"
+ e String'
+) SETTINGS max_http_get_redirects=10;
```
-It will take about 40 seconds.
+Wait for the data to insert - it will take a minute or two depending on the network speed.
## Validate the Data {#validate-data}
-Query:
+Let's verify it worked by seeing how many rows were inserted:
```sql
-SELECT count() FROM uk_price_paid;
+SELECT count()
+FROM uk_price_paid
```
-Result:
-
-```text
-┌──count()─┐
-│ 26321785 │
-└──────────┘
-```
-
-The size of dataset in ClickHouse is just 278 MiB, check it.
-
-Query:
+At the time this query was run, the dataset had 27,450,499 rows. Let's see what the storage size is of the table in ClickHouse:
```sql
-SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'uk_price_paid';
+SELECT formatReadableSize(total_bytes)
+FROM system.tables
+WHERE name = 'uk_price_paid'
```
-Result:
-
-```text
-┌─formatReadableSize(total_bytes)─┐
-│ 278.80 MiB │
-└─────────────────────────────────┘
-```
+Notice the size of the table is just 221.43 MiB!
## Run Some Queries {#run-queries}
+Let's run some queries to analyze the data:
+
### Query 1. Average Price Per Year {#average-price}
-Query:
-
```sql
-SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 1000000, 80) FROM uk_price_paid GROUP BY year ORDER BY year;
+SELECT
+ toYear(date) AS year,
+ round(avg(price)) AS price,
+ bar(price, 0, 1000000, 80
+)
+FROM uk_price_paid
+GROUP BY year
+ORDER BY year
```
-Result:
+The result looks like:
-```text
+```response
┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐
-│ 1995 │ 67932 │ █████▍ │
-│ 1996 │ 71505 │ █████▋ │
-│ 1997 │ 78532 │ ██████▎ │
-│ 1998 │ 85436 │ ██████▋ │
-│ 1999 │ 96037 │ ███████▋ │
-│ 2000 │ 107479 │ ████████▌ │
-│ 2001 │ 118885 │ █████████▌ │
-│ 2002 │ 137941 │ ███████████ │
-│ 2003 │ 155889 │ ████████████▍ │
-│ 2004 │ 178885 │ ██████████████▎ │
-│ 2005 │ 189351 │ ███████████████▏ │
-│ 2006 │ 203528 │ ████████████████▎ │
-│ 2007 │ 219378 │ █████████████████▌ │
+│ 1995 │ 67934 │ █████▍ │
+│ 1996 │ 71508 │ █████▋ │
+│ 1997 │ 78536 │ ██████▎ │
+│ 1998 │ 85441 │ ██████▋ │
+│ 1999 │ 96038 │ ███████▋ │
+│ 2000 │ 107487 │ ████████▌ │
+│ 2001 │ 118888 │ █████████▌ │
+│ 2002 │ 137948 │ ███████████ │
+│ 2003 │ 155893 │ ████████████▍ │
+│ 2004 │ 178888 │ ██████████████▎ │
+│ 2005 │ 189359 │ ███████████████▏ │
+│ 2006 │ 203532 │ ████████████████▎ │
+│ 2007 │ 219375 │ █████████████████▌ │
│ 2008 │ 217056 │ █████████████████▎ │
│ 2009 │ 213419 │ █████████████████ │
-│ 2010 │ 236109 │ ██████████████████▊ │
+│ 2010 │ 236110 │ ██████████████████▊ │
│ 2011 │ 232805 │ ██████████████████▌ │
-│ 2012 │ 238367 │ ███████████████████ │
-│ 2013 │ 256931 │ ████████████████████▌ │
-│ 2014 │ 279915 │ ██████████████████████▍ │
-│ 2015 │ 297266 │ ███████████████████████▋ │
-│ 2016 │ 313201 │ █████████████████████████ │
-│ 2017 │ 346097 │ ███████████████████████████▋ │
-│ 2018 │ 350116 │ ████████████████████████████ │
-│ 2019 │ 351013 │ ████████████████████████████ │
-│ 2020 │ 369420 │ █████████████████████████████▌ │
-│ 2021 │ 386903 │ ██████████████████████████████▊ │
+│ 2012 │ 238381 │ ███████████████████ │
+│ 2013 │ 256927 │ ████████████████████▌ │
+│ 2014 │ 280008 │ ██████████████████████▍ │
+│ 2015 │ 297263 │ ███████████████████████▋ │
+│ 2016 │ 313518 │ █████████████████████████ │
+│ 2017 │ 346371 │ ███████████████████████████▋ │
+│ 2018 │ 350556 │ ████████████████████████████ │
+│ 2019 │ 352184 │ ████████████████████████████▏ │
+│ 2020 │ 375808 │ ██████████████████████████████ │
+│ 2021 │ 381105 │ ██████████████████████████████▍ │
+│ 2022 │ 362572 │ █████████████████████████████ │
└──────┴────────┴────────────────────────────────────────┘
```
### Query 2. Average Price per Year in London {#average-price-london}
-Query:
-
```sql
-SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 2000000, 100) FROM uk_price_paid WHERE town = 'LONDON' GROUP BY year ORDER BY year;
+SELECT
+ toYear(date) AS year,
+ round(avg(price)) AS price,
+ bar(price, 0, 2000000, 100
+)
+FROM uk_price_paid
+WHERE town = 'LONDON'
+GROUP BY year
+ORDER BY year
```
-Result:
+The result looks like:
-```text
+```response
┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐
-│ 1995 │ 109116 │ █████▍ │
-│ 1996 │ 118667 │ █████▊ │
-│ 1997 │ 136518 │ ██████▋ │
-│ 1998 │ 152983 │ ███████▋ │
-│ 1999 │ 180637 │ █████████ │
-│ 2000 │ 215838 │ ██████████▋ │
-│ 2001 │ 232994 │ ███████████▋ │
-│ 2002 │ 263670 │ █████████████▏ │
-│ 2003 │ 278394 │ █████████████▊ │
-│ 2004 │ 304666 │ ███████████████▏ │
-│ 2005 │ 322875 │ ████████████████▏ │
-│ 2006 │ 356191 │ █████████████████▋ │
-│ 2007 │ 404054 │ ████████████████████▏ │
+│ 1995 │ 109110 │ █████▍ │
+│ 1996 │ 118659 │ █████▊ │
+│ 1997 │ 136526 │ ██████▋ │
+│ 1998 │ 153002 │ ███████▋ │
+│ 1999 │ 180633 │ █████████ │
+│ 2000 │ 215849 │ ██████████▋ │
+│ 2001 │ 232987 │ ███████████▋ │
+│ 2002 │ 263668 │ █████████████▏ │
+│ 2003 │ 278424 │ █████████████▊ │
+│ 2004 │ 304664 │ ███████████████▏ │
+│ 2005 │ 322887 │ ████████████████▏ │
+│ 2006 │ 356195 │ █████████████████▋ │
+│ 2007 │ 404062 │ ████████████████████▏ │
│ 2008 │ 420741 │ █████████████████████ │
-│ 2009 │ 427753 │ █████████████████████▍ │
-│ 2010 │ 480306 │ ████████████████████████ │
-│ 2011 │ 496274 │ ████████████████████████▋ │
-│ 2012 │ 519442 │ █████████████████████████▊ │
-│ 2013 │ 616212 │ ██████████████████████████████▋ │
-│ 2014 │ 724154 │ ████████████████████████████████████▏ │
-│ 2015 │ 792129 │ ███████████████████████████████████████▌ │
-│ 2016 │ 843655 │ ██████████████████████████████████████████▏ │
-│ 2017 │ 982642 │ █████████████████████████████████████████████████▏ │
-│ 2018 │ 1016835 │ ██████████████████████████████████████████████████▋ │
-│ 2019 │ 1042849 │ ████████████████████████████████████████████████████▏ │
-│ 2020 │ 1011889 │ ██████████████████████████████████████████████████▌ │
-│ 2021 │ 960343 │ ████████████████████████████████████████████████ │
+│ 2009 │ 427754 │ █████████████████████▍ │
+│ 2010 │ 480322 │ ████████████████████████ │
+│ 2011 │ 496278 │ ████████████████████████▋ │
+│ 2012 │ 519482 │ █████████████████████████▊ │
+│ 2013 │ 616195 │ ██████████████████████████████▋ │
+│ 2014 │ 724121 │ ████████████████████████████████████▏ │
+│ 2015 │ 792101 │ ███████████████████████████████████████▌ │
+│ 2016 │ 843589 │ ██████████████████████████████████████████▏ │
+│ 2017 │ 983523 │ █████████████████████████████████████████████████▏ │
+│ 2018 │ 1016753 │ ██████████████████████████████████████████████████▋ │
+│ 2019 │ 1041673 │ ████████████████████████████████████████████████████ │
+│ 2020 │ 1060027 │ █████████████████████████████████████████████████████ │
+│ 2021 │ 958249 │ ███████████████████████████████████████████████▊ │
+│ 2022 │ 902596 │ █████████████████████████████████████████████▏ │
└──────┴─────────┴───────────────────────────────────────────────────────┘
```
-Something happened in 2013. I don't have a clue. Maybe you have a clue what happened in 2020?
+Something happened to home prices in 2020! But that is probably not a surprise...
### Query 3. The Most Expensive Neighborhoods {#most-expensive-neighborhoods}
-Query:
-
```sql
SELECT
town,
@@ -240,124 +230,123 @@ GROUP BY
district
HAVING c >= 100
ORDER BY price DESC
-LIMIT 100;
+LIMIT 100
```
-Result:
+The result looks like:
-```text
-
-┌─town─────────────────┬─district───────────────┬────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)────────────────────────────┐
-│ LONDON │ CITY OF WESTMINSTER │ 3606 │ 3280239 │ █████████████████████████████████████████████████████████████████▌ │
-│ LONDON │ CITY OF LONDON │ 274 │ 3160502 │ ███████████████████████████████████████████████████████████████▏ │
-│ LONDON │ KENSINGTON AND CHELSEA │ 2550 │ 2308478 │ ██████████████████████████████████████████████▏ │
-│ LEATHERHEAD │ ELMBRIDGE │ 114 │ 1897407 │ █████████████████████████████████████▊ │
-│ LONDON │ CAMDEN │ 3033 │ 1805404 │ ████████████████████████████████████ │
-│ VIRGINIA WATER │ RUNNYMEDE │ 156 │ 1753247 │ ███████████████████████████████████ │
-│ WINDLESHAM │ SURREY HEATH │ 108 │ 1677613 │ █████████████████████████████████▌ │
-│ THORNTON HEATH │ CROYDON │ 546 │ 1671721 │ █████████████████████████████████▍ │
-│ BARNET │ ENFIELD │ 124 │ 1505840 │ ██████████████████████████████ │
-│ COBHAM │ ELMBRIDGE │ 387 │ 1237250 │ ████████████████████████▋ │
-│ LONDON │ ISLINGTON │ 2668 │ 1236980 │ ████████████████████████▋ │
-│ OXFORD │ SOUTH OXFORDSHIRE │ 321 │ 1220907 │ ████████████████████████▍ │
-│ LONDON │ RICHMOND UPON THAMES │ 704 │ 1215551 │ ████████████████████████▎ │
-│ LONDON │ HOUNSLOW │ 671 │ 1207493 │ ████████████████████████▏ │
-│ ASCOT │ WINDSOR AND MAIDENHEAD │ 407 │ 1183299 │ ███████████████████████▋ │
-│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 330 │ 1175615 │ ███████████████████████▌ │
-│ RICHMOND │ RICHMOND UPON THAMES │ 874 │ 1110444 │ ██████████████████████▏ │
-│ LONDON │ HAMMERSMITH AND FULHAM │ 3086 │ 1053983 │ █████████████████████ │
-│ SURBITON │ ELMBRIDGE │ 100 │ 1011800 │ ████████████████████▏ │
-│ RADLETT │ HERTSMERE │ 283 │ 1011712 │ ████████████████████▏ │
-│ SALCOMBE │ SOUTH HAMS │ 127 │ 1011624 │ ████████████████████▏ │
-│ WEYBRIDGE │ ELMBRIDGE │ 655 │ 1007265 │ ████████████████████▏ │
-│ ESHER │ ELMBRIDGE │ 485 │ 986581 │ ███████████████████▋ │
-│ LEATHERHEAD │ GUILDFORD │ 202 │ 977320 │ ███████████████████▌ │
-│ BURFORD │ WEST OXFORDSHIRE │ 111 │ 966893 │ ███████████████████▎ │
-│ BROCKENHURST │ NEW FOREST │ 129 │ 956675 │ ███████████████████▏ │
-│ HINDHEAD │ WAVERLEY │ 137 │ 953753 │ ███████████████████ │
-│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 419 │ 951121 │ ███████████████████ │
-│ EAST MOLESEY │ ELMBRIDGE │ 192 │ 936769 │ ██████████████████▋ │
-│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 146 │ 925515 │ ██████████████████▌ │
-│ LONDON │ TOWER HAMLETS │ 4388 │ 918304 │ ██████████████████▎ │
-│ OLNEY │ MILTON KEYNES │ 235 │ 910646 │ ██████████████████▏ │
-│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 540 │ 902418 │ ██████████████████ │
-│ LONDON │ SOUTHWARK │ 3885 │ 892997 │ █████████████████▋ │
-│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 960 │ 885969 │ █████████████████▋ │
-│ LONDON │ EALING │ 2658 │ 871755 │ █████████████████▍ │
-│ CRANBROOK │ TUNBRIDGE WELLS │ 431 │ 862348 │ █████████████████▏ │
-│ LONDON │ MERTON │ 2099 │ 859118 │ █████████████████▏ │
-│ BELVEDERE │ BEXLEY │ 346 │ 842423 │ ████████████████▋ │
-│ GUILDFORD │ WAVERLEY │ 143 │ 841277 │ ████████████████▋ │
-│ HARPENDEN │ ST ALBANS │ 657 │ 841216 │ ████████████████▋ │
-│ LONDON │ HACKNEY │ 3307 │ 837090 │ ████████████████▋ │
-│ LONDON │ WANDSWORTH │ 6566 │ 832663 │ ████████████████▋ │
-│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 123 │ 824299 │ ████████████████▍ │
-│ KINGS LANGLEY │ DACORUM │ 145 │ 821331 │ ████████████████▍ │
-│ BERKHAMSTED │ DACORUM │ 543 │ 818415 │ ████████████████▎ │
-│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 226 │ 802807 │ ████████████████ │
-│ BILLINGSHURST │ CHICHESTER │ 144 │ 797829 │ ███████████████▊ │
-│ WOKING │ GUILDFORD │ 176 │ 793494 │ ███████████████▋ │
-│ STOCKBRIDGE │ TEST VALLEY │ 178 │ 793269 │ ███████████████▋ │
-│ EPSOM │ REIGATE AND BANSTEAD │ 172 │ 791862 │ ███████████████▋ │
-│ TONBRIDGE │ TUNBRIDGE WELLS │ 360 │ 787876 │ ███████████████▋ │
-│ TEDDINGTON │ RICHMOND UPON THAMES │ 595 │ 786492 │ ███████████████▋ │
-│ TWICKENHAM │ RICHMOND UPON THAMES │ 1155 │ 786193 │ ███████████████▋ │
-│ LYNDHURST │ NEW FOREST │ 102 │ 785593 │ ███████████████▋ │
-│ LONDON │ LAMBETH │ 5228 │ 774574 │ ███████████████▍ │
-│ LONDON │ BARNET │ 3955 │ 773259 │ ███████████████▍ │
-│ OXFORD │ VALE OF WHITE HORSE │ 353 │ 772088 │ ███████████████▍ │
-│ TONBRIDGE │ MAIDSTONE │ 305 │ 770740 │ ███████████████▍ │
-│ LUTTERWORTH │ HARBOROUGH │ 538 │ 768634 │ ███████████████▎ │
-│ WOODSTOCK │ WEST OXFORDSHIRE │ 140 │ 766037 │ ███████████████▎ │
-│ MIDHURST │ CHICHESTER │ 257 │ 764815 │ ███████████████▎ │
-│ MARLOW │ BUCKINGHAMSHIRE │ 327 │ 761876 │ ███████████████▏ │
-│ LONDON │ NEWHAM │ 3237 │ 761784 │ ███████████████▏ │
-│ ALDERLEY EDGE │ CHESHIRE EAST │ 178 │ 757318 │ ███████████████▏ │
-│ LUTON │ CENTRAL BEDFORDSHIRE │ 212 │ 754283 │ ███████████████ │
-│ PETWORTH │ CHICHESTER │ 154 │ 754220 │ ███████████████ │
-│ ALRESFORD │ WINCHESTER │ 219 │ 752718 │ ███████████████ │
-│ POTTERS BAR │ WELWYN HATFIELD │ 174 │ 748465 │ ██████████████▊ │
-│ HASLEMERE │ CHICHESTER │ 128 │ 746907 │ ██████████████▊ │
-│ TADWORTH │ REIGATE AND BANSTEAD │ 502 │ 743252 │ ██████████████▋ │
-│ THAMES DITTON │ ELMBRIDGE │ 244 │ 741913 │ ██████████████▋ │
-│ REIGATE │ REIGATE AND BANSTEAD │ 581 │ 738198 │ ██████████████▋ │
-│ BOURNE END │ BUCKINGHAMSHIRE │ 138 │ 735190 │ ██████████████▋ │
-│ SEVENOAKS │ SEVENOAKS │ 1156 │ 730018 │ ██████████████▌ │
-│ OXTED │ TANDRIDGE │ 336 │ 729123 │ ██████████████▌ │
-│ INGATESTONE │ BRENTWOOD │ 166 │ 728103 │ ██████████████▌ │
-│ LONDON │ BRENT │ 2079 │ 720605 │ ██████████████▍ │
-│ LONDON │ HARINGEY │ 3216 │ 717780 │ ██████████████▎ │
-│ PURLEY │ CROYDON │ 575 │ 716108 │ ██████████████▎ │
-│ WELWYN │ WELWYN HATFIELD │ 222 │ 710603 │ ██████████████▏ │
-│ RICKMANSWORTH │ THREE RIVERS │ 798 │ 704571 │ ██████████████ │
-│ BANSTEAD │ REIGATE AND BANSTEAD │ 401 │ 701293 │ ██████████████ │
-│ CHIGWELL │ EPPING FOREST │ 261 │ 701203 │ ██████████████ │
-│ PINNER │ HARROW │ 528 │ 698885 │ █████████████▊ │
-│ HASLEMERE │ WAVERLEY │ 280 │ 696659 │ █████████████▊ │
-│ SLOUGH │ BUCKINGHAMSHIRE │ 396 │ 694917 │ █████████████▊ │
-│ WALTON-ON-THAMES │ ELMBRIDGE │ 946 │ 692395 │ █████████████▋ │
-│ READING │ SOUTH OXFORDSHIRE │ 318 │ 691988 │ █████████████▋ │
-│ NORTHWOOD │ HILLINGDON │ 271 │ 690643 │ █████████████▋ │
-│ FELTHAM │ HOUNSLOW │ 763 │ 688595 │ █████████████▋ │
-│ ASHTEAD │ MOLE VALLEY │ 303 │ 687923 │ █████████████▋ │
-│ BARNET │ BARNET │ 975 │ 686980 │ █████████████▋ │
-│ WOKING │ SURREY HEATH │ 283 │ 686669 │ █████████████▋ │
-│ MALMESBURY │ WILTSHIRE │ 323 │ 683324 │ █████████████▋ │
-│ AMERSHAM │ BUCKINGHAMSHIRE │ 496 │ 680962 │ █████████████▌ │
-│ CHISLEHURST │ BROMLEY │ 430 │ 680209 │ █████████████▌ │
-│ HYTHE │ FOLKESTONE AND HYTHE │ 490 │ 676908 │ █████████████▌ │
-│ MAYFIELD │ WEALDEN │ 101 │ 676210 │ █████████████▌ │
-│ ASCOT │ BRACKNELL FOREST │ 168 │ 676004 │ █████████████▌ │
-└──────────────────────┴────────────────────────┴──────┴─────────┴────────────────────────────────────────────────────────────────────┘
+```response
+┌─town─────────────────┬─district───────────────┬─────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)─────────────────────────┐
+│ LONDON │ CITY OF LONDON │ 578 │ 3149590 │ ██████████████████████████████████████████████████████████████▊ │
+│ LONDON │ CITY OF WESTMINSTER │ 7083 │ 2903794 │ ██████████████████████████████████████████████████████████ │
+│ LONDON │ KENSINGTON AND CHELSEA │ 4986 │ 2333782 │ ██████████████████████████████████████████████▋ │
+│ LEATHERHEAD │ ELMBRIDGE │ 203 │ 2071595 │ █████████████████████████████████████████▍ │
+│ VIRGINIA WATER │ RUNNYMEDE │ 308 │ 1939465 │ ██████████████████████████████████████▋ │
+│ LONDON │ CAMDEN │ 5750 │ 1673687 │ █████████████████████████████████▍ │
+│ WINDLESHAM │ SURREY HEATH │ 182 │ 1428358 │ ████████████████████████████▌ │
+│ NORTHWOOD │ THREE RIVERS │ 112 │ 1404170 │ ████████████████████████████ │
+│ BARNET │ ENFIELD │ 259 │ 1338299 │ ██████████████████████████▋ │
+│ LONDON │ ISLINGTON │ 5504 │ 1275520 │ █████████████████████████▌ │
+│ LONDON │ RICHMOND UPON THAMES │ 1345 │ 1261935 │ █████████████████████████▏ │
+│ COBHAM │ ELMBRIDGE │ 727 │ 1251403 │ █████████████████████████ │
+│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 680 │ 1199970 │ ███████████████████████▊ │
+│ LONDON │ TOWER HAMLETS │ 10012 │ 1157827 │ ███████████████████████▏ │
+│ LONDON │ HOUNSLOW │ 1278 │ 1144389 │ ██████████████████████▊ │
+│ BURFORD │ WEST OXFORDSHIRE │ 182 │ 1139393 │ ██████████████████████▋ │
+│ RICHMOND │ RICHMOND UPON THAMES │ 1649 │ 1130076 │ ██████████████████████▌ │
+│ KINGSTON UPON THAMES │ RICHMOND UPON THAMES │ 147 │ 1126111 │ ██████████████████████▌ │
+│ ASCOT │ WINDSOR AND MAIDENHEAD │ 773 │ 1106109 │ ██████████████████████ │
+│ LONDON │ HAMMERSMITH AND FULHAM │ 6162 │ 1056198 │ █████████████████████ │
+│ RADLETT │ HERTSMERE │ 513 │ 1045758 │ ████████████████████▊ │
+│ LEATHERHEAD │ GUILDFORD │ 354 │ 1045175 │ ████████████████████▊ │
+│ WEYBRIDGE │ ELMBRIDGE │ 1275 │ 1036702 │ ████████████████████▋ │
+│ FARNHAM │ EAST HAMPSHIRE │ 107 │ 1033682 │ ████████████████████▋ │
+│ ESHER │ ELMBRIDGE │ 915 │ 1032753 │ ████████████████████▋ │
+│ FARNHAM │ HART │ 102 │ 1002692 │ ████████████████████ │
+│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 845 │ 983639 │ ███████████████████▋ │
+│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 286 │ 973993 │ ███████████████████▍ │
+│ SALCOMBE │ SOUTH HAMS │ 215 │ 965724 │ ███████████████████▎ │
+│ SURBITON │ ELMBRIDGE │ 181 │ 960346 │ ███████████████████▏ │
+│ BROCKENHURST │ NEW FOREST │ 226 │ 951278 │ ███████████████████ │
+│ SUTTON COLDFIELD │ LICHFIELD │ 110 │ 930757 │ ██████████████████▌ │
+│ EAST MOLESEY │ ELMBRIDGE │ 372 │ 927026 │ ██████████████████▌ │
+│ LLANGOLLEN │ WREXHAM │ 127 │ 925681 │ ██████████████████▌ │
+│ OXFORD │ SOUTH OXFORDSHIRE │ 638 │ 923830 │ ██████████████████▍ │
+│ LONDON │ MERTON │ 4383 │ 923194 │ ██████████████████▍ │
+│ GUILDFORD │ WAVERLEY │ 261 │ 905733 │ ██████████████████ │
+│ TEDDINGTON │ RICHMOND UPON THAMES │ 1147 │ 894856 │ █████████████████▊ │
+│ HARPENDEN │ ST ALBANS │ 1271 │ 893079 │ █████████████████▋ │
+│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 1042 │ 887557 │ █████████████████▋ │
+│ POTTERS BAR │ WELWYN HATFIELD │ 314 │ 863037 │ █████████████████▎ │
+│ LONDON │ WANDSWORTH │ 13210 │ 857318 │ █████████████████▏ │
+│ BILLINGSHURST │ CHICHESTER │ 255 │ 856508 │ █████████████████▏ │
+│ LONDON │ SOUTHWARK │ 7742 │ 843145 │ ████████████████▋ │
+│ LONDON │ HACKNEY │ 6656 │ 839716 │ ████████████████▋ │
+│ LUTTERWORTH │ HARBOROUGH │ 1096 │ 836546 │ ████████████████▋ │
+│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 1846 │ 828990 │ ████████████████▌ │
+│ LONDON │ EALING │ 5583 │ 820135 │ ████████████████▍ │
+│ INGATESTONE │ CHELMSFORD │ 120 │ 815379 │ ████████████████▎ │
+│ MARLOW │ BUCKINGHAMSHIRE │ 718 │ 809943 │ ████████████████▏ │
+│ EAST GRINSTEAD │ TANDRIDGE │ 105 │ 809461 │ ████████████████▏ │
+│ CHIGWELL │ EPPING FOREST │ 484 │ 809338 │ ████████████████▏ │
+│ EGHAM │ RUNNYMEDE │ 989 │ 807858 │ ████████████████▏ │
+│ HASLEMERE │ CHICHESTER │ 223 │ 804173 │ ████████████████ │
+│ PETWORTH │ CHICHESTER │ 288 │ 803206 │ ████████████████ │
+│ TWICKENHAM │ RICHMOND UPON THAMES │ 2194 │ 802616 │ ████████████████ │
+│ WEMBLEY │ BRENT │ 1698 │ 801733 │ ████████████████ │
+│ HINDHEAD │ WAVERLEY │ 233 │ 801482 │ ████████████████ │
+│ LONDON │ BARNET │ 8083 │ 792066 │ ███████████████▋ │
+│ WOKING │ GUILDFORD │ 343 │ 789360 │ ███████████████▋ │
+│ STOCKBRIDGE │ TEST VALLEY │ 318 │ 777909 │ ███████████████▌ │
+│ BERKHAMSTED │ DACORUM │ 1049 │ 776138 │ ███████████████▌ │
+│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 236 │ 775572 │ ███████████████▌ │
+│ SOLIHULL │ STRATFORD-ON-AVON │ 142 │ 770727 │ ███████████████▍ │
+│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 431 │ 764493 │ ███████████████▎ │
+│ TADWORTH │ REIGATE AND BANSTEAD │ 920 │ 757511 │ ███████████████▏ │
+│ LONDON │ BRENT │ 4124 │ 757194 │ ███████████████▏ │
+│ THAMES DITTON │ ELMBRIDGE │ 470 │ 750828 │ ███████████████ │
+│ LONDON │ LAMBETH │ 10431 │ 750532 │ ███████████████ │
+│ RICKMANSWORTH │ THREE RIVERS │ 1500 │ 747029 │ ██████████████▊ │
+│ KINGS LANGLEY │ DACORUM │ 281 │ 746536 │ ██████████████▊ │
+│ HARLOW │ EPPING FOREST │ 172 │ 739423 │ ██████████████▋ │
+│ TONBRIDGE │ SEVENOAKS │ 103 │ 738740 │ ██████████████▋ │
+│ BELVEDERE │ BEXLEY │ 686 │ 736385 │ ██████████████▋ │
+│ CRANBROOK │ TUNBRIDGE WELLS │ 769 │ 734328 │ ██████████████▋ │
+│ SOLIHULL │ WARWICK │ 116 │ 733286 │ ██████████████▋ │
+│ ALDERLEY EDGE │ CHESHIRE EAST │ 357 │ 732882 │ ██████████████▋ │
+│ WELWYN │ WELWYN HATFIELD │ 404 │ 730281 │ ██████████████▌ │
+│ CHISLEHURST │ BROMLEY │ 870 │ 730279 │ ██████████████▌ │
+│ LONDON │ HARINGEY │ 6488 │ 726715 │ ██████████████▌ │
+│ AMERSHAM │ BUCKINGHAMSHIRE │ 965 │ 725426 │ ██████████████▌ │
+│ SEVENOAKS │ SEVENOAKS │ 2183 │ 725102 │ ██████████████▌ │
+│ BOURNE END │ BUCKINGHAMSHIRE │ 269 │ 724595 │ ██████████████▍ │
+│ NORTHWOOD │ HILLINGDON │ 568 │ 722436 │ ██████████████▍ │
+│ PURFLEET │ THURROCK │ 143 │ 722205 │ ██████████████▍ │
+│ SLOUGH │ BUCKINGHAMSHIRE │ 832 │ 721529 │ ██████████████▍ │
+│ INGATESTONE │ BRENTWOOD │ 301 │ 718292 │ ██████████████▎ │
+│ EPSOM │ REIGATE AND BANSTEAD │ 315 │ 709264 │ ██████████████▏ │
+│ ASHTEAD │ MOLE VALLEY │ 524 │ 708646 │ ██████████████▏ │
+│ BETCHWORTH │ MOLE VALLEY │ 155 │ 708525 │ ██████████████▏ │
+│ OXTED │ TANDRIDGE │ 645 │ 706946 │ ██████████████▏ │
+│ READING │ SOUTH OXFORDSHIRE │ 593 │ 705466 │ ██████████████ │
+│ FELTHAM │ HOUNSLOW │ 1536 │ 703815 │ ██████████████ │
+│ TUNBRIDGE WELLS │ WEALDEN │ 207 │ 703296 │ ██████████████ │
+│ LEWES │ WEALDEN │ 116 │ 701349 │ ██████████████ │
+│ OXFORD │ OXFORD │ 3656 │ 700813 │ ██████████████ │
+│ MAYFIELD │ WEALDEN │ 177 │ 698158 │ █████████████▊ │
+│ PINNER │ HARROW │ 997 │ 697876 │ █████████████▊ │
+│ LECHLADE │ COTSWOLD │ 155 │ 696262 │ █████████████▊ │
+│ WALTON-ON-THAMES │ ELMBRIDGE │ 1850 │ 690102 │ █████████████▋ │
+└──────────────────────┴────────────────────────┴───────┴─────────┴─────────────────────────────────────────────────────────────────┘
```
## Let's Speed Up Queries Using Projections {#speedup-with-projections}
-[Projections](../../sql-reference/statements/alter/projection.md) allow to improve queries speed by storing pre-aggregated data.
+[Projections](../../sql-reference/statements/alter/projection.md) allow you to improve query speeds by storing pre-aggregated data in whatever format you want. In this example, we create a projection that keeps track of the average price, total price, and count of properties grouped by the year, district and town. At query time, ClickHouse will use your projection if it thinks the projection can improve the performance of the query (you don't have to do anything special to use the projection - ClickHouse decides for you when the projection will be useful).
### Build a Projection {#build-projection}
-Create an aggregate projection by dimensions `toYear(date)`, `district`, `town`:
+Let's create an aggregate projection by the dimensions `toYear(date)`, `district`, and `town`:
```sql
ALTER TABLE uk_price_paid
@@ -374,25 +363,23 @@ ALTER TABLE uk_price_paid
toYear(date),
district,
town
- );
+ )
```
-Populate the projection for existing data (without it projection will be created for only newly inserted data):
+Populate the projection for existing data. (Without materializing it, the projection will be created for only newly inserted data):
```sql
ALTER TABLE uk_price_paid
MATERIALIZE PROJECTION projection_by_year_district_town
-SETTINGS mutations_sync = 1;
+SETTINGS mutations_sync = 1
```
## Test Performance {#test-performance}
-Let's run the same 3 queries.
+Let's run the same 3 queries again:
### Query 1. Average Price Per Year {#average-price-projections}
-Query:
-
```sql
SELECT
toYear(date) AS year,
@@ -400,47 +387,18 @@ SELECT
bar(price, 0, 1000000, 80)
FROM uk_price_paid
GROUP BY year
-ORDER BY year ASC;
+ORDER BY year ASC
```
-Result:
-
-```text
-┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐
-│ 1995 │ 67932 │ █████▍ │
-│ 1996 │ 71505 │ █████▋ │
-│ 1997 │ 78532 │ ██████▎ │
-│ 1998 │ 85436 │ ██████▋ │
-│ 1999 │ 96037 │ ███████▋ │
-│ 2000 │ 107479 │ ████████▌ │
-│ 2001 │ 118885 │ █████████▌ │
-│ 2002 │ 137941 │ ███████████ │
-│ 2003 │ 155889 │ ████████████▍ │
-│ 2004 │ 178885 │ ██████████████▎ │
-│ 2005 │ 189351 │ ███████████████▏ │
-│ 2006 │ 203528 │ ████████████████▎ │
-│ 2007 │ 219378 │ █████████████████▌ │
-│ 2008 │ 217056 │ █████████████████▎ │
-│ 2009 │ 213419 │ █████████████████ │
-│ 2010 │ 236109 │ ██████████████████▊ │
-│ 2011 │ 232805 │ ██████████████████▌ │
-│ 2012 │ 238367 │ ███████████████████ │
-│ 2013 │ 256931 │ ████████████████████▌ │
-│ 2014 │ 279915 │ ██████████████████████▍ │
-│ 2015 │ 297266 │ ███████████████████████▋ │
-│ 2016 │ 313201 │ █████████████████████████ │
-│ 2017 │ 346097 │ ███████████████████████████▋ │
-│ 2018 │ 350116 │ ████████████████████████████ │
-│ 2019 │ 351013 │ ████████████████████████████ │
-│ 2020 │ 369420 │ █████████████████████████████▌ │
-│ 2021 │ 386903 │ ██████████████████████████████▊ │
-└──────┴────────┴────────────────────────────────────────┘
+The result is the same, but the performance is better!
+```response
+No projection: 28 rows in set. Elapsed: 1.775 sec. Processed 27.45 million rows, 164.70 MB (15.47 million rows/s., 92.79 MB/s.)
+With projection: 28 rows in set. Elapsed: 0.665 sec. Processed 87.51 thousand rows, 3.21 MB (131.51 thousand rows/s., 4.82 MB/s.)
```
+
### Query 2. Average Price Per Year in London {#average-price-london-projections}
-Query:
-
```sql
SELECT
toYear(date) AS year,
@@ -449,48 +407,19 @@ SELECT
FROM uk_price_paid
WHERE town = 'LONDON'
GROUP BY year
-ORDER BY year ASC;
+ORDER BY year ASC
```
-Result:
+Same result, but notice the improvement in query performance:
-```text
-┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐
-│ 1995 │ 109116 │ █████▍ │
-│ 1996 │ 118667 │ █████▊ │
-│ 1997 │ 136518 │ ██████▋ │
-│ 1998 │ 152983 │ ███████▋ │
-│ 1999 │ 180637 │ █████████ │
-│ 2000 │ 215838 │ ██████████▋ │
-│ 2001 │ 232994 │ ███████████▋ │
-│ 2002 │ 263670 │ █████████████▏ │
-│ 2003 │ 278394 │ █████████████▊ │
-│ 2004 │ 304666 │ ███████████████▏ │
-│ 2005 │ 322875 │ ████████████████▏ │
-│ 2006 │ 356191 │ █████████████████▋ │
-│ 2007 │ 404054 │ ████████████████████▏ │
-│ 2008 │ 420741 │ █████████████████████ │
-│ 2009 │ 427753 │ █████████████████████▍ │
-│ 2010 │ 480306 │ ████████████████████████ │
-│ 2011 │ 496274 │ ████████████████████████▋ │
-│ 2012 │ 519442 │ █████████████████████████▊ │
-│ 2013 │ 616212 │ ██████████████████████████████▋ │
-│ 2014 │ 724154 │ ████████████████████████████████████▏ │
-│ 2015 │ 792129 │ ███████████████████████████████████████▌ │
-│ 2016 │ 843655 │ ██████████████████████████████████████████▏ │
-│ 2017 │ 982642 │ █████████████████████████████████████████████████▏ │
-│ 2018 │ 1016835 │ ██████████████████████████████████████████████████▋ │
-│ 2019 │ 1042849 │ ████████████████████████████████████████████████████▏ │
-│ 2020 │ 1011889 │ ██████████████████████████████████████████████████▌ │
-│ 2021 │ 960343 │ ████████████████████████████████████████████████ │
-└──────┴─────────┴───────────────────────────────────────────────────────┘
+```response
+No projection: 28 rows in set. Elapsed: 0.720 sec. Processed 27.45 million rows, 46.61 MB (38.13 million rows/s., 64.74 MB/s.)
+With projection: 28 rows in set. Elapsed: 0.015 sec. Processed 87.51 thousand rows, 3.51 MB (5.74 million rows/s., 230.24 MB/s.)
```
### Query 3. The Most Expensive Neighborhoods {#most-expensive-neighborhoods-projections}
-The condition (date >= '2020-01-01') needs to be modified to match projection dimension (toYear(date) >= 2020).
-
-Query:
+The condition (date >= '2020-01-01') needs to be modified so that it matches the projection dimension (`toYear(date) >= 2020)`:
```sql
SELECT
@@ -506,138 +435,16 @@ GROUP BY
district
HAVING c >= 100
ORDER BY price DESC
-LIMIT 100;
+LIMIT 100
```
-Result:
+Again, the result is the same but notice the improvement in query performance:
-```text
-┌─town─────────────────┬─district───────────────┬────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)────────────────────────────┐
-│ LONDON │ CITY OF WESTMINSTER │ 3606 │ 3280239 │ █████████████████████████████████████████████████████████████████▌ │
-│ LONDON │ CITY OF LONDON │ 274 │ 3160502 │ ███████████████████████████████████████████████████████████████▏ │
-│ LONDON │ KENSINGTON AND CHELSEA │ 2550 │ 2308478 │ ██████████████████████████████████████████████▏ │
-│ LEATHERHEAD │ ELMBRIDGE │ 114 │ 1897407 │ █████████████████████████████████████▊ │
-│ LONDON │ CAMDEN │ 3033 │ 1805404 │ ████████████████████████████████████ │
-│ VIRGINIA WATER │ RUNNYMEDE │ 156 │ 1753247 │ ███████████████████████████████████ │
-│ WINDLESHAM │ SURREY HEATH │ 108 │ 1677613 │ █████████████████████████████████▌ │
-│ THORNTON HEATH │ CROYDON │ 546 │ 1671721 │ █████████████████████████████████▍ │
-│ BARNET │ ENFIELD │ 124 │ 1505840 │ ██████████████████████████████ │
-│ COBHAM │ ELMBRIDGE │ 387 │ 1237250 │ ████████████████████████▋ │
-│ LONDON │ ISLINGTON │ 2668 │ 1236980 │ ████████████████████████▋ │
-│ OXFORD │ SOUTH OXFORDSHIRE │ 321 │ 1220907 │ ████████████████████████▍ │
-│ LONDON │ RICHMOND UPON THAMES │ 704 │ 1215551 │ ████████████████████████▎ │
-│ LONDON │ HOUNSLOW │ 671 │ 1207493 │ ████████████████████████▏ │
-│ ASCOT │ WINDSOR AND MAIDENHEAD │ 407 │ 1183299 │ ███████████████████████▋ │
-│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 330 │ 1175615 │ ███████████████████████▌ │
-│ RICHMOND │ RICHMOND UPON THAMES │ 874 │ 1110444 │ ██████████████████████▏ │
-│ LONDON │ HAMMERSMITH AND FULHAM │ 3086 │ 1053983 │ █████████████████████ │
-│ SURBITON │ ELMBRIDGE │ 100 │ 1011800 │ ████████████████████▏ │
-│ RADLETT │ HERTSMERE │ 283 │ 1011712 │ ████████████████████▏ │
-│ SALCOMBE │ SOUTH HAMS │ 127 │ 1011624 │ ████████████████████▏ │
-│ WEYBRIDGE │ ELMBRIDGE │ 655 │ 1007265 │ ████████████████████▏ │
-│ ESHER │ ELMBRIDGE │ 485 │ 986581 │ ███████████████████▋ │
-│ LEATHERHEAD │ GUILDFORD │ 202 │ 977320 │ ███████████████████▌ │
-│ BURFORD │ WEST OXFORDSHIRE │ 111 │ 966893 │ ███████████████████▎ │
-│ BROCKENHURST │ NEW FOREST │ 129 │ 956675 │ ███████████████████▏ │
-│ HINDHEAD │ WAVERLEY │ 137 │ 953753 │ ███████████████████ │
-│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 419 │ 951121 │ ███████████████████ │
-│ EAST MOLESEY │ ELMBRIDGE │ 192 │ 936769 │ ██████████████████▋ │
-│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 146 │ 925515 │ ██████████████████▌ │
-│ LONDON │ TOWER HAMLETS │ 4388 │ 918304 │ ██████████████████▎ │
-│ OLNEY │ MILTON KEYNES │ 235 │ 910646 │ ██████████████████▏ │
-│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 540 │ 902418 │ ██████████████████ │
-│ LONDON │ SOUTHWARK │ 3885 │ 892997 │ █████████████████▋ │
-│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 960 │ 885969 │ █████████████████▋ │
-│ LONDON │ EALING │ 2658 │ 871755 │ █████████████████▍ │
-│ CRANBROOK │ TUNBRIDGE WELLS │ 431 │ 862348 │ █████████████████▏ │
-│ LONDON │ MERTON │ 2099 │ 859118 │ █████████████████▏ │
-│ BELVEDERE │ BEXLEY │ 346 │ 842423 │ ████████████████▋ │
-│ GUILDFORD │ WAVERLEY │ 143 │ 841277 │ ████████████████▋ │
-│ HARPENDEN │ ST ALBANS │ 657 │ 841216 │ ████████████████▋ │
-│ LONDON │ HACKNEY │ 3307 │ 837090 │ ████████████████▋ │
-│ LONDON │ WANDSWORTH │ 6566 │ 832663 │ ████████████████▋ │
-│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 123 │ 824299 │ ████████████████▍ │
-│ KINGS LANGLEY │ DACORUM │ 145 │ 821331 │ ████████████████▍ │
-│ BERKHAMSTED │ DACORUM │ 543 │ 818415 │ ████████████████▎ │
-│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 226 │ 802807 │ ████████████████ │
-│ BILLINGSHURST │ CHICHESTER │ 144 │ 797829 │ ███████████████▊ │
-│ WOKING │ GUILDFORD │ 176 │ 793494 │ ███████████████▋ │
-│ STOCKBRIDGE │ TEST VALLEY │ 178 │ 793269 │ ███████████████▋ │
-│ EPSOM │ REIGATE AND BANSTEAD │ 172 │ 791862 │ ███████████████▋ │
-│ TONBRIDGE │ TUNBRIDGE WELLS │ 360 │ 787876 │ ███████████████▋ │
-│ TEDDINGTON │ RICHMOND UPON THAMES │ 595 │ 786492 │ ███████████████▋ │
-│ TWICKENHAM │ RICHMOND UPON THAMES │ 1155 │ 786193 │ ███████████████▋ │
-│ LYNDHURST │ NEW FOREST │ 102 │ 785593 │ ███████████████▋ │
-│ LONDON │ LAMBETH │ 5228 │ 774574 │ ███████████████▍ │
-│ LONDON │ BARNET │ 3955 │ 773259 │ ███████████████▍ │
-│ OXFORD │ VALE OF WHITE HORSE │ 353 │ 772088 │ ███████████████▍ │
-│ TONBRIDGE │ MAIDSTONE │ 305 │ 770740 │ ███████████████▍ │
-│ LUTTERWORTH │ HARBOROUGH │ 538 │ 768634 │ ███████████████▎ │
-│ WOODSTOCK │ WEST OXFORDSHIRE │ 140 │ 766037 │ ███████████████▎ │
-│ MIDHURST │ CHICHESTER │ 257 │ 764815 │ ███████████████▎ │
-│ MARLOW │ BUCKINGHAMSHIRE │ 327 │ 761876 │ ███████████████▏ │
-│ LONDON │ NEWHAM │ 3237 │ 761784 │ ███████████████▏ │
-│ ALDERLEY EDGE │ CHESHIRE EAST │ 178 │ 757318 │ ███████████████▏ │
-│ LUTON │ CENTRAL BEDFORDSHIRE │ 212 │ 754283 │ ███████████████ │
-│ PETWORTH │ CHICHESTER │ 154 │ 754220 │ ███████████████ │
-│ ALRESFORD │ WINCHESTER │ 219 │ 752718 │ ███████████████ │
-│ POTTERS BAR │ WELWYN HATFIELD │ 174 │ 748465 │ ██████████████▊ │
-│ HASLEMERE │ CHICHESTER │ 128 │ 746907 │ ██████████████▊ │
-│ TADWORTH │ REIGATE AND BANSTEAD │ 502 │ 743252 │ ██████████████▋ │
-│ THAMES DITTON │ ELMBRIDGE │ 244 │ 741913 │ ██████████████▋ │
-│ REIGATE │ REIGATE AND BANSTEAD │ 581 │ 738198 │ ██████████████▋ │
-│ BOURNE END │ BUCKINGHAMSHIRE │ 138 │ 735190 │ ██████████████▋ │
-│ SEVENOAKS │ SEVENOAKS │ 1156 │ 730018 │ ██████████████▌ │
-│ OXTED │ TANDRIDGE │ 336 │ 729123 │ ██████████████▌ │
-│ INGATESTONE │ BRENTWOOD │ 166 │ 728103 │ ██████████████▌ │
-│ LONDON │ BRENT │ 2079 │ 720605 │ ██████████████▍ │
-│ LONDON │ HARINGEY │ 3216 │ 717780 │ ██████████████▎ │
-│ PURLEY │ CROYDON │ 575 │ 716108 │ ██████████████▎ │
-│ WELWYN │ WELWYN HATFIELD │ 222 │ 710603 │ ██████████████▏ │
-│ RICKMANSWORTH │ THREE RIVERS │ 798 │ 704571 │ ██████████████ │
-│ BANSTEAD │ REIGATE AND BANSTEAD │ 401 │ 701293 │ ██████████████ │
-│ CHIGWELL │ EPPING FOREST │ 261 │ 701203 │ ██████████████ │
-│ PINNER │ HARROW │ 528 │ 698885 │ █████████████▊ │
-│ HASLEMERE │ WAVERLEY │ 280 │ 696659 │ █████████████▊ │
-│ SLOUGH │ BUCKINGHAMSHIRE │ 396 │ 694917 │ █████████████▊ │
-│ WALTON-ON-THAMES │ ELMBRIDGE │ 946 │ 692395 │ █████████████▋ │
-│ READING │ SOUTH OXFORDSHIRE │ 318 │ 691988 │ █████████████▋ │
-│ NORTHWOOD │ HILLINGDON │ 271 │ 690643 │ █████████████▋ │
-│ FELTHAM │ HOUNSLOW │ 763 │ 688595 │ █████████████▋ │
-│ ASHTEAD │ MOLE VALLEY │ 303 │ 687923 │ █████████████▋ │
-│ BARNET │ BARNET │ 975 │ 686980 │ █████████████▋ │
-│ WOKING │ SURREY HEATH │ 283 │ 686669 │ █████████████▋ │
-│ MALMESBURY │ WILTSHIRE │ 323 │ 683324 │ █████████████▋ │
-│ AMERSHAM │ BUCKINGHAMSHIRE │ 496 │ 680962 │ █████████████▌ │
-│ CHISLEHURST │ BROMLEY │ 430 │ 680209 │ █████████████▌ │
-│ HYTHE │ FOLKESTONE AND HYTHE │ 490 │ 676908 │ █████████████▌ │
-│ MAYFIELD │ WEALDEN │ 101 │ 676210 │ █████████████▌ │
-│ ASCOT │ BRACKNELL FOREST │ 168 │ 676004 │ █████████████▌ │
-└──────────────────────┴────────────────────────┴──────┴─────────┴────────────────────────────────────────────────────────────────────┘
+```response
+No projection: 100 rows in set. Elapsed: 0.928 sec. Processed 27.45 million rows, 103.80 MB (29.56 million rows/s., 111.80 MB/s.)
+With projection: 100 rows in set. Elapsed: 0.336 sec. Processed 17.32 thousand rows, 1.23 MB (51.61 thousand rows/s., 3.65 MB/s.)
```
-### Summary {#summary}
-
-All 3 queries work much faster and read fewer rows.
-
-```text
-Query 1
-
-no projection: 27 rows in set. Elapsed: 0.158 sec. Processed 26.32 million rows, 157.93 MB (166.57 million rows/s., 999.39 MB/s.)
- projection: 27 rows in set. Elapsed: 0.007 sec. Processed 105.96 thousand rows, 3.33 MB (14.58 million rows/s., 458.13 MB/s.)
-
-
-Query 2
-
-no projection: 27 rows in set. Elapsed: 0.163 sec. Processed 26.32 million rows, 80.01 MB (161.75 million rows/s., 491.64 MB/s.)
- projection: 27 rows in set. Elapsed: 0.008 sec. Processed 105.96 thousand rows, 3.67 MB (13.29 million rows/s., 459.89 MB/s.)
-
-Query 3
-
-no projection: 100 rows in set. Elapsed: 0.069 sec. Processed 26.32 million rows, 62.47 MB (382.13 million rows/s., 906.93 MB/s.)
- projection: 100 rows in set. Elapsed: 0.029 sec. Processed 8.08 thousand rows, 511.08 KB (276.06 thousand rows/s., 17.47 MB/s.)
-```
-
-### Test It in Playground {#playground}
+### Test it in the Playground {#playground}
The dataset is also available in the [Online Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==).
diff --git a/docs/en/getting-started/index.md b/docs/en/getting-started/index.md
new file mode 100644
index 00000000000..0bb3ae1ca71
--- /dev/null
+++ b/docs/en/getting-started/index.md
@@ -0,0 +1,26 @@
+---
+slug: /en/getting-started/example-datasets/
+sidebar_position: 0
+sidebar_label: Overview
+keywords: [clickhouse, install, tutorial, sample, datasets]
+pagination_next: 'en/tutorial'
+---
+
+# Tutorials and Example Datasets
+
+We have a lot of resources for helping you get started and learn how ClickHouse works:
+
+- If you need to get ClickHouse up and running, check out our [Quick Start](../quick-start.mdx)
+- The [ClickHouse Tutorial](../tutorial.md) analyzes a dataset of New York City taxi rides
+
+In addition, the sample datasets provide a great experience on working with ClickHouse,
+learning important techniques and tricks, and seeing how to take advantage of the many powerful
+functions in ClickHouse. The sample datasets include:
+
+- The [UK Property Price Paid dataset](../getting-started/example-datasets/uk-price-paid.md) is a good starting point with some interesting SQL queries
+- The [New York Taxi Data](../getting-started/example-datasets/nyc-taxi.md) has an example of how to insert data from S3 into ClickHouse
+- The [Cell Towers dataset](../getting-started/example-datasets/cell-towers.md) imports a CSV into ClickHouse
+- The [NYPD Complaint Data](../getting-started/example-datasets/nypd_complaint_data.md) demonstrates how to use data inference to simplify creating tables
+- The ["What's on the Menu?" dataset](../getting-started/example-datasets/menus.md) has an example of denormalizing data
+
+View the **Tutorials and Datasets** menu for a complete list of sample datasets.
\ No newline at end of file
diff --git a/docs/en/getting-started/install.md b/docs/en/getting-started/install.md
index 83561b07ade..e88e9e06a68 100644
--- a/docs/en/getting-started/install.md
+++ b/docs/en/getting-started/install.md
@@ -1,13 +1,34 @@
---
-sidebar_label: Installation
-sidebar_position: 1
-keywords: [clickhouse, install, installation, docs]
-description: ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture.
-slug: /en/getting-started/install
-title: Installation
+sidebar_label: Install
+keywords: [clickhouse, install, getting started, quick start]
+slug: /en/install
---
-## System Requirements {#system-requirements}
+# Installing ClickHouse
+
+You have two options for getting up and running with ClickHouse:
+
+- **[ClickHouse Cloud](https://clickhouse.cloud/):** the official ClickHouse as a service, - built by, maintained, and supported by the creators of ClickHouse
+- **Self-managed ClickHouse:** ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture
+
+## ClickHouse Cloud
+
+The quickest and easiest way to get up and running with ClickHouse is to create a new service in [ClickHouse Cloud](https://clickhouse.cloud/):
+
+
+
+![Create a ClickHouse Cloud service](@site/docs/en/_snippets/images/createservice1.png)
+
+
+Once your Cloud service is provisioned, you will be able to [connect to it](/docs/en/integrations/connect-a-client.md) and start [inserting data](/docs/en/integrations/data-ingestion.md).
+
+:::note
+The [Quick Start](/docs/en/quick-start.mdx) walks through the steps to get a ClickHouse Cloud service up and running, connecting to it, and inserting data.
+:::
+
+## Self-Managed Requirements
+
+### CPU Architecture
ClickHouse can run on any Linux, FreeBSD, or Mac OS X with x86_64, AArch64, or PowerPC64LE CPU architecture.
@@ -19,6 +40,55 @@ $ grep -q sse4_2 /proc/cpuinfo && echo "SSE 4.2 supported" || echo "SSE 4.2 not
To run ClickHouse on processors that do not support SSE 4.2 or have AArch64 or PowerPC64LE architecture, you should [build ClickHouse from sources](#from-sources) with proper configuration adjustments.
+ClickHouse implements parallel data processing and uses all the hardware resources available. When choosing a processor, take into account that ClickHouse works more efficiently at configurations with a large number of cores but a lower clock rate than at configurations with fewer cores and a higher clock rate. For example, 16 cores with 2600 MHz is preferable to 8 cores with 3600 MHz.
+
+It is recommended to use **Turbo Boost** and **hyper-threading** technologies. It significantly improves performance with a typical workload.
+
+### RAM {#ram}
+
+We recommend using a minimum of 4GB of RAM to perform non-trivial queries. The ClickHouse server can run with a much smaller amount of RAM, but it requires memory for processing queries.
+
+The required volume of RAM depends on:
+
+- The complexity of queries.
+- The amount of data that is processed in queries.
+
+To calculate the required volume of RAM, you should estimate the size of temporary data for [GROUP BY](/docs/en/sql-reference/statements/select/group-by.md#select-group-by-clause), [DISTINCT](/docs/en/sql-reference/statements/select/distinct.md#select-distinct), [JOIN](/docs/en/sql-reference/statements/select/join.md#select-join) and other operations you use.
+
+ClickHouse can use external memory for temporary data. See [GROUP BY in External Memory](/docs/en/sql-reference/statements/select/group-by.md#select-group-by-in-external-memory) for details.
+
+### Swap File {#swap-file}
+
+Disable the swap file for production environments.
+
+### Storage Subsystem {#storage-subsystem}
+
+You need to have 2GB of free disk space to install ClickHouse.
+
+The volume of storage required for your data should be calculated separately. Assessment should include:
+
+- Estimation of the data volume.
+
+ You can take a sample of the data and get the average size of a row from it. Then multiply the value by the number of rows you plan to store.
+
+- The data compression coefficient.
+
+ To estimate the data compression coefficient, load a sample of your data into ClickHouse, and compare the actual size of the data with the size of the table stored. For example, clickstream data is usually compressed by 6-10 times.
+
+To calculate the final volume of data to be stored, apply the compression coefficient to the estimated data volume. If you plan to store data in several replicas, then multiply the estimated volume by the number of replicas.
+
+### Network {#network}
+
+If possible, use networks of 10G or higher class.
+
+The network bandwidth is critical for processing distributed queries with a large amount of intermediate data. Besides, network speed affects replication processes.
+
+### Software {#software}
+
+ClickHouse is developed primarily for the Linux family of operating systems. The recommended Linux distribution is Ubuntu. The `tzdata` package should be installed in the system.
+
+## Self-Managed Install
+
## Available Installation Options {#available-installation-options}
### From DEB Packages {#install-from-deb-packages}
@@ -58,9 +128,27 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password.
-You can replace `stable` with `lts` to use different [release kinds](../faq/operations/production.md) based on your needs.
+
+Migration Method for installing the deb-packages
-You can also download and install packages manually from [here](https://packages.clickhouse.com/deb/pool/stable).
+```bash
+sudo apt-key del E0C56BD4
+sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754
+echo "deb https://packages.clickhouse.com/deb stable main" | sudo tee \
+ /etc/apt/sources.list.d/clickhouse.list
+sudo apt-get update
+
+sudo apt-get install -y clickhouse-server clickhouse-client
+
+sudo service clickhouse-server start
+clickhouse-client # or "clickhouse-client --password" if you set up a password.
+```
+
+
+
+You can replace `stable` with `lts` to use different [release kinds](/docs/en/faq/operations/production.md) based on your needs.
+
+You can also download and install packages manually from [here](https://packages.clickhouse.com/deb/pool/main/c/).
#### Packages {#packages}
@@ -105,7 +193,7 @@ clickhouse-client # or "clickhouse-client --password" if you set up a password.
-You can replace `stable` with `lts` to use different [release kinds](../faq/operations/production.md) based on your needs.
+You can replace `stable` with `lts` to use different [release kinds](/docs/en/faq/operations/production.md) based on your needs.
Then run these commands to install packages:
@@ -226,7 +314,7 @@ Use the `clickhouse client` to connect to the server, or `clickhouse local` to p
### From Sources {#from-sources}
-To manually compile ClickHouse, follow the instructions for [Linux](../development/build.md) or [Mac OS X](../development/build-osx.md).
+To manually compile ClickHouse, follow the instructions for [Linux](/docs/en/development/build.md) or [Mac OS X](/docs/en/development/build-osx.md).
You can compile packages and install them or use programs without installing packages. Also by building manually you can disable SSE 4.2 requirement or build for AArch64 CPUs.
@@ -281,7 +369,7 @@ If the configuration file is in the current directory, you do not need to specif
ClickHouse supports access restriction settings. They are located in the `users.xml` file (next to `config.xml`).
By default, access is allowed from anywhere for the `default` user, without a password. See `user/default/networks`.
-For more information, see the section [“Configuration Files”](../operations/configuration-files.md).
+For more information, see the section [“Configuration Files”](/docs/en/operations/configuration-files.md).
After launching server, you can use the command-line client to connect to it:
@@ -292,7 +380,7 @@ $ clickhouse-client
By default, it connects to `localhost:9000` on behalf of the user `default` without a password. It can also be used to connect to a remote server using `--host` argument.
The terminal must use UTF-8 encoding.
-For more information, see the section [“Command-line client”](../interfaces/cli.md).
+For more information, see the section [“Command-line client”](/docs/en/interfaces/cli.md).
Example:
@@ -317,6 +405,5 @@ SELECT 1
**Congratulations, the system works!**
-To continue experimenting, you can download one of the test data sets or go through [tutorial](./../tutorial.md).
+To continue experimenting, you can download one of the test data sets or go through [tutorial](/docs/en/tutorial.md).
-[Original article](https://clickhouse.com/docs/en/getting_started/install/)
diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md
index 1f45d1fa411..4f07f99fb26 100644
--- a/docs/en/interfaces/cli.md
+++ b/docs/en/interfaces/cli.md
@@ -3,6 +3,7 @@ slug: /en/interfaces/cli
sidebar_position: 17
sidebar_label: Command-Line Client
---
+import ConnectionDetails from '@site/docs/en/_snippets/_gather_your_details_native.md';
# Command-line Client
@@ -24,26 +25,76 @@ Connected to ClickHouse server version 20.13.1 revision 54442.
Different client and server versions are compatible with one another, but some features may not be available in older clients. We recommend using the same version of the client as the server app. When you try to use a client of the older version, then the server, `clickhouse-client` displays the message:
```response
-ClickHouse client version is older than ClickHouse server. It may lack support for new features.
+ClickHouse client version is older than ClickHouse server.
+It may lack support for new features.
```
## Usage {#cli_usage}
-The client can be used in interactive and non-interactive (batch) mode. To use batch mode, specify the ‘query’ parameter, or send data to ‘stdin’ (it verifies that ‘stdin’ is not a terminal), or both. Similar to the HTTP interface, when using the ‘query’ parameter and sending data to ‘stdin’, the request is a concatenation of the ‘query’ parameter, a line feed, and the data in ‘stdin’. This is convenient for large INSERT queries.
+The client can be used in interactive and non-interactive (batch) mode.
-Example of using the client to insert data:
+### Gather your connection details
+
+
+### Interactive
+
+To connect to your ClickHouse Cloud service, or any ClickHouse server using TLS and passwords, interactively use `--secure`, port 9440, and provide your username and password:
+
+```bash
+clickhouse-client --host \
+ --secure \
+ --port 9440 \
+ --user \
+ --password
+```
+
+To connect to a self-managed ClickHouse server you will need the details for that server. Whether or not TLS is used, port numbers, and passwords are all configurable. Use the above example for ClickHouse Cloud as a starting point.
+
+
+### Batch
+
+To use batch mode, specify the ‘query’ parameter, or send data to ‘stdin’ (it verifies that ‘stdin’ is not a terminal), or both. Similar to the HTTP interface, when using the ‘query’ parameter and sending data to ‘stdin’, the request is a concatenation of the ‘query’ parameter, a line feed, and the data in ‘stdin’. This is convenient for large INSERT queries.
+
+Examples of using the client to insert data:
+
+#### Inserting a CSV file into a remote ClickHouse service
+
+This example is appropriate for ClickHouse Cloud, or any ClickHouse server using TLS and a password. In this example a sample dataset CSV file, `cell_towers.csv` is inserted into an existing table `cell_towers` in the `default` database:
+
+```bash
+clickhouse-client --host HOSTNAME.clickhouse.cloud \
+ --secure \
+ --port 9440 \
+ --user default \
+ --password PASSWORD \
+ --query "INSERT INTO cell_towers FORMAT CSVWithNames" \
+ < cell_towers.csv
+```
+
+:::note
+To concentrate on the query syntax, the rest of the examples leave off the connection details (`--host`, `--port`, etc.). Add them in when you try the commands.
+:::
+
+#### Three different ways of inserting data
``` bash
-$ echo -ne "1, 'some text', '2016-08-14 00:00:00'\n2, 'some more text', '2016-08-14 00:00:01'" | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
+echo -ne "1, 'some text', '2016-08-14 00:00:00'\n2, 'some more text', '2016-08-14 00:00:01'" | \
+ clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
+```
-$ cat <<_EOF | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
+```bash
+cat <<_EOF | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
3, 'some text', '2016-08-14 00:00:00'
4, 'some more text', '2016-08-14 00:00:01'
_EOF
-
-$ cat file.csv | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
```
+```bash
+cat file.csv | clickhouse-client --database=test --query="INSERT INTO test FORMAT CSV";
+```
+
+### Notes
+
In batch mode, the default data format is TabSeparated. You can set the format in the FORMAT clause of the query.
By default, you can only process a single query in batch mode. To make multiple queries from a “script,” use the `--multiquery` parameter. This works for all queries except INSERT. Query results are output consecutively without additional separators. Similarly, to process a large number of queries, you can run ‘clickhouse-client’ for each query. Note that it may take tens of milliseconds to launch the ‘clickhouse-client’ program.
diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index 9b8354f23a2..56708def497 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -5,7 +5,7 @@ sidebar_label: Input and Output Formats
title: Formats for Input and Output Data
---
-ClickHouse can accept and return data in various formats. A format supported for input can be used to parse the data provided to `INSERT`s, to perform `SELECT`s from a file-backed table such as File, URL or HDFS, or to read an external dictionary. A format supported for output can be used to arrange the
+ClickHouse can accept and return data in various formats. A format supported for input can be used to parse the data provided to `INSERT`s, to perform `SELECT`s from a file-backed table such as File, URL or HDFS, or to read a dictionary. A format supported for output can be used to arrange the
results of a `SELECT`, and to perform `INSERT`s into a file-backed table.
The supported formats are:
@@ -1020,6 +1020,62 @@ Example:
}
```
+To use object name as column value you can use special setting [format_json_object_each_row_column_for_object_name](../operations/settings/settings.md#format_json_object_each_row_column_for_object_name). Value of this setting is set to the name of a column, that is used as JSON key for a row in resulting object.
+Examples:
+
+For output:
+
+Let's say we have table `test` with two columns:
+```
+┌─object_name─┬─number─┐
+│ first_obj │ 1 │
+│ second_obj │ 2 │
+│ third_obj │ 3 │
+└─────────────┴────────┘
+```
+Let's output it in `JSONObjectEachRow` format and use `format_json_object_each_row_column_for_object_name` setting:
+
+```sql
+select * from test settings format_json_object_each_row_column_for_object_name='object_name'
+```
+
+The output:
+```json
+{
+ "first_obj": {"number": 1},
+ "second_obj": {"number": 2},
+ "third_obj": {"number": 3}
+}
+```
+
+For input:
+
+Let's say we stored output from previous example in a file with name `data.json`:
+```sql
+select * from file('data.json', JSONObjectEachRow, 'object_name String, number UInt64') settings format_json_object_each_row_column_for_object_name='object_name'
+```
+
+```
+┌─object_name─┬─number─┐
+│ first_obj │ 1 │
+│ second_obj │ 2 │
+│ third_obj │ 3 │
+└─────────────┴────────┘
+```
+
+It also works in schema inference:
+
+```sql
+desc file('data.json', JSONObjectEachRow) settings format_json_object_each_row_column_for_object_name='object_name'
+```
+
+```
+┌─name────────┬─type────────────┐
+│ object_name │ String │
+│ number │ Nullable(Int64) │
+└─────────────┴─────────────────┘
+```
+
### Inserting Data {#json-inserting-data}
diff --git a/docs/en/interfaces/third-party/client-libraries.md b/docs/en/interfaces/third-party/client-libraries.md
index e085566aa7e..c26532c98cb 100644
--- a/docs/en/interfaces/third-party/client-libraries.md
+++ b/docs/en/interfaces/third-party/client-libraries.md
@@ -41,6 +41,7 @@ ClickHouse Inc does **not** maintain the libraries listed below and hasn’t don
- [node-clickhouse](https://github.com/apla/node-clickhouse)
- [nestjs-clickhouse](https://github.com/depyronick/nestjs-clickhouse)
- [clickhouse-client](https://github.com/depyronick/clickhouse-client)
+ - [node-clickhouse-orm](https://github.com/zimv/node-clickhouse-orm)
- Perl
- [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse)
- [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse)
diff --git a/docs/en/operations/backup.md b/docs/en/operations/_backup.md
similarity index 61%
rename from docs/en/operations/backup.md
rename to docs/en/operations/_backup.md
index d26d8f27820..d694c51cee6 100644
--- a/docs/en/operations/backup.md
+++ b/docs/en/operations/_backup.md
@@ -1,9 +1,12 @@
----
-slug: /en/operations/backup
-sidebar_position: 49
-sidebar_label: Data backup and restore
-title: Data backup and restore
----
+
+[//]: # (This file is included in Manage > Backups)
+
+- [Backup to a local disk](#backup-to-a-local-disk)
+- [Configuring backup/restore to use an S3 endpoint](#configuring-backuprestore-to-use-an-s3-endpoint)
+- [Backup/restore using an S3 disk](#backuprestore-using-an-s3-disk)
+- [Alternatives](#alternatives)
+
+## Background
While [replication](../engines/table-engines/mergetree-family/replication.md) provides protection from hardware failures, it does not protect against human errors: accidental deletion of data, deletion of the wrong table or a table on the wrong cluster, and software bugs that result in incorrect data processing or data corruption. In many cases mistakes like these will affect all replicas. ClickHouse has built-in safeguards to prevent some types of mistakes — for example, by default [you can’t just drop tables with a MergeTree-like engine containing more than 50 Gb of data](server-configuration-parameters/settings.md#max-table-size-to-drop). However, these safeguards do not cover all possible cases and can be circumvented.
@@ -15,7 +18,9 @@ Each company has different resources available and business requirements, so the
Keep in mind that if you backed something up and never tried to restore it, chances are that restore will not work properly when you actually need it (or at least it will take longer than business can tolerate). So whatever backup approach you choose, make sure to automate the restore process as well, and practice it on a spare ClickHouse cluster regularly.
:::
-## Configure a backup destination
+## Backup to a local disk
+
+### Configure a backup destination
In the examples below you will see the backup destination specified like `Disk('backups', '1.zip')`. To prepare the destination add a file to `/etc/clickhouse-server/config.d/backup_disk.xml` specifying the backup destination. For example, this file defines disk named `backups` and then adds that disk to the **backups > allowed_disk** list:
@@ -39,7 +44,7 @@ In the examples below you will see the backup destination specified like `Disk('
```
-## Parameters
+### Parameters
Backups can be either full or incremental, and can include tables (including materialized views, projections, and dictionaries), and databases. Backups can be synchronous (default) or asynchronous. They can be compressed. Backups can be password protected.
@@ -52,7 +57,7 @@ The BACKUP and RESTORE statements take a list of DATABASE and TABLE names, a des
- `password` for the file on disk
- `base_backup`: the destination of the previous backup of this source. For example, `Disk('backups', '1.zip')`
-## Usage examples
+### Usage examples
Backup and then restore a table:
```
@@ -81,7 +86,7 @@ RESTORE TABLE test.table AS test.table2 FROM Disk('backups', '1.zip')
BACKUP TABLE test.table3 AS test.table4 TO Disk('backups', '2.zip')
```
-## Incremental backups
+### Incremental backups
Incremental backups can be taken by specifying the `base_backup`.
:::note
@@ -100,7 +105,7 @@ RESTORE TABLE test.table AS test.table2
FROM Disk('backups', 'incremental-a.zip');
```
-## Assign a password to the backup
+### Assign a password to the backup
Backups written to disk can have a password applied to the file:
```
@@ -116,7 +121,7 @@ RESTORE TABLE test.table
SETTINGS password='qwerty'
```
-## Compression settings
+### Compression settings
If you would like to specify the compression method or level:
```
@@ -125,14 +130,14 @@ BACKUP TABLE test.table
SETTINGS compression_method='lzma', compression_level=3
```
-## Restore specific partitions
+### Restore specific partitions
If specific partitions associated with a table need to be restored these can be specified. To restore partitions 1 and 4 from backup:
```
RESTORE TABLE test.table PARTITIONS '2', '3'
FROM Disk('backups', 'filename.zip')
```
-## Check the status of backups
+### Check the status of backups
The backup command returns an `id` and `status`, and that `id` can be used to get the status of the backup. This is very useful to check the progress of long ASYNC backups. The example below shows a failure that happened when trying to overwrite an existing backup file:
```sql
@@ -171,6 +176,160 @@ end_time: 2022-08-30 09:21:46
1 row in set. Elapsed: 0.002 sec.
```
+## Configuring BACKUP/RESTORE to use an S3 Endpoint
+
+To write backups to an S3 bucket you need three pieces of information:
+- S3 endpoint,
+ for example `https://mars-doc-test.s3.amazonaws.com/backup-S3/`
+- Access key ID,
+ for example `ABC123`
+- Secret access key,
+ for example `Abc+123`
+
+:::note
+Creating an S3 bucket is covered in [Use S3 Object Storage as a ClickHouse disk](/docs/en/integrations/data-ingestion/s3/configuring-s3-for-clickhouse-use.md), just come back to this doc after saving the policy, there is no need to configure ClickHouse to use the S3 bucket.
+:::
+
+The destination for a backup will be specified like this:
+```
+S3('/', '', ')
+```
+
+```sql
+CREATE TABLE data
+(
+ `key` Int,
+ `value` String,
+ `array` Array(String)
+)
+ENGINE = MergeTree
+ORDER BY tuple()
+```
+
+```sql
+INSERT INTO data SELECT *
+FROM generateRandom('key Int, value String, array Array(String)')
+LIMIT 1000
+```
+
+### Create a base (initial) backup
+
+Incremental backups require a _base_ backup to start from, this example will be used
+later as the base backup. The first parameter of the S3 destination is the S3 endpoint followed by the directory within the bucket to use for this backup. In this example the directory is named `my_backup`.
+
+```sql
+BACKUP TABLE data TO S3('https://mars-doc-test.s3.amazonaws.com/backup-S3/my_backup', 'ABC123', 'Abc+123')
+```
+
+```response
+┌─id───────────────────────────────────┬─status─────────┐
+│ de442b75-a66c-4a3c-a193-f76f278c70f3 │ BACKUP_CREATED │
+└──────────────────────────────────────┴────────────────┘
+```
+
+### Add more data
+
+Incremental backups are populated with the difference between the base backup and the current content of the table being backed up. Add more data before taking the incremental backup:
+
+```sql
+INSERT INTO data SELECT *
+FROM generateRandom('key Int, value String, array Array(String)')
+LIMIT 100
+```
+### Take an incremental backup
+
+This backup command is similar to the base backup, but adds `SETTINGS base_backup` and the location of the base backup. Note that the destination for the incremental backup is not the same directory as the base, it is the same endpoint with a different target directory within the bucket. The base backup is in `my_backup`, and the incremental will be written to `my_incremental`:
+```sql
+BACKUP TABLE data TO S3('https://mars-doc-test.s3.amazonaws.com/backup-S3/my_incremental', 'ABC123', 'Abc+123') SETTINGS base_backup = S3('https://mars-doc-test.s3.amazonaws.com/backup-S3/my_backup', 'ABC123', 'Abc+123')
+```
+
+```response
+┌─id───────────────────────────────────┬─status─────────┐
+│ f6cd3900-850f-41c9-94f1-0c4df33ea528 │ BACKUP_CREATED │
+└──────────────────────────────────────┴────────────────┘
+```
+### Restore from the incremental backup
+
+This command restores the incremental backup into a new table, `data3`. Note that when an incremental backup is restored, the base backup is also included. Specify only the incremental backup when restoring:
+```sql
+RESTORE TABLE data AS data3 FROM S3('https://mars-doc-test.s3.amazonaws.com/backup-S3/my_incremental', 'ABC123', 'Abc+123')
+```
+
+```response
+┌─id───────────────────────────────────┬─status───┐
+│ ff0c8c39-7dff-4324-a241-000796de11ca │ RESTORED │
+└──────────────────────────────────────┴──────────┘
+```
+
+### Verify the count
+
+There were two inserts into the original table `data`, one with 1,000 rows and one with 100 rows, for a total of 1,100. Verify that the restored table has 1,100 rows:
+```sql
+SELECT count()
+FROM data3
+```
+```response
+┌─count()─┐
+│ 1100 │
+└─────────┘
+```
+
+### Verify the content
+This compares the content of the original table, `data` with the restored table `data3`:
+```sql
+SELECT throwIf((
+ SELECT groupArray(tuple(*))
+ FROM data
+ ) != (
+ SELECT groupArray(tuple(*))
+ FROM data3
+ ), 'Data does not match after BACKUP/RESTORE')
+```
+## BACKUP/RESTORE Using an S3 Disk
+
+It is also possible to `BACKUP`/`RESTORE` to S3 by configuring an S3 disk in the ClickHouse storage configuration. Configure the disk like this by adding a file to `/etc/clickhouse-server/config.d`:
+
+```xml
+
+
+
+
+ s3_plain
+
+
+
+
+
+
+
+
+
+ s3
+
+
+
+
+
+
+
+ s3_plain
+
+
+```
+
+And then `BACKUP`/`RESTORE` as usual:
+
+```sql
+BACKUP TABLE data TO Disk('s3_plain', 'cloud_backup');
+RESTORE TABLE data AS data_restored FROM Disk('s3_plain', 'cloud_backup');
+```
+
+:::note
+But keep in mind that:
+- This disk should not be used for `MergeTree` itself, only for `BACKUP`/`RESTORE`
+- It has excessive API calls
+:::
+
## Alternatives
ClickHouse stores data on disk, and there are many ways to backup disks. These are some alternatives that have been used in the past, and that may fit in well in your environment.
diff --git a/docs/en/operations/troubleshooting.md b/docs/en/operations/_troubleshooting.md
similarity index 77%
rename from docs/en/operations/troubleshooting.md
rename to docs/en/operations/_troubleshooting.md
index 93bd56087a2..aed63ec4d0f 100644
--- a/docs/en/operations/troubleshooting.md
+++ b/docs/en/operations/_troubleshooting.md
@@ -1,9 +1,5 @@
----
-slug: /en/operations/troubleshooting
-sidebar_position: 46
-sidebar_label: Troubleshooting
-title: Troubleshooting
----
+
+[//]: # (This file is included in FAQ > Troubleshooting)
- [Installation](#troubleshooting-installation-errors)
- [Connecting to the server](#troubleshooting-accepts-no-connections)
@@ -17,6 +13,49 @@ title: Troubleshooting
- Check firewall settings.
- If you cannot access the repository for any reason, download packages as described in the [install guide](../getting-started/install.md) article and install them manually using the `sudo dpkg -i ` command. You will also need the `tzdata` package.
+### You Cannot Update Deb Packages from ClickHouse Repository with Apt-get {#you-cannot-update-deb-packages-from-clickhouse-repository-with-apt-get}
+
+- The issue may be happened when the GPG key is changed.
+
+Please use the following scripts to resolve the issue:
+
+```bash
+sudo apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754
+sudo apt-get update
+```
+
+### You Get Different Warnings with `apt-get update` {#you-get-different-warnings-with-apt-get-update}
+
+- The completed warning messages are as one of following:
+
+```
+N: Skipping acquire of configured file 'main/binary-i386/Packages' as repository 'https://packages.clickhouse.com/deb stable InRelease' doesn't support architecture 'i386'
+```
+
+```
+E: Failed to fetch https://packages.clickhouse.com/deb/dists/stable/main/binary-amd64/Packages.gz File has unexpected size (30451 != 28154). Mirror sync in progress?
+```
+
+```
+E: Repository 'https://packages.clickhouse.com/deb stable InRelease' changed its 'Origin' value from 'Artifactory' to 'ClickHouse'
+E: Repository 'https://packages.clickhouse.com/deb stable InRelease' changed its 'Label' value from 'Artifactory' to 'ClickHouse'
+N: Repository 'https://packages.clickhouse.com/deb stable InRelease' changed its 'Suite' value from 'stable' to ''
+N: This must be accepted explicitly before updates for this repository can be applied. See apt-secure(8) manpage for details.
+```
+
+```
+Err:11 https://packages.clickhouse.com/deb stable InRelease
+ 400 Bad Request [IP: 172.66.40.249 443]
+```
+
+To resolve the above issue, please use the following script:
+
+```bash
+sudo rm /var/lib/apt/lists/packages.clickhouse.com_* /var/lib/dpkg/arch /var/lib/apt/lists/partial/packages.clickhouse.com_*
+sudo apt-get clean
+sudo apt-get autoclean
+```
+
## Connecting to the Server {#troubleshooting-accepts-no-connections}
Possible issues:
diff --git a/docs/en/operations/update.md b/docs/en/operations/_update.md
similarity index 88%
rename from docs/en/operations/update.md
rename to docs/en/operations/_update.md
index 24f7efecc7b..86981da2be6 100644
--- a/docs/en/operations/update.md
+++ b/docs/en/operations/_update.md
@@ -1,10 +1,7 @@
----
-slug: /en/operations/update
-sidebar_position: 47
-sidebar_label: ClickHouse Upgrade
----
-# ClickHouse Upgrade
+[//]: # (This file is included in Manage > Updates)
+
+## Self-managed ClickHouse Upgrade
If ClickHouse was installed from `deb` packages, execute the following commands on the server:
diff --git a/docs/en/operations/clickhouse-keeper.md b/docs/en/operations/clickhouse-keeper.md
index 8bf64bca28f..0324f742988 100644
--- a/docs/en/operations/clickhouse-keeper.md
+++ b/docs/en/operations/clickhouse-keeper.md
@@ -5,6 +5,9 @@ sidebar_label: ClickHouse Keeper
---
# ClickHouse Keeper
+import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md';
+
+
ClickHouse Keeper provides the coordination system for data [replication](../engines/table-engines/mergetree-family/replication.md) and [distributed DDL](../sql-reference/distributed-ddl.md) queries execution. ClickHouse Keeper is compatible with ZooKeeper.
@@ -123,7 +126,7 @@ clickhouse keeper --config /etc/your_path_to_config/config.xml
ClickHouse Keeper also provides 4lw commands which are almost the same with Zookeeper. Each command is composed of four letters such as `mntr`, `stat` etc. There are some more interesting commands: `stat` gives some general information about the server and connected clients, while `srvr` and `cons` give extended details on server and connections respectively.
-The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value `conf,cons,crst,envi,ruok,srst,srvr,stat,wchc,wchs,dirs,mntr,isro`.
+The 4lw commands has a white list configuration `four_letter_word_white_list` which has default value `conf,cons,crst,envi,ruok,srst,srvr,stat,wchs,dirs,mntr,isro,rcvr,apiv,csnp,lgif`.
You can issue the commands to ClickHouse Keeper via telnet or nc, at the client port.
@@ -306,7 +309,26 @@ Sessions with Ephemerals (1):
/clickhouse/task_queue/ddl
```
-## [experimental] Migration from ZooKeeper {#migration-from-zookeeper}
+- `csnp`: Schedule a snapshot creation task. Return the last committed log index of the scheduled snapshot if success or `Failed to schedule snapshot creation task.` if failed. Note that `lgif` command can help you determine whether the snapshot is done.
+
+```
+100
+```
+
+- `lgif`: Keeper log information. `first_log_idx` : my first log index in log store; `first_log_term` : my first log term; `last_log_idx` : my last log index in log store; `last_log_term` : my last log term; `last_committed_log_idx` : my last committed log index in state machine; `leader_committed_log_idx` : leader's committed log index from my perspective; `target_committed_log_idx` : target log index should be committed to; `last_snapshot_idx` : the largest committed log index in last snapshot.
+
+```
+first_log_idx 1
+first_log_term 1
+last_log_idx 101
+last_log_term 1
+last_committed_log_idx 100
+leader_committed_log_idx 101
+target_committed_log_idx 101
+last_snapshot_idx 50
+```
+
+## Migration from ZooKeeper {#migration-from-zookeeper}
Seamlessly migration from ZooKeeper to ClickHouse Keeper is impossible you have to stop your ZooKeeper cluster, convert data and start ClickHouse Keeper. `clickhouse-keeper-converter` tool allows converting ZooKeeper logs and snapshots to ClickHouse Keeper snapshot. It works only with ZooKeeper > 3.4. Steps for migration:
diff --git a/docs/en/operations/external-authenticators/index.md b/docs/en/operations/external-authenticators/index.md
index 8a95f13e6f8..1d9ce829e79 100644
--- a/docs/en/operations/external-authenticators/index.md
+++ b/docs/en/operations/external-authenticators/index.md
@@ -3,7 +3,11 @@ slug: /en/operations/external-authenticators/
sidebar_position: 48
sidebar_label: External User Authenticators and Directories
title: "External User Authenticators and Directories"
+pagination_next: 'en/operations/external-authenticators/kerberos'
---
+import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
+
+
ClickHouse supports authenticating and managing users using external services.
diff --git a/docs/en/operations/external-authenticators/kerberos.md b/docs/en/operations/external-authenticators/kerberos.md
index 689c3f66e04..c1360e880ad 100644
--- a/docs/en/operations/external-authenticators/kerberos.md
+++ b/docs/en/operations/external-authenticators/kerberos.md
@@ -2,6 +2,9 @@
slug: /en/operations/external-authenticators/kerberos
---
# Kerberos
+import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
+
+
Existing and properly configured ClickHouse users can be authenticated via Kerberos authentication protocol.
diff --git a/docs/en/operations/external-authenticators/ldap.md b/docs/en/operations/external-authenticators/ldap.md
index 5a250a8f60f..0493f5a539f 100644
--- a/docs/en/operations/external-authenticators/ldap.md
+++ b/docs/en/operations/external-authenticators/ldap.md
@@ -2,6 +2,9 @@
slug: /en/operations/external-authenticators/ldap
title: "LDAP"
---
+import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
+
+
LDAP server can be used to authenticate ClickHouse users. There are two different approaches for doing this:
diff --git a/docs/en/operations/external-authenticators/ssl-x509.md b/docs/en/operations/external-authenticators/ssl-x509.md
index a6287bef45b..109913c2b18 100644
--- a/docs/en/operations/external-authenticators/ssl-x509.md
+++ b/docs/en/operations/external-authenticators/ssl-x509.md
@@ -2,6 +2,9 @@
slug: /en/operations/external-authenticators/ssl-x509
title: "SSL X.509 certificate authentication"
---
+import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
+
+
[SSL 'strict' option](../server-configuration-parameters/settings.md#server_configuration_parameters-openssl) enables mandatory certificate validation for the incoming connections. In this case, only connections with trusted certificates can be established. Connections with untrusted certificates will be rejected. Thus, certificate validation allows to uniquely authenticate an incoming connection. `Common Name` field of the certificate is used to identify connected user. This allows to associate multiple certificates with the same user. Additionally, reissuing and revoking of the certificates does not affect the ClickHouse configuration.
diff --git a/docs/en/operations/monitoring.md b/docs/en/operations/monitoring.md
index 8c08080e331..0b47450db61 100644
--- a/docs/en/operations/monitoring.md
+++ b/docs/en/operations/monitoring.md
@@ -5,6 +5,9 @@ sidebar_label: Monitoring
---
# Monitoring
+import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md';
+
+
You can monitor:
diff --git a/docs/en/operations/named-collections.md b/docs/en/operations/named-collections.md
index f605045a0ad..cbb8d0a4c02 100644
--- a/docs/en/operations/named-collections.md
+++ b/docs/en/operations/named-collections.md
@@ -130,7 +130,7 @@ SHOW TABLES FROM mydatabase;
└────────┘
```
-### Example of using named collections with an external dictionary with source MySQL
+### Example of using named collections with a dictionary with source MySQL
```sql
CREATE DICTIONARY dict (A Int64, B String)
@@ -213,7 +213,7 @@ SHOW TABLES FROM mydatabase
└──────┘
```
-### Example of using named collections with an external dictionary with source POSTGRESQL
+### Example of using named collections with a dictionary with source POSTGRESQL
```sql
CREATE DICTIONARY dict (a Int64, b String)
@@ -270,7 +270,7 @@ SELECT * FROM remote(remote1, database = default, table = test);
└───┴───┘
```
-### Example of using named collections with an external dictionary with source ClickHouse
+### Example of using named collections with a dictionary with source ClickHouse
```sql
CREATE DICTIONARY dict(a Int64, b String)
diff --git a/docs/en/operations/optimizing-performance/sampling-query-profiler.md b/docs/en/operations/optimizing-performance/sampling-query-profiler.md
index 0178d5bcfa9..7c63d4a9174 100644
--- a/docs/en/operations/optimizing-performance/sampling-query-profiler.md
+++ b/docs/en/operations/optimizing-performance/sampling-query-profiler.md
@@ -3,9 +3,12 @@ slug: /en/operations/optimizing-performance/sampling-query-profiler
sidebar_position: 54
sidebar_label: Query Profiling
---
+import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
# Sampling Query Profiler
+
+
ClickHouse runs sampling profiler that allows analyzing query execution. Using profiler you can find source code routines that used the most frequently during query execution. You can trace CPU time and wall-clock time spent including idle time.
To use profiler:
diff --git a/docs/en/operations/performance-test.md b/docs/en/operations/performance-test.md
index 6e185c121de..ec0cf007ff4 100644
--- a/docs/en/operations/performance-test.md
+++ b/docs/en/operations/performance-test.md
@@ -5,6 +5,10 @@ sidebar_label: Testing Hardware
title: "How to Test Your Hardware with ClickHouse"
---
+import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
+
+
+
You can run a basic ClickHouse performance test on any server without installation of ClickHouse packages.
diff --git a/docs/en/operations/requirements.md b/docs/en/operations/requirements.md
deleted file mode 100644
index dc05a7b4896..00000000000
--- a/docs/en/operations/requirements.md
+++ /dev/null
@@ -1,60 +0,0 @@
----
-slug: /en/operations/requirements
-sidebar_position: 44
-sidebar_label: Requirements
----
-
-# Requirements
-
-## CPU
-
-For installation from prebuilt deb packages, use a CPU with x86_64 architecture and support for SSE 4.2 instructions. To run ClickHouse with processors that do not support SSE 4.2 or have AArch64 or PowerPC64LE architecture, you should build ClickHouse from sources.
-
-ClickHouse implements parallel data processing and uses all the hardware resources available. When choosing a processor, take into account that ClickHouse works more efficiently at configurations with a large number of cores but a lower clock rate than at configurations with fewer cores and a higher clock rate. For example, 16 cores with 2600 MHz is preferable to 8 cores with 3600 MHz.
-
-It is recommended to use **Turbo Boost** and **hyper-threading** technologies. It significantly improves performance with a typical workload.
-
-## RAM {#ram}
-
-We recommend using a minimum of 4GB of RAM to perform non-trivial queries. The ClickHouse server can run with a much smaller amount of RAM, but it requires memory for processing queries.
-
-The required volume of RAM depends on:
-
-- The complexity of queries.
-- The amount of data that is processed in queries.
-
-To calculate the required volume of RAM, you should estimate the size of temporary data for [GROUP BY](../sql-reference/statements/select/group-by.md#select-group-by-clause), [DISTINCT](../sql-reference/statements/select/distinct.md#select-distinct), [JOIN](../sql-reference/statements/select/join.md#select-join) and other operations you use.
-
-ClickHouse can use external memory for temporary data. See [GROUP BY in External Memory](../sql-reference/statements/select/group-by.md#select-group-by-in-external-memory) for details.
-
-## Swap File {#swap-file}
-
-Disable the swap file for production environments.
-
-## Storage Subsystem {#storage-subsystem}
-
-You need to have 2GB of free disk space to install ClickHouse.
-
-The volume of storage required for your data should be calculated separately. Assessment should include:
-
-- Estimation of the data volume.
-
- You can take a sample of the data and get the average size of a row from it. Then multiply the value by the number of rows you plan to store.
-
-- The data compression coefficient.
-
- To estimate the data compression coefficient, load a sample of your data into ClickHouse, and compare the actual size of the data with the size of the table stored. For example, clickstream data is usually compressed by 6-10 times.
-
-To calculate the final volume of data to be stored, apply the compression coefficient to the estimated data volume. If you plan to store data in several replicas, then multiply the estimated volume by the number of replicas.
-
-## Network {#network}
-
-If possible, use networks of 10G or higher class.
-
-The network bandwidth is critical for processing distributed queries with a large amount of intermediate data. Besides, network speed affects replication processes.
-
-## Software {#software}
-
-ClickHouse is developed primarily for the Linux family of operating systems. The recommended Linux distribution is Ubuntu. The `tzdata` package should be installed in the system.
-
-ClickHouse can also work in other operating system families. See details in the [install guide](../getting-started/install.md) section of the documentation.
diff --git a/docs/en/operations/server-configuration-parameters/index.md b/docs/en/operations/server-configuration-parameters/index.md
index 0a6b1953a62..27ade81ec55 100644
--- a/docs/en/operations/server-configuration-parameters/index.md
+++ b/docs/en/operations/server-configuration-parameters/index.md
@@ -2,6 +2,7 @@
slug: /en/operations/server-configuration-parameters/
sidebar_position: 54
sidebar_label: Server Configuration Parameters
+pagination_next: en/operations/server-configuration-parameters/settings
---
# Server Configuration Parameters
diff --git a/docs/en/operations/server-configuration-parameters/settings.md b/docs/en/operations/server-configuration-parameters/settings.md
index 05c42974b8d..5faf3819d7e 100644
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@@ -268,14 +268,14 @@ The path to the table in ZooKeeper.
## dictionaries_config {#server_configuration_parameters-dictionaries_config}
-The path to the config file for external dictionaries.
+The path to the config file for dictionaries.
Path:
- Specify the absolute path or the path relative to the server config file.
- The path can contain wildcards \* and ?.
-See also “[External dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md)”.
+See also “[Dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md)”.
**Example**
@@ -666,6 +666,7 @@ Keys:
- `http_proxy` - Configure HTTP proxy for sending crash reports.
- `debug` - Sets the Sentry client into debug mode.
- `tmp_path` - Filesystem path for temporary crash report state.
+- `environment` - An arbitrary name of an environment in which the ClickHouse server is running. It will be mentioned in each crash report. The default value is `test` or `prod` depending on the version of ClickHouse.
**Recommended way to use**
@@ -1501,6 +1502,21 @@ If not set, [tmp_path](#tmp-path) is used, otherwise it is ignored.
- Policy should have exactly one volume with local disks.
:::
+## max_temporary_data_on_disk_size {#max_temporary_data_on_disk_size}
+
+Limit the amount of disk space consumed by temporary files in `tmp_path` for the server.
+Queries that exceed this limit will fail with an exception.
+
+Default value: `0`.
+
+**See also**
+
+- [max_temporary_data_on_disk_size_for_user](../../operations/settings/query-complexity.md#settings_max_temporary_data_on_disk_size_for_user)
+- [max_temporary_data_on_disk_size_for_query](../../operations/settings/query-complexity.md#settings_max_temporary_data_on_disk_size_for_query)
+- [tmp_path](#tmp-path)
+- [tmp_policy](#tmp-policy)
+- [max_server_memory_usage](#max_server_memory_usage)
+
## uncompressed_cache_size {#server-settings-uncompressed_cache_size}
Cache size (in bytes) for uncompressed data used by table engines from the [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md).
diff --git a/docs/en/operations/settings/index.md b/docs/en/operations/settings/index.md
index 35fadd295cd..eee4058c230 100644
--- a/docs/en/operations/settings/index.md
+++ b/docs/en/operations/settings/index.md
@@ -2,6 +2,7 @@
sidebar_label: Settings
sidebar_position: 51
slug: /en/operations/settings/
+pagination_next: en/operations/settings/settings
---
# Settings Overview
@@ -25,7 +26,7 @@ Ways to configure settings, in order of priority:
- When starting the ClickHouse console client in non-interactive mode, set the startup parameter `--setting=value`.
- When using the HTTP API, pass CGI parameters (`URL?setting_1=value&setting_2=value...`).
- - Make settings in the [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select) clause of the SELECT query. The setting value is applied only to that query and is reset to default or previous value after the query is executed.
+ - Make settings in the [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select-query) clause of the SELECT query. The setting value is applied only to that query and is reset to default or previous value after the query is executed.
Settings that can only be made in the server config file are not covered in this section.
diff --git a/docs/en/operations/settings/permissions-for-queries.md b/docs/en/operations/settings/permissions-for-queries.md
index 3ba62b78cfe..c565de9b21a 100644
--- a/docs/en/operations/settings/permissions-for-queries.md
+++ b/docs/en/operations/settings/permissions-for-queries.md
@@ -16,44 +16,54 @@ Queries in ClickHouse can be divided into several types:
The following settings regulate user permissions by the type of query:
-- [readonly](#settings_readonly) — Restricts permissions for all types of queries except DDL queries.
-- [allow_ddl](#settings_allow_ddl) — Restricts permissions for DDL queries.
+## readonly
+Restricts permissions for read data, write data, and change settings queries.
-`KILL QUERY` can be performed with any settings.
+When set to 1, allows:
-## readonly {#settings_readonly}
+- All types of read queries (like SELECT and equivalent queries).
+- Queries that modify only session context (like USE).
-Restricts permissions for reading data, write data and change settings queries.
+When set to 2, allows the above plus:
+- SET and CREATE TEMPORARY TABLE
-See how the queries are divided into types [above](#permissions_for_queries).
+ :::tip
+ Queries like EXISTS, DESCRIBE, EXPLAIN, SHOW PROCESSLIST, etc are equivalent to SELECT, because they just do select from system tables.
+ :::
Possible values:
-- 0 — All queries are allowed.
-- 1 — Only read data queries are allowed.
-- 2 — Read data and change settings queries are allowed.
+- 0 — Read, Write, and Change settings queries are allowed.
+- 1 — Only Read data queries are allowed.
+- 2 — Read data and Change settings queries are allowed.
+Default value: 0
+
+:::note
After setting `readonly = 1`, the user can’t change `readonly` and `allow_ddl` settings in the current session.
When using the `GET` method in the [HTTP interface](../../interfaces/http.md), `readonly = 1` is set automatically. To modify data, use the `POST` method.
-Setting `readonly = 1` prohibit the user from changing all the settings. There is a way to prohibit the user from changing only specific settings. Also there is a way to allow changing only specific settings under `readonly = 1` restrictions. For details see [constraints on settings](../../operations/settings/constraints-on-settings.md).
+Setting `readonly = 1` prohibits the user from changing settings. There is a way to prohibit the user from changing only specific settings. Also there is a way to allow changing only specific settings under `readonly = 1` restrictions. For details see [constraints on settings](../../operations/settings/constraints-on-settings.md).
+:::
-Default value: 0
## allow_ddl {#settings_allow_ddl}
Allows or denies [DDL](https://en.wikipedia.org/wiki/Data_definition_language) queries.
-See how the queries are divided into types [above](#permissions_for_queries).
-
Possible values:
- 0 — DDL queries are not allowed.
- 1 — DDL queries are allowed.
-You can’t execute `SET allow_ddl = 1` if `allow_ddl = 0` for the current session.
-
Default value: 1
-[Original article](https://clickhouse.com/docs/en/operations/settings/permissions_for_queries/)
+:::note
+You cannot run `SET allow_ddl = 1` if `allow_ddl = 0` for the current session.
+:::
+
+
+:::note KILL QUERY
+`KILL QUERY` can be performed with any combination of readonly and allow_ddl settings.
+:::
diff --git a/docs/en/operations/settings/query-complexity.md b/docs/en/operations/settings/query-complexity.md
index 597d524dd3f..ce374f0f1c8 100644
--- a/docs/en/operations/settings/query-complexity.md
+++ b/docs/en/operations/settings/query-complexity.md
@@ -313,4 +313,19 @@ When inserting data, ClickHouse calculates the number of partitions in the inser
> “Too many partitions for single INSERT block (more than” + toString(max_parts) + “). The limit is controlled by ‘max_partitions_per_insert_block’ setting. A large number of partitions is a common misconception. It will lead to severe negative performance impact, including slow server startup, slow INSERT queries and slow SELECT queries. Recommended total number of partitions for a table is under 1000..10000. Please note, that partitioning is not intended to speed up SELECT queries (ORDER BY key is sufficient to make range queries fast). Partitions are intended for data manipulation (DROP PARTITION, etc).”
+## max_temporary_data_on_disk_size_for_user {#settings_max_temporary_data_on_disk_size_for_user}
+
+The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running user queries.
+Zero means unlimited.
+
+Default value: 0.
+
+
+## max_temporary_data_on_disk_size_for_query {#settings_max_temporary_data_on_disk_size_for_query}
+
+The maximum amount of data consumed by temporary files on disk in bytes for all concurrently running queries.
+Zero means unlimited.
+
+Default value: 0.
+
[Original article](https://clickhouse.com/docs/en/operations/settings/query_complexity/)
diff --git a/docs/en/operations/settings/settings-users.md b/docs/en/operations/settings/settings-users.md
index 753eef1fb42..c482d72ffca 100644
--- a/docs/en/operations/settings/settings-users.md
+++ b/docs/en/operations/settings/settings-users.md
@@ -35,7 +35,7 @@ Structure of the `users` section:
expression
-
+
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index dc74b607289..a15a6e9bf4a 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -70,7 +70,7 @@ Another use case of `prefer_global_in_and_join` is accessing tables created by
**See also:**
-- [Distributed subqueries](../../sql-reference/operators/in.md#select-distributed-subqueries) for more information on how to use `GLOBAL IN`/`GLOBAL JOIN`
+- [Distributed subqueries](../../sql-reference/operators/in.md/#select-distributed-subqueries) for more information on how to use `GLOBAL IN`/`GLOBAL JOIN`
## enable_optimize_predicate_expression {#enable-optimize-predicate-expression}
@@ -170,7 +170,7 @@ It makes sense to disable it if the server has millions of tiny tables that are
## function_range_max_elements_in_block {#settings-function_range_max_elements_in_block}
-Sets the safety threshold for data volume generated by function [range](../../sql-reference/functions/array-functions.md#range). Defines the maximum number of values generated by function per block of data (sum of array sizes for every row in a block).
+Sets the safety threshold for data volume generated by function [range](../../sql-reference/functions/array-functions.md/#range). Defines the maximum number of values generated by function per block of data (sum of array sizes for every row in a block).
Possible values:
@@ -273,10 +273,10 @@ Default value: 0.
## insert_null_as_default {#insert_null_as_default}
-Enables or disables the insertion of [default values](../../sql-reference/statements/create/table.md#create-default-values) instead of [NULL](../../sql-reference/syntax.md#null-literal) into columns with not [nullable](../../sql-reference/data-types/nullable.md#data_type-nullable) data type.
+Enables or disables the insertion of [default values](../../sql-reference/statements/create/table.md/#create-default-values) instead of [NULL](../../sql-reference/syntax.md/#null-literal) into columns with not [nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable) data type.
If column type is not nullable and this setting is disabled, then inserting `NULL` causes an exception. If column type is nullable, then `NULL` values are inserted as is, regardless of this setting.
-This setting is applicable to [INSERT ... SELECT](../../sql-reference/statements/insert-into.md#insert_query_insert-select) queries. Note that `SELECT` subqueries may be concatenated with `UNION ALL` clause.
+This setting is applicable to [INSERT ... SELECT](../../sql-reference/statements/insert-into.md/#inserting-the-results-of-select) queries. Note that `SELECT` subqueries may be concatenated with `UNION ALL` clause.
Possible values:
@@ -287,7 +287,7 @@ Default value: `1`.
## join_default_strictness {#settings-join_default_strictness}
-Sets default strictness for [JOIN clauses](../../sql-reference/statements/select/join.md#select-join).
+Sets default strictness for [JOIN clauses](../../sql-reference/statements/select/join.md/#select-join).
Possible values:
@@ -322,7 +322,7 @@ When using `partial_merge` algorithm, ClickHouse sorts the data and dumps it to
- `direct` - can be applied when the right storage supports key-value requests.
-The `direct` algorithm performs a lookup in the right table using rows from the left table as keys. It's supported only by special storage such as [Dictionary](../../engines/table-engines/special/dictionary.md#dictionary) or [EmbeddedRocksDB](../../engines/table-engines/integrations/embedded-rocksdb.md) and only the `LEFT` and `INNER` JOINs.
+The `direct` algorithm performs a lookup in the right table using rows from the left table as keys. It's supported only by special storage such as [Dictionary](../../engines/table-engines/special/dictionary.md/#dictionary) or [EmbeddedRocksDB](../../engines/table-engines/integrations/embedded-rocksdb.md) and only the `LEFT` and `INNER` JOINs.
- `auto` — try `hash` join and switch on the fly to another algorithm if the memory limit is violated.
@@ -348,7 +348,7 @@ Default value: 0.
See also:
-- [JOIN clause](../../sql-reference/statements/select/join.md#select-join)
+- [JOIN clause](../../sql-reference/statements/select/join.md/#select-join)
- [Join table engine](../../engines/table-engines/special/join.md)
- [join_default_strictness](#settings-join_default_strictness)
@@ -359,7 +359,7 @@ Sets the type of [JOIN](../../sql-reference/statements/select/join.md) behaviour
Possible values:
- 0 — The empty cells are filled with the default value of the corresponding field type.
-- 1 — `JOIN` behaves the same way as in standard SQL. The type of the corresponding field is converted to [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable), and empty cells are filled with [NULL](../../sql-reference/syntax.md).
+- 1 — `JOIN` behaves the same way as in standard SQL. The type of the corresponding field is converted to [Nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable), and empty cells are filled with [NULL](../../sql-reference/syntax.md).
Default value: 0.
@@ -431,7 +431,7 @@ Default value: 0.
See also:
-- [JOIN strictness](../../sql-reference/statements/select/join.md#join-settings)
+- [JOIN strictness](../../sql-reference/statements/select/join.md/#join-settings)
## temporary_files_codec {#temporary_files_codec}
@@ -532,7 +532,7 @@ Default value: 8.
If ClickHouse should read more than `merge_tree_max_rows_to_use_cache` rows in one query, it does not use the cache of uncompressed blocks.
-The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks.
+The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md/#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks.
Possible values:
@@ -544,7 +544,7 @@ Default value: 128 ✕ 8192.
If ClickHouse should read more than `merge_tree_max_bytes_to_use_cache` bytes in one query, it does not use the cache of uncompressed blocks.
-The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks.
+The cache of uncompressed blocks stores data extracted for queries. ClickHouse uses this cache to speed up responses to repeated small queries. This setting protects the cache from trashing by queries that read a large amount of data. The [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md/#server-settings-uncompressed_cache_size) server setting defines the size of the cache of uncompressed blocks.
Possible values:
@@ -594,7 +594,7 @@ Default value: `1`.
Setting up query logging.
-Queries sent to ClickHouse with this setup are logged according to the rules in the [query_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query-log) server configuration parameter.
+Queries sent to ClickHouse with this setup are logged according to the rules in the [query_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query-log) server configuration parameter.
Example:
@@ -639,7 +639,7 @@ log_queries_min_type='EXCEPTION_WHILE_PROCESSING'
Setting up query threads logging.
-Query threads log into [system.query_thread_log](../../operations/system-tables/query_thread_log.md) table. This setting have effect only when [log_queries](#settings-log-queries) is true. Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_thread_log) server configuration parameter.
+Query threads log into [system.query_thread_log](../../operations/system-tables/query_thread_log.md) table. This setting have effect only when [log_queries](#settings-log-queries) is true. Queries’ threads run by ClickHouse with this setup are logged according to the rules in the [query_thread_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_thread_log) server configuration parameter.
Possible values:
@@ -658,7 +658,7 @@ log_query_threads=1
Setting up query views logging.
-When a query run by ClickHouse with this setup on has associated views (materialized or live views), they are logged in the [query_views_log](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-query_views_log) server configuration parameter.
+When a query run by ClickHouse with this setup on has associated views (materialized or live views), they are logged in the [query_views_log](../../operations/server-configuration-parameters/settings.md/#server_configuration_parameters-query_views_log) server configuration parameter.
Example:
@@ -668,7 +668,7 @@ log_query_views=1
## log_formatted_queries {#settings-log-formatted-queries}
-Allows to log formatted queries to the [system.query_log](../../operations/system-tables/query_log.md) system table.
+Allows to log formatted queries to the [system.query_log](../../operations/system-tables/query_log.md) system table (populates `formatted_query` column in the [system.query_log](../../operations/system-tables/query_log.md)).
Possible values:
@@ -884,7 +884,7 @@ Default value: `5`.
## max_replicated_fetches_network_bandwidth_for_server {#max_replicated_fetches_network_bandwidth_for_server}
-Limits the maximum speed of data exchange over the network in bytes per second for [replicated](../../engines/table-engines/mergetree-family/replication.md) fetches for the server. Only has meaning at server startup. You can also limit the speed for a particular table with [max_replicated_fetches_network_bandwidth](../../operations/settings/merge-tree-settings.md#max_replicated_fetches_network_bandwidth) setting.
+Limits the maximum speed of data exchange over the network in bytes per second for [replicated](../../engines/table-engines/mergetree-family/replication.md) fetches for the server. Only has meaning at server startup. You can also limit the speed for a particular table with [max_replicated_fetches_network_bandwidth](../../operations/settings/merge-tree-settings.md/#max_replicated_fetches_network_bandwidth) setting.
The setting isn't followed perfectly accurately.
@@ -905,7 +905,7 @@ Could be used for throttling speed when replicating the data to add or replace n
## max_replicated_sends_network_bandwidth_for_server {#max_replicated_sends_network_bandwidth_for_server}
-Limits the maximum speed of data exchange over the network in bytes per second for [replicated](../../engines/table-engines/mergetree-family/replication.md) sends for the server. Only has meaning at server startup. You can also limit the speed for a particular table with [max_replicated_sends_network_bandwidth](../../operations/settings/merge-tree-settings.md#max_replicated_sends_network_bandwidth) setting.
+Limits the maximum speed of data exchange over the network in bytes per second for [replicated](../../engines/table-engines/mergetree-family/replication.md) sends for the server. Only has meaning at server startup. You can also limit the speed for a particular table with [max_replicated_sends_network_bandwidth](../../operations/settings/merge-tree-settings.md/#max_replicated_sends_network_bandwidth) setting.
The setting isn't followed perfectly accurately.
@@ -955,7 +955,7 @@ For more information, see the section “Extreme values”.
## kafka_max_wait_ms {#kafka-max-wait-ms}
-The wait time in milliseconds for reading messages from [Kafka](../../engines/table-engines/integrations/kafka.md#kafka) before retry.
+The wait time in milliseconds for reading messages from [Kafka](../../engines/table-engines/integrations/kafka.md/#kafka) before retry.
Possible values:
@@ -977,7 +977,7 @@ Default value: false.
## use_uncompressed_cache {#setting-use_uncompressed_cache}
Whether to use a cache of uncompressed blocks. Accepts 0 or 1. By default, 0 (disabled).
-Using the uncompressed cache (only for tables in the MergeTree family) can significantly reduce latency and increase throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md#server-settings-uncompressed_cache_size) configuration parameter (only set in the config file) – the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed and the least-used data is automatically deleted.
+Using the uncompressed cache (only for tables in the MergeTree family) can significantly reduce latency and increase throughput when working with a large number of short queries. Enable this setting for users who send frequent short requests. Also pay attention to the [uncompressed_cache_size](../../operations/server-configuration-parameters/settings.md/#server-settings-uncompressed_cache_size) configuration parameter (only set in the config file) – the size of uncompressed cache blocks. By default, it is 8 GiB. The uncompressed cache is filled in as needed and the least-used data is automatically deleted.
For queries that read at least a somewhat large volume of data (one million rows or more), the uncompressed cache is disabled automatically to save space for truly small queries. This means that you can keep the ‘use_uncompressed_cache’ setting always set to 1.
@@ -1124,7 +1124,7 @@ This setting is useful for replicated tables with a sampling key. A query may be
- The cluster latency distribution has a long tail, so that querying more servers increases the query overall latency.
:::warning
-This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md#max_parallel_replica-subqueries) for more details.
+This setting will produce incorrect results when joins or subqueries are involved, and all tables don't meet certain requirements. See [Distributed Subqueries and max_parallel_replicas](../../sql-reference/operators/in.md/#max_parallel_replica-subqueries) for more details.
:::
## compile_expressions {#compile-expressions}
@@ -1261,7 +1261,7 @@ Possible values:
Default value: 1.
By default, blocks inserted into replicated tables by the `INSERT` statement are deduplicated (see [Data Replication](../../engines/table-engines/mergetree-family/replication.md)).
-For the replicated tables by default the only 100 of the most recent blocks for each partition are deduplicated (see [replicated_deduplication_window](merge-tree-settings.md#replicated-deduplication-window), [replicated_deduplication_window_seconds](merge-tree-settings.md/#replicated-deduplication-window-seconds)).
+For the replicated tables by default the only 100 of the most recent blocks for each partition are deduplicated (see [replicated_deduplication_window](merge-tree-settings.md/#replicated-deduplication-window), [replicated_deduplication_window_seconds](merge-tree-settings.md/#replicated-deduplication-window-seconds)).
For not replicated tables see [non_replicated_deduplication_window](merge-tree-settings.md/#non-replicated-deduplication-window).
## deduplicate_blocks_in_dependent_materialized_views {#settings-deduplicate-blocks-in-dependent-materialized-views}
@@ -1296,7 +1296,7 @@ Default value: empty string (disabled)
`insert_deduplication_token` is used for deduplication _only_ when not empty.
-For the replicated tables by default the only 100 of the most recent inserts for each partition are deduplicated (see [replicated_deduplication_window](merge-tree-settings.md#replicated-deduplication-window), [replicated_deduplication_window_seconds](merge-tree-settings.md/#replicated-deduplication-window-seconds)).
+For the replicated tables by default the only 100 of the most recent inserts for each partition are deduplicated (see [replicated_deduplication_window](merge-tree-settings.md/#replicated-deduplication-window), [replicated_deduplication_window_seconds](merge-tree-settings.md/#replicated-deduplication-window-seconds)).
For not replicated tables see [non_replicated_deduplication_window](merge-tree-settings.md/#non-replicated-deduplication-window).
Example:
@@ -1373,15 +1373,15 @@ Default value: 0.
## count_distinct_implementation {#settings-count_distinct_implementation}
-Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT …)](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count) construction.
+Specifies which of the `uniq*` functions should be used to perform the [COUNT(DISTINCT …)](../../sql-reference/aggregate-functions/reference/count.md/#agg_function-count) construction.
Possible values:
-- [uniq](../../sql-reference/aggregate-functions/reference/uniq.md#agg_function-uniq)
-- [uniqCombined](../../sql-reference/aggregate-functions/reference/uniqcombined.md#agg_function-uniqcombined)
-- [uniqCombined64](../../sql-reference/aggregate-functions/reference/uniqcombined64.md#agg_function-uniqcombined64)
-- [uniqHLL12](../../sql-reference/aggregate-functions/reference/uniqhll12.md#agg_function-uniqhll12)
-- [uniqExact](../../sql-reference/aggregate-functions/reference/uniqexact.md#agg_function-uniqexact)
+- [uniq](../../sql-reference/aggregate-functions/reference/uniq.md/#agg_function-uniq)
+- [uniqCombined](../../sql-reference/aggregate-functions/reference/uniqcombined.md/#agg_function-uniqcombined)
+- [uniqCombined64](../../sql-reference/aggregate-functions/reference/uniqcombined64.md/#agg_function-uniqcombined64)
+- [uniqHLL12](../../sql-reference/aggregate-functions/reference/uniqhll12.md/#agg_function-uniqhll12)
+- [uniqExact](../../sql-reference/aggregate-functions/reference/uniqexact.md/#agg_function-uniqexact)
Default value: `uniqExact`.
@@ -1599,7 +1599,7 @@ Right now it requires `optimize_skip_unused_shards` (the reason behind this is t
## optimize_throw_if_noop {#setting-optimize_throw_if_noop}
-Enables or disables throwing an exception if an [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) query didn’t perform a merge.
+Enables or disables throwing an exception if an [OPTIMIZE](../../sql-reference/statements/optimize.md) query didn’t perform a merge.
By default, `OPTIMIZE` returns successfully even if it didn’t do anything. This setting lets you differentiate these situations and get the reason in an exception message.
@@ -1616,14 +1616,14 @@ Enables or disables optimization by transforming some functions to reading subco
These functions can be transformed:
-- [length](../../sql-reference/functions/array-functions.md#array_functions-length) to read the [size0](../../sql-reference/data-types/array.md#array-size) subcolumn.
-- [empty](../../sql-reference/functions/array-functions.md#function-empty) to read the [size0](../../sql-reference/data-types/array.md#array-size) subcolumn.
-- [notEmpty](../../sql-reference/functions/array-functions.md#function-notempty) to read the [size0](../../sql-reference/data-types/array.md#array-size) subcolumn.
-- [isNull](../../sql-reference/operators/index.md#operator-is-null) to read the [null](../../sql-reference/data-types/nullable.md#finding-null) subcolumn.
-- [isNotNull](../../sql-reference/operators/index.md#is-not-null) to read the [null](../../sql-reference/data-types/nullable.md#finding-null) subcolumn.
-- [count](../../sql-reference/aggregate-functions/reference/count.md) to read the [null](../../sql-reference/data-types/nullable.md#finding-null) subcolumn.
-- [mapKeys](../../sql-reference/functions/tuple-map-functions.md#mapkeys) to read the [keys](../../sql-reference/data-types/map.md#map-subcolumns) subcolumn.
-- [mapValues](../../sql-reference/functions/tuple-map-functions.md#mapvalues) to read the [values](../../sql-reference/data-types/map.md#map-subcolumns) subcolumn.
+- [length](../../sql-reference/functions/array-functions.md/#array_functions-length) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn.
+- [empty](../../sql-reference/functions/array-functions.md/#function-empty) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn.
+- [notEmpty](../../sql-reference/functions/array-functions.md/#function-notempty) to read the [size0](../../sql-reference/data-types/array.md/#array-size) subcolumn.
+- [isNull](../../sql-reference/operators/index.md#operator-is-null) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn.
+- [isNotNull](../../sql-reference/operators/index.md#is-not-null) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn.
+- [count](../../sql-reference/aggregate-functions/reference/count.md) to read the [null](../../sql-reference/data-types/nullable.md/#finding-null) subcolumn.
+- [mapKeys](../../sql-reference/functions/tuple-map-functions.md/#mapkeys) to read the [keys](../../sql-reference/data-types/map.md/#map-subcolumns) subcolumn.
+- [mapValues](../../sql-reference/functions/tuple-map-functions.md/#mapvalues) to read the [values](../../sql-reference/data-types/map.md/#map-subcolumns) subcolumn.
Possible values:
@@ -1782,7 +1782,7 @@ Default value: 1000000000 nanoseconds (once a second).
See also:
-- System table [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log)
+- System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log)
## query_profiler_cpu_time_period_ns {#query_profiler_cpu_time_period_ns}
@@ -1805,7 +1805,7 @@ Default value: 1000000000 nanoseconds.
See also:
-- System table [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log)
+- System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log)
## allow_introspection_functions {#settings-allow_introspection_functions}
@@ -1821,11 +1821,11 @@ Default value: 0.
**See Also**
- [Sampling Query Profiler](../../operations/optimizing-performance/sampling-query-profiler.md)
-- System table [trace_log](../../operations/system-tables/trace_log.md#system_tables-trace_log)
+- System table [trace_log](../../operations/system-tables/trace_log.md/#system_tables-trace_log)
## input_format_parallel_parsing {#input-format-parallel-parsing}
-Enables or disables order-preserving parallel parsing of data formats. Supported only for [TSV](../../interfaces/formats.md#tabseparated), [TKSV](../../interfaces/formats.md#tskv), [CSV](../../interfaces/formats.md#csv) and [JSONEachRow](../../interfaces/formats.md#jsoneachrow) formats.
+Enables or disables order-preserving parallel parsing of data formats. Supported only for [TSV](../../interfaces/formats.md/#tabseparated), [TKSV](../../interfaces/formats.md/#tskv), [CSV](../../interfaces/formats.md/#csv) and [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) formats.
Possible values:
@@ -1836,7 +1836,7 @@ Default value: `1`.
## output_format_parallel_formatting {#output-format-parallel-formatting}
-Enables or disables parallel formatting of data formats. Supported only for [TSV](../../interfaces/formats.md#tabseparated), [TKSV](../../interfaces/formats.md#tskv), [CSV](../../interfaces/formats.md#csv) and [JSONEachRow](../../interfaces/formats.md#jsoneachrow) formats.
+Enables or disables parallel formatting of data formats. Supported only for [TSV](../../interfaces/formats.md/#tabseparated), [TKSV](../../interfaces/formats.md/#tskv), [CSV](../../interfaces/formats.md/#csv) and [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) formats.
Possible values:
@@ -1878,7 +1878,7 @@ Default value: 0.
## insert_distributed_sync {#insert_distributed_sync}
-Enables or disables synchronous data insertion into a [Distributed](../../engines/table-engines/special/distributed.md#distributed) table.
+Enables or disables synchronous data insertion into a [Distributed](../../engines/table-engines/special/distributed.md/#distributed) table.
By default, when inserting data into a `Distributed` table, the ClickHouse server sends data to cluster nodes in asynchronous mode. When `insert_distributed_sync=1`, the data is processed synchronously, and the `INSERT` operation succeeds only after all the data is saved on all shards (at least one replica for each shard if `internal_replication` is true).
@@ -1891,12 +1891,12 @@ Default value: `0`.
**See Also**
-- [Distributed Table Engine](../../engines/table-engines/special/distributed.md#distributed)
-- [Managing Distributed Tables](../../sql-reference/statements/system.md#query-language-system-distributed)
+- [Distributed Table Engine](../../engines/table-engines/special/distributed.md/#distributed)
+- [Managing Distributed Tables](../../sql-reference/statements/system.md/#query-language-system-distributed)
## insert_shard_id {#insert_shard_id}
-If not `0`, specifies the shard of [Distributed](../../engines/table-engines/special/distributed.md#distributed) table into which the data will be inserted synchronously.
+If not `0`, specifies the shard of [Distributed](../../engines/table-engines/special/distributed.md/#distributed) table into which the data will be inserted synchronously.
If `insert_shard_id` value is incorrect, the server will throw an exception.
@@ -1909,7 +1909,7 @@ SELECT uniq(shard_num) FROM system.clusters WHERE cluster = 'requested_cluster';
Possible values:
- 0 — Disabled.
-- Any number from `1` to `shards_num` of corresponding [Distributed](../../engines/table-engines/special/distributed.md#distributed) table.
+- Any number from `1` to `shards_num` of corresponding [Distributed](../../engines/table-engines/special/distributed.md/#distributed) table.
Default value: `0`.
@@ -1969,7 +1969,7 @@ Default value: 16.
## background_move_pool_size {#background_move_pool_size}
-Sets the number of threads performing background moves of data parts for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)-engine tables. This setting is applied at the ClickHouse server start and can’t be changed in a user session.
+Sets the number of threads performing background moves of data parts for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-multiple-volumes)-engine tables. This setting is applied at the ClickHouse server start and can’t be changed in a user session.
Possible values:
@@ -1979,7 +1979,7 @@ Default value: 8.
## background_schedule_pool_size {#background_schedule_pool_size}
-Sets the number of threads performing background tasks for [replicated](../../engines/table-engines/mergetree-family/replication.md) tables, [Kafka](../../engines/table-engines/integrations/kafka.md) streaming, [DNS cache updates](../../operations/server-configuration-parameters/settings.md#server-settings-dns-cache-update-period). This setting is applied at ClickHouse server start and can’t be changed in a user session.
+Sets the number of threads performing background tasks for [replicated](../../engines/table-engines/mergetree-family/replication.md) tables, [Kafka](../../engines/table-engines/integrations/kafka.md) streaming, [DNS cache updates](../../operations/server-configuration-parameters/settings.md/#server-settings-dns-cache-update-period). This setting is applied at ClickHouse server start and can’t be changed in a user session.
Possible values:
@@ -2036,8 +2036,8 @@ Default value: 16.
**See Also**
-- [Kafka](../../engines/table-engines/integrations/kafka.md#kafka) engine.
-- [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md#rabbitmq-engine) engine.
+- [Kafka](../../engines/table-engines/integrations/kafka.md/#kafka) engine.
+- [RabbitMQ](../../engines/table-engines/integrations/rabbitmq.md/#rabbitmq-engine) engine.
## validate_polygons {#validate_polygons}
@@ -2052,7 +2052,7 @@ Default value: 1.
## transform_null_in {#transform_null_in}
-Enables equality of [NULL](../../sql-reference/syntax.md#null-literal) values for [IN](../../sql-reference/operators/in.md) operator.
+Enables equality of [NULL](../../sql-reference/syntax.md/#null-literal) values for [IN](../../sql-reference/operators/in.md) operator.
By default, `NULL` values can’t be compared because `NULL` means undefined value. Thus, comparison `expr = NULL` must always return `false`. With this setting `NULL = NULL` returns `true` for `IN` operator.
@@ -2106,7 +2106,7 @@ Result:
**See Also**
-- [NULL Processing in IN Operators](../../sql-reference/operators/in.md#in-null-processing)
+- [NULL Processing in IN Operators](../../sql-reference/operators/in.md/#in-null-processing)
## low_cardinality_max_dictionary_size {#low_cardinality_max_dictionary_size}
@@ -2133,7 +2133,7 @@ Default value: 0.
## low_cardinality_allow_in_native_format {#low_cardinality_allow_in_native_format}
-Allows or restricts using the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) data type with the [Native](../../interfaces/formats.md#native) format.
+Allows or restricts using the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) data type with the [Native](../../interfaces/formats.md/#native) format.
If usage of `LowCardinality` is restricted, ClickHouse server converts `LowCardinality`-columns to ordinary ones for `SELECT` queries, and convert ordinary columns to `LowCardinality`-columns for `INSERT` queries.
@@ -2197,7 +2197,7 @@ Default value: 268435456.
## optimize_read_in_order {#optimize_read_in_order}
-Enables [ORDER BY](../../sql-reference/statements/select/order-by.md#optimize_read_in_order) optimization in [SELECT](../../sql-reference/statements/select/index.md) queries for reading data from [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables.
+Enables [ORDER BY](../../sql-reference/statements/select/order-by.md/#optimize_read_in_order) optimization in [SELECT](../../sql-reference/statements/select/index.md) queries for reading data from [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables.
Possible values:
@@ -2208,7 +2208,7 @@ Default value: `1`.
**See Also**
-- [ORDER BY Clause](../../sql-reference/statements/select/order-by.md#optimize_read_in_order)
+- [ORDER BY Clause](../../sql-reference/statements/select/order-by.md/#optimize_read_in_order)
## optimize_aggregation_in_order {#optimize_aggregation_in_order}
@@ -2223,7 +2223,7 @@ Default value: `0`.
**See Also**
-- [GROUP BY optimization](../../sql-reference/statements/select/group-by.md#aggregation-in-order)
+- [GROUP BY optimization](../../sql-reference/statements/select/group-by.md/#aggregation-in-order)
## mutations_sync {#mutations_sync}
@@ -2261,8 +2261,8 @@ Default value: `0`.
**See Also**
-- [CREATE TABLE query clauses and settings](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-query-clauses) (`merge_with_ttl_timeout` setting)
-- [Table TTL](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl)
+- [CREATE TABLE query clauses and settings](../../engines/table-engines/mergetree-family/mergetree.md/#mergetree-query-clauses) (`merge_with_ttl_timeout` setting)
+- [Table TTL](../../engines/table-engines/mergetree-family/mergetree.md/#mergetree-table-ttl)
## lock_acquire_timeout {#lock_acquire_timeout}
@@ -2279,7 +2279,7 @@ Default value: `120` seconds.
## cast_keep_nullable {#cast_keep_nullable}
-Enables or disables keeping of the `Nullable` data type in [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) operations.
+Enables or disables keeping of the `Nullable` data type in [CAST](../../sql-reference/functions/type-conversion-functions.md/#type_conversion_function-cast) operations.
When the setting is enabled and the argument of `CAST` function is `Nullable`, the result is also transformed to `Nullable` type. When the setting is disabled, the result always has the destination type exactly.
@@ -2324,7 +2324,7 @@ Result:
**See Also**
-- [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function
+- [CAST](../../sql-reference/functions/type-conversion-functions.md/#type_conversion_function-cast) function
## system_events_show_zero_values {#system_events_show_zero_values}
@@ -2369,7 +2369,7 @@ Result
## persistent {#persistent}
-Disables persistency for the [Set](../../engines/table-engines/special/set.md#set) and [Join](../../engines/table-engines/special/join.md#join) table engines.
+Disables persistency for the [Set](../../engines/table-engines/special/set.md/#set) and [Join](../../engines/table-engines/special/join.md/#join) table engines.
Reduces the I/O overhead. Suitable for scenarios that pursue performance and do not require persistence.
@@ -2382,7 +2382,7 @@ Default value: `1`.
## allow_nullable_key {#allow-nullable-key}
-Allows using of the [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable)-typed values in a sorting and a primary key for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree) tables.
+Allows using of the [Nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable)-typed values in a sorting and a primary key for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md/#table_engines-mergetree) tables.
Possible values:
@@ -2401,7 +2401,7 @@ Do not enable this feature in version `<= 21.8`. It's not properly implemented a
## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty}
-Enables or disables rewriting all aggregate functions in a query, adding [-OrNull](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-ornull) suffix to them. Enable it for SQL standard compatibility.
+Enables or disables rewriting all aggregate functions in a query, adding [-OrNull](../../sql-reference/aggregate-functions/combinators.md/#agg-functions-combinator-ornull) suffix to them. Enable it for SQL standard compatibility.
It is implemented via query rewrite (similar to [count_distinct_implementation](#settings-count_distinct_implementation) setting) to get consistent results for distributed queries.
Possible values:
@@ -2448,7 +2448,7 @@ See examples in [UNION](../../sql-reference/statements/select/union.md).
## data_type_default_nullable {#data_type_default_nullable}
-Allows data types without explicit modifiers [NULL or NOT NULL](../../sql-reference/statements/create/table.md#null-modifiers) in column definition will be [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable).
+Allows data types without explicit modifiers [NULL or NOT NULL](../../sql-reference/statements/create/table.md/#null-modifiers) in column definition will be [Nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable).
Possible values:
@@ -2478,7 +2478,7 @@ It can be useful when merges are CPU bounded not IO bounded (performing heavy da
## max_final_threads {#max-final-threads}
-Sets the maximum number of parallel threads for the `SELECT` query data read phase with the [FINAL](../../sql-reference/statements/select/from.md#select-from-final) modifier.
+Sets the maximum number of parallel threads for the `SELECT` query data read phase with the [FINAL](../../sql-reference/statements/select/from.md/#select-from-final) modifier.
Possible values:
@@ -2551,7 +2551,7 @@ Result:
└─────────────┘
```
-Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md#materialized) and [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md) behaviour.
+Note that this setting influences [Materialized view](../../sql-reference/statements/create/view.md/#materialized) and [MaterializedMySQL](../../engines/database-engines/materialized-mysql.md) behaviour.
## engine_file_empty_if_not_exists {#engine-file-empty_if-not-exists}
@@ -2608,7 +2608,7 @@ Default value: `0`.
## allow_experimental_live_view {#allow-experimental-live-view}
-Allows creation of experimental [live views](../../sql-reference/statements/create/view.md#live-view).
+Allows creation of experimental [live views](../../sql-reference/statements/create/view.md/#live-view).
Possible values:
@@ -2619,25 +2619,19 @@ Default value: `0`.
## live_view_heartbeat_interval {#live-view-heartbeat-interval}
-Sets the heartbeat interval in seconds to indicate [live view](../../sql-reference/statements/create/view.md#live-view) is alive .
+Sets the heartbeat interval in seconds to indicate [live view](../../sql-reference/statements/create/view.md/#live-view) is alive .
Default value: `15`.
## max_live_view_insert_blocks_before_refresh {#max-live-view-insert-blocks-before-refresh}
-Sets the maximum number of inserted blocks after which mergeable blocks are dropped and query for [live view](../../sql-reference/statements/create/view.md#live-view) is re-executed.
+Sets the maximum number of inserted blocks after which mergeable blocks are dropped and query for [live view](../../sql-reference/statements/create/view.md/#live-view) is re-executed.
Default value: `64`.
-## temporary_live_view_timeout {#temporary-live-view-timeout}
-
-Sets the interval in seconds after which [live view](../../sql-reference/statements/create/view.md#live-view) with timeout is deleted.
-
-Default value: `5`.
-
## periodic_live_view_refresh {#periodic-live-view-refresh}
-Sets the interval in seconds after which periodically refreshed [live view](../../sql-reference/statements/create/view.md#live-view) is forced to refresh.
+Sets the interval in seconds after which periodically refreshed [live view](../../sql-reference/statements/create/view.md/#live-view) is forced to refresh.
Default value: `60`.
@@ -2676,7 +2670,7 @@ Default value: 180.
## check_query_single_value_result {#check_query_single_value_result}
-Defines the level of detail for the [CHECK TABLE](../../sql-reference/statements/check-table.md#checking-mergetree-tables) query result for `MergeTree` family engines .
+Defines the level of detail for the [CHECK TABLE](../../sql-reference/statements/check-table.md/#checking-mergetree-tables) query result for `MergeTree` family engines .
Possible values:
@@ -2687,7 +2681,7 @@ Default value: `0`.
## prefer_column_name_to_alias {#prefer-column-name-to-alias}
-Enables or disables using the original column names instead of aliases in query expressions and clauses. It especially matters when alias is the same as the column name, see [Expression Aliases](../../sql-reference/syntax.md#notes-on-usage). Enable this setting to make aliases syntax rules in ClickHouse more compatible with most other database engines.
+Enables or disables using the original column names instead of aliases in query expressions and clauses. It especially matters when alias is the same as the column name, see [Expression Aliases](../../sql-reference/syntax.md/#notes-on-usage). Enable this setting to make aliases syntax rules in ClickHouse more compatible with most other database engines.
Possible values:
@@ -2731,7 +2725,7 @@ Result:
## limit {#limit}
-Sets the maximum number of rows to get from the query result. It adjusts the value set by the [LIMIT](../../sql-reference/statements/select/limit.md#limit-clause) clause, so that the limit, specified in the query, cannot exceed the limit, set by this setting.
+Sets the maximum number of rows to get from the query result. It adjusts the value set by the [LIMIT](../../sql-reference/statements/select/limit.md/#limit-clause) clause, so that the limit, specified in the query, cannot exceed the limit, set by this setting.
Possible values:
@@ -2742,7 +2736,7 @@ Default value: `0`.
## offset {#offset}
-Sets the number of rows to skip before starting to return rows from the query. It adjusts the offset set by the [OFFSET](../../sql-reference/statements/select/offset.md#offset-fetch) clause, so that these two values are summarized.
+Sets the number of rows to skip before starting to return rows from the query. It adjusts the offset set by the [OFFSET](../../sql-reference/statements/select/offset.md/#offset-fetch) clause, so that these two values are summarized.
Possible values:
@@ -2779,7 +2773,7 @@ Result:
## optimize_syntax_fuse_functions {#optimize_syntax_fuse_functions}
-Enables to fuse aggregate functions with identical argument. It rewrites query contains at least two aggregate functions from [sum](../../sql-reference/aggregate-functions/reference/sum.md#agg_function-sum), [count](../../sql-reference/aggregate-functions/reference/count.md#agg_function-count) or [avg](../../sql-reference/aggregate-functions/reference/avg.md#agg_function-avg) with identical argument to [sumCount](../../sql-reference/aggregate-functions/reference/sumcount.md#agg_function-sumCount).
+Enables to fuse aggregate functions with identical argument. It rewrites query contains at least two aggregate functions from [sum](../../sql-reference/aggregate-functions/reference/sum.md/#agg_function-sum), [count](../../sql-reference/aggregate-functions/reference/count.md/#agg_function-count) or [avg](../../sql-reference/aggregate-functions/reference/avg.md/#agg_function-avg) with identical argument to [sumCount](../../sql-reference/aggregate-functions/reference/sumcount.md/#agg_function-sumCount).
Possible values:
@@ -2938,18 +2932,18 @@ If the setting is set to `0`, the table function does not make Nullable columns
## allow_experimental_projection_optimization {#allow-experimental-projection-optimization}
-Enables or disables [projection](../../engines/table-engines/mergetree-family/mergetree.md#projections) optimization when processing `SELECT` queries.
+Enables or disables [projection](../../engines/table-engines/mergetree-family/mergetree.md/#projections) optimization when processing `SELECT` queries.
Possible values:
- 0 — Projection optimization disabled.
- 1 — Projection optimization enabled.
-Default value: `0`.
+Default value: `1`.
## force_optimize_projection {#force-optimize-projection}
-Enables or disables the obligatory use of [projections](../../engines/table-engines/mergetree-family/mergetree.md#projections) in `SELECT` queries, when projection optimization is enabled (see [allow_experimental_projection_optimization](#allow-experimental-projection-optimization) setting).
+Enables or disables the obligatory use of [projections](../../engines/table-engines/mergetree-family/mergetree.md/#projections) in `SELECT` queries, when projection optimization is enabled (see [allow_experimental_projection_optimization](#allow-experimental-projection-optimization) setting).
Possible values:
@@ -2984,7 +2978,7 @@ Default value: `120` seconds.
## regexp_max_matches_per_row {#regexp-max-matches-per-row}
-Sets the maximum number of matches for a single regular expression per row. Use it to protect against memory overload when using greedy regular expression in the [extractAllGroupsHorizontal](../../sql-reference/functions/string-search-functions.md#extractallgroups-horizontal) function.
+Sets the maximum number of matches for a single regular expression per row. Use it to protect against memory overload when using greedy regular expression in the [extractAllGroupsHorizontal](../../sql-reference/functions/string-search-functions.md/#extractallgroups-horizontal) function.
Possible values:
@@ -3016,7 +3010,7 @@ Default value: `1`.
## short_circuit_function_evaluation {#short-circuit-function-evaluation}
-Allows calculating the [if](../../sql-reference/functions/conditional-functions.md#if), [multiIf](../../sql-reference/functions/conditional-functions.md#multiif), [and](../../sql-reference/functions/logical-functions.md#logical-and-function), and [or](../../sql-reference/functions/logical-functions.md#logical-or-function) functions according to a [short scheme](https://en.wikipedia.org/wiki/Short-circuit_evaluation). This helps optimize the execution of complex expressions in these functions and prevent possible exceptions (such as division by zero when it is not expected).
+Allows calculating the [if](../../sql-reference/functions/conditional-functions.md/#if), [multiIf](../../sql-reference/functions/conditional-functions.md/#multiif), [and](../../sql-reference/functions/logical-functions.md/#logical-and-function), and [or](../../sql-reference/functions/logical-functions.md/#logical-or-function) functions according to a [short scheme](https://en.wikipedia.org/wiki/Short-circuit_evaluation). This helps optimize the execution of complex expressions in these functions and prevent possible exceptions (such as division by zero when it is not expected).
Possible values:
@@ -3028,7 +3022,7 @@ Default value: `enable`.
## max_hyperscan_regexp_length {#max-hyperscan-regexp-length}
-Defines the maximum length for each regular expression in the [hyperscan multi-match functions](../../sql-reference/functions/string-search-functions.md#multimatchanyhaystack-pattern1-pattern2-patternn).
+Defines the maximum length for each regular expression in the [hyperscan multi-match functions](../../sql-reference/functions/string-search-functions.md/#multimatchanyhaystack-pattern1-pattern2-patternn).
Possible values:
@@ -3071,7 +3065,7 @@ Exception: Regexp length too large.
## max_hyperscan_regexp_total_length {#max-hyperscan-regexp-total-length}
-Sets the maximum length total of all regular expressions in each [hyperscan multi-match function](../../sql-reference/functions/string-search-functions.md#multimatchanyhaystack-pattern1-pattern2-patternn).
+Sets the maximum length total of all regular expressions in each [hyperscan multi-match function](../../sql-reference/functions/string-search-functions.md/#multimatchanyhaystack-pattern1-pattern2-patternn).
Possible values:
@@ -3148,8 +3142,8 @@ Result:
## enable_extended_results_for_datetime_functions {#enable-extended-results-for-datetime-functions}
Enables or disables returning results of type:
-- `Date32` with extended range (compared to type `Date`) for functions [toStartOfYear](../../sql-reference/functions/date-time-functions.md#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md#tostartofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md#tostartofweek), [toMonday](../../sql-reference/functions/date-time-functions.md#tomonday) and [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md#tolastdayofmonth).
-- `DateTime64` with extended range (compared to type `DateTime`) for functions [toStartOfDay](../../sql-reference/functions/date-time-functions.md#tostartofday), [toStartOfHour](../../sql-reference/functions/date-time-functions.md#tostartofhour), [toStartOfMinute](../../sql-reference/functions/date-time-functions.md#tostartofminute), [toStartOfFiveMinutes](../../sql-reference/functions/date-time-functions.md#tostartoffiveminutes), [toStartOfTenMinutes](../../sql-reference/functions/date-time-functions.md#tostartoftenminutes), [toStartOfFifteenMinutes](../../sql-reference/functions/date-time-functions.md#tostartoffifteenminutes) and [timeSlot](../../sql-reference/functions/date-time-functions.md#timeslot).
+- `Date32` with extended range (compared to type `Date`) for functions [toStartOfYear](../../sql-reference/functions/date-time-functions.md/#tostartofyear), [toStartOfISOYear](../../sql-reference/functions/date-time-functions.md/#tostartofisoyear), [toStartOfQuarter](../../sql-reference/functions/date-time-functions.md/#tostartofquarter), [toStartOfMonth](../../sql-reference/functions/date-time-functions.md/#tostartofmonth), [toStartOfWeek](../../sql-reference/functions/date-time-functions.md/#tostartofweek), [toMonday](../../sql-reference/functions/date-time-functions.md/#tomonday) and [toLastDayOfMonth](../../sql-reference/functions/date-time-functions.md/#tolastdayofmonth).
+- `DateTime64` with extended range (compared to type `DateTime`) for functions [toStartOfDay](../../sql-reference/functions/date-time-functions.md/#tostartofday), [toStartOfHour](../../sql-reference/functions/date-time-functions.md/#tostartofhour), [toStartOfMinute](../../sql-reference/functions/date-time-functions.md/#tostartofminute), [toStartOfFiveMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoffiveminutes), [toStartOfTenMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoftenminutes), [toStartOfFifteenMinutes](../../sql-reference/functions/date-time-functions.md/#tostartoffifteenminutes) and [timeSlot](../../sql-reference/functions/date-time-functions.md/#timeslot).
Possible values:
@@ -3173,7 +3167,7 @@ Default value: `1`.
## optimize_move_to_prewhere_if_final {#optimize_move_to_prewhere_if_final}
-Enables or disables automatic [PREWHERE](../../sql-reference/statements/select/prewhere.md) optimization in [SELECT](../../sql-reference/statements/select/index.md) queries with [FINAL](../../sql-reference/statements/select/from.md#select-from-final) modifier.
+Enables or disables automatic [PREWHERE](../../sql-reference/statements/select/prewhere.md) optimization in [SELECT](../../sql-reference/statements/select/index.md) queries with [FINAL](../../sql-reference/statements/select/from.md/#select-from-final) modifier.
Works only for [*MergeTree](../../engines/table-engines/mergetree-family/index.md) tables.
@@ -3190,7 +3184,7 @@ Default value: `0`.
## describe_include_subcolumns {#describe_include_subcolumns}
-Enables describing subcolumns for a [DESCRIBE](../../sql-reference/statements/describe-table.md) query. For example, members of a [Tuple](../../sql-reference/data-types/tuple.md) or subcolumns of a [Map](../../sql-reference/data-types/map.md#map-subcolumns), [Nullable](../../sql-reference/data-types/nullable.md#finding-null) or an [Array](../../sql-reference/data-types/array.md#array-size) data type.
+Enables describing subcolumns for a [DESCRIBE](../../sql-reference/statements/describe-table.md) query. For example, members of a [Tuple](../../sql-reference/data-types/tuple.md) or subcolumns of a [Map](../../sql-reference/data-types/map.md/#map-subcolumns), [Nullable](../../sql-reference/data-types/nullable.md/#finding-null) or an [Array](../../sql-reference/data-types/array.md/#array-size) data type.
Possible values:
@@ -3289,7 +3283,7 @@ Default value: `0`.
## alter_partition_verbose_result {#alter-partition-verbose-result}
Enables or disables the display of information about the parts to which the manipulation operations with partitions and parts have been successfully applied.
-Applicable to [ATTACH PARTITION|PART](../../sql-reference/statements/alter/partition.md#alter_attach-partition) and to [FREEZE PARTITION](../../sql-reference/statements/alter/partition.md#alter_freeze-partition).
+Applicable to [ATTACH PARTITION|PART](../../sql-reference/statements/alter/partition.md/#alter_attach-partition) and to [FREEZE PARTITION](../../sql-reference/statements/alter/partition.md/#alter_freeze-partition).
Possible values:
@@ -3405,6 +3399,17 @@ Use schema from cache for URL with last modification time validation (for urls w
Default value: `true`.
+## use_structure_from_insertion_table_in_table_functions {use_structure_from_insertion_table_in_table_functions}
+
+Use structure from insertion table instead of schema inference from data.
+
+Possible values:
+- 0 - disabled
+- 1 - enabled
+- 2 - auto
+
+Default value: 2.
+
## compatibility {#compatibility}
This setting changes other settings according to provided ClickHouse version.
@@ -3424,11 +3429,11 @@ When writing data, ClickHouse throws an exception if input data contain columns
Supported formats:
-- [JSONEachRow](../../interfaces/formats.md#jsoneachrow)
-- [TSKV](../../interfaces/formats.md#tskv)
+- [JSONEachRow](../../interfaces/formats.md/#jsoneachrow)
+- [TSKV](../../interfaces/formats.md/#tskv)
- All formats with suffixes WithNames/WithNamesAndTypes
-- [JSONColumns](../../interfaces/formats.md#jsoncolumns)
-- [MySQLDump](../../interfaces/formats.md#mysqldump)
+- [JSONColumns](../../interfaces/formats.md/#jsoncolumns)
+- [MySQLDump](../../interfaces/formats.md/#mysqldump)
Possible values:
@@ -3445,18 +3450,18 @@ To improve insert performance, we recommend disabling this check if you are sure
Supported formats:
-- [CSVWithNames](../../interfaces/formats.md#csvwithnames)
-- [CSVWithNamesAndTypes](../../interfaces/formats.md#csvwithnamesandtypes)
-- [TabSeparatedWithNames](../../interfaces/formats.md#tabseparatedwithnames)
-- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md#tabseparatedwithnamesandtypes)
-- [JSONCompactEachRowWithNames](../../interfaces/formats.md#jsoncompacteachrowwithnames)
-- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md#jsoncompacteachrowwithnamesandtypes)
-- [JSONCompactStringsEachRowWithNames](../../interfaces/formats.md#jsoncompactstringseachrowwithnames)
-- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md#jsoncompactstringseachrowwithnamesandtypes)
-- [RowBinaryWithNames](../../interfaces/formats.md#rowbinarywithnames)
-- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md#rowbinarywithnamesandtypes)
-- [CustomSeparatedWithNames](../../interfaces/formats.md#customseparatedwithnames)
-- [CustomSeparatedWithNamesAndTypes](../../interfaces/formats.md#customseparatedwithnamesandtypes)
+- [CSVWithNames](../../interfaces/formats.md/#csvwithnames)
+- [CSVWithNamesAndTypes](../../interfaces/formats.md/#csvwithnamesandtypes)
+- [TabSeparatedWithNames](../../interfaces/formats.md/#tabseparatedwithnames)
+- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md/#tabseparatedwithnamesandtypes)
+- [JSONCompactEachRowWithNames](../../interfaces/formats.md/#jsoncompacteachrowwithnames)
+- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompacteachrowwithnamesandtypes)
+- [JSONCompactStringsEachRowWithNames](../../interfaces/formats.md/#jsoncompactstringseachrowwithnames)
+- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompactstringseachrowwithnamesandtypes)
+- [RowBinaryWithNames](../../interfaces/formats.md/#rowbinarywithnames)
+- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md/#rowbinarywithnamesandtypes)
+- [CustomSeparatedWithNames](../../interfaces/formats.md/#customseparatedwithnames)
+- [CustomSeparatedWithNamesAndTypes](../../interfaces/formats.md/#customseparatedwithnamesandtypes)
Possible values:
@@ -3471,12 +3476,12 @@ Controls whether format parser should check if data types from the input data ma
Supported formats:
-- [CSVWithNamesAndTypes](../../interfaces/formats.md#csvwithnamesandtypes)
-- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md#tabseparatedwithnamesandtypes)
-- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md#jsoncompacteachrowwithnamesandtypes)
-- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md#jsoncompactstringseachrowwithnamesandtypes)
-- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md#rowbinarywithnamesandtypes-rowbinarywithnamesandtypes)
-- [CustomSeparatedWithNamesAndTypes](../../interfaces/formats.md#customseparatedwithnamesandtypes)
+- [CSVWithNamesAndTypes](../../interfaces/formats.md/#csvwithnamesandtypes)
+- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md/#tabseparatedwithnamesandtypes)
+- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompacteachrowwithnamesandtypes)
+- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md/#jsoncompactstringseachrowwithnamesandtypes)
+- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md/#rowbinarywithnamesandtypes-rowbinarywithnamesandtypes)
+- [CustomSeparatedWithNamesAndTypes](../../interfaces/formats.md/#customseparatedwithnamesandtypes)
Possible values:
@@ -3487,7 +3492,7 @@ Default value: 1.
## input_format_defaults_for_omitted_fields {#input_format_defaults_for_omitted_fields}
-When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option only applies to [JSONEachRow](../../interfaces/formats.md#jsoneachrow), [CSV](../../interfaces/formats.md#csv), [TabSeparated](../../interfaces/formats.md#tabseparated) formats and formats with `WithNames`/`WithNamesAndTypes` suffixes.
+When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option only applies to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow), [CSV](../../interfaces/formats.md/#csv), [TabSeparated](../../interfaces/formats.md/#tabseparated) formats and formats with `WithNames`/`WithNamesAndTypes` suffixes.
:::note
When this option is enabled, extended table metadata are sent from server to client. It consumes additional computing resources on the server and can reduce performance.
@@ -3502,7 +3507,7 @@ Default value: 1.
## input_format_null_as_default {#input_format_null_as_default}
-Enables or disables the initialization of [NULL](../../sql-reference/syntax.md#null-literal) fields with [default values](../../sql-reference/statements/create/table.md#create-default-values), if data type of these fields is not [nullable](../../sql-reference/data-types/nullable.md#data_type-nullable).
+Enables or disables the initialization of [NULL](../../sql-reference/syntax.md/#null-literal) fields with [default values](../../sql-reference/statements/create/table.md/#create-default-values), if data type of these fields is not [nullable](../../sql-reference/data-types/nullable.md/#data_type-nullable).
If column type is not nullable and this setting is disabled, then inserting `NULL` causes an exception. If column type is nullable, then `NULL` values are inserted as is, regardless of this setting.
This setting is applicable to [INSERT ... VALUES](../../sql-reference/statements/insert-into.md) queries for text input formats.
@@ -3669,7 +3674,7 @@ Enabled by default
## insert_distributed_one_random_shard {#insert_distributed_one_random_shard}
-Enables or disables random shard insertion into a [Distributed](../../engines/table-engines/special/distributed.md#distributed) table when there is no distributed key.
+Enables or disables random shard insertion into a [Distributed](../../engines/table-engines/special/distributed.md/#distributed) table when there is no distributed key.
By default, when inserting data into a `Distributed` table with more than one shard, the ClickHouse server will reject any insertion request if there is no distributed key. When `insert_distributed_one_random_shard = 1`, insertions are allowed and data is forwarded randomly among all shards.
@@ -3688,7 +3693,7 @@ Enables or disables the insertion of JSON data with nested objects.
Supported formats:
-- [JSONEachRow](../../interfaces/formats.md#jsoneachrow)
+- [JSONEachRow](../../interfaces/formats.md/#jsoneachrow)
Possible values:
@@ -3699,7 +3704,7 @@ Default value: 0.
See also:
-- [Usage of Nested Structures](../../interfaces/formats.md#jsoneachrow-nested) with the `JSONEachRow` format.
+- [Usage of Nested Structures](../../interfaces/formats.md/#jsoneachrow-nested) with the `JSONEachRow` format.
### input_format_json_read_bools_as_numbers {#input_format_json_read_bools_as_numbers}
@@ -3722,7 +3727,7 @@ Enabled by default.
### output_format_json_quote_64bit_integers {#output_format_json_quote_64bit_integers}
-Controls quoting of 64-bit or bigger [integers](../../sql-reference/data-types/int-uint.md) (like `UInt64` or `Int128`) when they are output in a [JSON](../../interfaces/formats.md#json) format.
+Controls quoting of 64-bit or bigger [integers](../../sql-reference/data-types/int-uint.md) (like `UInt64` or `Int128`) when they are output in a [JSON](../../interfaces/formats.md/#json) format.
Such integers are enclosed in quotes by default. This behavior is compatible with most JavaScript implementations.
Possible values:
@@ -3740,7 +3745,7 @@ Disabled by default.
### output_format_json_quote_denormals {#output_format_json_quote_denormals}
-Enables `+nan`, `-nan`, `+inf`, `-inf` outputs in [JSON](../../interfaces/formats.md#json) output format.
+Enables `+nan`, `-nan`, `+inf`, `-inf` outputs in [JSON](../../interfaces/formats.md/#json) output format.
Possible values:
@@ -3857,7 +3862,7 @@ Disabled by default.
### output_format_json_array_of_rows {#output_format_json_array_of_rows}
-Enables the ability to output all rows as a JSON array in the [JSONEachRow](../../interfaces/formats.md#jsoneachrow) format.
+Enables the ability to output all rows as a JSON array in the [JSONEachRow](../../interfaces/formats.md/#jsoneachrow) format.
Possible values:
@@ -3908,6 +3913,13 @@ Controls validation of UTF-8 sequences in JSON output formats, doesn't impact fo
Disabled by default.
+### format_json_object_each_row_column_for_object_name {#format_json_object_each_row_column_for_object_name}
+
+The name of column that will be used for storing/writing object names in [JSONObjectEachRow](../../interfaces/formats.md/#jsonobjecteachrow) format.
+Column type should be String. If value is empty, default names `row_{i}`will be used for object names.
+
+Default value: ''.
+
## TSV format settings {#tsv-format-settings}
### input_format_tsv_empty_as_default {#input_format_tsv_empty_as_default}
@@ -4004,7 +4016,7 @@ Disabled by default.
### format_tsv_null_representation {#format_tsv_null_representation}
-Defines the representation of `NULL` for [TSV](../../interfaces/formats.md#tabseparated) output and input formats. User can set any string as a value, for example, `My NULL`.
+Defines the representation of `NULL` for [TSV](../../interfaces/formats.md/#tabseparated) output and input formats. User can set any string as a value, for example, `My NULL`.
Default value: `\N`.
@@ -4158,7 +4170,7 @@ Default value: `0`.
### format_csv_null_representation {#format_csv_null_representation}
-Defines the representation of `NULL` for [CSV](../../interfaces/formats.md#csv) output and input formats. User can set any string as a value, for example, `My NULL`.
+Defines the representation of `NULL` for [CSV](../../interfaces/formats.md/#csv) output and input formats. User can set any string as a value, for example, `My NULL`.
Default value: `\N`.
@@ -4197,7 +4209,7 @@ My NULL
### input_format_values_interpret_expressions {#input_format_values_interpret_expressions}
-Enables or disables the full SQL parser if the fast stream parser can’t parse the data. This setting is used only for the [Values](../../interfaces/formats.md#data-format-values) format at the data insertion. For more information about syntax parsing, see the [Syntax](../../sql-reference/syntax.md) section.
+Enables or disables the full SQL parser if the fast stream parser can’t parse the data. This setting is used only for the [Values](../../interfaces/formats.md/#data-format-values) format at the data insertion. For more information about syntax parsing, see the [Syntax](../../sql-reference/syntax.md) section.
Possible values:
@@ -4247,7 +4259,7 @@ Ok.
### input_format_values_deduce_templates_of_expressions {#input_format_values_deduce_templates_of_expressions}
-Enables or disables template deduction for SQL expressions in [Values](../../interfaces/formats.md#data-format-values) format. It allows parsing and interpreting expressions in `Values` much faster if expressions in consecutive rows have the same structure. ClickHouse tries to deduce the template of an expression, parse the following rows using this template and evaluate the expression on a batch of successfully parsed rows.
+Enables or disables template deduction for SQL expressions in [Values](../../interfaces/formats.md/#data-format-values) format. It allows parsing and interpreting expressions in `Values` much faster if expressions in consecutive rows have the same structure. ClickHouse tries to deduce the template of an expression, parse the following rows using this template and evaluate the expression on a batch of successfully parsed rows.
Possible values:
@@ -4292,7 +4304,7 @@ Default value: 1.
### input_format_arrow_import_nested {#input_format_arrow_import_nested}
-Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Arrow](../../interfaces/formats.md#data_types-matching-arrow) input format.
+Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Arrow](../../interfaces/formats.md/#data_types-matching-arrow) input format.
Possible values:
@@ -4321,7 +4333,7 @@ Disabled by default.
### output_format_arrow_low_cardinality_as_dictionary {#output_format_arrow_low_cardinality_as_dictionary}
-Allows to convert the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) type to the `DICTIONARY` type of the [Arrow](../../interfaces/formats.md#data-format-arrow) format for `SELECT` queries.
+Allows to convert the [LowCardinality](../../sql-reference/data-types/lowcardinality.md) type to the `DICTIONARY` type of the [Arrow](../../interfaces/formats.md/#data-format-arrow) format for `SELECT` queries.
Possible values:
@@ -4340,7 +4352,7 @@ Disabled by default.
### input_format_orc_import_nested {#input_format_orc_import_nested}
-Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [ORC](../../interfaces/formats.md#data-format-orc) input format.
+Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [ORC](../../interfaces/formats.md/#data-format-orc) input format.
Possible values:
@@ -4383,7 +4395,7 @@ Disabled by default.
## input_format_parquet_import_nested {#input_format_parquet_import_nested}
-Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Parquet](../../interfaces/formats.md#data-format-parquet) input format.
+Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Parquet](../../interfaces/formats.md/#data-format-parquet) input format.
Possible values:
@@ -4480,7 +4492,7 @@ Disabled by default.
### input_format_avro_allow_missing_fields {#input_format_avro_allow_missing_fields}
-Enables using fields that are not specified in [Avro](../../interfaces/formats.md#data-format-avro) or [AvroConfluent](../../interfaces/formats.md#data-format-avro-confluent) format schema. When a field is not found in the schema, ClickHouse uses the default value instead of throwing an exception.
+Enables using fields that are not specified in [Avro](../../interfaces/formats.md/#data-format-avro) or [AvroConfluent](../../interfaces/formats.md/#data-format-avro-confluent) format schema. When a field is not found in the schema, ClickHouse uses the default value instead of throwing an exception.
Possible values:
@@ -4491,7 +4503,7 @@ Default value: 0.
### format_avro_schema_registry_url {#format_avro_schema_registry_url}
-Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md#data-format-avro-confluent) format.
+Sets [Confluent Schema Registry](https://docs.confluent.io/current/schema-registry/index.html) URL to use with [AvroConfluent](../../interfaces/formats.md/#data-format-avro-confluent) format.
Default value: `Empty`.
@@ -4548,7 +4560,7 @@ Default value: `250`.
### output_format_pretty_max_value_width {#output_format_pretty_max_value_width}
-Limits the width of value displayed in [Pretty](../../interfaces/formats.md#pretty) formats. If the value width exceeds the limit, the value is cut.
+Limits the width of value displayed in [Pretty](../../interfaces/formats.md/#pretty) formats. If the value width exceeds the limit, the value is cut.
Possible values:
@@ -4624,7 +4636,7 @@ SELECT * FROM a;
### output_format_pretty_row_numbers {#output_format_pretty_row_numbers}
-Adds row numbers to output in the [Pretty](../../interfaces/formats.md#pretty) format.
+Adds row numbers to output in the [Pretty](../../interfaces/formats.md/#pretty) format.
Possible values:
@@ -4669,52 +4681,52 @@ Delimiter between rows (for Template format).
### format_custom_escaping_rule {#format_custom_escaping_rule}
-Sets the field escaping rule for [CustomSeparated](../../interfaces/formats.md#format-customseparated) data format.
+Sets the field escaping rule for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format.
Possible values:
-- `'Escaped'` — Similarly to [TSV](../../interfaces/formats.md#tabseparated).
-- `'Quoted'` — Similarly to [Values](../../interfaces/formats.md#data-format-values).
-- `'CSV'` — Similarly to [CSV](../../interfaces/formats.md#csv).
-- `'JSON'` — Similarly to [JSONEachRow](../../interfaces/formats.md#jsoneachrow).
-- `'XML'` — Similarly to [XML](../../interfaces/formats.md#xml).
-- `'Raw'` — Extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](../../interfaces/formats.md#tabseparatedraw).
+- `'Escaped'` — Similarly to [TSV](../../interfaces/formats.md/#tabseparated).
+- `'Quoted'` — Similarly to [Values](../../interfaces/formats.md/#data-format-values).
+- `'CSV'` — Similarly to [CSV](../../interfaces/formats.md/#csv).
+- `'JSON'` — Similarly to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow).
+- `'XML'` — Similarly to [XML](../../interfaces/formats.md/#xml).
+- `'Raw'` — Extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](../../interfaces/formats.md/#tabseparatedraw).
Default value: `'Escaped'`.
### format_custom_field_delimiter {#format_custom_field_delimiter}
-Sets the character that is interpreted as a delimiter between the fields for [CustomSeparated](../../interfaces/formats.md#format-customseparated) data format.
+Sets the character that is interpreted as a delimiter between the fields for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format.
Default value: `'\t'`.
### format_custom_row_before_delimiter {#format_custom_row_before_delimiter}
-Sets the character that is interpreted as a delimiter before the field of the first column for [CustomSeparated](../../interfaces/formats.md#format-customseparated) data format.
+Sets the character that is interpreted as a delimiter before the field of the first column for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format.
Default value: `''`.
### format_custom_row_after_delimiter {#format_custom_row_after_delimiter}
-Sets the character that is interpreted as a delimiter after the field of the last column for [CustomSeparated](../../interfaces/formats.md#format-customseparated) data format.
+Sets the character that is interpreted as a delimiter after the field of the last column for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format.
Default value: `'\n'`.
### format_custom_row_between_delimiter {#format_custom_row_between_delimiter}
-Sets the character that is interpreted as a delimiter between the rows for [CustomSeparated](../../interfaces/formats.md#format-customseparated) data format.
+Sets the character that is interpreted as a delimiter between the rows for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format.
Default value: `''`.
### format_custom_result_before_delimiter {#format_custom_result_before_delimiter}
-Sets the character that is interpreted as a prefix before the result set for [CustomSeparated](../../interfaces/formats.md#format-customseparated) data format.
+Sets the character that is interpreted as a prefix before the result set for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format.
Default value: `''`.
### format_custom_result_after_delimiter {#format_custom_result_after_delimiter}
-Sets the character that is interpreted as a suffix after the result set for [CustomSeparated](../../interfaces/formats.md#format-customseparated) data format.
+Sets the character that is interpreted as a suffix after the result set for [CustomSeparated](../../interfaces/formats.md/#format-customseparated) data format.
Default value: `''`.
@@ -4726,12 +4738,12 @@ Field escaping rule.
Possible values:
-- `'Escaped'` — Similarly to [TSV](../../interfaces/formats.md#tabseparated).
-- `'Quoted'` — Similarly to [Values](../../interfaces/formats.md#data-format-values).
-- `'CSV'` — Similarly to [CSV](../../interfaces/formats.md#csv).
-- `'JSON'` — Similarly to [JSONEachRow](../../interfaces/formats.md#jsoneachrow).
-- `'XML'` — Similarly to [XML](../../interfaces/formats.md#xml).
-- `'Raw'` — Extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](../../interfaces/formats.md#tabseparatedraw).
+- `'Escaped'` — Similarly to [TSV](../../interfaces/formats.md/#tabseparated).
+- `'Quoted'` — Similarly to [Values](../../interfaces/formats.md/#data-format-values).
+- `'CSV'` — Similarly to [CSV](../../interfaces/formats.md/#csv).
+- `'JSON'` — Similarly to [JSONEachRow](../../interfaces/formats.md/#jsoneachrow).
+- `'XML'` — Similarly to [XML](../../interfaces/formats.md/#xml).
+- `'Raw'` — Extracts subpatterns as a whole, no escaping rules, similarly to [TSVRaw](../../interfaces/formats.md/#tabseparatedraw).
Default value: `Raw`.
@@ -4745,7 +4757,7 @@ Disabled by default.
### format_capn_proto_enum_comparising_mode {#format_capn_proto_enum_comparising_mode}
-Determines how to map ClickHouse `Enum` data type and [CapnProto](../../interfaces/formats.md#capnproto) `Enum` data type from schema.
+Determines how to map ClickHouse `Enum` data type and [CapnProto](../../interfaces/formats.md/#capnproto) `Enum` data type from schema.
Possible values:
diff --git a/docs/en/operations/ssl-zookeeper.md b/docs/en/operations/ssl-zookeeper.md
index a38e9f81b41..c0ac8d2903d 100644
--- a/docs/en/operations/ssl-zookeeper.md
+++ b/docs/en/operations/ssl-zookeeper.md
@@ -5,6 +5,9 @@ sidebar_label: Secured Communication with Zookeeper
---
# Optional secured communication between ClickHouse and Zookeeper
+import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md';
+
+
You should specify `ssl.keyStore.location`, `ssl.keyStore.password` and `ssl.trustStore.location`, `ssl.trustStore.password` for communication with ClickHouse client over SSL. These options are available from Zookeeper version 3.5.2.
diff --git a/docs/en/operations/storing-data.md b/docs/en/operations/storing-data.md
index 43623577e66..203fe4e42d2 100644
--- a/docs/en/operations/storing-data.md
+++ b/docs/en/operations/storing-data.md
@@ -7,13 +7,13 @@ title: "External Disks for Storing Data"
Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely — on [Amazon S3](https://aws.amazon.com/s3/) disks or in the Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)).
-To work with data stored on `Amazon S3` disks use [S3](../engines/table-engines/integrations/s3.md) table engine, and to work with data in the Hadoop Distributed File System — [HDFS](../engines/table-engines/integrations/hdfs.md) table engine.
+To work with data stored on `Amazon S3` disks use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine, and to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine.
To load data from a web server with static files use a disk with type [web](#storing-data-on-webserver).
## Configuring HDFS {#configuring-hdfs}
-[MergeTree](../engines/table-engines/mergetree-family/mergetree.md) and [Log](../engines/table-engines/log-family/log.md) family table engines can store data to HDFS using a disk with type `HDFS`.
+[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to HDFS using a disk with type `HDFS`.
Configuration markup:
@@ -53,7 +53,7 @@ Optional parameters:
## Using Virtual File System for Data Encryption {#encrypted-virtual-file-system}
-You can encrypt the data stored on [S3](../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-s3), or [HDFS](#configuring-hdfs) external disks, or on a local disk. To turn on the encryption mode, in the configuration file you must define a disk with the type `encrypted` and choose a disk on which the data will be saved. An `encrypted` disk ciphers all written files on the fly, and when you read files from an `encrypted` disk it deciphers them automatically. So you can work with an `encrypted` disk like with a normal one.
+You can encrypt the data stored on [S3](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3), or [HDFS](#configuring-hdfs) external disks, or on a local disk. To turn on the encryption mode, in the configuration file you must define a disk with the type `encrypted` and choose a disk on which the data will be saved. An `encrypted` disk ciphers all written files on the fly, and when you read files from an `encrypted` disk it deciphers them automatically. So you can work with an `encrypted` disk like with a normal one.
Example of disk configuration:
@@ -80,14 +80,14 @@ Required parameters:
- `type` — `encrypted`. Otherwise the encrypted disk is not created.
- `disk` — Type of disk for data storage.
-- `key` — The key for encryption and decryption. Type: [Uint64](../sql-reference/data-types/int-uint.md). You can use `key_hex` parameter to encrypt in hexadecimal form.
+- `key` — The key for encryption and decryption. Type: [Uint64](/docs/en/sql-reference/data-types/int-uint.md). You can use `key_hex` parameter to encrypt in hexadecimal form.
You can specify multiple keys using the `id` attribute (see example above).
Optional parameters:
- `path` — Path to the location on the disk where the data will be saved. If not specified, the data will be saved in the root directory.
- `current_key_id` — The key used for encryption. All the specified keys can be used for decryption, and you can always switch to another key while maintaining access to previously encrypted data.
-- `algorithm` — [Algorithm](../sql-reference/statements/create/table.md#create-query-encryption-codecs) for encryption. Possible values: `AES_128_CTR`, `AES_192_CTR` or `AES_256_CTR`. Default value: `AES_128_CTR`. The key length depends on the algorithm: `AES_128_CTR` — 16 bytes, `AES_192_CTR` — 24 bytes, `AES_256_CTR` — 32 bytes.
+- `algorithm` — [Algorithm](/docs/en/sql-reference/statements/create/table.md/#create-query-encryption-codecs) for encryption. Possible values: `AES_128_CTR`, `AES_192_CTR` or `AES_256_CTR`. Default value: `AES_128_CTR`. The key length depends on the algorithm: `AES_128_CTR` — 16 bytes, `AES_192_CTR` — 24 bytes, `AES_256_CTR` — 32 bytes.
Example of disk configuration:
@@ -265,9 +265,9 @@ Cache profile events:
There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`.
-This is a read-only disk. Its data is only read and never modified. A new table is loaded to this disk via `ATTACH TABLE` query (see example below). Local disk is not actually used, each `SELECT` query will result in a `http` request to fetch required data. All modification of the table data will result in an exception, i.e. the following types of queries are not allowed: [CREATE TABLE](../sql-reference/statements/create/table.md), [ALTER TABLE](../sql-reference/statements/alter/index.md), [RENAME TABLE](../sql-reference/statements/rename.md#misc_operations-rename_table), [DETACH TABLE](../sql-reference/statements/detach.md) and [TRUNCATE TABLE](../sql-reference/statements/truncate.md).
+This is a read-only disk. Its data is only read and never modified. A new table is loaded to this disk via `ATTACH TABLE` query (see example below). Local disk is not actually used, each `SELECT` query will result in a `http` request to fetch required data. All modification of the table data will result in an exception, i.e. the following types of queries are not allowed: [CREATE TABLE](/docs/en/sql-reference/statements/create/table.md), [ALTER TABLE](/docs/en/sql-reference/statements/alter/index.md), [RENAME TABLE](/docs/en/sql-reference/statements/rename.md/#misc_operations-rename_table), [DETACH TABLE](/docs/en/sql-reference/statements/detach.md) and [TRUNCATE TABLE](/docs/en/sql-reference/statements/truncate.md).
-Web server storage is supported only for the [MergeTree](../engines/table-engines/mergetree-family/mergetree.md) and [Log](../engines/table-engines/log-family/log.md) engine families. To access the data stored on a `web` disk, use the [storage_policy](../engines/table-engines/mergetree-family/mergetree.md#terms) setting when executing the query. For example, `ATTACH TABLE table_web UUID '{}' (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'web'`.
+Web server storage is supported only for the [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) engine families. To access the data stored on a `web` disk, use the [storage_policy](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#terms) setting when executing the query. For example, `ATTACH TABLE table_web UUID '{}' (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'web'`.
A ready test case. You need to add this configuration to config:
@@ -451,7 +451,7 @@ Optional parameters:
- `remote_fs_read_backoff_threashold` — The maximum wait time when trying to read data for remote disk. Default value: `10000` seconds.
- `remote_fs_read_backoff_max_tries` — The maximum number of attempts to read with backoff. Default value: `5`.
-If a query fails with an exception `DB:Exception Unreachable URL`, then you can try to adjust the settings: [http_connection_timeout](../operations/settings/settings.md#http_connection_timeout), [http_receive_timeout](../operations/settings/settings.md#http_receive_timeout), [keep_alive_timeout](../operations/server-configuration-parameters/settings.md#keep-alive-timeout).
+If a query fails with an exception `DB:Exception Unreachable URL`, then you can try to adjust the settings: [http_connection_timeout](/docs/en/operations/settings/settings.md/#http_connection_timeout), [http_receive_timeout](/docs/en/operations/settings/settings.md/#http_receive_timeout), [keep_alive_timeout](/docs/en/operations/server-configuration-parameters/settings.md/#keep-alive-timeout).
To get files for upload run:
`clickhouse static-files-disk-uploader --metadata-path --output-dir ` (`--metadata-path` can be found in query `SELECT data_paths FROM system.tables WHERE name = 'table_name'`).
@@ -460,7 +460,7 @@ When loading files by `endpoint`, they must be loaded into `/store/` p
If URL is not reachable on disk load when the server is starting up tables, then all errors are caught. If in this case there were errors, tables can be reloaded (become visible) via `DETACH TABLE table_name` -> `ATTACH TABLE table_name`. If metadata was successfully loaded at server startup, then tables are available straight away.
-Use [http_max_single_read_retries](../operations/settings/settings.md#http-max-single-read-retries) setting to limit the maximum number of retries during a single HTTP read.
+Use [http_max_single_read_retries](/docs/en/operations/settings/settings.md/#http-max-single-read-retries) setting to limit the maximum number of retries during a single HTTP read.
## Zero-copy Replication (not ready for production) {#zero-copy}
diff --git a/docs/en/operations/system-tables/columns.md b/docs/en/operations/system-tables/columns.md
index a2b26c3684c..8b633fbe2f0 100644
--- a/docs/en/operations/system-tables/columns.md
+++ b/docs/en/operations/system-tables/columns.md
@@ -5,7 +5,7 @@ slug: /en/operations/system-tables/columns
Contains information about columns in all the tables.
-You can use this table to get information similar to the [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table) query, but for multiple tables at once.
+You can use this table to get information similar to the [DESCRIBE TABLE](../../sql-reference/statements/describe-table.md) query, but for multiple tables at once.
Columns from [temporary tables](../../sql-reference/statements/create/table.md#temporary-tables) are visible in the `system.columns` only in those session where they have been created. They are shown with the empty `database` field.
diff --git a/docs/en/operations/system-tables/crash-log.md b/docs/en/operations/system-tables/crash-log.md
index 0c0a4cd967d..a44b0db8e9b 100644
--- a/docs/en/operations/system-tables/crash-log.md
+++ b/docs/en/operations/system-tables/crash-log.md
@@ -7,8 +7,8 @@ Contains information about stack traces for fatal errors. The table does not exi
Columns:
-- `event_date` ([Datetime](../../sql-reference/data-types/datetime.md)) — Date of the event.
-- `event_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — Time of the event.
+- `event_date` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date of the event.
+- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Time of the event.
- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Timestamp of the event with nanoseconds.
- `signal` ([Int32](../../sql-reference/data-types/int-uint.md)) — Signal number.
- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Thread ID.
diff --git a/docs/en/operations/system-tables/dictionaries.md b/docs/en/operations/system-tables/dictionaries.md
index 112e2cc2cdf..4b256f0de97 100644
--- a/docs/en/operations/system-tables/dictionaries.md
+++ b/docs/en/operations/system-tables/dictionaries.md
@@ -3,7 +3,7 @@ slug: /en/operations/system-tables/dictionaries
---
# dictionaries
-Contains information about [external dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md).
+Contains information about [dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md).
Columns:
@@ -33,7 +33,7 @@ Columns:
- `lifetime_min` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Minimum [lifetime](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds.
- `lifetime_max` ([UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges)) — Maximum [lifetime](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md) of the dictionary in memory, after which ClickHouse tries to reload the dictionary (if `invalidate_query` is set, then only if it has changed). Set in seconds.
- `loading_start_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Start time for loading the dictionary.
-- `last_successful_update_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — End time for loading or updating the dictionary. Helps to monitor some troubles with external sources and investigate causes.
+- `last_successful_update_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — End time for loading or updating the dictionary. Helps to monitor some troubles with dictionary sources and investigate the causes.
- `loading_duration` ([Float32](../../sql-reference/data-types/float.md)) — Duration of a dictionary loading.
- `last_exception` ([String](../../sql-reference/data-types/string.md)) — Text of the error that occurs when creating or reloading the dictionary if the dictionary couldn’t be created.
- `comment` ([String](../../sql-reference/data-types/string.md)) — Text of the comment to dictionary.
diff --git a/docs/en/operations/system-tables/disks.md b/docs/en/operations/system-tables/disks.md
index 1106562da53..f4c71eb1cd2 100644
--- a/docs/en/operations/system-tables/disks.md
+++ b/docs/en/operations/system-tables/disks.md
@@ -11,6 +11,7 @@ Columns:
- `path` ([String](../../sql-reference/data-types/string.md)) — Path to the mount point in the file system.
- `free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Free space on disk in bytes.
- `total_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Disk volume in bytes.
+- `unreserved_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Free space which is not taken by reservations (`free_space` minus the size of reservations taken by merges, inserts, and other disk write operations currently running).
- `keep_free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Amount of disk space that should stay free on disk in bytes. Defined in the `keep_free_space_bytes` parameter of disk configuration.
**Example**
diff --git a/docs/en/operations/system-tables/index.md b/docs/en/operations/system-tables/index.md
index e08a727a62a..5fc302cad34 100644
--- a/docs/en/operations/system-tables/index.md
+++ b/docs/en/operations/system-tables/index.md
@@ -1,7 +1,8 @@
---
slug: /en/operations/system-tables/
sidebar_position: 52
-sidebar_label: System Tables
+sidebar_label: Overview
+pagination_next: 'en/operations/system-tables/asynchronous_metric_log'
---
# System Tables
@@ -72,4 +73,3 @@ If procfs is supported and enabled on the system, ClickHouse server collects the
- `OSReadBytes`
- `OSWriteBytes`
-[Original article](https://clickhouse.com/docs/en/operations/system-tables/)
diff --git a/docs/en/operations/system-tables/information_schema.md b/docs/en/operations/system-tables/information_schema.md
index a573491282a..a8e516f02a3 100644
--- a/docs/en/operations/system-tables/information_schema.md
+++ b/docs/en/operations/system-tables/information_schema.md
@@ -178,7 +178,7 @@ Columns:
- `view_definition` ([String](../../sql-reference/data-types/string.md)) — `SELECT` query for view.
- `check_option` ([String](../../sql-reference/data-types/string.md)) — `NONE`, no checking.
- `is_updatable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, the view is not updated.
-- `is_insertable_into` ([Enum8](../../sql-reference/data-types/enum.md)) — Shows whether the created view is [materialized](../../sql-reference/statements/create/view/#materialized). Possible values:
+- `is_insertable_into` ([Enum8](../../sql-reference/data-types/enum.md)) — Shows whether the created view is [materialized](../../sql-reference/statements/create/view.md/#materialized-view). Possible values:
- `NO` — The created view is not materialized.
- `YES` — The created view is materialized.
- `is_trigger_updatable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, the trigger is not updated.
diff --git a/docs/en/operations/system-tables/mutations.md b/docs/en/operations/system-tables/mutations.md
index 45447f3644e..d8fb91a63f5 100644
--- a/docs/en/operations/system-tables/mutations.md
+++ b/docs/en/operations/system-tables/mutations.md
@@ -3,31 +3,31 @@ slug: /en/operations/system-tables/mutations
---
# mutations
-The table contains information about [mutations](../../sql-reference/statements/alter/index.md#mutations) of [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables and their progress. Each mutation command is represented by a single row.
+The table contains information about [mutations](/docs/en/sql-reference/statements/alter/index.md#mutations) of [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) tables and their progress. Each mutation command is represented by a single row.
Columns:
-- `database` ([String](../../sql-reference/data-types/string.md)) — The name of the database to which the mutation was applied.
+- `database` ([String](/docs/en/sql-reference/data-types/string.md)) — The name of the database to which the mutation was applied.
-- `table` ([String](../../sql-reference/data-types/string.md)) — The name of the table to which the mutation was applied.
+- `table` ([String](/docs/en/sql-reference/data-types/string.md)) — The name of the table to which the mutation was applied.
-- `mutation_id` ([String](../../sql-reference/data-types/string.md)) — The ID of the mutation. For replicated tables these IDs correspond to znode names in the `/mutations/` directory in ClickHouse Keeper. For non-replicated tables the IDs correspond to file names in the data directory of the table.
+- `mutation_id` ([String](/docs/en/sql-reference/data-types/string.md)) — The ID of the mutation. For replicated tables these IDs correspond to znode names in the `/mutations/` directory in ClickHouse Keeper. For non-replicated tables the IDs correspond to file names in the data directory of the table.
-- `command` ([String](../../sql-reference/data-types/string.md)) — The mutation command string (the part of the query after `ALTER TABLE [db.]table`).
+- `command` ([String](/docs/en/sql-reference/data-types/string.md)) — The mutation command string (the part of the query after `ALTER TABLE [db.]table`).
-- `create_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — Date and time when the mutation command was submitted for execution.
+- `create_time` ([DateTime](/docs/en/sql-reference/data-types/datetime.md)) — Date and time when the mutation command was submitted for execution.
-- `block_numbers.partition_id` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — For mutations of replicated tables, the array contains the partitions' IDs (one record for each partition). For mutations of non-replicated tables the array is empty.
+- `block_numbers.partition_id` ([Array](/docs/en/sql-reference/data-types/array.md)([String](/docs/en/sql-reference/data-types/string.md))) — For mutations of replicated tables, the array contains the partitions' IDs (one record for each partition). For mutations of non-replicated tables the array is empty.
-- `block_numbers.number` ([Array](../../sql-reference/data-types/array.md)([Int64](../../sql-reference/data-types/int-uint.md))) — For mutations of replicated tables, the array contains one record for each partition, with the block number that was acquired by the mutation. Only parts that contain blocks with numbers less than this number will be mutated in the partition.
+- `block_numbers.number` ([Array](/docs/en/sql-reference/data-types/array.md)([Int64](/docs/en/sql-reference/data-types/int-uint.md))) — For mutations of replicated tables, the array contains one record for each partition, with the block number that was acquired by the mutation. Only parts that contain blocks with numbers less than this number will be mutated in the partition.
In non-replicated tables, block numbers in all partitions form a single sequence. This means that for mutations of non-replicated tables, the column will contain one record with a single block number acquired by the mutation.
-- `parts_to_do_names` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — An array of names of data parts that need to be mutated for the mutation to complete.
+- `parts_to_do_names` ([Array](/docs/en/sql-reference/data-types/array.md)([String](/docs/en/sql-reference/data-types/string.md))) — An array of names of data parts that need to be mutated for the mutation to complete.
-- `parts_to_do` ([Int64](../../sql-reference/data-types/int-uint.md)) — The number of data parts that need to be mutated for the mutation to complete.
+- `parts_to_do` ([Int64](/docs/en/sql-reference/data-types/int-uint.md)) — The number of data parts that need to be mutated for the mutation to complete.
-- `is_done` ([UInt8](../../sql-reference/data-types/int-uint.md)) — The flag whether the mutation is done or not. Possible values:
+- `is_done` ([UInt8](/docs/en/sql-reference/data-types/int-uint.md)) — The flag whether the mutation is done or not. Possible values:
- `1` if the mutation is completed,
- `0` if the mutation is still in process.
@@ -37,16 +37,16 @@ Even if `parts_to_do = 0` it is possible that a mutation of a replicated table i
If there were problems with mutating some data parts, the following columns contain additional information:
-- `latest_failed_part` ([String](../../sql-reference/data-types/string.md)) — The name of the most recent part that could not be mutated.
+- `latest_failed_part` ([String](/docs/en/sql-reference/data-types/string.md)) — The name of the most recent part that could not be mutated.
-- `latest_fail_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — The date and time of the most recent part mutation failure.
+- `latest_fail_time` ([DateTime](/docs/en/sql-reference/data-types/datetime.md)) — The date and time of the most recent part mutation failure.
-- `latest_fail_reason` ([String](../../sql-reference/data-types/string.md)) — The exception message that caused the most recent part mutation failure.
+- `latest_fail_reason` ([String](/docs/en/sql-reference/data-types/string.md)) — The exception message that caused the most recent part mutation failure.
**See Also**
-- [Mutations](../../sql-reference/statements/alter/index.md#mutations)
-- [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) table engine
-- [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md) family
+- [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations)
+- [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) table engine
+- [ReplicatedMergeTree](/docs/en/engines/table-engines/mergetree-family/replication.md) family
[Original article](https://clickhouse.com/docs/en/operations/system-tables/mutations)
diff --git a/docs/en/operations/system-tables/parts.md b/docs/en/operations/system-tables/parts.md
index f1d60896a2e..bbd5385f44b 100644
--- a/docs/en/operations/system-tables/parts.md
+++ b/docs/en/operations/system-tables/parts.md
@@ -75,7 +75,7 @@ Columns:
- `primary_key_bytes_in_memory_allocated` ([UInt64](../../sql-reference/data-types/int-uint.md)) – The amount of memory (in bytes) reserved for primary key values.
-- `is_frozen` ([UInt8](../../sql-reference/data-types/int-uint.md)) – Flag that shows that a partition data backup exists. 1, the backup exists. 0, the backup does not exist. For more details, see [FREEZE PARTITION](../../sql-reference/statements/alter/partition.md#alter_freeze-partition)
+- `is_frozen` ([UInt8](../../sql-reference/data-types/int-uint.md)) – Flag that shows that a partition data backup exists. 1, the backup exists. 0, the backup does not exist. For more details, see [FREEZE PARTITION](../../sql-reference/statements/alter/partition.md/#alter_freeze-partition)
- `database` ([String](../../sql-reference/data-types/string.md)) – Name of the database.
@@ -87,25 +87,25 @@ Columns:
- `disk_name` ([String](../../sql-reference/data-types/string.md)) – Name of a disk that stores the data part.
-- `hash_of_all_files` ([String](../../sql-reference/data-types/string.md)) – [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) of compressed files.
+- `hash_of_all_files` ([String](../../sql-reference/data-types/string.md)) – [sipHash128](../../sql-reference/functions/hash-functions.md/#hash_functions-siphash128) of compressed files.
-- `hash_of_uncompressed_files` ([String](../../sql-reference/data-types/string.md)) – [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) of uncompressed files (files with marks, index file etc.).
+- `hash_of_uncompressed_files` ([String](../../sql-reference/data-types/string.md)) – [sipHash128](../../sql-reference/functions/hash-functions.md/#hash_functions-siphash128) of uncompressed files (files with marks, index file etc.).
-- `uncompressed_hash_of_compressed_files` ([String](../../sql-reference/data-types/string.md)) – [sipHash128](../../sql-reference/functions/hash-functions.md#hash_functions-siphash128) of data in the compressed files as if they were uncompressed.
+- `uncompressed_hash_of_compressed_files` ([String](../../sql-reference/data-types/string.md)) – [sipHash128](../../sql-reference/functions/hash-functions.md/#hash_functions-siphash128) of data in the compressed files as if they were uncompressed.
-- `delete_ttl_info_min` ([DateTime](../../sql-reference/data-types/datetime.md)) — The minimum value of the date and time key for [TTL DELETE rule](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
+- `delete_ttl_info_min` ([DateTime](../../sql-reference/data-types/datetime.md)) — The minimum value of the date and time key for [TTL DELETE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl).
-- `delete_ttl_info_max` ([DateTime](../../sql-reference/data-types/datetime.md)) — The maximum value of the date and time key for [TTL DELETE rule](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
+- `delete_ttl_info_max` ([DateTime](../../sql-reference/data-types/datetime.md)) — The maximum value of the date and time key for [TTL DELETE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl).
-- `move_ttl_info.expression` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of expressions. Each expression defines a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
+- `move_ttl_info.expression` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Array of expressions. Each expression defines a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl).
:::warning
The `move_ttl_info.expression` array is kept mostly for backward compatibility, now the simpliest way to check `TTL MOVE` rule is to use the `move_ttl_info.min` and `move_ttl_info.max` fields.
:::
-- `move_ttl_info.min` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Array of date and time values. Each element describes the minimum key value for a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
+- `move_ttl_info.min` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Array of date and time values. Each element describes the minimum key value for a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl).
-- `move_ttl_info.max` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Array of date and time values. Each element describes the maximum key value for a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
+- `move_ttl_info.max` ([Array](../../sql-reference/data-types/array.md)([DateTime](../../sql-reference/data-types/datetime.md))) — Array of date and time values. Each element describes the maximum key value for a [TTL MOVE rule](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl).
- `bytes` ([UInt64](../../sql-reference/data-types/int-uint.md)) – Alias for `bytes_on_disk`.
@@ -166,6 +166,6 @@ move_ttl_info.max: []
**See Also**
- [MergeTree family](../../engines/table-engines/mergetree-family/mergetree.md)
-- [TTL for Columns and Tables](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl)
+- [TTL for Columns and Tables](../../engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-ttl)
[Original article](https://clickhouse.com/docs/en/operations/system-tables/parts)
diff --git a/docs/en/operations/system-tables/replicated_fetches.md b/docs/en/operations/system-tables/replicated_fetches.md
index 3536bbaff4d..74888fd2f13 100644
--- a/docs/en/operations/system-tables/replicated_fetches.md
+++ b/docs/en/operations/system-tables/replicated_fetches.md
@@ -68,6 +68,5 @@ thread_id: 54
**See Also**
-- [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system/#query-language-system-replicated)
+- [Managing ReplicatedMergeTree Tables](../../sql-reference/statements/system.md/#managing-replicatedmergetree-tables)
-[Original article](https://clickhouse.com/docs/en/operations/system_tables/replicated_fetches)
diff --git a/docs/en/operations/system-tables/replication_queue.md b/docs/en/operations/system-tables/replication_queue.md
index ced20b0048a..dff3bce246a 100644
--- a/docs/en/operations/system-tables/replication_queue.md
+++ b/docs/en/operations/system-tables/replication_queue.md
@@ -29,7 +29,7 @@ Columns:
- `MUTATE_PART` — Apply one or several mutations to the part.
- `ALTER_METADATA` — Apply alter modification according to global /metadata and /columns paths.
-- `create_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was submitted for execution.
+- `create_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was submitted for execution.
- `required_quorum` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of replicas waiting for the task to complete with confirmation of completion. This column is only relevant for the `GET_PARTS` task.
@@ -47,13 +47,13 @@ Columns:
- `last_exception` ([String](../../sql-reference/data-types/string.md)) — Text message about the last error that occurred (if any).
-- `last_attempt_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was last attempted.
+- `last_attempt_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was last attempted.
- `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — The number of postponed tasks.
- `postpone_reason` ([String](../../sql-reference/data-types/string.md)) — The reason why the task was postponed.
-- `last_postpone_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was last postponed.
+- `last_postpone_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Date and time when the task was last postponed.
- `merge_type` ([String](../../sql-reference/data-types/string.md)) — Type of the current merge. Empty if it's a mutation.
diff --git a/docs/en/operations/system-tables/session_log.md b/docs/en/operations/system-tables/session_log.md
index 79c8ea184ce..cdf86b57ef6 100644
--- a/docs/en/operations/system-tables/session_log.md
+++ b/docs/en/operations/system-tables/session_log.md
@@ -24,6 +24,7 @@ Columns:
- `DOUBLE_SHA1_PASSWORD`
- `LDAP`
- `KERBEROS`
+ - `SSL_CERTIFICATE`
- `profiles` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — The list of profiles set for all roles and/or users.
- `roles` ([Array](../../sql-reference/data-types/array.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md))) — The list of roles to which the profile is applied.
- `settings` ([Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([LowCardinality(String)](../../sql-reference/data-types/lowcardinality.md), [String](../../sql-reference/data-types/string.md)))) — Settings that were changed when the client logged in/out.
diff --git a/docs/en/operations/system-tables/users.md b/docs/en/operations/system-tables/users.md
index eaeabab131b..6ef9b7b18a4 100644
--- a/docs/en/operations/system-tables/users.md
+++ b/docs/en/operations/system-tables/users.md
@@ -12,7 +12,7 @@ Columns:
- `storage` ([String](../../sql-reference/data-types/string.md)) — Path to the storage of users. Configured in the `access_control_path` parameter.
-- `auth_type` ([Enum8](../../sql-reference/data-types/enum.md)('no_password' = 0,'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3)) — Shows the authentication type. There are multiple ways of user identification: with no password, with plain text password, with [SHA256](https://ru.wikipedia.org/wiki/SHA-2)-encoded password or with [double SHA-1](https://ru.wikipedia.org/wiki/SHA-1)-encoded password.
+- `auth_type` ([Enum8](../../sql-reference/data-types/enum.md)('no_password' = 0,'plaintext_password' = 1, 'sha256_password' = 2, 'double_sha1_password' = 3, 'ldap' = 4, 'kerberos' = 5, 'ssl_certificate' = 6)) — Shows the authentication type. There are multiple ways of user identification: with no password, with plain text password, with [SHA256](https://ru.wikipedia.org/wiki/SHA-2)-encoded password or with [double SHA-1](https://ru.wikipedia.org/wiki/SHA-1)-encoded password.
- `auth_params` ([String](../../sql-reference/data-types/string.md)) — Authentication parameters in the JSON format depending on the `auth_type`.
diff --git a/docs/en/operations/tips.md b/docs/en/operations/tips.md
index facf78c85bf..827a7e33ea3 100644
--- a/docs/en/operations/tips.md
+++ b/docs/en/operations/tips.md
@@ -4,6 +4,9 @@ sidebar_position: 58
sidebar_label: Usage Recommendations
title: "Usage Recommendations"
---
+import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_automated.md';
+
+
## CPU Scaling Governor
diff --git a/docs/en/operations/utilities/clickhouse-benchmark.md b/docs/en/operations/utilities/clickhouse-benchmark.md
index 1a250ea5481..faa7ac75c74 100644
--- a/docs/en/operations/utilities/clickhouse-benchmark.md
+++ b/docs/en/operations/utilities/clickhouse-benchmark.md
@@ -109,56 +109,38 @@ In the report you can find:
`clickhouse-benchmark` can compare performances for two running ClickHouse servers.
-To use the comparison mode, specify endpoints of both servers by two pairs of `--host`, `--port` keys. Keys matched together by position in arguments list, the first `--host` is matched with the first `--port` and so on. `clickhouse-benchmark` establishes connections to both servers, then sends queries. Each query addressed to a randomly selected server. The results are shown for each server separately.
+To use the comparison mode, specify endpoints of both servers by two pairs of `--host`, `--port` keys. Keys matched together by position in arguments list, the first `--host` is matched with the first `--port` and so on. `clickhouse-benchmark` establishes connections to both servers, then sends queries. Each query addressed to a randomly selected server. The results are shown in a table.
## Example {#clickhouse-benchmark-example}
``` bash
-$ echo "SELECT * FROM system.numbers LIMIT 10000000 OFFSET 10000000" | clickhouse-benchmark -i 10
+$ echo "SELECT * FROM system.numbers LIMIT 10000000 OFFSET 10000000" | clickhouse-benchmark --host=localhost --port=9001 --host=localhost --port=9000 -i 10
```
``` text
Loaded 1 queries.
-Queries executed: 6.
+Queries executed: 5.
-localhost:9000, queries 6, QPS: 6.153, RPS: 123398340.957, MiB/s: 941.455, result RPS: 61532982.200, result MiB/s: 469.459.
+localhost:9001, queries 2, QPS: 3.764, RPS: 75446929.370, MiB/s: 575.614, result RPS: 37639659.982, result MiB/s: 287.168.
+localhost:9000, queries 3, QPS: 3.815, RPS: 76466659.385, MiB/s: 583.394, result RPS: 38148392.297, result MiB/s: 291.049.
-0.000% 0.159 sec.
-10.000% 0.159 sec.
-20.000% 0.159 sec.
-30.000% 0.160 sec.
-40.000% 0.160 sec.
-50.000% 0.162 sec.
-60.000% 0.164 sec.
-70.000% 0.165 sec.
-80.000% 0.166 sec.
-90.000% 0.166 sec.
-95.000% 0.167 sec.
-99.000% 0.167 sec.
-99.900% 0.167 sec.
-99.990% 0.167 sec.
+0.000% 0.258 sec. 0.250 sec.
+10.000% 0.258 sec. 0.250 sec.
+20.000% 0.258 sec. 0.250 sec.
+30.000% 0.258 sec. 0.267 sec.
+40.000% 0.258 sec. 0.267 sec.
+50.000% 0.273 sec. 0.267 sec.
+60.000% 0.273 sec. 0.267 sec.
+70.000% 0.273 sec. 0.267 sec.
+80.000% 0.273 sec. 0.269 sec.
+90.000% 0.273 sec. 0.269 sec.
+95.000% 0.273 sec. 0.269 sec.
+99.000% 0.273 sec. 0.269 sec.
+99.900% 0.273 sec. 0.269 sec.
+99.990% 0.273 sec. 0.269 sec.
-
-
-Queries executed: 10.
-
-localhost:9000, queries 10, QPS: 6.082, RPS: 121959604.568, MiB/s: 930.478, result RPS: 60815551.642, result MiB/s: 463.986.
-
-0.000% 0.159 sec.
-10.000% 0.159 sec.
-20.000% 0.160 sec.
-30.000% 0.163 sec.
-40.000% 0.164 sec.
-50.000% 0.165 sec.
-60.000% 0.166 sec.
-70.000% 0.166 sec.
-80.000% 0.167 sec.
-90.000% 0.167 sec.
-95.000% 0.170 sec.
-99.000% 0.172 sec.
-99.900% 0.172 sec.
-99.990% 0.172 sec.
+No difference proven at 99.5% confidence
```
[Original article](https://clickhouse.com/docs/en/operations/utilities/clickhouse-benchmark.md)
diff --git a/docs/en/operations/utilities/index.md b/docs/en/operations/utilities/index.md
index df4af30768c..9de68923ea4 100644
--- a/docs/en/operations/utilities/index.md
+++ b/docs/en/operations/utilities/index.md
@@ -1,10 +1,11 @@
---
slug: /en/operations/utilities/
sidebar_position: 56
-sidebar_label: Utilities
+sidebar_label: Overview
+pagination_next: 'en/operations/utilities/clickhouse-copier'
---
-# ClickHouse Utility
+# ClickHouse Utilities
- [clickhouse-local](../../operations/utilities/clickhouse-local.md) — Allows running SQL queries on data without starting the ClickHouse server, similar to how `awk` does this.
- [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) — Copies (and reshards) data from one cluster to another cluster.
diff --git a/docs/en/sql-reference/data-types/date32.md b/docs/en/sql-reference/data-types/date32.md
index ff1a745785b..c8c7470d2cb 100644
--- a/docs/en/sql-reference/data-types/date32.md
+++ b/docs/en/sql-reference/data-types/date32.md
@@ -6,7 +6,7 @@ sidebar_label: Date32
# Date32
-A date. Supports the date range same with [Datetime64](../../sql-reference/data-types/datetime64.md). Stored in four bytes as the number of days since 1900-01-01. Allows storing values till 2299-12-31.
+A date. Supports the date range same with [DateTime64](../../sql-reference/data-types/datetime64.md). Stored in four bytes as the number of days since 1900-01-01. Allows storing values till 2299-12-31.
**Examples**
diff --git a/docs/en/sql-reference/data-types/datetime.md b/docs/en/sql-reference/data-types/datetime.md
index 85587882e01..7f7f21ded54 100644
--- a/docs/en/sql-reference/data-types/datetime.md
+++ b/docs/en/sql-reference/data-types/datetime.md
@@ -4,7 +4,7 @@ sidebar_position: 48
sidebar_label: DateTime
---
-# Datetime
+# DateTime
Allows to store an instant in time, that can be expressed as a calendar date and a time of a day.
diff --git a/docs/en/sql-reference/data-types/datetime64.md b/docs/en/sql-reference/data-types/datetime64.md
index c7372e4b064..fa3a1eecd46 100644
--- a/docs/en/sql-reference/data-types/datetime64.md
+++ b/docs/en/sql-reference/data-types/datetime64.md
@@ -4,7 +4,7 @@ sidebar_position: 49
sidebar_label: DateTime64
---
-# Datetime64
+# DateTime64
Allows to store an instant in time, that can be expressed as a calendar date and a time of a day, with defined sub-second precision
diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/_category_.yml b/docs/en/sql-reference/dictionaries/external-dictionaries/_category_.yml
index 1f98223c54c..af79ff9af23 100644
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/_category_.yml
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/_category_.yml
@@ -1,8 +1,8 @@
position: 37
-label: 'External Dictionaries'
+label: 'Dictionaries'
collapsible: true
collapsed: true
link:
type: generated-index
- title: External Dictionaries
+ title: Dictionaries
slug: /en/sql-reference/dictionaries/external-dictionaries
diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md b/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md
new file mode 100644
index 00000000000..e6a0dac7afb
--- /dev/null
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md
@@ -0,0 +1,4 @@
+:::tip
+If you are using a dictionary with ClickHouse Cloud please use the DDL query option to create your dictionaries, and create your dictionary as user `default`.
+Also, verify the list of supported dictionary sources in the [Cloud Compatibility guide](/docs/en/whats-new/cloud-capabilities.md).
+:::
diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
index 198ff12f1d6..aac0db208c6 100644
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md
@@ -3,6 +3,7 @@ slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-l
sidebar_position: 41
sidebar_label: Storing Dictionaries in Memory
---
+import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md';
# Storing Dictionaries in Memory
@@ -22,7 +23,9 @@ ClickHouse generates an exception for errors with dictionaries. Examples of erro
- The dictionary being accessed could not be loaded.
- Error querying a `cached` dictionary.
-You can view the list of external dictionaries and their statuses in the [system.dictionaries](../../../operations/system-tables/dictionaries.md) table.
+You can view the list of dictionaries and their statuses in the [system.dictionaries](../../../operations/system-tables/dictionaries.md) table.
+
+
The configuration looks like this:
@@ -303,17 +306,25 @@ or
CREATE DICTIONARY somedict (
id UInt64,
first Date,
- last Date
+ last Date,
+ advertiser_id UInt64
)
PRIMARY KEY id
+SOURCE(CLICKHOUSE(TABLE 'date_table'))
+LIFETIME(MIN 1 MAX 1000)
LAYOUT(RANGE_HASHED())
RANGE(MIN first MAX last)
```
-To work with these dictionaries, you need to pass an additional argument to the `dictGetT` function, for which a range is selected:
+To work with these dictionaries, you need to pass an additional argument to the `dictGet` function, for which a range is selected:
``` sql
-dictGetT('dict_name', 'attr_name', id, date)
+dictGet('dict_name', 'attr_name', id, date)
+```
+Query example:
+
+``` sql
+SELECT dictGet('somedict', 'advertiser_id', 1, '2022-10-20 23:20:10.000'::DateTime64::UInt64);
```
This function returns the value for the specified `id`s and the date range that includes the passed date.
diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md
index 6e4c8c4b94e..e4edad4d9a1 100644
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md
@@ -3,6 +3,7 @@ slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-l
sidebar_position: 42
sidebar_label: Dictionary Updates
---
+import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md';
# Dictionary Updates
@@ -12,6 +13,8 @@ Dictionary updates (other than loading for first use) do not block queries. Duri
Example of settings:
+
+
``` xml
...
diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md
index 912af5b5bce..366d88e07c7 100644
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-polygon.md
@@ -4,18 +4,23 @@ sidebar_position: 46
sidebar_label: Polygon Dictionaries With Grids
title: "Polygon dictionaries"
---
+import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md';
Polygon dictionaries allow you to efficiently search for the polygon containing specified points.
For example: defining a city area by geographical coordinates.
Example of a polygon dictionary configuration:
+
+
``` xml
- key
- Array(Array(Array(Array(Float64))))
+
+ key
+ Array(Array(Array(Array(Float64))))
+
@@ -76,7 +81,7 @@ To respond to the query, there is a corresponding cell, and the index for the po
- `POLYGON`. Synonym to `POLYGON_INDEX_CELL`.
-Dictionary queries are carried out using standard [functions](../../../sql-reference/functions/ext-dict-functions.md) for working with external dictionaries.
+Dictionary queries are carried out using standard [functions](../../../sql-reference/functions/ext-dict-functions.md) for working with dictionaries.
An important difference is that here the keys will be the points for which you want to find the polygon containing them.
**Example**
diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
index d457f327e7a..4eb96fe80a2 100644
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
@@ -1,12 +1,15 @@
---
slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources
sidebar_position: 43
-sidebar_label: Sources of External Dictionaries
+sidebar_label: Dictionary Sources
---
+import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md';
-# Sources of External Dictionaries
+# Dictionary Sources
-An external dictionary can be connected to ClickHouse from many different sources.
+
+
+A dictionary can be connected to ClickHouse from many different sources.
If the dictionary is configured using an xml-file, the configuration looks like this:
@@ -65,13 +68,13 @@ Types of sources (`source_type`):
- [Executable Pool](#dicts-external_dicts_dict_sources-executable_pool)
- [HTTP(s)](#dicts-external_dicts_dict_sources-http)
- DBMS
- - [ODBC](#dicts-external_dicts_dict_sources-odbc)
- - [MySQL](#dicts-external_dicts_dict_sources-mysql)
- - [ClickHouse](#dicts-external_dicts_dict_sources-clickhouse)
- - [MongoDB](#dicts-external_dicts_dict_sources-mongodb)
- - [Redis](#dicts-external_dicts_dict_sources-redis)
- - [Cassandra](#dicts-external_dicts_dict_sources-cassandra)
- - [PostgreSQL](#dicts-external_dicts_dict_sources-postgresql)
+ - [ODBC](#odbc)
+ - [MySQL](#mysql)
+ - [ClickHouse](#clickhouse)
+ - [MongoDB](#mongodb)
+ - [Redis](#redis)
+ - [Cassandra](#cassandra)
+ - [PostgreSQL](#postgresql)
## Local File
diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
index 895743c3b50..881630167e3 100644
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md
@@ -3,9 +3,12 @@ slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-s
sidebar_position: 44
sidebar_label: Dictionary Key and Fields
---
+import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md';
# Dictionary Key and Fields
+
+
The `structure` clause describes the dictionary key and fields available for queries.
XML description:
@@ -171,5 +174,5 @@ Configuration fields:
**See Also**
-- [Functions for working with external dictionaries](../../../sql-reference/functions/ext-dict-functions.md).
+- [Functions for working with dictionaries](../../../sql-reference/functions/ext-dict-functions.md).
diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md
index 5c237eea8c7..76ca3ac978f 100644
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md
@@ -1,10 +1,13 @@
---
slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict
sidebar_position: 40
-sidebar_label: Configuring an External Dictionary
+sidebar_label: Configuring a Dictionary
---
+import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md';
-# Configuring an External Dictionary
+# Configuring a Dictionary
+
+
If dictionary is configured using xml file, than dictionary configuration has the following structure:
diff --git a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md
index 095fb6360cd..06b5b8a6746 100644
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts.md
@@ -3,18 +3,21 @@ slug: /en/sql-reference/dictionaries/external-dictionaries/external-dicts
sidebar_position: 39
sidebar_label: General Description
---
+import CloudDetails from '@site/docs/en/sql-reference/dictionaries/external-dictionaries/_snippet_dictionary_in_cloud.md';
-# External Dictionaries
+# Dictionaries
-You can add your own dictionaries from various data sources. The data source for a dictionary can be a local text or executable file, an HTTP(s) resource, or another DBMS. For more information, see “[Sources for external dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md)”.
+
+
+You can add your own dictionaries from various data sources. The source for a dictionary can be a ClickHouse table, a local text or executable file, an HTTP(s) resource, or another DBMS. For more information, see “[Dictionary Sources](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md)”.
ClickHouse:
- Fully or partially stores dictionaries in RAM.
- Periodically updates dictionaries and dynamically loads missing values. In other words, dictionaries can be loaded dynamically.
-- Allows to create external dictionaries with xml files or [DDL queries](../../../sql-reference/statements/create/dictionary.md).
+- Allows creating dictionaries with xml files or [DDL queries](../../../sql-reference/statements/create/dictionary.md).
-The configuration of external dictionaries can be located in one or more xml-files. The path to the configuration is specified in the [dictionaries_config](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_config) parameter.
+The configuration of dictionaries can be located in one or more xml-files. The path to the configuration is specified in the [dictionaries_config](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_config) parameter.
Dictionaries can be loaded at server startup or at first use, depending on the [dictionaries_lazy_load](../../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-dictionaries_lazy_load) setting.
@@ -24,6 +27,20 @@ The [dictionaries](../../../operations/system-tables/dictionaries.md#system_tabl
- Configuration parameters.
- Metrics like amount of RAM allocated for the dictionary or a number of queries since the dictionary was successfully loaded.
+## Creating a dictionary with a DDL query
+
+Dictionaries can be created with [DDL queries](../../../sql-reference/statements/create/dictionary.md), and this is the recommended method because with DDL created dictionaries:
+- No additional records are added to server configuration files
+- The dictionaries can be worked with as first-class entities, like tables or views
+- Data can be read directly, using familiar SELECT rather than dictionary table functions
+- The dictionaries can be easily renamed
+
+## Creating a dictionary with a configuration file
+
+:::note
+Creating a dictionary with a configuration file is not applicable to ClickHouse Cloud. Please use DDL (see above), and create your dictionary as user `default`.
+:::
+
The dictionary configuration file has the following format:
``` xml
@@ -44,18 +61,17 @@ The dictionary configuration file has the following format:
You can [configure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md) any number of dictionaries in the same file.
-[DDL queries for dictionaries](../../../sql-reference/statements/create/dictionary.md) does not require any additional records in server configuration. They allow to work with dictionaries as first-class entities, like tables or views.
:::note
-You can convert values for a small dictionary by describing it in a `SELECT` query (see the [transform](../../../sql-reference/functions/other-functions.md) function). This functionality is not related to external dictionaries.
+You can convert values for a small dictionary by describing it in a `SELECT` query (see the [transform](../../../sql-reference/functions/other-functions.md) function). This functionality is not related to dictionaries.
:::
## See Also
-- [Configuring an External Dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md)
+- [Configuring a Dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md)
- [Storing Dictionaries in Memory](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md)
- [Dictionary Updates](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md)
-- [Sources of External Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md)
+- [Dictionary Sources](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md)
- [Dictionary Key and Fields](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md)
-- [Functions for Working with External Dictionaries](../../../sql-reference/functions/ext-dict-functions.md)
+- [Functions for Working with Dictionaries](../../../sql-reference/functions/ext-dict-functions.md)
diff --git a/docs/en/sql-reference/dictionaries/index.md b/docs/en/sql-reference/dictionaries/index.md
index eccd1215e30..b6aa62bdb47 100644
--- a/docs/en/sql-reference/dictionaries/index.md
+++ b/docs/en/sql-reference/dictionaries/index.md
@@ -12,6 +12,6 @@ ClickHouse supports special functions for working with dictionaries that can be
ClickHouse supports:
-- [Built-in dictionaries](../../sql-reference/dictionaries/internal-dicts.md#internal_dicts) with a specific [set of functions](../../sql-reference/functions/ym-dict-functions.md).
-- [Plug-in (external) dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md#dicts-external-dicts) with a [set of functions](../../sql-reference/functions/ext-dict-functions.md).
+- [Dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md#dicts-external-dicts) with a [set of functions](../../sql-reference/functions/ext-dict-functions.md).
+- [Embedded dictionaries](../../sql-reference/dictionaries/internal-dicts.md#internal_dicts) with a specific [set of functions](../../sql-reference/functions/ym-dict-functions.md).
diff --git a/docs/en/sql-reference/dictionaries/internal-dicts.md b/docs/en/sql-reference/dictionaries/internal-dicts.md
index dbc12a576f7..f26c60880a4 100644
--- a/docs/en/sql-reference/dictionaries/internal-dicts.md
+++ b/docs/en/sql-reference/dictionaries/internal-dicts.md
@@ -1,10 +1,13 @@
---
slug: /en/sql-reference/dictionaries/internal-dicts
sidebar_position: 39
-sidebar_label: Internal Dictionaries
+sidebar_label: Embedded Dictionaries
---
+import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md';
-# Internal Dictionaries
+# Embedded Dictionaries
+
+
ClickHouse contains a built-in feature for working with a geobase.
diff --git a/docs/en/sql-reference/functions/arithmetic-functions.md b/docs/en/sql-reference/functions/arithmetic-functions.md
index 9059facb0c6..ece50591ef9 100644
--- a/docs/en/sql-reference/functions/arithmetic-functions.md
+++ b/docs/en/sql-reference/functions/arithmetic-functions.md
@@ -65,6 +65,11 @@ An exception is thrown when dividing by zero or when dividing a minimal negative
Differs from [modulo](#modulo) in that it returns zero when the divisor is zero.
+## positive_modulo(a, b)
+Calculates the remainder when dividing `a` by `b`. Similar to function `modulo` except that `positive_modulo` always return non-negative number.
+
+Notice that `positive_modulo` is 4-5 times slower than `modulo`. You should not use `positive_modulo` unless you want to get positive result and don't care about performance too much.
+
## negate(a), -a operator
Calculates a number with the reverse sign. The result is always signed.
diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 76f66db924f..6156a823d58 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -271,11 +271,7 @@ Result:
The return type of `toStartOf*`, `toLastDayOfMonth`, `toMonday`, `timeSlot` functions described below is determined by the configuration parameter [enable_extended_results_for_datetime_functions](../../operations/settings/settings#enable-extended-results-for-datetime-functions) which is `0` by default.
Behavior for
-* `enable_extended_results_for_datetime_functions = 0`: Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime`. Functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` return `DateTime`. Though these functions can take values of the extended types `Date32` and `DateTime64` as an argument, passing them a time outside the normal range (year 1970 to 2149 for `Date` / 2106 for `DateTime`) will produce wrong results. In case argument is out of normal range:
- * If the argument is smaller than 1970, the result will be calculated from the argument `1970-01-01 (00:00:00)` instead.
- * If the return type is `DateTime` and the argument is larger than `2106-02-07 08:28:15`, the result will be calculated from the argument `2106-02-07 08:28:15` instead.
- * If the return type is `Date` and the argument is larger than `2149-06-06`, the result will be calculated from the argument `2149-06-06` instead.
- * If `toLastDayOfMonth` is called with an argument greater then `2149-05-31`, the result will be calculated from the argument `2149-05-31` instead.
+* `enable_extended_results_for_datetime_functions = 0`: Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime`. Functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` return `DateTime`. Though these functions can take values of the extended types `Date32` and `DateTime64` as an argument, passing them a time outside the normal range (year 1970 to 2149 for `Date` / 2106 for `DateTime`) will produce wrong results.
* `enable_extended_results_for_datetime_functions = 1`:
* Functions `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` return `Date` or `DateTime` if their argument is a `Date` or `DateTime`, and they return `Date32` or `DateTime64` if their argument is a `Date32` or `DateTime64`.
* Functions `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` return `DateTime` if their argument is a `Date` or `DateTime`, and they return `DateTime64` if their argument is a `Date32` or `DateTime64`.
@@ -302,25 +298,22 @@ Returns the date.
Rounds down a date or date with time to the first day of the month.
Returns the date.
-## toLastDayOfMonth
-
-Rounds up a date or date with time to the last day of the month.
-Returns the date.
+:::note
+The behavior of parsing incorrect dates is implementation specific. ClickHouse may return zero date, throw an exception or do “natural” overflow.
+:::
If `toLastDayOfMonth` is called with an argument of type `Date` greater then 2149-05-31, the result will be calculated from the argument 2149-05-31 instead.
## toMonday
Rounds down a date or date with time to the nearest Monday.
-As a special case, date arguments `1970-01-01`, `1970-01-02`, `1970-01-03` and `1970-01-04` return date `1970-01-01`.
Returns the date.
## toStartOfWeek(t\[,mode\])
Rounds down a date or date with time to the nearest Sunday or Monday by mode.
Returns the date.
-As a special case, date arguments `1970-01-01`, `1970-01-02`, `1970-01-03` and `1970-01-04` (and `1970-01-05` if `mode` is `1`) return date `1970-01-01`.
-The `mode` argument works exactly like the mode argument to toWeek(). For the single-argument syntax, a mode value of 0 is used.
+The mode argument works exactly like the mode argument to toWeek(). For the single-argument syntax, a mode value of 0 is used.
## toStartOfDay
@@ -557,7 +550,7 @@ Alias: `dateTrunc`.
- Value, truncated to the specified part of date.
-Type: [Datetime](../../sql-reference/data-types/datetime.md).
+Type: [DateTime](../../sql-reference/data-types/datetime.md).
**Example**
@@ -671,9 +664,9 @@ Aliases: `dateDiff`, `DATE_DIFF`.
- `quarter`
- `year`
-- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+- `startdate` — The first time value to subtract (the subtrahend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
-- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md) or [DateTime](../../sql-reference/data-types/datetime.md).
+- `enddate` — The second time value to subtract from (the minuend). [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (optional). If specified, it is applied to both `startdate` and `enddate`. If not specified, timezones of `startdate` and `enddate` are used. If they are not the same, the result is unspecified. [String](../../sql-reference/data-types/string.md).
@@ -888,7 +881,7 @@ now([timezone])
- Current date and time.
-Type: [Datetime](../../sql-reference/data-types/datetime.md).
+Type: [DateTime](../../sql-reference/data-types/datetime.md).
**Example**
@@ -939,7 +932,7 @@ now64([scale], [timezone])
- Current date and time with sub-second precision.
-Type: [Datetime64](../../sql-reference/data-types/datetime64.md).
+Type: [DateTime64](../../sql-reference/data-types/datetime64.md).
**Example**
@@ -975,7 +968,7 @@ nowInBlock([timezone])
- Current date and time at the moment of processing of each block of data.
-Type: [Datetime](../../sql-reference/data-types/datetime.md).
+Type: [DateTime](../../sql-reference/data-types/datetime.md).
**Example**
@@ -1163,7 +1156,7 @@ dateName(date_part, date)
**Arguments**
- `date_part` — Date part. Possible values: 'year', 'quarter', 'month', 'week', 'dayofyear', 'day', 'weekday', 'hour', 'minute', 'second'. [String](../../sql-reference/data-types/string.md).
-- `date` — Date. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
+- `date` — Date. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — Timezone. Optional. [String](../../sql-reference/data-types/string.md).
**Returned value**
@@ -1251,7 +1244,7 @@ Result:
└──────────────────────────┘
```
-When there are two arguments: first is an [Integer](../../sql-reference/data-types/int-uint.md) or [DateTime](../../sql-reference/data-types/datetime.md), second is a constant format string — it acts in the same way as [formatDateTime](#formatdatetime) and return [String](../../sql-reference/data-types/string.md#string) type.
+When there are two or three arguments, the first an [Integer](../../sql-reference/data-types/int-uint.md), [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) or [DateTime64](../../sql-reference/data-types/datetime64.md), the second a constant format string and the third an optional constant time zone string — it acts in the same way as [formatDateTime](#formatdatetime) and return [String](../../sql-reference/data-types/string.md#string) type.
For example:
diff --git a/docs/en/sql-reference/functions/encoding-functions.md b/docs/en/sql-reference/functions/encoding-functions.md
index eb357df19db..4a6e46e1759 100644
--- a/docs/en/sql-reference/functions/encoding-functions.md
+++ b/docs/en/sql-reference/functions/encoding-functions.md
@@ -376,14 +376,6 @@ Result:
└─────┘
```
-## UUIDStringToNum(str)
-
-Accepts a string containing 36 characters in the format `123e4567-e89b-12d3-a456-426655440000`, and returns it as a set of bytes in a FixedString(16).
-
-## UUIDNumToString(str)
-
-Accepts a FixedString(16) value. Returns a string containing 36 characters in text format.
-
## bitmaskToList(num)
Accepts an integer. Returns a string containing the list of powers of two that total the source number when summed. They are comma-separated without spaces in text format, in ascending order.
diff --git a/docs/en/sql-reference/functions/encryption-functions.md b/docs/en/sql-reference/functions/encryption-functions.md
index 642c8643c16..1c123aa3db2 100644
--- a/docs/en/sql-reference/functions/encryption-functions.md
+++ b/docs/en/sql-reference/functions/encryption-functions.md
@@ -294,6 +294,53 @@ Result:
Notice how only a portion of the data was properly decrypted, and the rest is gibberish since either `mode`, `key`, or `iv` were different upon encryption.
+## tryDecrypt
+
+Similar to `decrypt`, but returns NULL if decryption fails because of using the wrong key.
+
+**Examples**
+
+Let's create a table where `user_id` is the unique user id, `encrypted` is an encrypted string field, `iv` is an initial vector for decrypt/encrypt. Assume that users know their id and the key to decrypt the encrypted field:
+
+```sql
+CREATE TABLE decrypt_null (
+ dt DateTime,
+ user_id UInt32,
+ encrypted String,
+ iv String
+) ENGINE = Memory;
+```
+
+Insert some data:
+
+```sql
+INSERT INTO decrypt_null VALUES
+ ('2022-08-02 00:00:00', 1, encrypt('aes-256-gcm', 'value1', 'keykeykeykeykeykeykeykeykeykey01', 'iv1'), 'iv1'),
+ ('2022-09-02 00:00:00', 2, encrypt('aes-256-gcm', 'value2', 'keykeykeykeykeykeykeykeykeykey02', 'iv2'), 'iv2'),
+ ('2022-09-02 00:00:01', 3, encrypt('aes-256-gcm', 'value3', 'keykeykeykeykeykeykeykeykeykey03', 'iv3'), 'iv3');
+```
+
+Query:
+
+```sql
+SELECT
+ dt,
+ user_id,
+ tryDecrypt('aes-256-gcm', encrypted, 'keykeykeykeykeykeykeykeykeykey02', iv) AS value
+FROM decrypt_null
+ORDER BY user_id ASC
+```
+
+Result:
+
+```
+┌──────────────────dt─┬─user_id─┬─value──┐
+│ 2022-08-02 00:00:00 │ 1 │ ᴺᵁᴸᴸ │
+│ 2022-09-02 00:00:00 │ 2 │ value2 │
+│ 2022-09-02 00:00:01 │ 3 │ ᴺᵁᴸᴸ │
+└─────────────────────┴─────────┴────────┘
+```
+
## aes_decrypt_mysql
Compatible with mysql encryption and decrypts data encrypted with [AES_ENCRYPT](https://dev.mysql.com/doc/refman/8.0/en/encryption-functions.html#function_aes-encrypt) function.
diff --git a/docs/en/sql-reference/functions/ext-dict-functions.md b/docs/en/sql-reference/functions/ext-dict-functions.md
index 728e26d6958..1c33638da09 100644
--- a/docs/en/sql-reference/functions/ext-dict-functions.md
+++ b/docs/en/sql-reference/functions/ext-dict-functions.md
@@ -1,20 +1,20 @@
---
slug: /en/sql-reference/functions/ext-dict-functions
sidebar_position: 58
-sidebar_label: External Dictionaries
+sidebar_label: Dictionaries
---
+# Functions for Working with Dictionaries
+
:::note
For dictionaries created with [DDL queries](../../sql-reference/statements/create/dictionary.md), the `dict_name` parameter must be fully specified, like `.`. Otherwise, the current database is used.
:::
-# Functions for Working with External Dictionaries
-
-For information on connecting and configuring external dictionaries, see [External dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md).
+For information on connecting and configuring dictionaries, see [Dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md).
## dictGet, dictGetOrDefault, dictGetOrNull
-Retrieves values from an external dictionary.
+Retrieves values from a dictionary.
``` sql
dictGet('dict_name', attr_names, id_expr)
@@ -52,7 +52,7 @@ Create a text file `ext-dict-test.csv` containing the following:
The first column is `id`, the second column is `c1`.
-Configure the external dictionary:
+Configure the dictionary:
``` xml
@@ -112,7 +112,7 @@ Create a text file `ext-dict-mult.csv` containing the following:
The first column is `id`, the second is `c1`, the third is `c2`.
-Configure the external dictionary:
+Configure the dictionary:
``` xml
@@ -185,7 +185,7 @@ INSERT INTO range_key_dictionary_source_table VALUES(2, toDate('2019-05-20'), to
INSERT INTO range_key_dictionary_source_table VALUES(3, toDate('2019-05-20'), toDate('2019-05-20'), 'Third', 'Third');
```
-Create the external dictionary:
+Create the dictionary:
```sql
CREATE DICTIONARY range_key_dictionary
@@ -226,7 +226,7 @@ Result:
**See Also**
-- [External Dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md)
+- [Dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md)
## dictHas
diff --git a/docs/en/sql-reference/functions/geo/index.md b/docs/en/sql-reference/functions/geo/index.md
index 64e23094105..8d659236d4c 100644
--- a/docs/en/sql-reference/functions/geo/index.md
+++ b/docs/en/sql-reference/functions/geo/index.md
@@ -8,70 +8,69 @@ title: "Geo Functions"
## Geographical Coordinates Functions
-- [greatCircleDistance](./coordinates.md#greatCircleDistance)
-- [geoDistance](./coordinates.md#geoDistance)
-- [greatCircleAngle](./coordinates.md#greatCircleAngle)
-- [pointInEllipses](./coordinates.md#pointInEllipses)
-- [pointInPolygon](./coordinates.md#pointInPolygon)
+- [greatCircleDistance](./coordinates.md#greatcircledistance)
+- [geoDistance](./coordinates.md#geodistance)
+- [greatCircleAngle](./coordinates.md#greatcircleangle)
+- [pointInEllipses](./coordinates.md#pointinellipses)
+- [pointInPolygon](./coordinates.md#pointinpolygon)
## Geohash Functions
-- [geohashEncode](./geohash.md#geohashEncode)
-- [geohashDecode](./geohash.md#geohashDecode)
-- [geohashesInBox](./geohash.md#geohashesInBox)
+- [geohashEncode](./geohash.md#geohashencode)
+- [geohashDecode](./geohash.md#geohashdecode)
+- [geohashesInBox](./geohash.md#geohashesinbox)
## H3 Indexes Functions
-- [h3IsValid](./h3.md#h3IsValid)
-- [h3GetResolution](./h3.md#h3GetResolution)
-- [h3EdgeAngle](./h3.md#h3EdgeAngle)
-- [h3EdgeLengthM](./h3.md#h3EdgeLengthM)
-- [h3EdgeLengthKm](./h3.md#h3EdgeLengthKm)
-- [geoToH3](./h3.md#geoToH3)
-- [h3ToGeo](./h3.md#h3ToGeo)
-- [h3ToGeoBoundary](./h3.md#h3ToGeoBoundary)
-- [h3kRing](./h3.md#h3kRing)
-- [h3GetBaseCell](./h3.md#h3GetBaseCell)
-- [h3HexAreaM2](./h3.md#h3HexAreaM2)
-- [h3HexAreaKm2](./h3.md#h3HexAreaKm2)
-- [h3IndexesAreNeighbors](./h3.md#h3IndexesAreNeighbors)
-- [h3ToChildren](./h3.md#h3ToChildren)
-- [h3ToParent](./h3.md#h3ToParent)
-- [h3ToString](./h3.md#h3ToString)
-- [stringToH3](./h3.md#stringToH3)
-- [h3GetResolution](./h3.md#h3GetResolution)
-- [h3IsResClassIII](./h3.md#h3IsResClassIII)
-- [h3IsPentagon](./h3.md#h3IsPentagon)
-- [h3GetFaces](./h3.md#h3GetFaces)
-- [h3CellAreaM2](./h3.md#h3CellAreaM2)
-- [h3CellAreaRads2](./h3.md#h3CellAreaRads2)
-- [h3ToCenterChild](./h3.md#h3ToCenterChild)
-- [h3ExactEdgeLengthM](./h3.md#h3ExactEdgeLengthM)
-- [h3ExactEdgeLengthKm](./h3.md#h3ExactEdgeLengthKm)
-- [h3ExactEdgeLengthRads](./h3.md#h3ExactEdgeLengthRads)
-- [h3NumHexagons](./h3.md#h3NumHexagons)
-- [h3Line](./h3.md#h3Line)
-- [h3Distance](./h3.md#h3Distance)
-- [h3HexRing](./h3.md#h3HexRing)
-- [h3GetUnidirectionalEdge](./h3.md#h3GetUnidirectionalEdge)
-- [h3UnidirectionalEdgeIsValid](./h3.md#h3UnidirectionalEdgeIsValid)
-- [h3GetOriginIndexFromUnidirectionalEdge](./h3.md#h3GetOriginIndexFromUnidirectionalEdge)
-- [h3GetDestinationIndexFromUnidirectionalEdge](./h3.md#h3GetDestinationIndexFromUnidirectionalEdge)
-- [h3GetIndexesFromUnidirectionalEdge](./h3.md#h3GetIndexesFromUnidirectionalEdge)
-- [h3GetUnidirectionalEdgesFromHexagon](./h3.md#h3GetUnidirectionalEdgesFromHexagon)
-- [h3GetUnidirectionalEdgeBoundary](./h3.md#h3GetUnidirectionalEdgeBoundary)
+- [h3IsValid](./h3.md#h3isvalid)
+- [h3GetResolution](./h3.md#h3getresolution)
+- [h3EdgeAngle](./h3.md#h3edgeangle)
+- [h3EdgeLengthM](./h3.md#h3edgelengthm)
+- [h3EdgeLengthKm](./h3.md#h3edgelengthkm)
+- [geoToH3](./h3.md#geotoh3)
+- [h3ToGeo](./h3.md#h3togeo)
+- [h3ToGeoBoundary](./h3.md#h3togeoboundary)
+- [h3kRing](./h3.md#h3kring)
+- [h3GetBaseCell](./h3.md#h3getbasecell)
+- [h3HexAreaM2](./h3.md#h3hexaream2)
+- [h3HexAreaKm2](./h3.md#h3hexareakm2)
+- [h3IndexesAreNeighbors](./h3.md#h3indexesareneighbors)
+- [h3ToChildren](./h3.md#h3tochildren)
+- [h3ToParent](./h3.md#h3toparent)
+- [h3ToString](./h3.md#h3tostring)
+- [stringToH3](./h3.md#stringtoh3)
+- [h3GetResolution](./h3.md#h3getresolution)
+- [h3IsResClassIII](./h3.md#h3isresclassiii)
+- [h3IsPentagon](./h3.md#h3ispentagon)
+- [h3GetFaces](./h3.md#h3getfaces)
+- [h3CellAreaM2](./h3.md#h3cellaream2)
+- [h3CellAreaRads2](./h3.md#h3cellarearads2)
+- [h3ToCenterChild](./h3.md#h3tocenterchild)
+- [h3ExactEdgeLengthM](./h3.md#h3exactedgelengthm)
+- [h3ExactEdgeLengthKm](./h3.md#h3exactedgelengthkm)
+- [h3ExactEdgeLengthRads](./h3.md#h3exactedgelengthrads)
+- [h3NumHexagons](./h3.md#h3numhexagons)
+- [h3Line](./h3.md#h3line)
+- [h3Distance](./h3.md#h3distance)
+- [h3HexRing](./h3.md#h3hexring)
+- [h3GetUnidirectionalEdge](./h3.md#h3getunidirectionaledge)
+- [h3UnidirectionalEdgeIsValid](./h3.md#h3unidirectionaledgeisvalid)
+- [h3GetOriginIndexFromUnidirectionalEdge](./h3.md#h3getoriginindexfromunidirectionaledge)
+- [h3GetDestinationIndexFromUnidirectionalEdge](./h3.md#h3getdestinationindexfromunidirectionaledge)
+- [h3GetIndexesFromUnidirectionalEdge](./h3.md#h3getindexesfromunidirectionaledge)
+- [h3GetUnidirectionalEdgesFromHexagon](./h3.md#h3getunidirectionaledgesfromhexagon)
+- [h3GetUnidirectionalEdgeBoundary](./h3.md#h3getunidirectionaledgeboundary)
## S2 Index Functions
-- [geoToS2](./s2.md#geoToS2)
-- [s2ToGeo](./s2.md#s2ToGeo)
-- [s2GetNeighbors](./s2.md#s2GetNeighbors)
-- [s2CellsIntersect](./s2.md#s2CellsIntersect)
-- [s2CapContains](./s2.md#s2CapContains)
-- [s2CapUnion](./s2.md#s2CapUnion)
-- [s2RectAdd](./s2.md#s2RectAdd)
-- [s2RectContains](./s2.md#s2RectContains)
-- [s2RectUinion](./s2.md#s2RectUinion)
-- [s2RectIntersection](./s2.md#s2RectIntersection)
+- [geoToS2](./s2.md#geotos2)
+- [s2ToGeo](./s2.md#s2togeo)
+- [s2GetNeighbors](./s2.md#s2getneighbors)
+- [s2CellsIntersect](./s2.md#s2cellsintersect)
+- [s2CapContains](./s2.md#s2capcontains)
+- [s2CapUnion](./s2.md#s2capunion)
+- [s2RectAdd](./s2.md#s2rectadd)
+- [s2RectContains](./s2.md#s2rectcontains)
+- [s2RectUnion](./s2.md#s2rectunion)
+- [s2RectIntersection](./s2.md#s2rectintersection)
-[Original article](https://clickhouse.com/docs/en/sql-reference/functions/geo/)
diff --git a/docs/en/sql-reference/functions/math-functions.md b/docs/en/sql-reference/functions/math-functions.md
index 430762a1885..bcd118ce0be 100644
--- a/docs/en/sql-reference/functions/math-functions.md
+++ b/docs/en/sql-reference/functions/math-functions.md
@@ -549,3 +549,33 @@ Result:
│ 3.141592653589793 │
└───────────────────┘
```
+
+
+## factorial(n)
+
+Computes the factorial of an integer value. It works with any native integer type including UInt(8|16|32|64) and Int(8|16|32|64). The return type is UInt64.
+
+The factorial of 0 is 1. Likewise, the factorial() function returns 1 for any negative value. The maximum positive value for the input argument is 20, a value of 21 or greater will cause exception throw.
+
+
+**Syntax**
+
+``` sql
+factorial(n)
+```
+
+**Example**
+
+Query:
+
+``` sql
+SELECT factorial(10);
+```
+
+Result:
+
+``` text
+┌─factorial(10)─┐
+│ 3628800 │
+└───────────────┘
+```
diff --git a/docs/en/sql-reference/functions/other-functions.md b/docs/en/sql-reference/functions/other-functions.md
index b80d75e3611..b9ec21bb59d 100644
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
@@ -571,7 +571,7 @@ Example:
``` sql
SELECT
- transform(domain(Referer), ['yandex.ru', 'google.ru', 'vk.com'], ['www.yandex', 'example.com']) AS s,
+ transform(domain(Referer), ['yandex.ru', 'google.ru', 'vkontakte.ru'], ['www.yandex', 'example.com', 'vk.com']) AS s,
count() AS c
FROM test.hits
GROUP BY domain(Referer)
@@ -593,6 +593,27 @@ LIMIT 10
└────────────────┴─────────┘
```
+## formatReadableDecimalSize(x)
+
+Accepts the size (number of bytes). Returns a rounded size with a suffix (KB, MB, etc.) as a string.
+
+Example:
+
+``` sql
+SELECT
+ arrayJoin([1, 1024, 1024*1024, 192851925]) AS filesize_bytes,
+ formatReadableDecimalSize(filesize_bytes) AS filesize
+```
+
+``` text
+┌─filesize_bytes─┬─filesize───┐
+│ 1 │ 1.00 B │
+│ 1024 │ 1.02 KB │
+│ 1048576 │ 1.05 MB │
+│ 192851925 │ 192.85 MB │
+└────────────────┴────────────┘
+```
+
## formatReadableSize(x)
Accepts the size (number of bytes). Returns a rounded size with a suffix (KiB, MiB, etc.) as a string.
diff --git a/docs/en/sql-reference/functions/random-functions.md b/docs/en/sql-reference/functions/random-functions.md
index d77cc55e5eb..08f2620a009 100644
--- a/docs/en/sql-reference/functions/random-functions.md
+++ b/docs/en/sql-reference/functions/random-functions.md
@@ -24,6 +24,11 @@ Returns a pseudo-random UInt64 number, evenly distributed among all UInt64-type
Uses a linear congruential generator.
+## canonicalRand
+The function generates pseudo random results with independent and identically distributed uniformly distributed values in [0, 1).
+
+Non-deterministic. Return type is Float64.
+
## randConstant
Produces a constant column with a random value.
diff --git a/docs/en/sql-reference/functions/splitting-merging-functions.md b/docs/en/sql-reference/functions/splitting-merging-functions.md
index 70a1f10083b..7cad6b2fbbf 100644
--- a/docs/en/sql-reference/functions/splitting-merging-functions.md
+++ b/docs/en/sql-reference/functions/splitting-merging-functions.md
@@ -6,21 +6,22 @@ sidebar_label: Splitting and Merging Strings and Arrays
# Functions for Splitting and Merging Strings and Arrays
-## splitByChar(separator, s)
+## splitByChar(separator, s[, max_substrings])
-Splits a string into substrings separated by a specified character. It uses a constant string `separator` which consisting of exactly one character.
+Splits a string into substrings separated by a specified character. It uses a constant string `separator` which consists of exactly one character.
Returns an array of selected substrings. Empty substrings may be selected if the separator occurs at the beginning or end of the string, or if there are multiple consecutive separators.
**Syntax**
``` sql
-splitByChar(separator, s)
+splitByChar(separator, s[, max_substrings]))
```
**Arguments**
- `separator` — The separator which should contain exactly one character. [String](../../sql-reference/data-types/string.md).
- `s` — The string to split. [String](../../sql-reference/data-types/string.md).
+- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible.
**Returned value(s)**
@@ -44,20 +45,22 @@ SELECT splitByChar(',', '1,2,3,abcde');
└─────────────────────────────────┘
```
-## splitByString(separator, s)
+## splitByString(separator, s[, max_substrings])
Splits a string into substrings separated by a string. It uses a constant string `separator` of multiple characters as the separator. If the string `separator` is empty, it will split the string `s` into an array of single characters.
**Syntax**
``` sql
-splitByString(separator, s)
+splitByString(separator, s[, max_substrings]))
```
**Arguments**
- `separator` — The separator. [String](../../sql-reference/data-types/string.md).
- `s` — The string to split. [String](../../sql-reference/data-types/string.md).
+- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible.
+
**Returned value(s)**
@@ -91,20 +94,22 @@ SELECT splitByString('', 'abcde');
└────────────────────────────┘
```
-## splitByRegexp(regexp, s)
+## splitByRegexp(regexp, s[, max_substrings])
Splits a string into substrings separated by a regular expression. It uses a regular expression string `regexp` as the separator. If the `regexp` is empty, it will split the string `s` into an array of single characters. If no match is found for this regular expression, the string `s` won't be split.
**Syntax**
``` sql
-splitByRegexp(regexp, s)
+splitByRegexp(regexp, s[, max_substrings]))
```
**Arguments**
- `regexp` — Regular expression. Constant. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
- `s` — The string to split. [String](../../sql-reference/data-types/string.md).
+- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible.
+
**Returned value(s)**
@@ -146,7 +151,7 @@ Result:
└────────────────────────────┘
```
-## splitByWhitespace(s)
+## splitByWhitespace(s[, max_substrings])
Splits a string into substrings separated by whitespace characters.
Returns an array of selected substrings.
@@ -154,12 +159,14 @@ Returns an array of selected substrings.
**Syntax**
``` sql
-splitByWhitespace(s)
+splitByWhitespace(s[, max_substrings]))
```
**Arguments**
- `s` — The string to split. [String](../../sql-reference/data-types/string.md).
+- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible.
+
**Returned value(s)**
@@ -179,7 +186,7 @@ SELECT splitByWhitespace(' 1! a, b. ');
└─────────────────────────────────────┘
```
-## splitByNonAlpha(s)
+## splitByNonAlpha(s[, max_substrings])
Splits a string into substrings separated by whitespace and punctuation characters.
Returns an array of selected substrings.
@@ -187,12 +194,14 @@ Returns an array of selected substrings.
**Syntax**
``` sql
-splitByNonAlpha(s)
+splitByNonAlpha(s[, max_substrings]))
```
**Arguments**
- `s` — The string to split. [String](../../sql-reference/data-types/string.md).
+- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible.
+
**Returned value(s)**
@@ -217,10 +226,28 @@ SELECT splitByNonAlpha(' 1! a, b. ');
Concatenates string representations of values listed in the array with the separator. `separator` is an optional parameter: a constant string, set to an empty string by default.
Returns the string.
-## alphaTokens(s)
+## alphaTokens(s[, max_substrings]), splitByAlpha(s[, max_substrings])
Selects substrings of consecutive bytes from the ranges a-z and A-Z.Returns an array of substrings.
+**Syntax**
+
+``` sql
+alphaTokens(s[, max_substrings]))
+splitByAlpha(s[, max_substrings])
+```
+
+**Arguments**
+
+- `s` — The string to split. [String](../../sql-reference/data-types/string.md).
+- `max_substrings` — An optional `Int64` defaulting to 0. When `max_substrings` > 0, the returned substrings will be no more than `max_substrings`, otherwise the function will return as many substrings as possible.
+
+**Returned value(s)**
+
+Returns an array of selected substrings.
+
+Type: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+
**Example**
``` sql
diff --git a/docs/en/sql-reference/functions/string-functions.md b/docs/en/sql-reference/functions/string-functions.md
index a8ba4843279..cdbf29f3e6d 100644
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@@ -571,13 +571,13 @@ Similar to base58Decode, but returns an empty string in case of error.
## base64Encode(s)
-Encodes ‘s’ string into base64
+Encodes ‘s’ FixedString or String into base64.
Alias: `TO_BASE64`.
## base64Decode(s)
-Decode base64-encoded string ‘s’ into original string. In case of failure raises an exception.
+Decode base64-encoded FixedString or String ‘s’ into original string. In case of failure raises an exception.
Alias: `FROM_BASE64`.
@@ -1150,3 +1150,13 @@ A text with tags .
The content within CDATA
Do Nothing for 2 Minutes 2:00
```
+
+## ascii(s) {#ascii}
+
+Returns the ASCII code point of the first character of str. The result type is Int32.
+
+If s is empty, the result is 0. If the first character is not an ASCII character or not part of the Latin-1 Supplement range of UTF-16, the result is undefined.
+
+
+
+
diff --git a/docs/en/sql-reference/functions/string-replace-functions.md b/docs/en/sql-reference/functions/string-replace-functions.md
index adf2a07b732..d1f0e44f6b4 100644
--- a/docs/en/sql-reference/functions/string-replace-functions.md
+++ b/docs/en/sql-reference/functions/string-replace-functions.md
@@ -6,28 +6,29 @@ sidebar_label: For Replacing in Strings
# Functions for Searching and Replacing in Strings
-:::note
+:::note
Functions for [searching](../../sql-reference/functions/string-search-functions.md) and [other manipulations with strings](../../sql-reference/functions/string-functions.md) are described separately.
:::
## replaceOne(haystack, pattern, replacement)
-Replaces the first occurrence, if it exists, of the ‘pattern’ substring in ‘haystack’ with the ‘replacement’ substring.
-Hereafter, ‘pattern’ and ‘replacement’ must be constants.
+Replaces the first occurrence of the substring ‘pattern’ (if it exists) in ‘haystack’ by the ‘replacement’ string.
+‘pattern’ and ‘replacement’ must be constants.
## replaceAll(haystack, pattern, replacement), replace(haystack, pattern, replacement)
-Replaces all occurrences of the ‘pattern’ substring in ‘haystack’ with the ‘replacement’ substring.
+Replaces all occurrences of the substring ‘pattern’ in ‘haystack’ by the ‘replacement’ string.
## replaceRegexpOne(haystack, pattern, replacement)
-Replacement using the ‘pattern’ regular expression. A re2 regular expression.
-Replaces only the first occurrence, if it exists.
-A pattern can be specified as ‘replacement’. This pattern can include substitutions `\0-\9`.
-The substitution `\0` includes the entire regular expression. Substitutions `\1-\9` correspond to the subpattern numbers.To use the `\` character in a template, escape it using `\`.
-Also keep in mind that a string literal requires an extra escape.
+Replaces the first occurrence of the substring matching the regular expression ‘pattern’ in ‘haystack‘ by the ‘replacement‘ string.
+‘pattern‘ must be a constant [re2 regular expression](https://github.com/google/re2/wiki/Syntax).
+‘replacement’ must be a plain constant string or a constant string containing substitutions `\0-\9`.
+Substitutions `\1-\9` correspond to the 1st to 9th capturing group (submatch), substitution `\0` corresponds to the entire match.
+To use a verbatim `\` character in the ‘pattern‘ or ‘replacement‘ string, escape it using `\`.
+Also keep in mind that string literals require an extra escaping.
-Example 1. Converting the date to American format:
+Example 1. Converting ISO dates to American format:
``` sql
SELECT DISTINCT
@@ -62,7 +63,7 @@ SELECT replaceRegexpOne('Hello, World!', '.*', '\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0')
## replaceRegexpAll(haystack, pattern, replacement)
-This does the same thing, but replaces all the occurrences. Example:
+Like ‘replaceRegexpOne‘, but replaces all occurrences of the pattern. Example:
``` sql
SELECT replaceRegexpAll('Hello, World!', '.', '\\0\\0') AS res
diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md
index b8f222c2e4e..43542367cd5 100644
--- a/docs/en/sql-reference/functions/uuid-functions.md
+++ b/docs/en/sql-reference/functions/uuid-functions.md
@@ -211,12 +211,19 @@ SELECT toUUIDOrZero('61f0c404-5cb3-11e7-907b-a6006ad3dba0T') AS uuid
## UUIDStringToNum
-Accepts a string containing 36 characters in the format `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, and returns it as a set of bytes in a [FixedString(16)](../../sql-reference/data-types/fixedstring.md).
+Accepts `string` containing 36 characters in the format `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, and returns a [FixedString(16)](../../sql-reference/data-types/fixedstring.md) as its binary representation, with its format optionally specified by `variant` (`Big-endian` by default).
+
+**Syntax**
``` sql
-UUIDStringToNum(String)
+UUIDStringToNum(string[, variant = 1])
```
+**Arguments**
+
+- `string` — String of 36 characters or FixedString(36). [String](../../sql-reference/syntax.md#syntax-string-literal).
+- `variant` — Integer, representing a variant as specified by [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122#section-4.1.1). 1 = `Big-endian` (default), 2 = `Microsoft`.
+
**Returned value**
FixedString(16)
@@ -235,14 +242,33 @@ SELECT
└──────────────────────────────────────┴──────────────────┘
```
+``` sql
+SELECT
+ '612f3c40-5d3b-217e-707b-6a546a3d7b29' AS uuid,
+ UUIDStringToNum(uuid, 2) AS bytes
+```
+
+``` text
+┌─uuid─────────────────────────────────┬─bytes────────────┐
+│ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │ @ [GROUP BY] [ORDER BY] )` - Adds projection description to tables metadata.
+You can define one or more projections for a table, and during the query analysis the projection with the least data to scan will be selected by ClickHouse without modifying the query provided by the user.
-- `ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
+## Example filtering without using primary keys
-- `ALTER TABLE [db.]table MATERIALIZE PROJECTION name IN PARTITION partition_name` - The query rebuilds the projection `name` in the partition `partition_name`. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
+Creating the table:
+```
+CREATE TABLE visits_order
+(
+ `user_id` UInt64,
+ `user_name` String,
+ `pages_visited` Nullable(Float64),
+ `user_agent` String
+)
+ENGINE = MergeTree()
+PRIMARY KEY user_agent
+```
+Using `ALTER TABLE`, we could add the Projection to an existing table:
+```
+ALTER TABLE visits_order ADD PROJECTION user_name_projection (
+SELECT
+*
+ORDER BY user_name
+)
-- `ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` - Deletes projection files from disk without removing description. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
+ALTER TABLE visits_order MATERIALIZE PROJECTION user_name_projection
+```
+Inserting the data:
+```
+INSERT INTO visits_order SELECT
+ number,
+ 'test',
+ 1.5 * (number / 2),
+ 'Android'
+FROM numbers(1, 100);
+```
+
+The Projection will allow us to filter by `user_name` fast even if in the original Table `user_name` was not defined as a `PRIMARY_KEY`.
+At query time ClickHouse determined that less data will be processed if the projection is used, as the data is ordered by `user_name`.
+```
+SELECT
+ *
+FROM visits_order
+WHERE user_name='test'
+LIMIT 2
+```
+
+To verify that a query is using the projection, we could review the `system.query_log` table. On the `projections` field we have the name of the projection used or empty if none has been used:
+```
+SELECT query, projections FROM system.query_log WHERE query_id=''
+```
+
+## Example pre-aggregation query
+
+Creating the table with the Projection:
+```
+CREATE TABLE visits
+(
+ `user_id` UInt64,
+ `user_name` String,
+ `pages_visited` Nullable(Float64),
+ `user_agent` String,
+ PROJECTION projection_visits_by_user
+ (
+ SELECT
+ user_agent,
+ sum(pages_visited)
+ GROUP BY user_id, user_agent
+ )
+)
+ENGINE = MergeTree()
+ORDER BY user_agent
+```
+Inserting the data:
+```
+INSERT INTO visits SELECT
+ number,
+ 'test',
+ 1.5 * (number / 2),
+ 'Android'
+FROM numbers(1, 100);
+```
+```
+INSERT INTO visits SELECT
+ number,
+ 'test',
+ 1. * (number / 2),
+ 'IOS'
+FROM numbers(100, 500);
+```
+We will execute a first query using `GROUP BY` using the field `user_agent`, this query will not use the projection defined as the pre-aggregation does not match.
+```
+SELECT
+ user_agent,
+ count(DISTINCT user_id)
+FROM visits
+GROUP BY user_agent
+```
+
+To use the projection we could execute queries that select part of, or all of the pre-aggregation and `GROUP BY` fields.
+```
+SELECT
+ user_agent
+FROM visits
+WHERE user_id > 50 AND user_id < 150
+GROUP BY user_agent
+```
+```
+SELECT
+ user_agent,
+ sum(pages_visited)
+FROM visits
+GROUP BY user_id
+```
+
+As mentioned before, we could review the `system.query_log` table. On the `projections` field we have the name of the projection used or empty if none has been used:
+```
+SELECT query, projections FROM system.query_log WHERE query_id=''
+```
+
+# Manipulating Projections
+
+The following operations with [projections](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#projections) are available:
+
+## ADD PROJECTION
+
+`ALTER TABLE [db].name ADD PROJECTION name ( SELECT [GROUP BY] [ORDER BY] )` - Adds projection description to tables metadata.
+
+## DROP PROJECTION
+
+`ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations).
+
+## MATERIALIZE PROJECTION
+
+`ALTER TABLE [db.]table MATERIALIZE PROJECTION name IN PARTITION partition_name` - The query rebuilds the projection `name` in the partition `partition_name`. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations).
+
+## CLEAR PROJECTION
+
+`ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` - Deletes projection files from disk without removing description. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations).
The commands `ADD`, `DROP` and `CLEAR` are lightweight in a sense that they only change metadata or remove files.
-Also, they are replicated, syncing projections metadata via ZooKeeper.
+Also, they are replicated, syncing projections metadata via ClickHouse Keeper or ZooKeeper.
:::note
-Projection manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants).
+Projection manipulation is supported only for tables with [`*MergeTree`](/docs/en/engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](/docs/en/engines/table-engines/mergetree-family/replication.md) variants).
:::
diff --git a/docs/en/sql-reference/statements/alter/index/index.md b/docs/en/sql-reference/statements/alter/skipping-index.md
similarity index 70%
rename from docs/en/sql-reference/statements/alter/index/index.md
rename to docs/en/sql-reference/statements/alter/skipping-index.md
index 03d4bd47e71..037e4bc38c5 100644
--- a/docs/en/sql-reference/statements/alter/index/index.md
+++ b/docs/en/sql-reference/statements/alter/skipping-index.md
@@ -1,5 +1,6 @@
---
-slug: /en/sql-reference/statements/alter/index
+slug: /en/sql-reference/statements/alter/skipping-index
+
toc_hidden_folder: true
sidebar_position: 42
sidebar_label: INDEX
@@ -13,12 +14,12 @@ The following operations are available:
- `ALTER TABLE [db].table_name [ON CLUSTER cluster] DROP INDEX name` - Removes index description from tables metadata and deletes index files from disk.
-- `ALTER TABLE [db.]table_name [ON CLUSTER cluster] MATERIALIZE INDEX name [IN PARTITION partition_name]` - Rebuilds the secondary index `name` for the specified `partition_name`. Implemented as a [mutation](../../../../sql-reference/statements/alter/index.md#mutations). If `IN PARTITION` part is omitted then it rebuilds the index for the whole table data.
+- `ALTER TABLE [db.]table_name [ON CLUSTER cluster] MATERIALIZE INDEX name [IN PARTITION partition_name]` - Rebuilds the secondary index `name` for the specified `partition_name`. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations). If `IN PARTITION` part is omitted then it rebuilds the index for the whole table data.
The first two commands are lightweight in a sense that they only change metadata or remove files.
Also, they are replicated, syncing indices metadata via ZooKeeper.
:::note
-Index manipulation is supported only for tables with [`*MergeTree`](../../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../../engines/table-engines/mergetree-family/replication.md) variants).
+Index manipulation is supported only for tables with [`*MergeTree`](/docs/en/engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](/docs/en/engines/table-engines/mergetree-family/replication.md) variants).
:::
diff --git a/docs/en/sql-reference/statements/alter/update.md b/docs/en/sql-reference/statements/alter/update.md
index e4fb872ae24..5d27c382982 100644
--- a/docs/en/sql-reference/statements/alter/update.md
+++ b/docs/en/sql-reference/statements/alter/update.md
@@ -10,7 +10,7 @@ sidebar_label: UPDATE
ALTER TABLE [db.]table [ON CLUSTER cluster] UPDATE column1 = expr1 [, ...] WHERE filter_expr
```
-Manipulates data matching the specified filtering expression. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
+Manipulates data matching the specified filtering expression. Implemented as a [mutation](/docs/en/sql-reference/statements/alter/index.md#mutations).
:::note
The `ALTER TABLE` prefix makes this syntax different from most other systems supporting SQL. It is intended to signify that unlike similar queries in OLTP databases this is a heavy operation not designed for frequent use.
@@ -20,11 +20,11 @@ The `filter_expr` must be of type `UInt8`. This query updates values of specifie
One query can contain several commands separated by commas.
-The synchronicity of the query processing is defined by the [mutations_sync](../../../operations/settings/settings.md#mutations_sync) setting. By default, it is asynchronous.
+The synchronicity of the query processing is defined by the [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting. By default, it is asynchronous.
**See also**
-- [Mutations](../../../sql-reference/statements/alter/index.md#mutations)
-- [Synchronicity of ALTER Queries](../../../sql-reference/statements/alter/index.md#synchronicity-of-alter-queries)
-- [mutations_sync](../../../operations/settings/settings.md#mutations_sync) setting
+- [Mutations](/docs/en/sql-reference/statements/alter/index.md#mutations)
+- [Synchronicity of ALTER Queries](/docs/en/sql-reference/statements/alter/index.md#synchronicity-of-alter-queries)
+- [mutations_sync](/docs/en/operations/settings/settings.md/#mutations_sync) setting
diff --git a/docs/en/sql-reference/statements/alter/user.md b/docs/en/sql-reference/statements/alter/user.md
index 0a68885842a..31db89164d7 100644
--- a/docs/en/sql-reference/statements/alter/user.md
+++ b/docs/en/sql-reference/statements/alter/user.md
@@ -12,7 +12,7 @@ Syntax:
``` sql
ALTER USER [IF EXISTS] name1 [ON CLUSTER cluster_name1] [RENAME TO new_name1]
[, name2 [ON CLUSTER cluster_name2] [RENAME TO new_name2] ...]
- [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']}]
+ [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'}]
[[ADD | DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
[DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ]
[GRANTEES {user | role | ANY | NONE} [,...] [EXCEPT {user | role} [,...]]]
diff --git a/docs/en/sql-reference/statements/check-table.md b/docs/en/sql-reference/statements/check-table.md
index 1a2ec69e4f9..8c4b8ab90a2 100644
--- a/docs/en/sql-reference/statements/check-table.md
+++ b/docs/en/sql-reference/statements/check-table.md
@@ -1,14 +1,14 @@
---
slug: /en/sql-reference/statements/check-table
sidebar_position: 41
-sidebar_label: CHECK
+sidebar_label: CHECK TABLE
title: "CHECK TABLE Statement"
---
Checks if the data in the table is corrupted.
``` sql
-CHECK TABLE [db.]name
+CHECK TABLE [db.]name [PARTITION partition_expr]
```
The `CHECK TABLE` query compares actual file sizes with the expected values which are stored on the server. If the file sizes do not match the stored values, it means the data is corrupted. This can be caused, for example, by a system crash during query execution.
diff --git a/docs/en/sql-reference/statements/create/database.md b/docs/en/sql-reference/statements/create/database.md
index 432f5975cc8..7954d1362f1 100644
--- a/docs/en/sql-reference/statements/create/database.md
+++ b/docs/en/sql-reference/statements/create/database.md
@@ -31,7 +31,7 @@ By default, ClickHouse uses its own [Atomic](../../../engines/database-engines/a
### COMMENT
-You can add a comment to the database when you creating it.
+You can add a comment to the database when you are creating it.
The comment is supported for all database engines.
diff --git a/docs/en/sql-reference/statements/create/dictionary.md b/docs/en/sql-reference/statements/create/dictionary.md
index b24ff480c2d..a470b071971 100644
--- a/docs/en/sql-reference/statements/create/dictionary.md
+++ b/docs/en/sql-reference/statements/create/dictionary.md
@@ -5,9 +5,9 @@ sidebar_label: DICTIONARY
title: "CREATE DICTIONARY"
---
-Creates a new [external dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) with given [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [layout](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) and [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).
+Creates a new [dictionary](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) with given [structure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-structure.md), [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md), [layout](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) and [lifetime](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).
-**Syntax**
+## Syntax
``` sql
CREATE [OR REPLACE] DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
@@ -25,17 +25,21 @@ SETTINGS(setting_name = setting_value, setting_name = setting_value, ...)
COMMENT 'Comment'
```
-External dictionary structure consists of attributes. Dictionary attributes are specified similarly to table columns. The only required attribute property is its type, all other properties may have default values.
+The dictionary structure consists of attributes. Dictionary attributes are specified similarly to table columns. The only required attribute property is its type, all other properties may have default values.
`ON CLUSTER` clause allows creating dictionary on a cluster, see [Distributed DDL](../../../sql-reference/distributed-ddl.md).
Depending on dictionary [layout](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-layout.md) one or more attributes can be specified as dictionary keys.
-For more information, see [External Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) section.
+## SOURCE
-You can add a comment to the dictionary when you creating it using `COMMENT` clause.
+The source for a dictionary can be a:
+- table in the current ClickHouse service
+- table in a remote ClickHouse service
+- file available by HTTP(S)
+- another database
-**Example**
+### Create a dictionary from a table in the current ClickHouse service
Input table `source_table`:
@@ -49,51 +53,81 @@ Input table `source_table`:
Creating the dictionary:
``` sql
-CREATE DICTIONARY dictionary_with_comment
+CREATE DICTIONARY id_value_dictionary
(
id UInt64,
value String
)
PRIMARY KEY id
-SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'source_table'))
+SOURCE(CLICKHOUSE(TABLE 'source_table'))
LAYOUT(FLAT())
LIFETIME(MIN 0 MAX 1000)
-COMMENT 'The temporary dictionary';
```
Output the dictionary:
``` sql
-SHOW CREATE DICTIONARY dictionary_with_comment;
+SHOW CREATE DICTIONARY id_value_dictionary;
```
-```text
-┌─statement───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
-│ CREATE DICTIONARY default.dictionary_with_comment
+```response
+CREATE DICTIONARY default.id_value_dictionary
(
`id` UInt64,
`value` String
)
PRIMARY KEY id
-SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() TABLE 'source_table'))
+SOURCE(CLICKHOUSE(TABLE 'source_table'))
LIFETIME(MIN 0 MAX 1000)
LAYOUT(FLAT())
-COMMENT 'The temporary dictionary' │
-└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
```
-Output the comment to dictionary:
+### Create a dictionary from a table in a remote ClickHouse service
+
+Input table (in the remote ClickHouse service) `source_table`:
+
+``` text
+┌─id─┬─value──┐
+│ 1 │ First │
+│ 2 │ Second │
+└────┴────────┘
+```
+
+Creating the dictionary:
``` sql
-SELECT comment FROM system.dictionaries WHERE name == 'dictionary_with_comment' AND database == currentDatabase();
+CREATE DICTIONARY id_value_dictionary
+(
+ id UInt64,
+ value String
+)
+PRIMARY KEY id
+SOURCE(CLICKHOUSE(HOST 'HOSTNAME' PORT 9000 USER 'default' PASSWORD 'PASSWORD' TABLE 'source_table' DB 'default'))
+LAYOUT(FLAT())
+LIFETIME(MIN 0 MAX 1000)
```
-```text
-┌─comment──────────────────┐
-│ The temporary dictionary │
-└──────────────────────────┘
+### Create a dictionary from a file available by HTTP(S)
+
+```sql
+statement: CREATE DICTIONARY default.taxi_zone_dictionary
+(
+ `LocationID` UInt16 DEFAULT 0,
+ `Borough` String,
+ `Zone` String,
+ `service_zone` String
+)
+PRIMARY KEY LocationID
+SOURCE(HTTP(URL 'https://datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi/taxi_zone_lookup.csv' FORMAT 'CSVWithNames'))
+LIFETIME(MIN 0 MAX 0)
+LAYOUT(HASHED())
```
+### Create a dictionary from another database
+
+Please see the details in [Dictionary sources](/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md/#dbms).
+
**See Also**
-- [system.dictionaries](../../../operations/system-tables/dictionaries.md) — This table contains information about [external dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md).
+- For more information, see the [Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) section.
+- [system.dictionaries](../../../operations/system-tables/dictionaries.md) — This table contains information about [Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md).
diff --git a/docs/en/sql-reference/statements/create/function.md b/docs/en/sql-reference/statements/create/function.md
index 63c006b1e3e..90be007bf43 100644
--- a/docs/en/sql-reference/statements/create/function.md
+++ b/docs/en/sql-reference/statements/create/function.md
@@ -4,7 +4,7 @@ sidebar_position: 38
sidebar_label: FUNCTION
---
-# CREATE FUNCTION
+# CREATE FUNCTION — user defined function (UDF)
Creates a user defined function from a lambda expression. The expression must consist of function parameters, constants, operators, or other function calls.
diff --git a/docs/en/sql-reference/statements/create/user.md b/docs/en/sql-reference/statements/create/user.md
index 56a0560e57e..a756b3d4a0d 100644
--- a/docs/en/sql-reference/statements/create/user.md
+++ b/docs/en/sql-reference/statements/create/user.md
@@ -12,7 +12,7 @@ Syntax:
``` sql
CREATE USER [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1]
[, name2 [ON CLUSTER cluster_name2] ...]
- [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']}]
+ [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password | plaintext_password | sha256_password | sha256_hash | double_sha1_password | double_sha1_hash}] BY {'password' | 'hash'}} | {WITH ldap SERVER 'server_name'} | {WITH kerberos [REALM 'realm']} | {WITH ssl_certificate CN 'common_name'}]
[HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE]
[DEFAULT ROLE role [,...]]
[DEFAULT DATABASE database | NONE]
@@ -34,6 +34,7 @@ There are multiple ways of user identification:
- `IDENTIFIED WITH double_sha1_hash BY 'hash'`
- `IDENTIFIED WITH ldap SERVER 'server_name'`
- `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'`
+- `IDENTIFIED WITH ssl_certificate CN 'mysite.com:user'`
For identification with sha256_hash using `SALT` - hash must be calculated from concatination of 'password' and 'salt'.
@@ -54,7 +55,7 @@ Another way of specifying host is to use `@` syntax following the username. Exam
- `CREATE USER mira@'localhost'` — Equivalent to the `HOST LOCAL` syntax.
- `CREATE USER mira@'192.168.%.%'` — Equivalent to the `HOST LIKE` syntax.
-:::warning
+:::warning
ClickHouse treats `user_name@'address'` as a username as a whole. Thus, technically you can create multiple users with the same `user_name` and different constructions after `@`. However, we do not recommend to do so.
:::
diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index 46dd7e6fdd7..5833c43f55d 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -166,23 +166,6 @@ SELECT * FROM [db.]live_view WHERE ...
You can force live view refresh using the `ALTER LIVE VIEW [db.]table_name REFRESH` statement.
-### WITH TIMEOUT Clause
-
-When a live view is created with a `WITH TIMEOUT` clause then the live view will be dropped automatically after the specified number of seconds elapse since the end of the last [WATCH](../../../sql-reference/statements/watch.md) query that was watching the live view.
-
-```sql
-CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AS SELECT ...
-```
-
-If the timeout value is not specified then the value specified by the [temporary_live_view_timeout](../../../operations/settings/settings.md#temporary-live-view-timeout) setting is used.
-
-**Example:**
-
-```sql
-CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x;
-CREATE LIVE VIEW lv WITH TIMEOUT 15 AS SELECT sum(x) FROM mt;
-```
-
### WITH REFRESH Clause
When a live view is created with a `WITH REFRESH` clause then it will be automatically refreshed after the specified number of seconds elapse since the last refresh or trigger.
@@ -212,20 +195,6 @@ WATCH lv
└─────────────────────┴──────────┘
```
-You can combine `WITH TIMEOUT` and `WITH REFRESH` clauses using an `AND` clause.
-
-```sql
-CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AND REFRESH [value_in_sec] AS SELECT ...
-```
-
-**Example:**
-
-```sql
-CREATE LIVE VIEW lv WITH TIMEOUT 15 AND REFRESH 5 AS SELECT now();
-```
-
-After 15 sec the live view will be automatically dropped if there are no active `WATCH` queries.
-
```sql
WATCH lv
```
diff --git a/docs/en/sql-reference/statements/describe-table.md b/docs/en/sql-reference/statements/describe-table.md
index ff3361caadb..4864743abbc 100644
--- a/docs/en/sql-reference/statements/describe-table.md
+++ b/docs/en/sql-reference/statements/describe-table.md
@@ -1,7 +1,7 @@
---
slug: /en/sql-reference/statements/describe-table
sidebar_position: 42
-sidebar_label: DESCRIBE
+sidebar_label: DESCRIBE TABLE
title: "DESCRIBE TABLE"
---
diff --git a/docs/en/sql-reference/statements/grant.md b/docs/en/sql-reference/statements/grant.md
index 56bb4cd4b65..546a8b0958d 100644
--- a/docs/en/sql-reference/statements/grant.md
+++ b/docs/en/sql-reference/statements/grant.md
@@ -221,7 +221,7 @@ By default, a user account or a role has no privileges.
If a user or a role has no privileges, it is displayed as [NONE](#grant-none) privilege.
-Some queries by their implementation require a set of privileges. For example, to execute the [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename) query you need the following privileges: `SELECT`, `CREATE TABLE`, `INSERT` and `DROP TABLE`.
+Some queries by their implementation require a set of privileges. For example, to execute the [RENAME](../../sql-reference/statements/optimize.md) query you need the following privileges: `SELECT`, `CREATE TABLE`, `INSERT` and `DROP TABLE`.
### SELECT
@@ -304,11 +304,11 @@ Examples of how this hierarchy is treated:
- The `MODIFY SETTING` privilege allows modifying table engine settings. It does not affect settings or server configuration parameters.
- The `ATTACH` operation needs the [CREATE](#grant-create) privilege.
- The `DETACH` operation needs the [DROP](#grant-drop) privilege.
-- To stop mutation by the [KILL MUTATION](../../sql-reference/statements/misc.md#kill-mutation) query, you need to have a privilege to start this mutation. For example, if you want to stop the `ALTER UPDATE` query, you need the `ALTER UPDATE`, `ALTER TABLE`, or `ALTER` privilege.
+- To stop mutation by the [KILL MUTATION](../../sql-reference/statements/kill.md#kill-mutation) query, you need to have a privilege to start this mutation. For example, if you want to stop the `ALTER UPDATE` query, you need the `ALTER UPDATE`, `ALTER TABLE`, or `ALTER` privilege.
### CREATE
-Allows executing [CREATE](../../sql-reference/statements/create/index.md) and [ATTACH](../../sql-reference/statements/misc.md#attach) DDL-queries according to the following hierarchy of privileges:
+Allows executing [CREATE](../../sql-reference/statements/create/index.md) and [ATTACH](../../sql-reference/statements/attach.md) DDL-queries according to the following hierarchy of privileges:
- `CREATE`. Level: `GROUP`
- `CREATE DATABASE`. Level: `DATABASE`
@@ -323,7 +323,7 @@ Allows executing [CREATE](../../sql-reference/statements/create/index.md) and [A
### DROP
-Allows executing [DROP](../../sql-reference/statements/misc.md#drop) and [DETACH](../../sql-reference/statements/misc.md#detach) queries according to the following hierarchy of privileges:
+Allows executing [DROP](../../sql-reference/statements/drop.md) and [DETACH](../../sql-reference/statements/detach.md) queries according to the following hierarchy of privileges:
- `DROP`. Level: `GROUP`
- `DROP DATABASE`. Level: `DATABASE`
@@ -333,13 +333,13 @@ Allows executing [DROP](../../sql-reference/statements/misc.md#drop) and [DETACH
### TRUNCATE
-Allows executing [TRUNCATE](../../sql-reference/statements/misc.md#truncate-statement) queries.
+Allows executing [TRUNCATE](../../sql-reference/statements/truncate.md) queries.
Privilege level: `TABLE`.
### OPTIMIZE
-Allows executing [OPTIMIZE TABLE](../../sql-reference/statements/misc.md#misc_operations-optimize) queries.
+Allows executing [OPTIMIZE TABLE](../../sql-reference/statements/optimize.md) queries.
Privilege level: `TABLE`.
@@ -359,7 +359,7 @@ A user has the `SHOW` privilege if it has any other privilege concerning the spe
### KILL QUERY
-Allows executing [KILL](../../sql-reference/statements/misc.md#kill-query-statement) queries according to the following hierarchy of privileges:
+Allows executing [KILL](../../sql-reference/statements/kill.md#kill-query) queries according to the following hierarchy of privileges:
Privilege level: `GLOBAL`.
diff --git a/docs/en/sql-reference/statements/index.md b/docs/en/sql-reference/statements/index.md
index bfb90f4a89f..b286d8c932d 100644
--- a/docs/en/sql-reference/statements/index.md
+++ b/docs/en/sql-reference/statements/index.md
@@ -8,25 +8,25 @@ sidebar_label: Statements
Statements represent various kinds of action you can perform using SQL queries. Each kind of statement has it’s own syntax and usage details that are described separately:
-- [SELECT](../../sql-reference/statements/select/index.md)
-- [INSERT INTO](../../sql-reference/statements/insert-into.md)
-- [CREATE](../../sql-reference/statements/create/index.md)
-- [ALTER](../../sql-reference/statements/alter/index.md)
-- [SYSTEM](../../sql-reference/statements/system.md)
-- [SHOW](../../sql-reference/statements/show.md)
-- [GRANT](../../sql-reference/statements/grant.md)
-- [REVOKE](../../sql-reference/statements/revoke.md)
-- [ATTACH](../../sql-reference/statements/attach.md)
-- [CHECK TABLE](../../sql-reference/statements/check-table.md)
-- [DESCRIBE TABLE](../../sql-reference/statements/describe-table.md)
-- [DETACH](../../sql-reference/statements/detach.md)
-- [DROP](../../sql-reference/statements/drop.md)
-- [EXISTS](../../sql-reference/statements/exists.md)
-- [KILL](../../sql-reference/statements/kill.md)
-- [OPTIMIZE](../../sql-reference/statements/optimize.md)
-- [RENAME](../../sql-reference/statements/rename.md)
-- [SET](../../sql-reference/statements/set.md)
-- [SET ROLE](../../sql-reference/statements/set-role.md)
-- [TRUNCATE](../../sql-reference/statements/truncate.md)
-- [USE](../../sql-reference/statements/use.md)
-- [EXPLAIN](../../sql-reference/statements/explain.md)
+- [SELECT](/docs/en/sql-reference/statements/select/index.md)
+- [INSERT INTO](/docs/en/sql-reference/statements/insert-into.md)
+- [CREATE](/docs/en/sql-reference/statements/create/index.md)
+- [ALTER](/docs/en/sql-reference/statements/alter/index.md)
+- [SYSTEM](/docs/en/sql-reference/statements/system.md)
+- [SHOW](/docs/en/sql-reference/statements/show.md)
+- [GRANT](/docs/en/sql-reference/statements/grant.md)
+- [REVOKE](/docs/en/sql-reference/statements/revoke.md)
+- [ATTACH](/docs/en/sql-reference/statements/attach.md)
+- [CHECK TABLE](/docs/en/sql-reference/statements/check-table.md)
+- [DESCRIBE TABLE](/docs/en/sql-reference/statements/describe-table.md)
+- [DETACH](/docs/en/sql-reference/statements/detach.md)
+- [DROP](/docs/en/sql-reference/statements/drop.md)
+- [EXISTS](/docs/en/sql-reference/statements/exists.md)
+- [KILL](/docs/en/sql-reference/statements/kill.md)
+- [OPTIMIZE](/docs/en/sql-reference/statements/optimize.md)
+- [RENAME](/docs/en/sql-reference/statements/rename.md)
+- [SET](/docs/en/sql-reference/statements/set.md)
+- [SET ROLE](/docs/en/sql-reference/statements/set-role.md)
+- [TRUNCATE](/docs/en/sql-reference/statements/truncate.md)
+- [USE](/docs/en/sql-reference/statements/use.md)
+- [EXPLAIN](/docs/en/sql-reference/statements/explain.md)
diff --git a/docs/en/sql-reference/statements/misc.md b/docs/en/sql-reference/statements/misc.md
deleted file mode 100644
index d812dd2008a..00000000000
--- a/docs/en/sql-reference/statements/misc.md
+++ /dev/null
@@ -1,21 +0,0 @@
----
-slug: /en/sql-reference/statements/misc
-toc_hidden: true
-sidebar_position: 70
----
-
-# Miscellaneous Statements
-
-- [ATTACH](../../sql-reference/statements/attach.md)
-- [CHECK TABLE](../../sql-reference/statements/check-table.md)
-- [DESCRIBE TABLE](../../sql-reference/statements/describe-table.md)
-- [DETACH](../../sql-reference/statements/detach.md)
-- [DROP](../../sql-reference/statements/drop.md)
-- [EXISTS](../../sql-reference/statements/exists.md)
-- [KILL](../../sql-reference/statements/kill.md)
-- [OPTIMIZE](../../sql-reference/statements/optimize.md)
-- [RENAME](../../sql-reference/statements/rename.md)
-- [SET](../../sql-reference/statements/set.md)
-- [SET ROLE](../../sql-reference/statements/set-role.md)
-- [TRUNCATE](../../sql-reference/statements/truncate.md)
-- [USE](../../sql-reference/statements/use.md)
diff --git a/docs/en/sql-reference/statements/optimize.md b/docs/en/sql-reference/statements/optimize.md
index 680ff773992..036d3f0599a 100644
--- a/docs/en/sql-reference/statements/optimize.md
+++ b/docs/en/sql-reference/statements/optimize.md
@@ -22,7 +22,7 @@ The `OPTIMIZE` query is supported for [MergeTree](../../engines/table-engines/me
When `OPTIMIZE` is used with the [ReplicatedMergeTree](../../engines/table-engines/mergetree-family/replication.md) family of table engines, ClickHouse creates a task for merging and waits for execution on all replicas (if the [replication_alter_partitions_sync](../../operations/settings/settings.md#replication-alter-partitions-sync) setting is set to `2`) or on current replica (if the [replication_alter_partitions_sync](../../operations/settings/settings.md#replication-alter-partitions-sync) setting is set to `1`).
- If `OPTIMIZE` does not perform a merge for any reason, it does not notify the client. To enable notifications, use the [optimize_throw_if_noop](../../operations/settings/settings.md#setting-optimize_throw_if_noop) setting.
-- If you specify a `PARTITION`, only the specified partition is optimized. [How to set partition expression](../../sql-reference/statements/alter/index.md#alter-how-to-specify-part-expr).
+- If you specify a `PARTITION`, only the specified partition is optimized. [How to set partition expression](alter/partition.md#how-to-set-partition-expression).
- If you specify `FINAL`, optimization is performed even when all the data is already in one part. Also merge is forced even if concurrent merges are performed.
- If you specify `DEDUPLICATE`, then completely identical rows (unless by-clause is specified) will be deduplicated (all columns are compared), it makes sense only for the MergeTree engine.
diff --git a/docs/en/sql-reference/statements/select/intersect.md b/docs/en/sql-reference/statements/select/intersect.md
index d3b2b51b6be..f1eb4738543 100644
--- a/docs/en/sql-reference/statements/select/intersect.md
+++ b/docs/en/sql-reference/statements/select/intersect.md
@@ -7,7 +7,7 @@ sidebar_label: INTERSECT
The `INTERSECT` clause returns only those rows that result from both the first and the second queries. The queries must match the number of columns, order, and type. The result of `INTERSECT` can contain duplicate rows.
-Multiple `INTERSECT` statements are executes left to right if parenthesis are not specified. The `INTERSECT` operator has a higher priority than the `UNION` and `EXCEPT` clause.
+Multiple `INTERSECT` statements are executed left to right if parentheses are not specified. The `INTERSECT` operator has a higher priority than the `UNION` and `EXCEPT` clauses.
``` sql
diff --git a/docs/en/sql-reference/statements/select/join.md b/docs/en/sql-reference/statements/select/join.md
index 1890ff081d8..62d3e9fd69a 100644
--- a/docs/en/sql-reference/statements/select/join.md
+++ b/docs/en/sql-reference/statements/select/join.md
@@ -282,7 +282,7 @@ Each time a query is run with the same `JOIN`, the subquery is run again because
In some cases, it is more efficient to use [IN](../../../sql-reference/operators/in.md) instead of `JOIN`.
-If you need a `JOIN` for joining with dimension tables (these are relatively small tables that contain dimension properties, such as names for advertising campaigns), a `JOIN` might not be very convenient due to the fact that the right table is re-accessed for every query. For such cases, there is an “external dictionaries” feature that you should use instead of `JOIN`. For more information, see the [External dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) section.
+If you need a `JOIN` for joining with dimension tables (these are relatively small tables that contain dimension properties, such as names for advertising campaigns), a `JOIN` might not be very convenient due to the fact that the right table is re-accessed for every query. For such cases, there is a “dictionaries” feature that you should use instead of `JOIN`. For more information, see the [Dictionaries](../../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) section.
### Memory Limitations
diff --git a/docs/en/sql-reference/statements/set-role.md b/docs/en/sql-reference/statements/set-role.md
index bf998d7841e..e017160623e 100644
--- a/docs/en/sql-reference/statements/set-role.md
+++ b/docs/en/sql-reference/statements/set-role.md
@@ -41,7 +41,7 @@ Purge default roles from a user:
SET DEFAULT ROLE NONE TO user
```
-Set all the granted roles as default excepting some of them:
+Set all the granted roles as default except for specific roles `role1` and `role2`:
``` sql
SET DEFAULT ROLE ALL EXCEPT role1, role2 TO user
diff --git a/docs/en/sql-reference/statements/show.md b/docs/en/sql-reference/statements/show.md
index 87248bb115b..0efad3d460f 100644
--- a/docs/en/sql-reference/statements/show.md
+++ b/docs/en/sql-reference/statements/show.md
@@ -198,7 +198,7 @@ Result:
## SHOW DICTIONARIES
-Displays a list of [external dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md).
+Displays a list of [Dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md).
``` sql
SHOW DICTIONARIES [FROM ] [LIKE ''] [LIMIT ] [INTO OUTFILE ] [FORMAT ]
diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md
index e9ff4d45c79..c8b104ea91f 100644
--- a/docs/en/sql-reference/statements/system.md
+++ b/docs/en/sql-reference/statements/system.md
@@ -281,8 +281,8 @@ After running this statement the `[db.]replicated_merge_tree_family_table_name`
### RESTART REPLICA
-Provides possibility to reinitialize Zookeeper sessions state for `ReplicatedMergeTree` table, will compare current state with Zookeeper as source of true and add tasks to Zookeeper queue if needed.
-Initialization replication queue based on ZooKeeper date happens in the same way as `ATTACH TABLE` statement. For a short time the table will be unavailable for any operations.
+Provides possibility to reinitialize Zookeeper session's state for `ReplicatedMergeTree` table, will compare current state with Zookeeper as source of truth and add tasks to Zookeeper queue if needed.
+Initialization of replication queue based on ZooKeeper data happens in the same way as for `ATTACH TABLE` statement. For a short time, the table will be unavailable for any operations.
``` sql
SYSTEM RESTART REPLICA [db.]replicated_merge_tree_family_table_name
diff --git a/docs/en/sql-reference/table-functions/file.md b/docs/en/sql-reference/table-functions/file.md
index f40107aaaca..fc81e7cf649 100644
--- a/docs/en/sql-reference/table-functions/file.md
+++ b/docs/en/sql-reference/table-functions/file.md
@@ -81,6 +81,7 @@ Multiple path components can have globs. For being processed file must exist and
- `?` — Substitutes any single character.
- `{some_string,another_string,yet_another_one}` — Substitutes any of strings `'some_string', 'another_string', 'yet_another_one'`.
- `{N..M}` — Substitutes any number in range from N to M including both borders.
+- `**` - Fetches all files inside the folder recursively.
Constructions with `{}` are similar to the [remote](remote.md) table function.
@@ -119,6 +120,22 @@ Query the data from files named `file000`, `file001`, … , `file999`:
SELECT count(*) FROM file('big_dir/file{0..9}{0..9}{0..9}', 'CSV', 'name String, value UInt32');
```
+**Example**
+
+Query the data from all files inside `big_dir` directory recursively:
+
+``` sql
+SELECT count(*) FROM file('big_dir/**', 'CSV', 'name String, value UInt32');
+```
+
+**Example**
+
+Query the data from all `file002` files from any folder inside `big_dir` directory recursively:
+
+``` sql
+SELECT count(*) FROM file('big_dir/**/file002', 'CSV', 'name String, value UInt32');
+```
+
## Virtual Columns
- `_path` — Path to the file.
diff --git a/docs/en/sql-reference/table-functions/index.md b/docs/en/sql-reference/table-functions/index.md
index d09adcd13d6..94b23bc695c 100644
--- a/docs/en/sql-reference/table-functions/index.md
+++ b/docs/en/sql-reference/table-functions/index.md
@@ -39,3 +39,7 @@ You can’t use table functions if the [allow_ddl](../../operations/settings/per
| [s3](../../sql-reference/table-functions/s3.md) | Creates a [S3](../../engines/table-engines/integrations/s3.md)-engine table. |
| [sqlite](../../sql-reference/table-functions/sqlite.md) | Creates a [sqlite](../../engines/table-engines/integrations/sqlite.md)-engine table. |
+:::note
+Only these table functions are enabled in readonly mode :
+null, view, viewIfPermitted, numbers, numbers_mt, generateRandom, values, cluster, clusterAllReplicas
+:::
\ No newline at end of file
diff --git a/docs/en/sql-reference/table-functions/mysql.md b/docs/en/sql-reference/table-functions/mysql.md
index f867cda45bd..de1567c052e 100644
--- a/docs/en/sql-reference/table-functions/mysql.md
+++ b/docs/en/sql-reference/table-functions/mysql.md
@@ -110,5 +110,5 @@ SELECT * FROM mysql('localhost:3306', 'test', 'test', 'bayonet', '123');
**See Also**
- [The ‘MySQL’ table engine](../../engines/table-engines/integrations/mysql.md)
-- [Using MySQL as a source of external dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-mysql)
+- [Using MySQL as a dictionary source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-mysql)
diff --git a/docs/en/sql-reference/table-functions/odbc.md b/docs/en/sql-reference/table-functions/odbc.md
index f8c46fe44d8..7e13424bc8a 100644
--- a/docs/en/sql-reference/table-functions/odbc.md
+++ b/docs/en/sql-reference/table-functions/odbc.md
@@ -101,5 +101,5 @@ SELECT * FROM odbc('DSN=mysqlconn', 'test', 'test')
## See Also
-- [ODBC external dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-odbc)
+- [ODBC dictionaries](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-odbc)
- [ODBC table engine](../../engines/table-engines/integrations/odbc.md).
diff --git a/docs/en/sql-reference/table-functions/postgresql.md b/docs/en/sql-reference/table-functions/postgresql.md
index 367edbe9a00..e98869de739 100644
--- a/docs/en/sql-reference/table-functions/postgresql.md
+++ b/docs/en/sql-reference/table-functions/postgresql.md
@@ -130,6 +130,6 @@ CREATE TABLE pg_table_schema_with_dots (a UInt32)
**See Also**
- [The PostgreSQL table engine](../../engines/table-engines/integrations/postgresql.md)
-- [Using PostgreSQL as a source of external dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql)
+- [Using PostgreSQL as a dictionary source](../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql)
[Original article](https://clickhouse.com/docs/en/sql-reference/table-functions/postgresql/)
diff --git a/docs/en/sql-reference/table-functions/s3.md b/docs/en/sql-reference/table-functions/s3.md
index 545037665bb..545a89223bf 100644
--- a/docs/en/sql-reference/table-functions/s3.md
+++ b/docs/en/sql-reference/table-functions/s3.md
@@ -127,6 +127,18 @@ INSERT INTO FUNCTION s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-
SELECT name, value FROM existing_table;
```
+Glob ** can be used for recursive directory traversal. Consider the below example, it will fetch all files from `my-test-bucket-768` directory recursively:
+
+``` sql
+SELECT * FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**', 'CSV', 'name String, value UInt32', 'gzip');
+```
+
+The below get data from all `test-data.csv.gz` files from any folder inside `my-test-bucket` directory recursively:
+
+``` sql
+SELECT * FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip');
+```
+
## Partitioned Write
If you specify `PARTITION BY` expression when inserting data into `S3` table, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency.
diff --git a/docs/ru/development/browse-code.md b/docs/ru/development/browse-code.md
deleted file mode 100644
index 640b1ac3693..00000000000
--- a/docs/ru/development/browse-code.md
+++ /dev/null
@@ -1,14 +0,0 @@
----
-slug: /ru/development/browse-code
-sidebar_position: 72
-sidebar_label: "Навигация по коду ClickHouse"
----
-
-
-# Навигация по коду ClickHouse {#navigatsiia-po-kodu-clickhouse}
-
-Для навигации по коду онлайн доступен **Woboq**, он расположен [здесь](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). В нём реализовано удобное перемещение между исходными файлами, семантическая подсветка, подсказки, индексация и поиск. Слепок кода обновляется ежедневно.
-
-Также вы можете просматривать исходники на [GitHub](https://github.com/ClickHouse/ClickHouse).
-
-Если вы интересуетесь, какую среду разработки выбрать для работы с ClickHouse, мы рекомендуем CLion, QT Creator, VSCode или KDevelop (с некоторыми предостережениями). Вы можете использовать свою любимую среду разработки, Vim и Emacs тоже считаются.
diff --git a/docs/ru/engines/table-engines/integrations/kafka.md b/docs/ru/engines/table-engines/integrations/kafka.md
index 37fc902e777..a5f091e1b23 100644
--- a/docs/ru/engines/table-engines/integrations/kafka.md
+++ b/docs/ru/engines/table-engines/integrations/kafka.md
@@ -87,14 +87,15 @@ SETTINGS
Устаревший способ создания таблицы
- :::note "Attention"
- Не используйте этот метод в новых проектах. По возможности переключите старые проекты на метод, описанный выше.
+:::note "Attention"
+Не используйте этот метод в новых проектах. По возможности переключите старые проекты на метод, описанный выше.
+:::
``` sql
Kafka(kafka_broker_list, kafka_topic_list, kafka_group_name, kafka_format
[, kafka_row_delimiter, kafka_schema, kafka_num_consumers, kafka_skip_broken_messages])
```
- :::
+
## Описание {#opisanie}
diff --git a/docs/ru/engines/table-engines/mergetree-family/aggregatingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/aggregatingmergetree.md
index aa16113192e..86a275767a0 100644
--- a/docs/ru/engines/table-engines/mergetree-family/aggregatingmergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/aggregatingmergetree.md
@@ -39,9 +39,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Устаревший способ создания таблицы
- :::note "Attention"
- Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше.
- :::
+:::note "Attention"
+Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше.
+:::
+
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
diff --git a/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md
index ecaaa6b8417..72b4725c6ed 100644
--- a/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/collapsingmergetree.md
@@ -43,9 +43,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Устаревший способ создания таблицы
- :::note "Attention"
- Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше.
- :::
+:::note "Attention"
+Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше.
+:::
+
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
@@ -59,7 +60,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
- `sign` — Имя столбца с типом строки: `1` — строка состояния, `-1` — строка отмены состояния.
- Тип данных столбца — `Int8`.
+ Тип данных столбца — `Int8`.
diff --git a/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md b/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md
index 818f85f7e37..324a3fd1633 100644
--- a/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/graphitemergetree.md
@@ -55,9 +55,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Устаревший способ создания таблицы
- :::note "Attention"
- Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше.
- :::
+:::note "Attention"
+Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше.
+:::
+
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
diff --git a/docs/ru/engines/table-engines/mergetree-family/mergetree.md b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
index e01e0006b87..f024d5f1985 100644
--- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
@@ -115,9 +115,10 @@ ENGINE MergeTree() PARTITION BY toYYYYMM(EventDate) ORDER BY (CounterID, EventDa
Устаревший способ создания таблицы
- :::note "Attention"
- Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ, описанный выше.
- :::
+:::note "Attention"
+Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ, описанный выше.
+:::
+
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
diff --git a/docs/ru/engines/table-engines/mergetree-family/summingmergetree.md b/docs/ru/engines/table-engines/mergetree-family/summingmergetree.md
index 0d9d268fa46..7b69927e161 100644
--- a/docs/ru/engines/table-engines/mergetree-family/summingmergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/summingmergetree.md
@@ -42,9 +42,10 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
Устаревший способ создания таблицы
- :::note "Attention"
- Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше.
- :::
+:::note "Attention"
+Не используйте этот способ в новых проектах и по возможности переведите старые проекты на способ описанный выше.
+:::
+
``` sql
CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
(
diff --git a/docs/ru/getting-started/tutorial.md b/docs/ru/getting-started/tutorial.md
index b1abc787c5d..be6d138669a 100644
--- a/docs/ru/getting-started/tutorial.md
+++ b/docs/ru/getting-started/tutorial.md
@@ -488,7 +488,7 @@ FORMAT TSV
max_insert_block_size 1048576 0 "The maximum block size for insertion, if we control the creation of blocks for insertion."
```
-Optionally you can [OPTIMIZE](../sql-reference/statements/misc.md#misc_operations-optimize) the tables after import. Tables that are configured with an engine from MergeTree-family always do merges of data parts in the background to optimize data storage (or at least check if it makes sense). These queries force the table engine to do storage optimization right now instead of some time later:
+Optionally you can [OPTIMIZE](../sql-reference/statements/optimize.md) the tables after import. Tables that are configured with an engine from MergeTree-family always do merges of data parts in the background to optimize data storage (or at least check if it makes sense). These queries force the table engine to do storage optimization right now instead of some time later:
``` bash
clickhouse-client --query "OPTIMIZE TABLE tutorial.hits_v1 FINAL"
diff --git a/docs/ru/interfaces/third-party/client-libraries.md b/docs/ru/interfaces/third-party/client-libraries.md
index ce9f94d5d74..b000208b53b 100644
--- a/docs/ru/interfaces/third-party/client-libraries.md
+++ b/docs/ru/interfaces/third-party/client-libraries.md
@@ -34,6 +34,7 @@ sidebar_label: "Клиентские библиотеки от сторонни
- [node-clickhouse](https://github.com/apla/node-clickhouse)
- [nestjs-clickhouse](https://github.com/depyronick/nestjs-clickhouse)
- [clickhouse-client](https://github.com/depyronick/clickhouse-client)
+ - [node-clickhouse-orm](https://github.com/zimv/node-clickhouse-orm)
- Perl
- [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse)
- [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse)
diff --git a/docs/ru/operations/access-rights.md b/docs/ru/operations/access-rights.md
index 987f7fecc55..7f4e06205be 100644
--- a/docs/ru/operations/access-rights.md
+++ b/docs/ru/operations/access-rights.md
@@ -64,7 +64,7 @@ ClickHouse поддерживает управление доступом на
- [CREATE USER](../sql-reference/statements/create/user.md#create-user-statement)
- [ALTER USER](../sql-reference/statements/alter/user.md)
-- [DROP USER](../sql-reference/statements/misc.md#drop-user-statement)
+- [DROP USER](../sql-reference/statements/drop.md#drop-user)
- [SHOW CREATE USER](../sql-reference/statements/show.md#show-create-user-statement)
### Применение настроек {#access-control-settings-applying}
@@ -91,9 +91,9 @@ ClickHouse поддерживает управление доступом на
- [CREATE ROLE](../sql-reference/statements/create/index.md#create-role-statement)
- [ALTER ROLE](../sql-reference/statements/alter/role.md)
-- [DROP ROLE](../sql-reference/statements/misc.md#drop-role-statement)
-- [SET ROLE](../sql-reference/statements/misc.md#set-role-statement)
-- [SET DEFAULT ROLE](../sql-reference/statements/misc.md#set-default-role-statement)
+- [DROP ROLE](../sql-reference/statements/drop.md#drop-role)
+- [SET ROLE](../sql-reference/statements/set-role.md)
+- [SET DEFAULT ROLE](../sql-reference/statements/set-role.md#set-default-role)
- [SHOW CREATE ROLE](../sql-reference/statements/show.md#show-create-role-statement)
Привилегии можно присвоить роли с помощью запроса [GRANT](../sql-reference/statements/grant.md). Для отзыва привилегий у роли ClickHouse предоставляет запрос [REVOKE](../sql-reference/statements/revoke.md).
@@ -106,7 +106,7 @@ ClickHouse поддерживает управление доступом на
- [CREATE ROW POLICY](../sql-reference/statements/create/index.md#create-row-policy-statement)
- [ALTER ROW POLICY](../sql-reference/statements/alter/row-policy.md)
-- [DROP ROW POLICY](../sql-reference/statements/misc.md#drop-row-policy-statement)
+- [DROP ROW POLICY](../sql-reference/statements/drop.md#drop-row-policy)
- [SHOW CREATE ROW POLICY](../sql-reference/statements/show.md#show-create-row-policy-statement)
@@ -118,7 +118,7 @@ ClickHouse поддерживает управление доступом на
- [CREATE SETTINGS PROFILE](../sql-reference/statements/create/index.md#create-settings-profile-statement)
- [ALTER SETTINGS PROFILE](../sql-reference/statements/alter/settings-profile.md)
-- [DROP SETTINGS PROFILE](../sql-reference/statements/misc.md#drop-settings-profile-statement)
+- [DROP SETTINGS PROFILE](../sql-reference/statements/drop.md#drop-settings-profile)
- [SHOW CREATE SETTINGS PROFILE](../sql-reference/statements/show.md#show-create-settings-profile-statement)
@@ -132,7 +132,7 @@ ClickHouse поддерживает управление доступом на
- [CREATE QUOTA](../sql-reference/statements/create/index.md#create-quota-statement)
- [ALTER QUOTA](../sql-reference/statements/alter/quota.md)
-- [DROP QUOTA](../sql-reference/statements/misc.md#drop-quota-statement)
+- [DROP QUOTA](../sql-reference/statements/drop.md#drop-quota)
- [SHOW CREATE QUOTA](../sql-reference/statements/show.md#show-create-quota-statement)
diff --git a/docs/ru/operations/server-configuration-parameters/settings.md b/docs/ru/operations/server-configuration-parameters/settings.md
index e1fb30ced0d..bffa3c39a60 100644
--- a/docs/ru/operations/server-configuration-parameters/settings.md
+++ b/docs/ru/operations/server-configuration-parameters/settings.md
@@ -624,6 +624,7 @@ ClickHouse поддерживает динамическое изменение
- `http_proxy` - Настройка HTTP proxy для отсылки отчетов о сбоях.
- `debug` - Настроить клиентскую библиотеку Sentry в debug режим.
- `tmp_path` - Путь в файловой системе для временного хранения состояния отчетов о сбоях перед отправкой на сервер Sentry.
+- `environment` - Произвольное название среды, в которой запущен сервер ClickHouse, которое будет упомянуто в каждом отчете от сбое. По умолчанию имеет значение `test` или `prod` в зависимости от версии ClickHouse.
**Рекомендованные настройки**
diff --git a/docs/ru/operations/settings/index.md b/docs/ru/operations/settings/index.md
index 4e055405847..6806aea5135 100644
--- a/docs/ru/operations/settings/index.md
+++ b/docs/ru/operations/settings/index.md
@@ -24,7 +24,7 @@ slug: /ru/operations/settings/
- При запуске консольного клиента ClickHouse в не интерактивном режиме установите параметр запуска `--setting=value`.
- При использовании HTTP API передавайте cgi-параметры (`URL?setting_1=value&setting_2=value...`).
- - Укажите необходимые настройки в секции [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select) запроса SELECT. Эти настройки действуют только в рамках данного запроса, а после его выполнения сбрасываются до предыдущего значения или значения по умолчанию.
+ - Укажите необходимые настройки в секции [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select-query) запроса SELECT. Эти настройки действуют только в рамках данного запроса, а после его выполнения сбрасываются до предыдущего значения или значения по умолчанию.
Настройки, которые можно задать только в конфигурационном файле сервера, в разделе не рассматриваются.
diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md
index 3d765b03d58..58894611386 100644
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@@ -479,7 +479,7 @@ SELECT * FROM table_with_enum_column_for_tsv_insert;
Включает или отключает вставку [значений по умолчанию](../../sql-reference/statements/create/table.md#create-default-values) вместо [NULL](../../sql-reference/syntax.md#null-literal) в столбцы, которые не позволяют [хранить NULL](../../sql-reference/data-types/nullable.md#data_type-nullable).
Если столбец не позволяет хранить `NULL` и эта настройка отключена, то вставка `NULL` приведет к возникновению исключения. Если столбец позволяет хранить `NULL`, то значения `NULL` вставляются независимо от этой настройки.
-Эта настройка используется для запросов [INSERT ... SELECT](../../sql-reference/statements/insert-into.md#insert_query_insert-select). При этом подзапросы `SELECT` могут объединяться с помощью `UNION ALL`.
+Эта настройка используется для запросов [INSERT ... SELECT](../../sql-reference/statements/insert-into.md#inserting-the-results-of-select). При этом подзапросы `SELECT` могут объединяться с помощью `UNION ALL`.
Возможные значения:
@@ -1986,7 +1986,7 @@ SELECT * FROM test_table
## optimize_throw_if_noop {#setting-optimize_throw_if_noop}
-Включает или отключает генерирование исключения в случаях, когда запрос [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) не выполняет мёрж.
+Включает или отключает генерирование исключения в случаях, когда запрос [OPTIMIZE](../../sql-reference/statements/optimize.md) не выполняет мёрж.
По умолчанию, `OPTIMIZE` завершается успешно и в тех случаях, когда он ничего не сделал. Настройка позволяет отделить подобные случаи и включает генерирование исключения с поясняющим сообщением.
@@ -3258,12 +3258,6 @@ SELECT * FROM test2;
Значение по умолчанию: `64`.
-## temporary_live_view_timeout {#temporary-live-view-timeout}
-
-Задает время в секундах, после которого [LIVE VIEW](../../sql-reference/statements/create/view.md#live-view) удаляется.
-
-Значение по умолчанию: `5`.
-
## periodic_live_view_refresh {#periodic-live-view-refresh}
Задает время в секундах, по истечении которого [LIVE VIEW](../../sql-reference/statements/create/view.md#live-view) с установленным автообновлением обновляется.
diff --git a/docs/ru/operations/system-tables/columns.md b/docs/ru/operations/system-tables/columns.md
index 818da3d6ac6..cade6f0a557 100644
--- a/docs/ru/operations/system-tables/columns.md
+++ b/docs/ru/operations/system-tables/columns.md
@@ -5,7 +5,7 @@ slug: /ru/operations/system-tables/columns
Содержит информацию о столбцах всех таблиц.
-С помощью этой таблицы можно получить информацию аналогично запросу [DESCRIBE TABLE](../../sql-reference/statements/misc.md#misc-describe-table), но для многих таблиц сразу.
+С помощью этой таблицы можно получить информацию аналогично запросу [DESCRIBE TABLE](../../sql-reference/statements/describe-table.md), но для многих таблиц сразу.
Колонки [временных таблиц](../../sql-reference/statements/create/table.md#temporary-tables) содержатся в `system.columns` только в тех сессиях, в которых эти таблицы были созданы. Поле `database` у таких колонок пустое.
diff --git a/docs/ru/operations/system-tables/crash-log.md b/docs/ru/operations/system-tables/crash-log.md
index 4ca8be5a199..68148fec6bd 100644
--- a/docs/ru/operations/system-tables/crash-log.md
+++ b/docs/ru/operations/system-tables/crash-log.md
@@ -7,8 +7,8 @@ slug: /ru/operations/system-tables/crash-log
Колонки:
-- `event_date` ([Datetime](../../sql-reference/data-types/datetime.md)) — Дата события.
-- `event_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — Время события.
+- `event_date` ([DateTime](../../sql-reference/data-types/datetime.md)) — Дата события.
+- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Время события.
- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Время события с наносекундами.
- `signal` ([Int32](../../sql-reference/data-types/int-uint.md)) — Номер сигнала, пришедшего в поток.
- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Идентификатор треда.
diff --git a/docs/ru/operations/system-tables/disks.md b/docs/ru/operations/system-tables/disks.md
index fc4c370cc1a..1d540b277d1 100644
--- a/docs/ru/operations/system-tables/disks.md
+++ b/docs/ru/operations/system-tables/disks.md
@@ -11,5 +11,6 @@ Cодержит информацию о дисках, заданных в [ко
- `path` ([String](../../sql-reference/data-types/string.md)) — путь к точке монтирования в файловой системе.
- `free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — свободное место на диске в байтах.
- `total_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — объём диска в байтах.
+- `unreserved_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — не зарезервированное cвободное место в байтах (`free_space` минус размер места, зарезервированного на выполняемые в данный момент фоновые слияния, вставки и другие операции записи на диск).
- `keep_free_space` ([UInt64](../../sql-reference/data-types/int-uint.md)) — место, которое должно остаться свободным на диске в байтах. Задаётся значением параметра `keep_free_space_bytes` конфигурации дисков.
diff --git a/docs/ru/operations/system-tables/mutations.md b/docs/ru/operations/system-tables/mutations.md
index 20e4ebfdaf1..bb0bd44ed7a 100644
--- a/docs/ru/operations/system-tables/mutations.md
+++ b/docs/ru/operations/system-tables/mutations.md
@@ -15,7 +15,7 @@ slug: /ru/operations/system-tables/mutations
- `command` ([String](../../sql-reference/data-types/string.md)) — команда мутации (часть запроса после `ALTER TABLE [db.]table`).
-- `create_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — дата и время создания мутации.
+- `create_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время создания мутации.
- `block_numbers.partition_id` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — Для мутаций реплицированных таблиц массив содержит содержит номера партиций (по одной записи для каждой партиции). Для мутаций нереплицированных таблиц массив пустой.
@@ -39,7 +39,7 @@ slug: /ru/operations/system-tables/mutations
- `latest_failed_part` ([String](../../sql-reference/data-types/string.md)) — имя последнего куска, мутация которого не удалась.
-- `latest_fail_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — дата и время последней ошибки мутации.
+- `latest_fail_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время последней ошибки мутации.
- `latest_fail_reason` ([String](../../sql-reference/data-types/string.md)) — причина последней ошибки мутации.
diff --git a/docs/ru/operations/system-tables/replication_queue.md b/docs/ru/operations/system-tables/replication_queue.md
index 25de174e98f..60d42133153 100644
--- a/docs/ru/operations/system-tables/replication_queue.md
+++ b/docs/ru/operations/system-tables/replication_queue.md
@@ -29,7 +29,7 @@ slug: /ru/operations/system-tables/replication_queue
- `MUTATE_PART` — применить одну или несколько мутаций к куску.
- `ALTER_METADATA` — применить изменения структуры таблицы в результате запросов с выражением `ALTER`.
-- `create_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — дата и время отправки задачи на выполнение.
+- `create_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время отправки задачи на выполнение.
- `required_quorum` ([UInt32](../../sql-reference/data-types/int-uint.md)) — количество реплик, ожидающих завершения задачи, с подтверждением о завершении. Этот столбец актуален только для задачи `GET_PARTS`.
@@ -47,13 +47,13 @@ slug: /ru/operations/system-tables/replication_queue
- `last_exception` ([String](../../sql-reference/data-types/string.md)) — текст сообщения о последней возникшей ошибке, если таковые имеются.
-- `last_attempt_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — дата и время последней попытки выполнить задачу.
+- `last_attempt_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время последней попытки выполнить задачу.
- `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — количество отложенных задач.
- `postpone_reason` ([String](../../sql-reference/data-types/string.md)) — причина, по которой была отложена задача.
-- `last_postpone_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — дата и время, когда была отложена задача в последний раз.
+- `last_postpone_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — дата и время, когда была отложена задача в последний раз.
- `merge_type` ([String](../../sql-reference/data-types/string.md)) — тип текущего слияния. Пусто, если это мутация.
diff --git a/docs/ru/sql-reference/data-types/date.md b/docs/ru/sql-reference/data-types/date.md
index 7254b82f461..185fe28d567 100644
--- a/docs/ru/sql-reference/data-types/date.md
+++ b/docs/ru/sql-reference/data-types/date.md
@@ -6,7 +6,7 @@ sidebar_label: Date
# Date {#data-type-date}
-Дата. Хранится в двух байтах в виде (беззнакового) числа дней, прошедших от 1970-01-01. Позволяет хранить значения от чуть больше, чем начала unix-эпохи до верхнего порога, определяющегося константой на этапе компиляции (сейчас - до 2149 года, последний полностью поддерживаемый год - 2148).
+Дата. Хранится в двух байтах в виде (беззнакового) числа дней, прошедших от 1970-01-01. Позволяет хранить значения от чуть больше, чем начала unix-эпохи до верхнего порога, определяющегося константой на этапе компиляции (сейчас - до 2106 года, последний полностью поддерживаемый год - 2105).
Диапазон значений: \[1970-01-01, 2149-06-06\].
diff --git a/docs/ru/sql-reference/data-types/date32.md b/docs/ru/sql-reference/data-types/date32.md
index fcb7d688c20..958b8e9763e 100644
--- a/docs/ru/sql-reference/data-types/date32.md
+++ b/docs/ru/sql-reference/data-types/date32.md
@@ -6,7 +6,7 @@ sidebar_label: Date32
# Date32 {#data_type-datetime32}
-Дата. Поддерживается такой же диапазон дат, как для типа [Datetime64](../../sql-reference/data-types/datetime64.md). Значение хранится в четырех байтах и соответствует числу дней с 1900-01-01 по 2299-12-31.
+Дата. Поддерживается такой же диапазон дат, как для типа [DateTime64](../../sql-reference/data-types/datetime64.md). Значение хранится в четырех байтах и соответствует числу дней с 1900-01-01 по 2299-12-31.
**Пример**
diff --git a/docs/ru/sql-reference/functions/date-time-functions.md b/docs/ru/sql-reference/functions/date-time-functions.md
index 897c4b3e86a..f430f5cae51 100644
--- a/docs/ru/sql-reference/functions/date-time-functions.md
+++ b/docs/ru/sql-reference/functions/date-time-functions.md
@@ -272,15 +272,9 @@ SELECT toUnixTimestamp('2017-11-05 08:07:47', 'Asia/Tokyo') AS unix_timestamp;
Поведение для
* `enable_extended_results_for_datetime_functions = 0`: Функции `toStartOf*`, `toLastDayOfMonth`, `toMonday` возвращают `Date` или `DateTime`. Функции `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` возвращают `DateTime`. Хотя эти функции могут принимать значения типа `Date32` или `DateTime64` в качестве аргумента, при обработке аргумента вне нормального диапазона значений (`1970` - `2148` для `Date` и `1970-01-01 00:00:00`-`2106-02-07 08:28:15` для `DateTime`) будет получен некорректный результат.
-В случае если значение аргумента вне нормального диапазона:
- * `1970-01-01 (00:00:00)` будет возвращён для моментов времени до 1970 года,
- * `2106-02-07 08:28:15` будет взят в качестве аргумента, если полученный аргумент превосходит данное значение и возвращаемый тип - `DateTime`,
- * `2149-06-06` будет взят в качестве аргумента, если полученный аргумент превосходит данное значение и возвращаемый тип - `Date`,
- * `2149-05-31` будет результатом функции `toLastDayOfMonth` при обработке аргумента больше `2149-05-31`.
* `enable_extended_results_for_datetime_functions = 1`:
* Функции `toStartOfYear`, `toStartOfISOYear`, `toStartOfQuarter`, `toStartOfMonth`, `toStartOfWeek`, `toLastDayOfMonth`, `toMonday` возвращают `Date` или `DateTime` если их аргумент `Date` или `DateTime` и они возвращают `Date32` или `DateTime64` если их аргумент `Date32` или `DateTime64`.
* Функции `toStartOfDay`, `toStartOfHour`, `toStartOfFifteenMinutes`, `toStartOfTenMinutes`, `toStartOfFiveMinutes`, `toStartOfMinute`, `timeSlot` возвращают `DateTime` если их аргумент `Date` или `DateTime` и они возвращают `DateTime64` если их аргумент `Date32` или `DateTime64`.
-
:::
## toStartOfYear {#tostartofyear}
@@ -321,20 +315,20 @@ SELECT toStartOfISOYear(toDate('2017-01-01')) AS ISOYear20170101;
Округляет дату или дату-с-временем до последнего числа месяца.
Возвращается дата.
-Если `toLastDayOfMonth` вызывается с аргументом типа `Date` большим чем 2149-05-31, то результат будет вычислен от аргумента 2149-05-31.
+:::note "Attention"
+Возвращаемое значение для некорректных дат зависит от реализации. ClickHouse может вернуть нулевую дату, выбросить исключение, или выполнить «естественное» перетекание дат между месяцами.
+:::
## toMonday {#tomonday}
Округляет дату или дату-с-временем вниз до ближайшего понедельника.
-Частный случай: для дат `1970-01-01`, `1970-01-02`, `1970-01-03` и `1970-01-04` результатом будет `1970-01-01`.
Возвращается дата.
## toStartOfWeek(t[,mode]) {#tostartofweek}
Округляет дату или дату со временем до ближайшего воскресенья или понедельника в соответствии с mode.
Возвращается дата.
-Частный случай: для дат `1970-01-01`, `1970-01-02`, `1970-01-03` и `1970-01-04` (и `1970-01-05`, если `mode` равен `1`) результатом будет `1970-01-01`.
-Аргумент `mode` работает точно так же, как аргумент mode [toWeek()](#toweek). Если аргумент mode опущен, то используется режим 0.
+Аргумент mode работает точно так же, как аргумент mode [toWeek()](#toweek). Если аргумент mode опущен, то используется режим 0.
## toStartOfDay {#tostartofday}
@@ -608,7 +602,7 @@ date_trunc(unit, value[, timezone])
- Дата и время, отсеченные до указанной части.
-Тип: [Datetime](../../sql-reference/data-types/datetime.md).
+Тип: [DateTime](../../sql-reference/data-types/datetime.md).
**Примеры**
@@ -721,9 +715,9 @@ date_diff('unit', startdate, enddate, [timezone])
- `quarter`
- `year`
-- `startdate` — первая дата или дата со временем, которая вычитается из `enddate`. [Date](../../sql-reference/data-types/date.md) или [DateTime](../../sql-reference/data-types/datetime.md).
+- `startdate` — первая дата или дата со временем, которая вычитается из `enddate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md).
-- `enddate` — вторая дата или дата со временем, из которой вычитается `startdate`. [Date](../../sql-reference/data-types/date.md) или [DateTime](../../sql-reference/data-types/datetime.md).
+- `enddate` — вторая дата или дата со временем, из которой вычитается `startdate`. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — [часовой пояс](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) (необязательно). Если этот аргумент указан, то он применяется как для `startdate`, так и для `enddate`. Если этот аргумент не указан, то используются часовые пояса аргументов `startdate` и `enddate`. Если часовые пояса аргументов `startdate` и `enddate` не совпадают, то результат не определен. [String](../../sql-reference/data-types/string.md).
@@ -919,7 +913,7 @@ now([timezone])
- Текущие дата и время.
-Тип: [Datetime](../../sql-reference/data-types/datetime.md).
+Тип: [DateTime](../../sql-reference/data-types/datetime.md).
**Пример**
@@ -975,8 +969,7 @@ SELECT now('Europe/Moscow');
## timeSlots(StartTime, Duration,\[, Size\]) {#timeslotsstarttime-duration-size}
Для интервала, начинающегося в `StartTime` и длящегося `Duration` секунд, возвращает массив моментов времени, кратных `Size`. Параметр `Size` указывать необязательно, по умолчанию он равен 1800 секундам (30 минутам) - необязательный параметр.
-Данная функция может использоваться, например, для анализа количества просмотров страницы за соответствующую сессию.
-Аргумент `StartTime` может иметь тип `DateTime` или `DateTime64`. В случае, если используется `DateTime`, аргументы `Duration` и `Size` должны иметь тип `UInt32`; Для DateTime64 они должны быть типа `Decimal64`.
+
Возвращает массив DateTime/DateTime64 (тип будет совпадать с типом параметра ’StartTime’). Для DateTime64 масштаб(scale) возвращаемой величины может отличаться от масштаба фргумента ’StartTime’ --- результат будет иметь наибольший масштаб среди всех данных аргументов.
Пример использования:
@@ -1053,6 +1046,7 @@ formatDateTime(Time, Format[, Timezone])
| %w | номер дня недели, начиная с воскресенья (0-6) | 2 |
| %y | год, последние 2 цифры (00-99) | 18 |
| %Y | год, 4 цифры | 2018 |
+| %z | Смещение времени от UTC +HHMM или -HHMM | -0500 |
| %% | символ % | % |
**Пример**
@@ -1084,7 +1078,7 @@ dateName(date_part, date)
**Аргументы**
- `date_part` — часть даты. Возможные значения: 'year', 'quarter', 'month', 'week', 'dayofyear', 'day', 'weekday', 'hour', 'minute', 'second'. [String](../../sql-reference/data-types/string.md).
-- `date` — дата. [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md).
+- `date` — дата. [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md).
- `timezone` — часовой пояс. Необязательный аргумент. [String](../../sql-reference/data-types/string.md).
**Возвращаемое значение**
@@ -1132,8 +1126,7 @@ SELECT FROM_UNIXTIME(423543535);
└──────────────────────────┘
```
-В случае, когда есть два аргумента: первый типа [Integer](../../sql-reference/data-types/int-uint.md) или [DateTime](../../sql-reference/data-types/datetime.md), а второй является строкой постоянного формата — функция работает также, как [formatDateTime](#formatdatetime), и возвращает значение типа [String](../../sql-reference/data-types/string.md#string).
-
+В случае, когда есть два или три аргумента: первый типа [Integer](../../sql-reference/data-types/int-uint.md), [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md) или [DateTime64](../../sql-reference/data-types/datetime64.md), а второй является строкой постоянного формата и третий является строкой постоянной временной зоны — функция работает также, как [formatDateTime](#formatdatetime), и возвращает значение типа [String](../../sql-reference/data-types/string.md#string).
Запрос:
diff --git a/docs/ru/sql-reference/functions/other-functions.md b/docs/ru/sql-reference/functions/other-functions.md
index 5c8584cd2a0..af21ccd6bed 100644
--- a/docs/ru/sql-reference/functions/other-functions.md
+++ b/docs/ru/sql-reference/functions/other-functions.md
@@ -568,7 +568,7 @@ ORDER BY c DESC
``` sql
SELECT
- transform(domain(Referer), ['yandex.ru', 'google.ru', 'vk.com'], ['www.yandex', 'example.com']) AS s,
+ transform(domain(Referer), ['yandex.ru', 'google.ru', 'vkontakte.ru'], ['www.yandex', 'example.com', 'vk.com']) AS s,
count() AS c
FROM test.hits
GROUP BY domain(Referer)
diff --git a/docs/ru/sql-reference/index.md b/docs/ru/sql-reference/index.md
index f55c5e859f1..95e2d6a3918 100644
--- a/docs/ru/sql-reference/index.md
+++ b/docs/ru/sql-reference/index.md
@@ -10,5 +10,4 @@ sidebar_position: 28
- [INSERT INTO](statements/insert-into.md)
- [CREATE](statements/create/index.md)
- [ALTER](statements/alter/index.md#query_language_queries_alter)
-- [Прочие виды запросов](statements/misc.md)
diff --git a/docs/ru/sql-reference/operators/in.md b/docs/ru/sql-reference/operators/in.md
index 2b3d87a877f..fa679b890a7 100644
--- a/docs/ru/sql-reference/operators/in.md
+++ b/docs/ru/sql-reference/operators/in.md
@@ -122,9 +122,9 @@ FROM t_null
Существует два варианта IN-ов с подзапросами (аналогично для JOIN-ов): обычный `IN` / `JOIN` и `GLOBAL IN` / `GLOBAL JOIN`. Они отличаются способом выполнения при распределённой обработке запроса.
- :::note "Attention"
- Помните, что алгоритмы, описанные ниже, могут работать иначе в зависимости от [настройки](../../operations/settings/settings.md) `distributed_product_mode`.
- :::
+:::note "Attention"
+Помните, что алгоритмы, описанные ниже, могут работать иначе в зависимости от [настройки](../../operations/settings/settings.md) `distributed_product_mode`.
+:::
При использовании обычного IN-а, запрос отправляется на удалённые серверы, и на каждом из них выполняются подзапросы в секциях `IN` / `JOIN`.
При использовании `GLOBAL IN` / `GLOBAL JOIN-а`, сначала выполняются все подзапросы для `GLOBAL IN` / `GLOBAL JOIN-ов`, и результаты складываются во временные таблицы. Затем эти временные таблицы передаются на каждый удалённый сервер, и на них выполняются запросы, с использованием этих переданных временных данных.
diff --git a/docs/ru/sql-reference/statements/alter/column.md b/docs/ru/sql-reference/statements/alter/column.md
index c337b64f1d6..a8ace213075 100644
--- a/docs/ru/sql-reference/statements/alter/column.md
+++ b/docs/ru/sql-reference/statements/alter/column.md
@@ -128,7 +128,7 @@ COMMENT COLUMN [IF EXISTS] name 'Text comment'
Каждый столбец может содержать только один комментарий. При выполнении запроса существующий комментарий заменяется на новый.
-Посмотреть комментарии можно в столбце `comment_expression` из запроса [DESCRIBE TABLE](../misc.md#misc-describe-table).
+Посмотреть комментарии можно в столбце `comment_expression` из запроса [DESCRIBE TABLE](../describe-table.md).
Пример:
@@ -254,7 +254,7 @@ SELECT groupArray(x), groupArray(s) FROM tmp;
Отсутствует возможность удалять столбцы, входящие в первичный ключ или ключ для сэмплирования (в общем, входящие в выражение `ENGINE`). Изменение типа у столбцов, входящих в первичный ключ возможно только в том случае, если это изменение не приводит к изменению данных (например, разрешено добавление значения в Enum или изменение типа с `DateTime` на `UInt32`).
-Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](../insert-into.md#insert_query_insert-select), затем поменять таблицы местами с помощью запроса [RENAME](../misc.md#misc_operations-rename), и удалить старую таблицу. В качестве альтернативы для запроса `INSERT SELECT`, можно использовать инструмент [clickhouse-copier](../../../sql-reference/statements/alter/index.md).
+Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](../insert-into.md#inserting-the-results-of-select), затем поменять таблицы местами с помощью запроса [RENAME](../rename.md#rename-table), и удалить старую таблицу. В качестве альтернативы для запроса `INSERT SELECT`, можно использовать инструмент [clickhouse-copier](../../../sql-reference/statements/alter/index.md).
Запрос `ALTER` блокирует все чтения и записи для таблицы. То есть если на момент запроса `ALTER` выполнялся долгий `SELECT`, то запрос `ALTER` сначала дождётся его выполнения. И в это время все новые запросы к той же таблице будут ждать, пока завершится этот `ALTER`.
diff --git a/docs/ru/sql-reference/statements/check-table.md b/docs/ru/sql-reference/statements/check-table.md
index 77d246b631e..633c3899006 100644
--- a/docs/ru/sql-reference/statements/check-table.md
+++ b/docs/ru/sql-reference/statements/check-table.md
@@ -1,7 +1,7 @@
---
slug: /ru/sql-reference/statements/check-table
sidebar_position: 41
-sidebar_label: CHECK
+sidebar_label: CHECK TABLE
---
# CHECK TABLE Statement {#check-table}
diff --git a/docs/ru/sql-reference/statements/create/role.md b/docs/ru/sql-reference/statements/create/role.md
index bd1141be4c5..1aa222d4de1 100644
--- a/docs/ru/sql-reference/statements/create/role.md
+++ b/docs/ru/sql-reference/statements/create/role.md
@@ -17,13 +17,13 @@ CREATE ROLE [IF NOT EXISTS | OR REPLACE] name1 [ON CLUSTER cluster_name1] [, nam
## Управление ролями {#managing-roles}
-Одному пользователю можно назначить несколько ролей. Пользователи могут применять назначенные роли в произвольных комбинациях с помощью выражения [SET ROLE](../misc.md#set-role-statement). Конечный объем привилегий — это комбинация всех привилегий всех примененных ролей. Если у пользователя имеются привилегии, присвоенные его аккаунту напрямую, они также прибавляются к привилегиям, присвоенным через роли.
+Одному пользователю можно назначить несколько ролей. Пользователи могут применять назначенные роли в произвольных комбинациях с помощью выражения [SET ROLE](../set-role.md). Конечный объем привилегий — это комбинация всех привилегий всех примененных ролей. Если у пользователя имеются привилегии, присвоенные его аккаунту напрямую, они также прибавляются к привилегиям, присвоенным через роли.
-Роли по умолчанию применяются при входе пользователя в систему. Установить роли по умолчанию можно с помощью выражений [SET DEFAULT ROLE](../misc.md#set-default-role-statement) или [ALTER USER](../alter/index.md#alter-user-statement).
+Роли по умолчанию применяются при входе пользователя в систему. Установить роли по умолчанию можно с помощью выражений [SET DEFAULT ROLE](../set-role.md#set-default-role) или [ALTER USER](../alter/index.md#alter-user-statement).
Для отзыва роли используется выражение [REVOKE](../../../sql-reference/statements/revoke.md).
-Для удаления роли используется выражение [DROP ROLE](../misc.md#drop-role-statement). Удаленная роль автоматически отзывается у всех пользователей, которым была назначена.
+Для удаления роли используется выражение [DROP ROLE](../drop.md#drop-role). Удаленная роль автоматически отзывается у всех пользователей, которым была назначена.
## Примеры {#create-role-examples}
diff --git a/docs/ru/sql-reference/statements/create/view.md b/docs/ru/sql-reference/statements/create/view.md
index 573db8938b2..6cbd4c6a30c 100644
--- a/docs/ru/sql-reference/statements/create/view.md
+++ b/docs/ru/sql-reference/statements/create/view.md
@@ -156,23 +156,6 @@ SELECT * FROM [db.]live_view WHERE ...
Чтобы принудительно обновить LIVE-представление, используйте запрос `ALTER LIVE VIEW [db.]table_name REFRESH`.
-### Секция WITH TIMEOUT {#live-view-with-timeout}
-
-LIVE-представление, созданное с параметром `WITH TIMEOUT`, будет автоматически удалено через определенное количество секунд с момента предыдущего запроса [WATCH](../../../sql-reference/statements/watch.md), примененного к данному LIVE-представлению.
-
-```sql
-CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AS SELECT ...
-```
-
-Если временной промежуток не указан, используется значение настройки [temporary_live_view_timeout](../../../operations/settings/settings.md#temporary-live-view-timeout).
-
-**Пример:**
-
-```sql
-CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x;
-CREATE LIVE VIEW lv WITH TIMEOUT 15 AS SELECT sum(x) FROM mt;
-```
-
### Секция WITH REFRESH {#live-view-with-refresh}
LIVE-представление, созданное с параметром `WITH REFRESH`, будет автоматически обновляться через указанные промежутки времени, начиная с момента последнего обновления.
@@ -202,20 +185,6 @@ WATCH lv;
└─────────────────────┴──────────┘
```
-Параметры `WITH TIMEOUT` и `WITH REFRESH` можно сочетать с помощью `AND`.
-
-```sql
-CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AND REFRESH [value_in_sec] AS SELECT ...
-```
-
-**Пример:**
-
-```sql
-CREATE LIVE VIEW lv WITH TIMEOUT 15 AND REFRESH 5 AS SELECT now();
-```
-
-По истечении 15 секунд представление будет автоматически удалено, если нет активного запроса `WATCH`.
-
```sql
WATCH lv;
```
diff --git a/docs/ru/sql-reference/statements/describe-table.md b/docs/ru/sql-reference/statements/describe-table.md
index 73b4278352a..14f97af1dd5 100644
--- a/docs/ru/sql-reference/statements/describe-table.md
+++ b/docs/ru/sql-reference/statements/describe-table.md
@@ -1,7 +1,7 @@
---
slug: /ru/sql-reference/statements/describe-table
sidebar_position: 42
-sidebar_label: DESCRIBE
+sidebar_label: DESCRIBE TABLE
---
# DESCRIBE TABLE {#misc-describe-table}
diff --git a/docs/ru/sql-reference/statements/grant.md b/docs/ru/sql-reference/statements/grant.md
index 79e3006d4ad..7c281634c98 100644
--- a/docs/ru/sql-reference/statements/grant.md
+++ b/docs/ru/sql-reference/statements/grant.md
@@ -221,7 +221,7 @@ GRANT SELECT(x,y) ON db.table TO john WITH GRANT OPTION
Отсутствие привилегий у пользователя или роли отображается как привилегия [NONE](#grant-none).
-Выполнение некоторых запросов требует определенного набора привилегий. Например, чтобы выполнить запрос [RENAME](misc.md#misc_operations-rename), нужны следующие привилегии: `SELECT`, `CREATE TABLE`, `INSERT` и `DROP TABLE`.
+Выполнение некоторых запросов требует определенного набора привилегий. Например, чтобы выполнить запрос [RENAME](rename.md#rename-table), нужны следующие привилегии: `SELECT`, `CREATE TABLE`, `INSERT` и `DROP TABLE`.
### SELECT {#grant-select}
@@ -309,7 +309,7 @@ GRANT INSERT(x,y) ON db.table TO john
### CREATE {#grant-create}
-Разрешает выполнять DDL-запросы [CREATE](../../sql-reference/statements/create/index.md) и [ATTACH](misc.md#attach) в соответствии со следующей иерархией привилегий:
+Разрешает выполнять DDL-запросы [CREATE](../../sql-reference/statements/create/index.md) и [ATTACH](attach.md) в соответствии со следующей иерархией привилегий:
- `CREATE`. Уровень: `GROUP`
- `CREATE DATABASE`. Уровень: `DATABASE`
@@ -324,7 +324,7 @@ GRANT INSERT(x,y) ON db.table TO john
### DROP {#grant-drop}
-Разрешает выполнять запросы [DROP](misc.md#drop) и [DETACH](misc.md#detach-statement) в соответствии со следующей иерархией привилегий:
+Разрешает выполнять запросы [DROP](drop.md) и [DETACH](detach.md) в соответствии со следующей иерархией привилегий:
- `DROP`. Уровень: `GROUP`
- `DROP DATABASE`. Уровень: `DATABASE`
@@ -340,7 +340,7 @@ GRANT INSERT(x,y) ON db.table TO john
### OPTIMIZE {#grant-optimize}
-Разрешает выполнять запросы [OPTIMIZE TABLE](misc.md#misc_operations-optimize).
+Разрешает выполнять запросы [OPTIMIZE TABLE](optimize.md).
Уровень: `TABLE`.
diff --git a/docs/ru/sql-reference/statements/insert-into.md b/docs/ru/sql-reference/statements/insert-into.md
index 573b8d39926..4fa6ac4ce66 100644
--- a/docs/ru/sql-reference/statements/insert-into.md
+++ b/docs/ru/sql-reference/statements/insert-into.md
@@ -95,7 +95,7 @@ INSERT INTO t FORMAT TabSeparated
Если в таблице объявлены [ограничения](../../sql-reference/statements/create/table.md#constraints), то их выполнимость будет проверена для каждой вставляемой строки. Если для хотя бы одной строки ограничения не будут выполнены, запрос будет остановлен.
-### Вставка результатов `SELECT` {#insert_query_insert-select}
+### Вставка результатов `SELECT` {#inserting-the-results-of-select}
**Синтаксис**
diff --git a/docs/ru/sql-reference/statements/misc.md b/docs/ru/sql-reference/statements/misc.md
deleted file mode 100644
index 437215f20ce..00000000000
--- a/docs/ru/sql-reference/statements/misc.md
+++ /dev/null
@@ -1,21 +0,0 @@
----
-slug: /ru/sql-reference/statements/misc
-sidebar_position: 41
----
-
-# Прочие виды запросов {#prochie-vidy-zaprosov}
-
-- [ATTACH](../../sql-reference/statements/attach.md)
-- [CHECK TABLE](../../sql-reference/statements/check-table.md)
-- [DESCRIBE TABLE](../../sql-reference/statements/describe-table.md)
-- [DETACH](../../sql-reference/statements/detach.md)
-- [DROP](../../sql-reference/statements/drop.md)
-- [EXISTS](../../sql-reference/statements/exists.md)
-- [KILL](../../sql-reference/statements/kill.md)
-- [OPTIMIZE](../../sql-reference/statements/optimize.md)
-- [RENAME](../../sql-reference/statements/rename.md)
-- [SET](../../sql-reference/statements/set.md)
-- [SET ROLE](../../sql-reference/statements/set-role.md)
-- [TRUNCATE](../../sql-reference/statements/truncate.md)
-- [USE](../../sql-reference/statements/use.md)
-
diff --git a/docs/ru/sql-reference/statements/select/index.md b/docs/ru/sql-reference/statements/select/index.md
index 4479e24000b..f360a09eb10 100644
--- a/docs/ru/sql-reference/statements/select/index.md
+++ b/docs/ru/sql-reference/statements/select/index.md
@@ -270,7 +270,7 @@ SELECT * REPLACE(i + 1 AS i) EXCEPT (j) APPLY(sum) from columns_transformers;
└─────────────────┴────────┘
```
-## SETTINGS в запросе SELECT {#settings-in-select}
+## SETTINGS в запросе SELECT {#settings-in-select-query}
Вы можете задать значения необходимых настроек непосредственно в запросе `SELECT` в секции `SETTINGS`. Эти настройки действуют только в рамках данного запроса, а после его выполнения сбрасываются до предыдущего значения или значения по умолчанию.
diff --git a/docs/zh/development/browse-code.md b/docs/zh/development/browse-code.md
deleted file mode 100644
index 16382a94ed5..00000000000
--- a/docs/zh/development/browse-code.md
+++ /dev/null
@@ -1,13 +0,0 @@
----
-slug: /zh/development/browse-code
-sidebar_position: 63
-sidebar_label: "\u6D4F\u89C8\u6E90\u4EE3\u7801"
----
-
-# 浏览ClickHouse源代码 {#browse-clickhouse-source-code}
-
-您可以使用 **Woboq** 在线代码浏览器 [点击这里](https://clickhouse.com/codebrowser/ClickHouse/src/index.html). 它提供了代码导航和语义突出显示、搜索和索引。 代码快照每天更新。
-
-此外,您还可以像往常一样浏览源代码 [GitHub](https://github.com/ClickHouse/ClickHouse)
-
-如果你希望了解哪种IDE较好,我们推荐使用CLion,QT Creator,VS Code和KDevelop(有注意事项)。 您可以使用任何您喜欢的IDE。 Vim和Emacs也可以。
diff --git a/docs/zh/getting-started/example-datasets/brown-benchmark.mdx b/docs/zh/getting-started/example-datasets/brown-benchmark.mdx
index c35e96718b1..6db4982f50f 100644
--- a/docs/zh/getting-started/example-datasets/brown-benchmark.mdx
+++ b/docs/zh/getting-started/example-datasets/brown-benchmark.mdx
@@ -1,10 +1,460 @@
---
slug: /zh/getting-started/example-datasets/brown-benchmark
-sidebar_label: Brown University Benchmark
-description: A new analytical benchmark for machine-generated log data
-title: "Brown University Benchmark"
+sidebar_label: 布朗大学基准
+description: 机器生成日志数据的新分析基准
+title: "布朗大学基准"
---
-import Content from '@site/docs/en/getting-started/example-datasets/brown-benchmark.md';
+`MgBench` 是机器生成的日志数据的新分析基准,[Andrew Crotty](http://cs.brown.edu/people/acrotty/)。
-
+下载数据:
+
+```bash
+wget https://datasets.clickhouse.com/mgbench{1..3}.csv.xz
+```
+
+解压数据:
+
+```bash
+xz -v -d mgbench{1..3}.csv.xz
+```
+
+创建数据库和表:
+
+```sql
+CREATE DATABASE mgbench;
+```
+
+```sql
+USE mgbench;
+```
+
+```sql
+CREATE TABLE mgbench.logs1 (
+ log_time DateTime,
+ machine_name LowCardinality(String),
+ machine_group LowCardinality(String),
+ cpu_idle Nullable(Float32),
+ cpu_nice Nullable(Float32),
+ cpu_system Nullable(Float32),
+ cpu_user Nullable(Float32),
+ cpu_wio Nullable(Float32),
+ disk_free Nullable(Float32),
+ disk_total Nullable(Float32),
+ part_max_used Nullable(Float32),
+ load_fifteen Nullable(Float32),
+ load_five Nullable(Float32),
+ load_one Nullable(Float32),
+ mem_buffers Nullable(Float32),
+ mem_cached Nullable(Float32),
+ mem_free Nullable(Float32),
+ mem_shared Nullable(Float32),
+ swap_free Nullable(Float32),
+ bytes_in Nullable(Float32),
+ bytes_out Nullable(Float32)
+)
+ENGINE = MergeTree()
+ORDER BY (machine_group, machine_name, log_time);
+```
+
+
+```sql
+CREATE TABLE mgbench.logs2 (
+ log_time DateTime,
+ client_ip IPv4,
+ request String,
+ status_code UInt16,
+ object_size UInt64
+)
+ENGINE = MergeTree()
+ORDER BY log_time;
+```
+
+
+```sql
+CREATE TABLE mgbench.logs3 (
+ log_time DateTime64,
+ device_id FixedString(15),
+ device_name LowCardinality(String),
+ device_type LowCardinality(String),
+ device_floor UInt8,
+ event_type LowCardinality(String),
+ event_unit FixedString(1),
+ event_value Nullable(Float32)
+)
+ENGINE = MergeTree()
+ORDER BY (event_type, log_time);
+```
+
+插入数据:
+
+```
+clickhouse-client --query "INSERT INTO mgbench.logs1 FORMAT CSVWithNames" < mgbench1.csv
+clickhouse-client --query "INSERT INTO mgbench.logs2 FORMAT CSVWithNames" < mgbench2.csv
+clickhouse-client --query "INSERT INTO mgbench.logs3 FORMAT CSVWithNames" < mgbench3.csv
+```
+
+## 运行基准查询:
+
+```sql
+USE mgbench;
+```
+
+```sql
+-- Q1.1: 自午夜以来每个 Web 服务器的 CPU/网络利用率是多少?
+
+SELECT machine_name,
+ MIN(cpu) AS cpu_min,
+ MAX(cpu) AS cpu_max,
+ AVG(cpu) AS cpu_avg,
+ MIN(net_in) AS net_in_min,
+ MAX(net_in) AS net_in_max,
+ AVG(net_in) AS net_in_avg,
+ MIN(net_out) AS net_out_min,
+ MAX(net_out) AS net_out_max,
+ AVG(net_out) AS net_out_avg
+FROM (
+ SELECT machine_name,
+ COALESCE(cpu_user, 0.0) AS cpu,
+ COALESCE(bytes_in, 0.0) AS net_in,
+ COALESCE(bytes_out, 0.0) AS net_out
+ FROM logs1
+ WHERE machine_name IN ('anansi','aragog','urd')
+ AND log_time >= TIMESTAMP '2017-01-11 00:00:00'
+) AS r
+GROUP BY machine_name;
+```
+
+
+```sql
+-- Q1.2:最近一天有哪些机房的机器离线?
+
+SELECT machine_name,
+ log_time
+FROM logs1
+WHERE (machine_name LIKE 'cslab%' OR
+ machine_name LIKE 'mslab%')
+ AND load_one IS NULL
+ AND log_time >= TIMESTAMP '2017-01-10 00:00:00'
+ORDER BY machine_name,
+ log_time;
+```
+
+```sql
+-- Q1.3:特定工作站过去 10 天的每小时的平均指标是多少?
+
+SELECT dt,
+ hr,
+ AVG(load_fifteen) AS load_fifteen_avg,
+ AVG(load_five) AS load_five_avg,
+ AVG(load_one) AS load_one_avg,
+ AVG(mem_free) AS mem_free_avg,
+ AVG(swap_free) AS swap_free_avg
+FROM (
+ SELECT CAST(log_time AS DATE) AS dt,
+ EXTRACT(HOUR FROM log_time) AS hr,
+ load_fifteen,
+ load_five,
+ load_one,
+ mem_free,
+ swap_free
+ FROM logs1
+ WHERE machine_name = 'babbage'
+ AND load_fifteen IS NOT NULL
+ AND load_five IS NOT NULL
+ AND load_one IS NOT NULL
+ AND mem_free IS NOT NULL
+ AND swap_free IS NOT NULL
+ AND log_time >= TIMESTAMP '2017-01-01 00:00:00'
+) AS r
+GROUP BY dt,
+ hr
+ORDER BY dt,
+ hr;
+```
+
+```sql
+-- Q1.4: 1 个月内,每台服务器的磁盘 I/O 阻塞的频率是多少?
+
+SELECT machine_name,
+ COUNT(*) AS spikes
+FROM logs1
+WHERE machine_group = 'Servers'
+ AND cpu_wio > 0.99
+ AND log_time >= TIMESTAMP '2016-12-01 00:00:00'
+ AND log_time < TIMESTAMP '2017-01-01 00:00:00'
+GROUP BY machine_name
+ORDER BY spikes DESC
+LIMIT 10;
+```
+
+```sql
+-- Q1.5:哪些外部可访问的虚拟机的运行内存不足?
+
+SELECT machine_name,
+ dt,
+ MIN(mem_free) AS mem_free_min
+FROM (
+ SELECT machine_name,
+ CAST(log_time AS DATE) AS dt,
+ mem_free
+ FROM logs1
+ WHERE machine_group = 'DMZ'
+ AND mem_free IS NOT NULL
+) AS r
+GROUP BY machine_name,
+ dt
+HAVING MIN(mem_free) < 10000
+ORDER BY machine_name,
+ dt;
+```
+
+```sql
+-- Q1.6: 每小时所有文件服务器的总网络流量是多少?
+
+SELECT dt,
+ hr,
+ SUM(net_in) AS net_in_sum,
+ SUM(net_out) AS net_out_sum,
+ SUM(net_in) + SUM(net_out) AS both_sum
+FROM (
+ SELECT CAST(log_time AS DATE) AS dt,
+ EXTRACT(HOUR FROM log_time) AS hr,
+ COALESCE(bytes_in, 0.0) / 1000000000.0 AS net_in,
+ COALESCE(bytes_out, 0.0) / 1000000000.0 AS net_out
+ FROM logs1
+ WHERE machine_name IN ('allsorts','andes','bigred','blackjack','bonbon',
+ 'cadbury','chiclets','cotton','crows','dove','fireball','hearts','huey',
+ 'lindt','milkduds','milkyway','mnm','necco','nerds','orbit','peeps',
+ 'poprocks','razzles','runts','smarties','smuggler','spree','stride',
+ 'tootsie','trident','wrigley','york')
+) AS r
+GROUP BY dt,
+ hr
+ORDER BY both_sum DESC
+LIMIT 10;
+```
+
+```sql
+-- Q2.1:过去 2 周内哪些请求导致了服务器错误?
+
+SELECT *
+FROM logs2
+WHERE status_code >= 500
+ AND log_time >= TIMESTAMP '2012-12-18 00:00:00'
+ORDER BY log_time;
+```
+
+```sql
+-- Q2.2:在特定的某 2 周内,用户密码文件是否被泄露了?
+
+SELECT *
+FROM logs2
+WHERE status_code >= 200
+ AND status_code < 300
+ AND request LIKE '%/etc/passwd%'
+ AND log_time >= TIMESTAMP '2012-05-06 00:00:00'
+ AND log_time < TIMESTAMP '2012-05-20 00:00:00';
+```
+
+
+```sql
+-- Q2.3:过去一个月顶级请求的平均路径深度是多少?
+
+SELECT top_level,
+ AVG(LENGTH(request) - LENGTH(REPLACE(request, '/', ''))) AS depth_avg
+FROM (
+ SELECT SUBSTRING(request FROM 1 FOR len) AS top_level,
+ request
+ FROM (
+ SELECT POSITION(SUBSTRING(request FROM 2), '/') AS len,
+ request
+ FROM logs2
+ WHERE status_code >= 200
+ AND status_code < 300
+ AND log_time >= TIMESTAMP '2012-12-01 00:00:00'
+ ) AS r
+ WHERE len > 0
+) AS s
+WHERE top_level IN ('/about','/courses','/degrees','/events',
+ '/grad','/industry','/news','/people',
+ '/publications','/research','/teaching','/ugrad')
+GROUP BY top_level
+ORDER BY top_level;
+```
+
+
+```sql
+-- Q2.4:在过去的 3 个月里,哪些客户端发出了过多的请求?
+
+SELECT client_ip,
+ COUNT(*) AS num_requests
+FROM logs2
+WHERE log_time >= TIMESTAMP '2012-10-01 00:00:00'
+GROUP BY client_ip
+HAVING COUNT(*) >= 100000
+ORDER BY num_requests DESC;
+```
+
+
+```sql
+-- Q2.5:每天的独立访问者数量是多少?
+
+SELECT dt,
+ COUNT(DISTINCT client_ip)
+FROM (
+ SELECT CAST(log_time AS DATE) AS dt,
+ client_ip
+ FROM logs2
+) AS r
+GROUP BY dt
+ORDER BY dt;
+```
+
+
+```sql
+-- Q2.6:平均和最大数据传输速率(Gbps)是多少?
+
+SELECT AVG(transfer) / 125000000.0 AS transfer_avg,
+ MAX(transfer) / 125000000.0 AS transfer_max
+FROM (
+ SELECT log_time,
+ SUM(object_size) AS transfer
+ FROM logs2
+ GROUP BY log_time
+) AS r;
+```
+
+
+```sql
+-- Q3.1:自 2019/11/29 17:00 以来,室温是否达到过冰点?
+
+SELECT *
+FROM logs3
+WHERE event_type = 'temperature'
+ AND event_value <= 32.0
+ AND log_time >= '2019-11-29 17:00:00.000';
+```
+
+
+```sql
+-- Q3.4:在过去的 6 个月里,每扇门打开的频率是多少?
+
+SELECT device_name,
+ device_floor,
+ COUNT(*) AS ct
+FROM logs3
+WHERE event_type = 'door_open'
+ AND log_time >= '2019-06-01 00:00:00.000'
+GROUP BY device_name,
+ device_floor
+ORDER BY ct DESC;
+```
+
+下面的查询 3.5 使用了 UNION 关键词。设置该模式以便组合 SELECT 的查询结果。该设置仅在未明确指定 UNION ALL 或 UNION DISTINCT 但使用了 UNION 进行共享时使用。
+
+```sql
+SET union_default_mode = 'DISTINCT'
+```
+
+```sql
+-- Q3.5: 在冬季和夏季,建筑物内哪些地方会出现较大的温度变化?
+
+WITH temperature AS (
+ SELECT dt,
+ device_name,
+ device_type,
+ device_floor
+ FROM (
+ SELECT dt,
+ hr,
+ device_name,
+ device_type,
+ device_floor,
+ AVG(event_value) AS temperature_hourly_avg
+ FROM (
+ SELECT CAST(log_time AS DATE) AS dt,
+ EXTRACT(HOUR FROM log_time) AS hr,
+ device_name,
+ device_type,
+ device_floor,
+ event_value
+ FROM logs3
+ WHERE event_type = 'temperature'
+ ) AS r
+ GROUP BY dt,
+ hr,
+ device_name,
+ device_type,
+ device_floor
+ ) AS s
+ GROUP BY dt,
+ device_name,
+ device_type,
+ device_floor
+ HAVING MAX(temperature_hourly_avg) - MIN(temperature_hourly_avg) >= 25.0
+)
+SELECT DISTINCT device_name,
+ device_type,
+ device_floor,
+ 'WINTER'
+FROM temperature
+WHERE dt >= DATE '2018-12-01'
+ AND dt < DATE '2019-03-01'
+UNION
+SELECT DISTINCT device_name,
+ device_type,
+ device_floor,
+ 'SUMMER'
+FROM temperature
+WHERE dt >= DATE '2019-06-01'
+ AND dt < DATE '2019-09-01';
+```
+
+
+```sql
+-- Q3.6:对于每种类别的设备,每月的功耗指标是什么?
+
+SELECT yr,
+ mo,
+ SUM(coffee_hourly_avg) AS coffee_monthly_sum,
+ AVG(coffee_hourly_avg) AS coffee_monthly_avg,
+ SUM(printer_hourly_avg) AS printer_monthly_sum,
+ AVG(printer_hourly_avg) AS printer_monthly_avg,
+ SUM(projector_hourly_avg) AS projector_monthly_sum,
+ AVG(projector_hourly_avg) AS projector_monthly_avg,
+ SUM(vending_hourly_avg) AS vending_monthly_sum,
+ AVG(vending_hourly_avg) AS vending_monthly_avg
+FROM (
+ SELECT dt,
+ yr,
+ mo,
+ hr,
+ AVG(coffee) AS coffee_hourly_avg,
+ AVG(printer) AS printer_hourly_avg,
+ AVG(projector) AS projector_hourly_avg,
+ AVG(vending) AS vending_hourly_avg
+ FROM (
+ SELECT CAST(log_time AS DATE) AS dt,
+ EXTRACT(YEAR FROM log_time) AS yr,
+ EXTRACT(MONTH FROM log_time) AS mo,
+ EXTRACT(HOUR FROM log_time) AS hr,
+ CASE WHEN device_name LIKE 'coffee%' THEN event_value END AS coffee,
+ CASE WHEN device_name LIKE 'printer%' THEN event_value END AS printer,
+ CASE WHEN device_name LIKE 'projector%' THEN event_value END AS projector,
+ CASE WHEN device_name LIKE 'vending%' THEN event_value END AS vending
+ FROM logs3
+ WHERE device_type = 'meter'
+ ) AS r
+ GROUP BY dt,
+ yr,
+ mo,
+ hr
+) AS s
+GROUP BY yr,
+ mo
+ORDER BY yr,
+ mo;
+```
+
+此数据集可在 [Playground](https://play.clickhouse.com/play?user=play) 中进行交互式的请求, [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1hY2hpbmVfbmFtZSwKICAgICAgIE1JTihjcHUpIEFTIGNwdV9taW4sCiAgICAgICBNQVgoY3B1KSBBUyBjcHVfbWF4LAogICAgICAgQVZHKGNwdSkgQVMgY3B1X2F2ZywKICAgICAgIE1JTihuZXRfaW4pIEFTIG5ldF9pbl9taW4sCiAgICAgICBNQVgobmV0X2luKSBBUyBuZXRfaW5fbWF4LAogICAgICAgQVZHKG5ldF9pbikgQVMgbmV0X2luX2F2ZywKICAgICAgIE1JTihuZXRfb3V0KSBBUyBuZXRfb3V0X21pbiwKICAgICAgIE1BWChuZXRfb3V0KSBBUyBuZXRfb3V0X21heCwKICAgICAgIEFWRyhuZXRfb3V0KSBBUyBuZXRfb3V0X2F2ZwpGUk9NICgKICBTRUxFQ1QgbWFjaGluZV9uYW1lLAogICAgICAgICBDT0FMRVNDRShjcHVfdXNlciwgMC4wKSBBUyBjcHUsCiAgICAgICAgIENPQUxFU0NFKGJ5dGVzX2luLCAwLjApIEFTIG5ldF9pbiwKICAgICAgICAgQ09BTEVTQ0UoYnl0ZXNfb3V0LCAwLjApIEFTIG5ldF9vdXQKICBGUk9NIG1nYmVuY2gubG9nczEKICBXSEVSRSBtYWNoaW5lX25hbWUgSU4gKCdhbmFuc2knLCdhcmFnb2cnLCd1cmQnKQogICAgQU5EIGxvZ190aW1lID49IFRJTUVTVEFNUCAnMjAxNy0wMS0xMSAwMDowMDowMCcKKSBBUyByCkdST1VQIEJZIG1hY2hpbmVfbmFtZQ==).
diff --git a/docs/zh/getting-started/example-datasets/cell-towers.mdx b/docs/zh/getting-started/example-datasets/cell-towers.mdx
index ece13445210..9738680519a 100644
--- a/docs/zh/getting-started/example-datasets/cell-towers.mdx
+++ b/docs/zh/getting-started/example-datasets/cell-towers.mdx
@@ -1,9 +1,232 @@
---
slug: /zh/getting-started/example-datasets/cell-towers
-sidebar_label: Cell Towers
-title: "Cell Towers"
+sidebar_label: 蜂窝信号塔
+sidebar_position: 3
+title: "蜂窝信号塔"
---
-import Content from '@site/docs/en/getting-started/example-datasets/cell-towers.md';
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+import CodeBlock from '@theme/CodeBlock';
+import ActionsMenu from '@site/docs/en/_snippets/_service_actions_menu.md';
+import SQLConsoleDetail from '@site/docs/en/_snippets/_launch_sql_console.md';
+
+该数据集来自 [OpenCellid](https://www.opencellid.org/) - 世界上最大的蜂窝信号塔的开放数据库。
+
+截至 2021 年,它拥有超过 4000 万条关于全球蜂窝信号塔(GSM、LTE、UMTS 等)的记录及其地理坐标和元数据(国家代码、网络等)。
+
+OpenCelliD 项目在 `Creative Commons Attribution-ShareAlike 4.0 International License` 协议下许可使用,我们根据相同许可条款重新分发此数据集的快照。登录后即可下载最新版本的数据集。
+
+
+## 获取数据集 {#get-the-dataset}
+
+
+
+
+在 ClickHouse Cloud 上可以通过一个按钮实现通过 S3 上传此数据集。登录你的 ClickHouse Cloud 组织,或通过 [ClickHouse.cloud](https://clickhouse.cloud) 创建免费试用版。
+
+从 **Sample data** 选项卡中选择 **Cell Towers** 数据集,然后选择 **Load data**:
+
+![加载数据集](@site/docs/en/_snippets/images/cloud-load-data-sample.png)
+
+检查 cell_towers 的表结构:
+
+```sql
+DESCRIBE TABLE cell_towers
+```
+
+
+
+
+
+
+1. 下载 2021 年 2 月以来的数据集快照:[cell_towers.csv.xz](https://datasets.clickhouse.com/cell_towers.csv.xz) (729 MB)。
+
+2. 验证完整性(可选步骤):
+
+```bash
+md5sum cell_towers.csv.xz
+```
+
+```response
+8cf986f4a0d9f12c6f384a0e9192c908 cell_towers.csv.xz
+```
+
+3. 使用以下命令解压:
+
+```bash
+xz -d cell_towers.csv.xz
+```
+
+4. 创建表:
+
+```sql
+CREATE TABLE cell_towers
+(
+ radio Enum8('' = 0, 'CDMA' = 1, 'GSM' = 2, 'LTE' = 3, 'NR' = 4, 'UMTS' = 5),
+ mcc UInt16,
+ net UInt16,
+ area UInt16,
+ cell UInt64,
+ unit Int16,
+ lon Float64,
+ lat Float64,
+ range UInt32,
+ samples UInt32,
+ changeable UInt8,
+ created DateTime,
+ updated DateTime,
+ averageSignal UInt8
+)
+ENGINE = MergeTree ORDER BY (radio, mcc, net, created);
+```
+
+5. 插入数据集:
+
+```bash
+clickhouse-client --query "INSERT INTO cell_towers FORMAT CSVWithNames" < cell_towers.csv
+```
+
+
+
+
+## 查询示例 {#examples}
+
+1. 按类型划分的基站数量:
+
+```sql
+SELECT radio, count() AS c FROM cell_towers GROUP BY radio ORDER BY c DESC
+```
+```response
+┌─radio─┬────────c─┐
+│ UMTS │ 20686487 │
+│ LTE │ 12101148 │
+│ GSM │ 9931312 │
+│ CDMA │ 556344 │
+│ NR │ 867 │
+└───────┴──────────┘
+
+5 rows in set. Elapsed: 0.011 sec. Processed 43.28 million rows, 43.28 MB (3.83 billion rows/s., 3.83 GB/s.)
+```
+
+2. 各个[移动国家代码(MCC)](https://en.wikipedia.org/wiki/Mobile_country_code)对应的蜂窝信号塔数量:
+
+```sql
+SELECT mcc, count() FROM cell_towers GROUP BY mcc ORDER BY count() DESC LIMIT 10
+```
+```response
+┌─mcc─┬─count()─┐
+│ 310 │ 5024650 │
+│ 262 │ 2622423 │
+│ 250 │ 1953176 │
+│ 208 │ 1891187 │
+│ 724 │ 1836150 │
+│ 404 │ 1729151 │
+│ 234 │ 1618924 │
+│ 510 │ 1353998 │
+│ 440 │ 1343355 │
+│ 311 │ 1332798 │
+└─────┴─────────┘
+
+10 rows in set. Elapsed: 0.019 sec. Processed 43.28 million rows, 86.55 MB (2.33 billion rows/s., 4.65 GB/s.)
+```
+
+排名靠前的国家是:美国、德国和俄罗斯。
+
+你可以通过在 ClickHouse 中创建一个 [External Dictionary](../../sql-reference/dictionaries/external-dictionaries/external-dicts.md) 来解码这些值。
+
+## 用例:合并地理数据 {#use-case}
+
+使用 `pointInPolygon` 函数。
+
+1. 创建一个用于存储多边形的表:
+
+
+
+
+```sql
+CREATE TABLE moscow (polygon Array(Tuple(Float64, Float64)))
+ORDER BY polygon;
+```
+
+
+
+
+```sql
+CREATE TEMPORARY TABLE
+moscow (polygon Array(Tuple(Float64, Float64)));
+```
+
+
+
+
+2. 以下点大致上构造了莫斯科的地理围栏(除“新莫斯科”外):
+
+```sql
+INSERT INTO moscow VALUES ([(37.84172564285271, 55.78000432402266),
+(37.8381207618713, 55.775874525970494), (37.83979446823122, 55.775626746008065), (37.84243326983639, 55.77446586811748), (37.84262672750849, 55.771974101091104), (37.84153238623039, 55.77114545193181), (37.841124690460184, 55.76722010265554),
+(37.84239076983644, 55.76654891107098), (37.842283558197025, 55.76258709833121), (37.8421759312134, 55.758073999993734), (37.84198330422974, 55.75381499999371), (37.8416827275085, 55.749277102484484), (37.84157576190186, 55.74794544108413),
+(37.83897929098507, 55.74525257875241), (37.83739676451868, 55.74404373042019), (37.838732481460525, 55.74298009816793), (37.841183997352545, 55.743060321833575), (37.84097476190185, 55.73938799999373), (37.84048155819702, 55.73570799999372),
+(37.840095812164286, 55.73228210777237), (37.83983814285274, 55.73080491981639), (37.83846476321406, 55.729799917464675), (37.83835745269769, 55.72919751082619), (37.838636380279524, 55.72859509486539), (37.8395161005249, 55.727705075632784),
+(37.83897964285276, 55.722727886185154), (37.83862557539366, 55.72034817326636), (37.83559735744853, 55.71944437307499), (37.835370708803126, 55.71831419154461), (37.83738169402022, 55.71765218986692), (37.83823396494291, 55.71691750159089),
+(37.838056931213345, 55.71547311301385), (37.836812846557606, 55.71221445615604), (37.83522525396725, 55.709331054395555), (37.83269301586908, 55.70953687463627), (37.829667367706236, 55.70903403789297), (37.83311126588435, 55.70552351822608),
+(37.83058993121339, 55.70041317726053), (37.82983872750851, 55.69883771404813), (37.82934501586913, 55.69718947487017), (37.828926414016685, 55.69504441658371), (37.82876530422971, 55.69287499999378), (37.82894754100031, 55.690759754047335),
+(37.827697554878185, 55.68951421135665), (37.82447346292115, 55.68965045405069), (37.83136543914793, 55.68322046195302), (37.833554015869154, 55.67814012759211), (37.83544184655761, 55.67295011628339), (37.837480388885474, 55.6672498719639),
+(37.838960677246064, 55.66316274139358), (37.83926093121332, 55.66046999999383), (37.839025050262435, 55.65869897264431), (37.83670784390257, 55.65794084879904), (37.835656529083245, 55.65694309303843), (37.83704060449217, 55.65689306460552),
+(37.83696819873806, 55.65550363526252), (37.83760389616388, 55.65487847246661), (37.83687972750851, 55.65356745541324), (37.83515216004943, 55.65155951234079), (37.83312418518067, 55.64979413590619), (37.82801726983639, 55.64640836412121),
+(37.820614174591, 55.64164525405531), (37.818908190475426, 55.6421883258084), (37.81717543386075, 55.64112490388471), (37.81690987037274, 55.63916106913107), (37.815099354492155, 55.637925371757085), (37.808769150787356, 55.633798276884455),
+(37.80100123544311, 55.62873670012244), (37.79598013491824, 55.62554336109055), (37.78634567724606, 55.62033499605651), (37.78334147619623, 55.618768681480326), (37.77746201055901, 55.619855533402706), (37.77527329626457, 55.61909966711279),
+(37.77801986242668, 55.618770300976294), (37.778212973541216, 55.617257701952106), (37.77784818518065, 55.61574504433011), (37.77016867724609, 55.61148576294007), (37.760191219573976, 55.60599579539028), (37.75338926983641, 55.60227892751446),
+(37.746329965606634, 55.59920577639331), (37.73939925396728, 55.59631430313617), (37.73273665739439, 55.5935318803559), (37.7299954450912, 55.59350760316188), (37.7268679946899, 55.59469840523759), (37.72626726983634, 55.59229549697373),
+(37.7262673598022, 55.59081598950582), (37.71897193121335, 55.5877595845419), (37.70871550793456, 55.58393177431724), (37.700497489410374, 55.580917323756644), (37.69204305026244, 55.57778089778455), (37.68544477378839, 55.57815154690915),
+(37.68391050793454, 55.57472945079756), (37.678803592590306, 55.57328235936491), (37.6743402539673, 55.57255251445782), (37.66813862698363, 55.57216388774464), (37.617927457672096, 55.57505691895805), (37.60443099999999, 55.5757737568051),
+(37.599683515869145, 55.57749105910326), (37.59754177842709, 55.57796291823627), (37.59625834786988, 55.57906686095235), (37.59501783265684, 55.57746616444403), (37.593090671936025, 55.57671634534502), (37.587018007904, 55.577944600233785),
+(37.578692203704804, 55.57982895000019), (37.57327546607398, 55.58116294118248), (37.57385012109279, 55.581550362779), (37.57399562266922, 55.5820107079112), (37.5735356072979, 55.58226289171689), (37.57290393054962, 55.582393529795155),
+(37.57037722355653, 55.581919415056234), (37.5592298306885, 55.584471614867844), (37.54189249206543, 55.58867650795186), (37.5297256269836, 55.59158133551745), (37.517837865081766, 55.59443656218868), (37.51200186508174, 55.59635625174229),
+(37.506808949737554, 55.59907823904434), (37.49820432275389, 55.6062944994944), (37.494406071441674, 55.60967103463367), (37.494760001358024, 55.61066689753365), (37.49397137107085, 55.61220931698269), (37.49016528606031, 55.613417718449064),
+(37.48773249206542, 55.61530616333343), (37.47921386508177, 55.622640129112334), (37.470652153442394, 55.62993723476164), (37.46273446298218, 55.6368075123157), (37.46350692265317, 55.64068225239439), (37.46050283203121, 55.640794546982576),
+(37.457627470916734, 55.64118904154646), (37.450718034393326, 55.64690488145138), (37.44239252645875, 55.65397824729769), (37.434587576721185, 55.66053543155961), (37.43582144975277, 55.661693766520735), (37.43576786245721, 55.662755031737014),
+(37.430982915344174, 55.664610641628116), (37.428547447097685, 55.66778515273695), (37.42945134592044, 55.668633314343566), (37.42859571562949, 55.66948145750025), (37.4262836402282, 55.670813882451405), (37.418709037048295, 55.6811141674414),
+(37.41922139651101, 55.68235377885389), (37.419218771842885, 55.68359335082235), (37.417196501327446, 55.684375235224735), (37.41607020370478, 55.68540557585352), (37.415640857147146, 55.68686637150793), (37.414632153442334, 55.68903015131686),
+(37.413344899475064, 55.690896881757396), (37.41171432275391, 55.69264232162232), (37.40948282275393, 55.69455101638112), (37.40703674603271, 55.69638690385348), (37.39607169577025, 55.70451821283731), (37.38952706878662, 55.70942491932811),
+(37.387778313491815, 55.71149057784176), (37.39049275399779, 55.71419814298992), (37.385557272491454, 55.7155489617061), (37.38388335714726, 55.71849856042102), (37.378368238098155, 55.7292763261685), (37.37763597123337, 55.730845879211614),
+(37.37890062088197, 55.73167906388319), (37.37750451918789, 55.734703664681774), (37.375610832015965, 55.734851959522246), (37.3723813571472, 55.74105626086403), (37.37014935714723, 55.746115620904355), (37.36944173016362, 55.750883999993725),
+(37.36975304365541, 55.76335905525834), (37.37244070571134, 55.76432079697595), (37.3724259757175, 55.76636979670426), (37.369922155757884, 55.76735417953104), (37.369892695770275, 55.76823419316575), (37.370214730163575, 55.782312184391266),
+(37.370493611114505, 55.78436801120489), (37.37120164550783, 55.78596427165359), (37.37284851456452, 55.7874378183096), (37.37608325135799, 55.7886695054807), (37.3764587460632, 55.78947647305964), (37.37530000265506, 55.79146512926804),
+(37.38235915344241, 55.79899647809345), (37.384344043655396, 55.80113596939471), (37.38594269577028, 55.80322699999366), (37.38711208598329, 55.804919036911976), (37.3880239841309, 55.806610999993666), (37.38928977249147, 55.81001864976979),
+(37.39038389947512, 55.81348641242801), (37.39235781481933, 55.81983538336746), (37.393709457672124, 55.82417822811877), (37.394685720901464, 55.82792275755836), (37.39557615344238, 55.830447148154136), (37.39844478226658, 55.83167107969975),
+(37.40019761214057, 55.83151823557964), (37.400398790382326, 55.83264967594742), (37.39659544313046, 55.83322180909622), (37.39667059524539, 55.83402792148566), (37.39682089947515, 55.83638877400216), (37.39643489154053, 55.83861656112751),
+(37.3955338994751, 55.84072348043264), (37.392680272491454, 55.84502158126453), (37.39241188227847, 55.84659117913199), (37.392529730163616, 55.84816071336481), (37.39486835714723, 55.85288092980303), (37.39873052645878, 55.859893456073635),
+(37.40272161111449, 55.86441833633205), (37.40697072750854, 55.867579567544375), (37.410007082016016, 55.868369880337), (37.4120992989502, 55.86920843741314), (37.412668021163924, 55.87055369615854), (37.41482461111453, 55.87170587948249),
+(37.41862266137694, 55.873183961039565), (37.42413732540892, 55.874879126654704), (37.4312182698669, 55.875614937236705), (37.43111093783558, 55.8762723478417), (37.43332105622856, 55.87706546369396), (37.43385747619623, 55.87790681284802),
+(37.441303050262405, 55.88027084462084), (37.44747234260555, 55.87942070143253), (37.44716141796871, 55.88072960917233), (37.44769797085568, 55.88121221323979), (37.45204320500181, 55.882080694420715), (37.45673176190186, 55.882346110794586),
+(37.463383999999984, 55.88252729504517), (37.46682797486874, 55.88294937719063), (37.470014457672086, 55.88361266759345), (37.47751410450743, 55.88546991372396), (37.47860317658232, 55.88534929207307), (37.48165826025772, 55.882563306475106),
+(37.48316434442331, 55.8815803226785), (37.483831555817645, 55.882427612793315), (37.483182967125686, 55.88372791409729), (37.483092277908824, 55.88495581062434), (37.4855716508179, 55.8875561994203), (37.486440636245746, 55.887827444039566),
+(37.49014203439328, 55.88897899871799), (37.493210285705544, 55.890208937135604), (37.497512451065035, 55.891342397444696), (37.49780744510645, 55.89174030252967), (37.49940333499519, 55.89239745507079), (37.50018383334346, 55.89339220941865),
+(37.52421672750851, 55.903869074155224), (37.52977457672118, 55.90564076517974), (37.53503220370484, 55.90661661218259), (37.54042858064267, 55.90714113744566), (37.54320461007303, 55.905645048442985), (37.545686966066306, 55.906608607018505),
+(37.54743976120755, 55.90788552162358), (37.55796999999999, 55.90901557907218), (37.572711542327866, 55.91059395704873), (37.57942799999998, 55.91073854155573), (37.58502865872187, 55.91009969268444), (37.58739968913264, 55.90794809960554),
+(37.59131567193598, 55.908713267595054), (37.612687423278814, 55.902866854295375), (37.62348079629517, 55.90041967242986), (37.635797880950896, 55.898141151686396), (37.649487626983664, 55.89639275532968), (37.65619302513125, 55.89572360207488),
+(37.66294133862307, 55.895295577183965), (37.66874564418033, 55.89505457604897), (37.67375601586915, 55.89254677027454), (37.67744661901856, 55.8947775867987), (37.688347, 55.89450045676125), (37.69480554232789, 55.89422926332761),
+(37.70107096560668, 55.89322256101114), (37.705962965606716, 55.891763491662616), (37.711885134918205, 55.889110234998974), (37.71682005026245, 55.886577568759876), (37.7199315476074, 55.88458159806678), (37.72234560316464, 55.882281005794134),
+(37.72364385977171, 55.8809452036196), (37.725371142837474, 55.8809722706006), (37.727870902099546, 55.88037213862385), (37.73394330422971, 55.877941504088696), (37.745339592590376, 55.87208120378722), (37.75525267724611, 55.86703807949492),
+(37.76919976190188, 55.859821640197474), (37.827835219574, 55.82962968399116), (37.83341438888553, 55.82575289922351), (37.83652584655761, 55.82188784027888), (37.83809213491821, 55.81612575504693), (37.83605359521481, 55.81460347077685),
+(37.83632178569025, 55.81276696067908), (37.838623105812026, 55.811486181656385), (37.83912198147584, 55.807329380532785), (37.839079078033414, 55.80510270463816), (37.83965844708251, 55.79940712529036), (37.840581150787344, 55.79131399999368),
+(37.84172564285271, 55.78000432402266)]);
+```
+
+3. 检查莫斯科有多少个蜂窝信号塔:
+
+```sql
+SELECT count() FROM cell_towers
+WHERE pointInPolygon((lon, lat), (SELECT * FROM moscow))
+```
+```response
+┌─count()─┐
+│ 310463 │
+└─────────┘
+
+1 rows in set. Elapsed: 0.067 sec. Processed 43.28 million rows, 692.42 MB (645.83 million rows/s., 10.33 GB/s.)
+```
+
+虽然不能创建临时表,但此数据集仍可在 [Playground](https://play.clickhouse.com/play?user=play) 中进行交互式的请求, [example](https://play.clickhouse.com/play?user=play#U0VMRUNUIG1jYywgY291bnQoKSBGUk9NIGNlbGxfdG93ZXJzIEdST1VQIEJZIG1jYyBPUkRFUiBCWSBjb3VudCgpIERFU0M=).
-
diff --git a/docs/zh/getting-started/example-datasets/menus.mdx b/docs/zh/getting-started/example-datasets/menus.mdx
index 250b8a4cd37..10e9f2bd318 100644
--- a/docs/zh/getting-started/example-datasets/menus.mdx
+++ b/docs/zh/getting-started/example-datasets/menus.mdx
@@ -1,9 +1,352 @@
----
-slug: /zh/getting-started/example-datasets/menus
-sidebar_label: New York Public Library "What's on the Menu?" Dataset
-title: "New York Public Library \"What's on the Menu?\" Dataset"
+---
+slug: /zh/getting-started/example-datasets/menus
+sidebar_label: '纽约公共图书馆“菜单上有什么?”数据集'
+title: '纽约公共图书馆“菜单上有什么?”数据集'
---
-import Content from '@site/docs/en/getting-started/example-datasets/menus.md';
+该数据集由纽约公共图书馆创建。其中含有有关酒店、餐馆和咖啡馆的菜单上的菜肴及其价格的历史数据。
-
+来源:http://menus.nypl.org/data
+数据为开放数据。
+
+数据来自于图书馆中的档案,因此可能不完整,以至于难以进行统计分析。尽管如此,该数据集也是非常有意思的。数据集中只有 130 万条关于菜单中的菜肴的记录 - 这对于 ClickHouse 来说是一个非常小的数据量,但这仍是一个很好的例子。
+
+## 下载数据集 {#download-dataset}
+
+运行命令:
+
+```bash
+wget https://s3.amazonaws.com/menusdata.nypl.org/gzips/2021_08_01_07_01_17_data.tgz
+```
+
+如果有需要可以使用 http://menus.nypl.org/data 中的最新链接。下载的大小约为 35 MB。
+
+## 解压数据集 {#unpack-dataset}
+
+```bash
+tar xvf 2021_08_01_07_01_17_data.tgz
+```
+
+解压后的的大小约为 150 MB。
+
+数据集由四个表组成:
+
+- `Menu` - 有关菜单的信息,其中包含:餐厅名称,看到菜单的日期等
+- `Dish` - 有关菜肴的信息,其中包含:菜肴名称以及一些特征。
+- `MenuPage` - 有关菜单中页面的信息,每个页面都属于某个 `Menu`。
+- `MenuItem` - 菜单项。某个菜单页面上的菜肴及其价格:指向 `Dish` 和 `MenuPage`的链接。
+
+## 创建表 {#create-tables}
+
+使用 [Decimal](/docs/zh/sql-reference/data-types/decimal.md) 数据类型来存储价格。
+
+```sql
+CREATE TABLE dish
+(
+ id UInt32,
+ name String,
+ description String,
+ menus_appeared UInt32,
+ times_appeared Int32,
+ first_appeared UInt16,
+ last_appeared UInt16,
+ lowest_price Decimal64(3),
+ highest_price Decimal64(3)
+) ENGINE = MergeTree ORDER BY id;
+
+CREATE TABLE menu
+(
+ id UInt32,
+ name String,
+ sponsor String,
+ event String,
+ venue String,
+ place String,
+ physical_description String,
+ occasion String,
+ notes String,
+ call_number String,
+ keywords String,
+ language String,
+ date String,
+ location String,
+ location_type String,
+ currency String,
+ currency_symbol String,
+ status String,
+ page_count UInt16,
+ dish_count UInt16
+) ENGINE = MergeTree ORDER BY id;
+
+CREATE TABLE menu_page
+(
+ id UInt32,
+ menu_id UInt32,
+ page_number UInt16,
+ image_id String,
+ full_height UInt16,
+ full_width UInt16,
+ uuid UUID
+) ENGINE = MergeTree ORDER BY id;
+
+CREATE TABLE menu_item
+(
+ id UInt32,
+ menu_page_id UInt32,
+ price Decimal64(3),
+ high_price Decimal64(3),
+ dish_id UInt32,
+ created_at DateTime,
+ updated_at DateTime,
+ xpos Float64,
+ ypos Float64
+) ENGINE = MergeTree ORDER BY id;
+```
+
+## 导入数据 {#import-data}
+
+执行以下命令将数据导入 ClickHouse:
+
+```bash
+clickhouse-client --format_csv_allow_single_quotes 0 --input_format_null_as_default 0 --query "INSERT INTO dish FORMAT CSVWithNames" < Dish.csv
+clickhouse-client --format_csv_allow_single_quotes 0 --input_format_null_as_default 0 --query "INSERT INTO menu FORMAT CSVWithNames" < Menu.csv
+clickhouse-client --format_csv_allow_single_quotes 0 --input_format_null_as_default 0 --query "INSERT INTO menu_page FORMAT CSVWithNames" < MenuPage.csv
+clickhouse-client --format_csv_allow_single_quotes 0 --input_format_null_as_default 0 --date_time_input_format best_effort --query "INSERT INTO menu_item FORMAT CSVWithNames" < MenuItem.csv
+```
+
+因为数据由带有标题的 CSV 表示,所以使用 [CSVWithNames](/docs/zh/interfaces/formats.md#csvwithnames) 格式。
+
+因为只有双引号用于数据字段,单引号可以在值内,所以禁用了 `format_csv_allow_single_quotes` 以避免混淆 CSV 解析器。
+
+因为数据中没有 [NULL](/docs/zh/sql-reference/syntax.md#null-literal) 值,所以禁用 [input_format_null_as_default](/docs/zh/operations/settings/settings.md#settings-input-format-null-as-default)。不然 ClickHouse 将会尝试解析 `\N` 序列,并可能与数据中的 `\` 混淆。
+
+设置 [date_time_input_format best_effort](/docs/zh/operations/settings/settings.md#settings-date_time_input_format) 以便解析各种格式的 [DateTime](/docs/zh/sql-reference/data-types/datetime.md)字段。例如,识别像“2000-01-01 01:02”这样没有秒数的 ISO-8601 时间字符串。如果没有此设置,则仅允许使用固定的 DateTime 格式。
+
+## 非规范化数据 {#denormalize-data}
+
+数据以 [规范化形式] (https://en.wikipedia.org/wiki/Database_normalization#Normal_forms) 在多个表格中呈现。这意味着如果你想进行如查询菜单项中的菜名这类的查询,则必须执行 [JOIN](/docs/zh/sql-reference/statements/select/join.md#select-join)。在典型的分析任务中,预先处理联接的数据以避免每次都执行“联接”会更有效率。这中操作被称为“非规范化”数据。
+
+我们将创建一个表“menu_item_denorm”,其中将包含所有联接在一起的数据:
+
+```sql
+CREATE TABLE menu_item_denorm
+ENGINE = MergeTree ORDER BY (dish_name, created_at)
+AS SELECT
+ price,
+ high_price,
+ created_at,
+ updated_at,
+ xpos,
+ ypos,
+ dish.id AS dish_id,
+ dish.name AS dish_name,
+ dish.description AS dish_description,
+ dish.menus_appeared AS dish_menus_appeared,
+ dish.times_appeared AS dish_times_appeared,
+ dish.first_appeared AS dish_first_appeared,
+ dish.last_appeared AS dish_last_appeared,
+ dish.lowest_price AS dish_lowest_price,
+ dish.highest_price AS dish_highest_price,
+ menu.id AS menu_id,
+ menu.name AS menu_name,
+ menu.sponsor AS menu_sponsor,
+ menu.event AS menu_event,
+ menu.venue AS menu_venue,
+ menu.place AS menu_place,
+ menu.physical_description AS menu_physical_description,
+ menu.occasion AS menu_occasion,
+ menu.notes AS menu_notes,
+ menu.call_number AS menu_call_number,
+ menu.keywords AS menu_keywords,
+ menu.language AS menu_language,
+ menu.date AS menu_date,
+ menu.location AS menu_location,
+ menu.location_type AS menu_location_type,
+ menu.currency AS menu_currency,
+ menu.currency_symbol AS menu_currency_symbol,
+ menu.status AS menu_status,
+ menu.page_count AS menu_page_count,
+ menu.dish_count AS menu_dish_count
+FROM menu_item
+ JOIN dish ON menu_item.dish_id = dish.id
+ JOIN menu_page ON menu_item.menu_page_id = menu_page.id
+ JOIN menu ON menu_page.menu_id = menu.id;
+```
+
+## 验证数据 {#validate-data}
+
+请求:
+
+```sql
+SELECT count() FROM menu_item_denorm;
+```
+
+结果:
+
+```text
+┌─count()─┐
+│ 1329175 │
+└─────────┘
+```
+
+## 运行一些查询 {#run-queries}
+
+### 菜品的平均历史价格 {#query-averaged-historical-prices}
+
+请求:
+
+```sql
+SELECT
+ round(toUInt32OrZero(extract(menu_date, '^\\d{4}')), -1) AS d,
+ count(),
+ round(avg(price), 2),
+ bar(avg(price), 0, 100, 100)
+FROM menu_item_denorm
+WHERE (menu_currency = 'Dollars') AND (d > 0) AND (d < 2022)
+GROUP BY d
+ORDER BY d ASC;
+```
+
+结果:
+
+```text
+┌────d─┬─count()─┬─round(avg(price), 2)─┬─bar(avg(price), 0, 100, 100)─┐
+│ 1850 │ 618 │ 1.5 │ █▍ │
+│ 1860 │ 1634 │ 1.29 │ █▎ │
+│ 1870 │ 2215 │ 1.36 │ █▎ │
+│ 1880 │ 3909 │ 1.01 │ █ │
+│ 1890 │ 8837 │ 1.4 │ █▍ │
+│ 1900 │ 176292 │ 0.68 │ ▋ │
+│ 1910 │ 212196 │ 0.88 │ ▊ │
+│ 1920 │ 179590 │ 0.74 │ ▋ │
+│ 1930 │ 73707 │ 0.6 │ ▌ │
+│ 1940 │ 58795 │ 0.57 │ ▌ │
+│ 1950 │ 41407 │ 0.95 │ ▊ │
+│ 1960 │ 51179 │ 1.32 │ █▎ │
+│ 1970 │ 12914 │ 1.86 │ █▋ │
+│ 1980 │ 7268 │ 4.35 │ ████▎ │
+│ 1990 │ 11055 │ 6.03 │ ██████ │
+│ 2000 │ 2467 │ 11.85 │ ███████████▋ │
+│ 2010 │ 597 │ 25.66 │ █████████████████████████▋ │
+└──────┴─────────┴──────────────────────┴──────────────────────────────┘
+```
+
+带上一粒盐。
+
+### 汉堡价格 {#query-burger-prices}
+
+请求:
+
+```sql
+SELECT
+ round(toUInt32OrZero(extract(menu_date, '^\\d{4}')), -1) AS d,
+ count(),
+ round(avg(price), 2),
+ bar(avg(price), 0, 50, 100)
+FROM menu_item_denorm
+WHERE (menu_currency = 'Dollars') AND (d > 0) AND (d < 2022) AND (dish_name ILIKE '%burger%')
+GROUP BY d
+ORDER BY d ASC;
+```
+
+结果:
+
+```text
+┌────d─┬─count()─┬─round(avg(price), 2)─┬─bar(avg(price), 0, 50, 100)───────────┐
+│ 1880 │ 2 │ 0.42 │ ▋ │
+│ 1890 │ 7 │ 0.85 │ █▋ │
+│ 1900 │ 399 │ 0.49 │ ▊ │
+│ 1910 │ 589 │ 0.68 │ █▎ │
+│ 1920 │ 280 │ 0.56 │ █ │
+│ 1930 │ 74 │ 0.42 │ ▋ │
+│ 1940 │ 119 │ 0.59 │ █▏ │
+│ 1950 │ 134 │ 1.09 │ ██▏ │
+│ 1960 │ 272 │ 0.92 │ █▋ │
+│ 1970 │ 108 │ 1.18 │ ██▎ │
+│ 1980 │ 88 │ 2.82 │ █████▋ │
+│ 1990 │ 184 │ 3.68 │ ███████▎ │
+│ 2000 │ 21 │ 7.14 │ ██████████████▎ │
+│ 2010 │ 6 │ 18.42 │ ████████████████████████████████████▋ │
+└──────┴─────────┴──────────────────────┴───────────────────────────────────────┘
+```
+
+###伏特加{#query-vodka}
+
+请求:
+
+```sql
+SELECT
+ round(toUInt32OrZero(extract(menu_date, '^\\d{4}')), -1) AS d,
+ count(),
+ round(avg(price), 2),
+ bar(avg(price), 0, 50, 100)
+FROM menu_item_denorm
+WHERE (menu_currency IN ('Dollars', '')) AND (d > 0) AND (d < 2022) AND (dish_name ILIKE '%vodka%')
+GROUP BY d
+ORDER BY d ASC;
+```
+
+结果:
+
+```text
+┌────d─┬─count()─┬─round(avg(price), 2)─┬─bar(avg(price), 0, 50, 100)─┐
+│ 1910 │ 2 │ 0 │ │
+│ 1920 │ 1 │ 0.3 │ ▌ │
+│ 1940 │ 21 │ 0.42 │ ▋ │
+│ 1950 │ 14 │ 0.59 │ █▏ │
+│ 1960 │ 113 │ 2.17 │ ████▎ │
+│ 1970 │ 37 │ 0.68 │ █▎ │
+│ 1980 │ 19 │ 2.55 │ █████ │
+│ 1990 │ 86 │ 3.6 │ ███████▏ │
+│ 2000 │ 2 │ 3.98 │ ███████▊ │
+└──────┴─────────┴──────────────────────┴─────────────────────────────┘
+```
+
+要查询 `Vodka`,必须声明通过 `ILIKE '%vodka%'` 进行查询。
+
+### 鱼子酱 {#query-caviar}
+
+列出鱼子酱的价格。另外,列出任何带有鱼子酱的菜肴的名称。
+
+请求:
+
+```sql
+SELECT
+ round(toUInt32OrZero(extract(menu_date, '^\\d{4}')), -1) AS d,
+ count(),
+ round(avg(price), 2),
+ bar(avg(price), 0, 50, 100),
+ any(dish_name)
+FROM menu_item_denorm
+WHERE (menu_currency IN ('Dollars', '')) AND (d > 0) AND (d < 2022) AND (dish_name ILIKE '%caviar%')
+GROUP BY d
+ORDER BY d ASC;
+```
+
+结果:
+
+```text
+┌────d─┬─count()─┬─round(avg(price), 2)─┬─bar(avg(price), 0, 50, 100)──────┬─any(dish_name)──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
+│ 1090 │ 1 │ 0 │ │ Caviar │
+│ 1880 │ 3 │ 0 │ │ Caviar │
+│ 1890 │ 39 │ 0.59 │ █▏ │ Butter and caviar │
+│ 1900 │ 1014 │ 0.34 │ ▋ │ Anchovy Caviar on Toast │
+│ 1910 │ 1588 │ 1.35 │ ██▋ │ 1/1 Brötchen Caviar │
+│ 1920 │ 927 │ 1.37 │ ██▋ │ ASTRAKAN CAVIAR │
+│ 1930 │ 289 │ 1.91 │ ███▋ │ Astrachan caviar │
+│ 1940 │ 201 │ 0.83 │ █▋ │ (SPECIAL) Domestic Caviar Sandwich │
+│ 1950 │ 81 │ 2.27 │ ████▌ │ Beluga Caviar │
+│ 1960 │ 126 │ 2.21 │ ████▍ │ Beluga Caviar │
+│ 1970 │ 105 │ 0.95 │ █▊ │ BELUGA MALOSSOL CAVIAR AMERICAN DRESSING │
+│ 1980 │ 12 │ 7.22 │ ██████████████▍ │ Authentic Iranian Beluga Caviar the world's finest black caviar presented in ice garni and a sampling of chilled 100° Russian vodka │
+│ 1990 │ 74 │ 14.42 │ ████████████████████████████▋ │ Avocado Salad, Fresh cut avocado with caviare │
+│ 2000 │ 3 │ 7.82 │ ███████████████▋ │ Aufgeschlagenes Kartoffelsueppchen mit Forellencaviar │
+│ 2010 │ 6 │ 15.58 │ ███████████████████████████████▏ │ "OYSTERS AND PEARLS" "Sabayon" of Pearl Tapioca with Island Creek Oysters and Russian Sevruga Caviar │
+└──────┴─────────┴──────────────────────┴──────────────────────────────────┴─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+至少他们有伏特加配鱼子酱。真棒。
+
+## 在线 Playground{#playground}
+
+此数据集已经上传到了 ClickHouse Playground 中,[example](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICByb3VuZCh0b1VJbnQzMk9yWmVybyhleHRyYWN0KG1lbnVfZGF0ZSwgJ15cXGR7NH0nKSksIC0xKSBBUyBkLAogICAgY291bnQoKSwKICAgIHJvdW5kKGF2ZyhwcmljZSksIDIpLAogICAgYmFyKGF2ZyhwcmljZSksIDAsIDUwLCAxMDApLAogICAgYW55KGRpc2hfbmFtZSkKRlJPTSBtZW51X2l0ZW1fZGVub3JtCldIRVJFIChtZW51X2N1cnJlbmN5IElOICgnRG9sbGFycycsICcnKSkgQU5EIChkID4gMCkgQU5EIChkIDwgMjAyMikgQU5EIChkaXNoX25hbWUgSUxJS0UgJyVjYXZpYXIlJykKR1JPVVAgQlkgZApPUkRFUiBCWSBkIEFTQw==)。
diff --git a/docs/zh/getting-started/example-datasets/opensky.mdx b/docs/zh/getting-started/example-datasets/opensky.mdx
index e8d5367e970..92cd104e06e 100644
--- a/docs/zh/getting-started/example-datasets/opensky.mdx
+++ b/docs/zh/getting-started/example-datasets/opensky.mdx
@@ -1,9 +1,416 @@
----
+---
slug: /zh/getting-started/example-datasets/opensky
-sidebar_label: Air Traffic Data
-title: "Crowdsourced air traffic data from The OpenSky Network 2020"
+sidebar_label: 空中交通数据
+description: 该数据集中的数据是从完整的 OpenSky 数据集中衍生而来的,对其中的数据进行了必要的清理,用以展示在 COVID-19 期间空中交通的发展。
+title: "来自 The OpenSky Network 2020 的众包空中交通数据"
---
-import Content from '@site/docs/en/getting-started/example-datasets/opensky.md';
+该数据集中的数据是从完整的 OpenSky 数据集中派生和清理的,以说明 COVID-19 大流行期间空中交通的发展。它涵盖了自 2019 年 1 月 1 日以来该网络中 2500 多名成员观测到的所有航班。直到 COVID-19 大流行结束,更多数据将定期的更新到数据集中。
-
+来源:https://zenodo.org/record/5092942#.YRBCyTpRXYd
+
+Martin Strohmeier、Xavier Olive、Jannis Lübbe、Matthias Schäfer 和 Vincent Lenders “来自 OpenSky 网络 2019-2020 的众包空中交通数据”地球系统科学数据 13(2),2021 https://doi.org/10.5194/essd- 13-357-2021
+
+## 下载数据集 {#download-dataset}
+
+运行命令:
+
+```bash
+wget -O- https://zenodo.org/record/5092942 | grep -oP 'https://zenodo.org/record/5092942/files/flightlist_\d+_\d+\.csv\.gz' | xargs wget
+```
+
+Download will take about 2 minutes with good internet connection. There are 30 files with total size of 4.3 GB.
+
+## 创建表 {#create-table}
+
+```sql
+CREATE TABLE opensky
+(
+ callsign String,
+ number String,
+ icao24 String,
+ registration String,
+ typecode String,
+ origin String,
+ destination String,
+ firstseen DateTime,
+ lastseen DateTime,
+ day DateTime,
+ latitude_1 Float64,
+ longitude_1 Float64,
+ altitude_1 Float64,
+ latitude_2 Float64,
+ longitude_2 Float64,
+ altitude_2 Float64
+) ENGINE = MergeTree ORDER BY (origin, destination, callsign);
+```
+
+## 导入数据 {#import-data}
+
+将数据并行导入到 ClickHouse:
+
+```bash
+ls -1 flightlist_*.csv.gz | xargs -P100 -I{} bash -c 'gzip -c -d "{}" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"'
+```
+
+- 这里我们将文件列表(`ls -1 flightlist_*.csv.gz`)传递给`xargs`以进行并行处理。 `xargs -P100` 指定最多使用 100 个并行工作程序,但由于我们只有 30 个文件,工作程序的数量将只有 30 个。
+- 对于每个文件,`xargs` 将通过 `bash -c` 为每个文件运行一个脚本文件。该脚本通过使用 `{}` 表示文件名占位符,然后 `xargs` 由命令进行填充(使用 `-I{}`)。
+- 该脚本会将文件 (`gzip -c -d "{}"`) 解压缩到标准输出(`-c` 参数),并将输出重定向到 `clickhouse-client`。
+- 我们还要求使用扩展解析器解析 [DateTime](../../sql-reference/data-types/datetime.md) 字段 ([--date_time_input_format best_effort](../../operations/settings/ settings.md#settings-date_time_input_format)) 以识别具有时区偏移的 ISO-8601 格式。
+
+最后,`clickhouse-client` 会以 [CSVWithNames](../../interfaces/formats.md#csvwithnames) 格式读取输入数据然后执行插入。
+
+并行导入需要 24 秒。
+
+如果您不想使用并行导入,以下是顺序导入的方式:
+
+```bash
+for file in flightlist_*.csv.gz; do gzip -c -d "$file" | clickhouse-client --date_time_input_format best_effort --query "INSERT INTO opensky FORMAT CSVWithNames"; done
+```
+
+## 验证数据 {#validate-data}
+
+请求:
+
+```sql
+SELECT count() FROM opensky;
+```
+
+结果:
+
+```text
+┌──count()─┐
+│ 66010819 │
+└──────────┘
+```
+
+ClickHouse 中的数据集大小只有 2.66 GiB,检查一下。
+
+请求:
+
+```sql
+SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'opensky';
+```
+
+结果:
+
+```text
+┌─formatReadableSize(total_bytes)─┐
+│ 2.66 GiB │
+└─────────────────────────────────┘
+```
+
+## 运行一些查询 {#run-queries}
+
+总行驶距离为 680 亿公里。
+
+请求:
+
+```sql
+SELECT formatReadableQuantity(sum(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) / 1000) FROM opensky;
+```
+
+结果:
+
+```text
+┌─formatReadableQuantity(divide(sum(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)), 1000))─┐
+│ 68.72 billion │
+└──────────────────────────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+平均飞行距离约为 1000 公里。
+
+请求:
+
+```sql
+SELECT avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2)) FROM opensky;
+```
+
+结果:
+
+```text
+┌─avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))─┐
+│ 1041090.6465708319 │
+└────────────────────────────────────────────────────────────────────┘
+```
+
+### 最繁忙的始发机场和观测到的平均距离{#busy-airports-average-distance}
+
+请求:
+
+```sql
+SELECT
+ origin,
+ count(),
+ round(avg(geoDistance(longitude_1, latitude_1, longitude_2, latitude_2))) AS distance,
+ bar(distance, 0, 10000000, 100) AS bar
+FROM opensky
+WHERE origin != ''
+GROUP BY origin
+ORDER BY count() DESC
+LIMIT 100;
+```
+
+结果:
+
+```text
+ ┌─origin─┬─count()─┬─distance─┬─bar────────────────────────────────────┐
+ 1. │ KORD │ 745007 │ 1546108 │ ███████████████▍ │
+ 2. │ KDFW │ 696702 │ 1358721 │ █████████████▌ │
+ 3. │ KATL │ 667286 │ 1169661 │ ███████████▋ │
+ 4. │ KDEN │ 582709 │ 1287742 │ ████████████▊ │
+ 5. │ KLAX │ 581952 │ 2628393 │ ██████████████████████████▎ │
+ 6. │ KLAS │ 447789 │ 1336967 │ █████████████▎ │
+ 7. │ KPHX │ 428558 │ 1345635 │ █████████████▍ │
+ 8. │ KSEA │ 412592 │ 1757317 │ █████████████████▌ │
+ 9. │ KCLT │ 404612 │ 880355 │ ████████▋ │
+ 10. │ VIDP │ 363074 │ 1445052 │ ██████████████▍ │
+ 11. │ EDDF │ 362643 │ 2263960 │ ██████████████████████▋ │
+ 12. │ KSFO │ 361869 │ 2445732 │ ████████████████████████▍ │
+ 13. │ KJFK │ 349232 │ 2996550 │ █████████████████████████████▊ │
+ 14. │ KMSP │ 346010 │ 1287328 │ ████████████▋ │
+ 15. │ LFPG │ 344748 │ 2206203 │ ██████████████████████ │
+ 16. │ EGLL │ 341370 │ 3216593 │ ████████████████████████████████▏ │
+ 17. │ EHAM │ 340272 │ 2116425 │ █████████████████████▏ │
+ 18. │ KEWR │ 337696 │ 1826545 │ ██████████████████▎ │
+ 19. │ KPHL │ 320762 │ 1291761 │ ████████████▊ │
+ 20. │ OMDB │ 308855 │ 2855706 │ ████████████████████████████▌ │
+ 21. │ UUEE │ 307098 │ 1555122 │ ███████████████▌ │
+ 22. │ KBOS │ 304416 │ 1621675 │ ████████████████▏ │
+ 23. │ LEMD │ 291787 │ 1695097 │ ████████████████▊ │
+ 24. │ YSSY │ 272979 │ 1875298 │ ██████████████████▋ │
+ 25. │ KMIA │ 265121 │ 1923542 │ ███████████████████▏ │
+ 26. │ ZGSZ │ 263497 │ 745086 │ ███████▍ │
+ 27. │ EDDM │ 256691 │ 1361453 │ █████████████▌ │
+ 28. │ WMKK │ 254264 │ 1626688 │ ████████████████▎ │
+ 29. │ CYYZ │ 251192 │ 2175026 │ █████████████████████▋ │
+ 30. │ KLGA │ 248699 │ 1106935 │ ███████████ │
+ 31. │ VHHH │ 248473 │ 3457658 │ ██████████████████████████████████▌ │
+ 32. │ RJTT │ 243477 │ 1272744 │ ████████████▋ │
+ 33. │ KBWI │ 241440 │ 1187060 │ ███████████▋ │
+ 34. │ KIAD │ 239558 │ 1683485 │ ████████████████▋ │
+ 35. │ KIAH │ 234202 │ 1538335 │ ███████████████▍ │
+ 36. │ KFLL │ 223447 │ 1464410 │ ██████████████▋ │
+ 37. │ KDAL │ 212055 │ 1082339 │ ██████████▋ │
+ 38. │ KDCA │ 207883 │ 1013359 │ ██████████▏ │
+ 39. │ LIRF │ 207047 │ 1427965 │ ██████████████▎ │
+ 40. │ PANC │ 206007 │ 2525359 │ █████████████████████████▎ │
+ 41. │ LTFJ │ 205415 │ 860470 │ ████████▌ │
+ 42. │ KDTW │ 204020 │ 1106716 │ ███████████ │
+ 43. │ VABB │ 201679 │ 1300865 │ █████████████ │
+ 44. │ OTHH │ 200797 │ 3759544 │ █████████████████████████████████████▌ │
+ 45. │ KMDW │ 200796 │ 1232551 │ ████████████▎ │
+ 46. │ KSAN │ 198003 │ 1495195 │ ██████████████▊ │
+ 47. │ KPDX │ 197760 │ 1269230 │ ████████████▋ │
+ 48. │ SBGR │ 197624 │ 2041697 │ ████████████████████▍ │
+ 49. │ VOBL │ 189011 │ 1040180 │ ██████████▍ │
+ 50. │ LEBL │ 188956 │ 1283190 │ ████████████▋ │
+ 51. │ YBBN │ 188011 │ 1253405 │ ████████████▌ │
+ 52. │ LSZH │ 187934 │ 1572029 │ ███████████████▋ │
+ 53. │ YMML │ 187643 │ 1870076 │ ██████████████████▋ │
+ 54. │ RCTP │ 184466 │ 2773976 │ ███████████████████████████▋ │
+ 55. │ KSNA │ 180045 │ 778484 │ ███████▋ │
+ 56. │ EGKK │ 176420 │ 1694770 │ ████████████████▊ │
+ 57. │ LOWW │ 176191 │ 1274833 │ ████████████▋ │
+ 58. │ UUDD │ 176099 │ 1368226 │ █████████████▋ │
+ 59. │ RKSI │ 173466 │ 3079026 │ ██████████████████████████████▋ │
+ 60. │ EKCH │ 172128 │ 1229895 │ ████████████▎ │
+ 61. │ KOAK │ 171119 │ 1114447 │ ███████████▏ │
+ 62. │ RPLL │ 170122 │ 1440735 │ ██████████████▍ │
+ 63. │ KRDU │ 167001 │ 830521 │ ████████▎ │
+ 64. │ KAUS │ 164524 │ 1256198 │ ████████████▌ │
+ 65. │ KBNA │ 163242 │ 1022726 │ ██████████▏ │
+ 66. │ KSDF │ 162655 │ 1380867 │ █████████████▋ │
+ 67. │ ENGM │ 160732 │ 910108 │ █████████ │
+ 68. │ LIMC │ 160696 │ 1564620 │ ███████████████▋ │
+ 69. │ KSJC │ 159278 │ 1081125 │ ██████████▋ │
+ 70. │ KSTL │ 157984 │ 1026699 │ ██████████▎ │
+ 71. │ UUWW │ 156811 │ 1261155 │ ████████████▌ │
+ 72. │ KIND │ 153929 │ 987944 │ █████████▊ │
+ 73. │ ESSA │ 153390 │ 1203439 │ ████████████ │
+ 74. │ KMCO │ 153351 │ 1508657 │ ███████████████ │
+ 75. │ KDVT │ 152895 │ 74048 │ ▋ │
+ 76. │ VTBS │ 152645 │ 2255591 │ ██████████████████████▌ │
+ 77. │ CYVR │ 149574 │ 2027413 │ ████████████████████▎ │
+ 78. │ EIDW │ 148723 │ 1503985 │ ███████████████ │
+ 79. │ LFPO │ 143277 │ 1152964 │ ███████████▌ │
+ 80. │ EGSS │ 140830 │ 1348183 │ █████████████▍ │
+ 81. │ KAPA │ 140776 │ 420441 │ ████▏ │
+ 82. │ KHOU │ 138985 │ 1068806 │ ██████████▋ │
+ 83. │ KTPA │ 138033 │ 1338223 │ █████████████▍ │
+ 84. │ KFFZ │ 137333 │ 55397 │ ▌ │
+ 85. │ NZAA │ 136092 │ 1581264 │ ███████████████▋ │
+ 86. │ YPPH │ 133916 │ 1271550 │ ████████████▋ │
+ 87. │ RJBB │ 133522 │ 1805623 │ ██████████████████ │
+ 88. │ EDDL │ 133018 │ 1265919 │ ████████████▋ │
+ 89. │ ULLI │ 130501 │ 1197108 │ ███████████▊ │
+ 90. │ KIWA │ 127195 │ 250876 │ ██▌ │
+ 91. │ KTEB │ 126969 │ 1189414 │ ███████████▊ │
+ 92. │ VOMM │ 125616 │ 1127757 │ ███████████▎ │
+ 93. │ LSGG │ 123998 │ 1049101 │ ██████████▍ │
+ 94. │ LPPT │ 122733 │ 1779187 │ █████████████████▋ │
+ 95. │ WSSS │ 120493 │ 3264122 │ ████████████████████████████████▋ │
+ 96. │ EBBR │ 118539 │ 1579939 │ ███████████████▋ │
+ 97. │ VTBD │ 118107 │ 661627 │ ██████▌ │
+ 98. │ KVNY │ 116326 │ 692960 │ ██████▊ │
+ 99. │ EDDT │ 115122 │ 941740 │ █████████▍ │
+100. │ EFHK │ 114860 │ 1629143 │ ████████████████▎ │
+ └────────┴─────────┴──────────┴────────────────────────────────────────┘
+```
+
+### 每周来自莫斯科三个主要机场的航班数量 {#flights-from-moscow}
+
+请求:
+
+```sql
+SELECT
+ toMonday(day) AS k,
+ count() AS c,
+ bar(c, 0, 10000, 100) AS bar
+FROM opensky
+WHERE origin IN ('UUEE', 'UUDD', 'UUWW')
+GROUP BY k
+ORDER BY k ASC;
+```
+
+结果:
+
+```text
+ ┌──────────k─┬────c─┬─bar──────────────────────────────────────────────────────────────────────────┐
+ 1. │ 2018-12-31 │ 5248 │ ████████████████████████████████████████████████████▍ │
+ 2. │ 2019-01-07 │ 6302 │ ███████████████████████████████████████████████████████████████ │
+ 3. │ 2019-01-14 │ 5701 │ █████████████████████████████████████████████████████████ │
+ 4. │ 2019-01-21 │ 5638 │ ████████████████████████████████████████████████████████▍ │
+ 5. │ 2019-01-28 │ 5731 │ █████████████████████████████████████████████████████████▎ │
+ 6. │ 2019-02-04 │ 5683 │ ████████████████████████████████████████████████████████▋ │
+ 7. │ 2019-02-11 │ 5759 │ █████████████████████████████████████████████████████████▌ │
+ 8. │ 2019-02-18 │ 5736 │ █████████████████████████████████████████████████████████▎ │
+ 9. │ 2019-02-25 │ 5873 │ ██████████████████████████████████████████████████████████▋ │
+ 10. │ 2019-03-04 │ 5965 │ ███████████████████████████████████████████████████████████▋ │
+ 11. │ 2019-03-11 │ 5900 │ ███████████████████████████████████████████████████████████ │
+ 12. │ 2019-03-18 │ 5823 │ ██████████████████████████████████████████████████████████▏ │
+ 13. │ 2019-03-25 │ 5899 │ ██████████████████████████████████████████████████████████▊ │
+ 14. │ 2019-04-01 │ 6043 │ ████████████████████████████████████████████████████████████▍ │
+ 15. │ 2019-04-08 │ 6098 │ ████████████████████████████████████████████████████████████▊ │
+ 16. │ 2019-04-15 │ 6196 │ █████████████████████████████████████████████████████████████▊ │
+ 17. │ 2019-04-22 │ 6486 │ ████████████████████████████████████████████████████████████████▋ │
+ 18. │ 2019-04-29 │ 6682 │ ██████████████████████████████████████████████████████████████████▋ │
+ 19. │ 2019-05-06 │ 6739 │ ███████████████████████████████████████████████████████████████████▍ │
+ 20. │ 2019-05-13 │ 6600 │ ██████████████████████████████████████████████████████████████████ │
+ 21. │ 2019-05-20 │ 6575 │ █████████████████████████████████████████████████████████████████▋ │
+ 22. │ 2019-05-27 │ 6786 │ ███████████████████████████████████████████████████████████████████▋ │
+ 23. │ 2019-06-03 │ 6872 │ ████████████████████████████████████████████████████████████████████▋ │
+ 24. │ 2019-06-10 │ 7045 │ ██████████████████████████████████████████████████████████████████████▍ │
+ 25. │ 2019-06-17 │ 7045 │ ██████████████████████████████████████████████████████████████████████▍ │
+ 26. │ 2019-06-24 │ 6852 │ ████████████████████████████████████████████████████████████████████▌ │
+ 27. │ 2019-07-01 │ 7248 │ ████████████████████████████████████████████████████████████████████████▍ │
+ 28. │ 2019-07-08 │ 7284 │ ████████████████████████████████████████████████████████████████████████▋ │
+ 29. │ 2019-07-15 │ 7142 │ ███████████████████████████████████████████████████████████████████████▍ │
+ 30. │ 2019-07-22 │ 7108 │ ███████████████████████████████████████████████████████████████████████ │
+ 31. │ 2019-07-29 │ 7251 │ ████████████████████████████████████████████████████████████████████████▌ │
+ 32. │ 2019-08-05 │ 7403 │ ██████████████████████████████████████████████████████████████████████████ │
+ 33. │ 2019-08-12 │ 7457 │ ██████████████████████████████████████████████████████████████████████████▌ │
+ 34. │ 2019-08-19 │ 7502 │ ███████████████████████████████████████████████████████████████████████████ │
+ 35. │ 2019-08-26 │ 7540 │ ███████████████████████████████████████████████████████████████████████████▍ │
+ 36. │ 2019-09-02 │ 7237 │ ████████████████████████████████████████████████████████████████████████▎ │
+ 37. │ 2019-09-09 │ 7328 │ █████████████████████████████████████████████████████████████████████████▎ │
+ 38. │ 2019-09-16 │ 5566 │ ███████████████████████████████████████████████████████▋ │
+ 39. │ 2019-09-23 │ 7049 │ ██████████████████████████████████████████████████████████████████████▍ │
+ 40. │ 2019-09-30 │ 6880 │ ████████████████████████████████████████████████████████████████████▋ │
+ 41. │ 2019-10-07 │ 6518 │ █████████████████████████████████████████████████████████████████▏ │
+ 42. │ 2019-10-14 │ 6688 │ ██████████████████████████████████████████████████████████████████▊ │
+ 43. │ 2019-10-21 │ 6667 │ ██████████████████████████████████████████████████████████████████▋ │
+ 44. │ 2019-10-28 │ 6303 │ ███████████████████████████████████████████████████████████████ │
+ 45. │ 2019-11-04 │ 6298 │ ██████████████████████████████████████████████████████████████▊ │
+ 46. │ 2019-11-11 │ 6137 │ █████████████████████████████████████████████████████████████▎ │
+ 47. │ 2019-11-18 │ 6051 │ ████████████████████████████████████████████████████████████▌ │
+ 48. │ 2019-11-25 │ 5820 │ ██████████████████████████████████████████████████████████▏ │
+ 49. │ 2019-12-02 │ 5942 │ ███████████████████████████████████████████████████████████▍ │
+ 50. │ 2019-12-09 │ 4891 │ ████████████████████████████████████████████████▊ │
+ 51. │ 2019-12-16 │ 5682 │ ████████████████████████████████████████████████████████▋ │
+ 52. │ 2019-12-23 │ 6111 │ █████████████████████████████████████████████████████████████ │
+ 53. │ 2019-12-30 │ 5870 │ ██████████████████████████████████████████████████████████▋ │
+ 54. │ 2020-01-06 │ 5953 │ ███████████████████████████████████████████████████████████▌ │
+ 55. │ 2020-01-13 │ 5698 │ ████████████████████████████████████████████████████████▊ │
+ 56. │ 2020-01-20 │ 5339 │ █████████████████████████████████████████████████████▍ │
+ 57. │ 2020-01-27 │ 5566 │ ███████████████████████████████████████████████████████▋ │
+ 58. │ 2020-02-03 │ 5801 │ ██████████████████████████████████████████████████████████ │
+ 59. │ 2020-02-10 │ 5692 │ ████████████████████████████████████████████████████████▊ │
+ 60. │ 2020-02-17 │ 5912 │ ███████████████████████████████████████████████████████████ │
+ 61. │ 2020-02-24 │ 6031 │ ████████████████████████████████████████████████████████████▎ │
+ 62. │ 2020-03-02 │ 6105 │ █████████████████████████████████████████████████████████████ │
+ 63. │ 2020-03-09 │ 5823 │ ██████████████████████████████████████████████████████████▏ │
+ 64. │ 2020-03-16 │ 4659 │ ██████████████████████████████████████████████▌ │
+ 65. │ 2020-03-23 │ 3720 │ █████████████████████████████████████▏ │
+ 66. │ 2020-03-30 │ 1720 │ █████████████████▏ │
+ 67. │ 2020-04-06 │ 849 │ ████████▍ │
+ 68. │ 2020-04-13 │ 710 │ ███████ │
+ 69. │ 2020-04-20 │ 725 │ ███████▏ │
+ 70. │ 2020-04-27 │ 920 │ █████████▏ │
+ 71. │ 2020-05-04 │ 859 │ ████████▌ │
+ 72. │ 2020-05-11 │ 1047 │ ██████████▍ │
+ 73. │ 2020-05-18 │ 1135 │ ███████████▎ │
+ 74. │ 2020-05-25 │ 1266 │ ████████████▋ │
+ 75. │ 2020-06-01 │ 1793 │ █████████████████▊ │
+ 76. │ 2020-06-08 │ 1979 │ ███████████████████▋ │
+ 77. │ 2020-06-15 │ 2297 │ ██████████████████████▊ │
+ 78. │ 2020-06-22 │ 2788 │ ███████████████████████████▊ │
+ 79. │ 2020-06-29 │ 3389 │ █████████████████████████████████▊ │
+ 80. │ 2020-07-06 │ 3545 │ ███████████████████████████████████▍ │
+ 81. │ 2020-07-13 │ 3569 │ ███████████████████████████████████▋ │
+ 82. │ 2020-07-20 │ 3784 │ █████████████████████████████████████▋ │
+ 83. │ 2020-07-27 │ 3960 │ ███████████████████████████████████████▌ │
+ 84. │ 2020-08-03 │ 4323 │ ███████████████████████████████████████████▏ │
+ 85. │ 2020-08-10 │ 4581 │ █████████████████████████████████████████████▋ │
+ 86. │ 2020-08-17 │ 4791 │ ███████████████████████████████████████████████▊ │
+ 87. │ 2020-08-24 │ 4928 │ █████████████████████████████████████████████████▎ │
+ 88. │ 2020-08-31 │ 4687 │ ██████████████████████████████████████████████▋ │
+ 89. │ 2020-09-07 │ 4643 │ ██████████████████████████████████████████████▍ │
+ 90. │ 2020-09-14 │ 4594 │ █████████████████████████████████████████████▊ │
+ 91. │ 2020-09-21 │ 4478 │ ████████████████████████████████████████████▋ │
+ 92. │ 2020-09-28 │ 4382 │ ███████████████████████████████████████████▋ │
+ 93. │ 2020-10-05 │ 4261 │ ██████████████████████████████████████████▌ │
+ 94. │ 2020-10-12 │ 4243 │ ██████████████████████████████████████████▍ │
+ 95. │ 2020-10-19 │ 3941 │ ███████████████████████████████████████▍ │
+ 96. │ 2020-10-26 │ 3616 │ ████████████████████████████████████▏ │
+ 97. │ 2020-11-02 │ 3586 │ ███████████████████████████████████▋ │
+ 98. │ 2020-11-09 │ 3403 │ ██████████████████████████████████ │
+ 99. │ 2020-11-16 │ 3336 │ █████████████████████████████████▎ │
+100. │ 2020-11-23 │ 3230 │ ████████████████████████████████▎ │
+101. │ 2020-11-30 │ 3183 │ ███████████████████████████████▋ │
+102. │ 2020-12-07 │ 3285 │ ████████████████████████████████▋ │
+103. │ 2020-12-14 │ 3367 │ █████████████████████████████████▋ │
+104. │ 2020-12-21 │ 3748 │ █████████████████████████████████████▍ │
+105. │ 2020-12-28 │ 3986 │ ███████████████████████████████████████▋ │
+106. │ 2021-01-04 │ 3906 │ ███████████████████████████████████████ │
+107. │ 2021-01-11 │ 3425 │ ██████████████████████████████████▎ │
+108. │ 2021-01-18 │ 3144 │ ███████████████████████████████▍ │
+109. │ 2021-01-25 │ 3115 │ ███████████████████████████████▏ │
+110. │ 2021-02-01 │ 3285 │ ████████████████████████████████▋ │
+111. │ 2021-02-08 │ 3321 │ █████████████████████████████████▏ │
+112. │ 2021-02-15 │ 3475 │ ██████████████████████████████████▋ │
+113. │ 2021-02-22 │ 3549 │ ███████████████████████████████████▍ │
+114. │ 2021-03-01 │ 3755 │ █████████████████████████████████████▌ │
+115. │ 2021-03-08 │ 3080 │ ██████████████████████████████▋ │
+116. │ 2021-03-15 │ 3789 │ █████████████████████████████████████▊ │
+117. │ 2021-03-22 │ 3804 │ ██████████████████████████████████████ │
+118. │ 2021-03-29 │ 4238 │ ██████████████████████████████████████████▍ │
+119. │ 2021-04-05 │ 4307 │ ███████████████████████████████████████████ │
+120. │ 2021-04-12 │ 4225 │ ██████████████████████████████████████████▎ │
+121. │ 2021-04-19 │ 4391 │ ███████████████████████████████████████████▊ │
+122. │ 2021-04-26 │ 4868 │ ████████████████████████████████████████████████▋ │
+123. │ 2021-05-03 │ 4977 │ █████████████████████████████████████████████████▋ │
+124. │ 2021-05-10 │ 5164 │ ███████████████████████████████████████████████████▋ │
+125. │ 2021-05-17 │ 4986 │ █████████████████████████████████████████████████▋ │
+126. │ 2021-05-24 │ 5024 │ ██████████████████████████████████████████████████▏ │
+127. │ 2021-05-31 │ 4824 │ ████████████████████████████████████████████████▏ │
+128. │ 2021-06-07 │ 5652 │ ████████████████████████████████████████████████████████▌ │
+129. │ 2021-06-14 │ 5613 │ ████████████████████████████████████████████████████████▏ │
+130. │ 2021-06-21 │ 6061 │ ████████████████████████████████████████████████████████████▌ │
+131. │ 2021-06-28 │ 2554 │ █████████████████████████▌ │
+ └────────────┴──────┴──────────────────────────────────────────────────────────────────────────────┘
+```
+
+### 在线 Playground {#playground}
+
+你可以使用交互式资源 [Online Playground](https://play.clickhouse.com/play?user=play) 来尝试对此数据集的其他查询。 例如, [执行这个查询](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBvcmlnaW4sCiAgICBjb3VudCgpLAogICAgcm91bmQoYXZnKGdlb0Rpc3RhbmNlKGxvbmdpdHVkZV8xLCBsYXRpdHVkZV8xLCBsb25naXR1ZGVfMiwgbGF0aXR1ZGVfMikpKSBBUyBkaXN0YW5jZSwKICAgIGJhcihkaXN0YW5jZSwgMCwgMTAwMDAwMDAsIDEwMCkgQVMgYmFyCkZST00gb3BlbnNreQpXSEVSRSBvcmlnaW4gIT0gJycKR1JPVVAgQlkgb3JpZ2luCk9SREVSIEJZIGNvdW50KCkgREVTQwpMSU1JVCAxMDA=). 但是,请注意无法在 Playground 中创建临时表。
diff --git a/docs/zh/getting-started/example-datasets/recipes.mdx b/docs/zh/getting-started/example-datasets/recipes.mdx
index da3a2ac541b..b7ed92962c5 100644
--- a/docs/zh/getting-started/example-datasets/recipes.mdx
+++ b/docs/zh/getting-started/example-datasets/recipes.mdx
@@ -1,9 +1,339 @@
----
-slug: /zh/getting-started/example-datasets/recipes
-sidebar_label: Recipes Dataset
-title: "Recipes Dataset"
+---
+slug: /zh/getting-started/example-datasets/recipes
+sidebar_label: 食谱数据集
+title: "食谱数据集"
---
-import Content from '@site/docs/en/getting-started/example-datasets/recipes.md';
+RecipeNLG 数据集可在 [此处](https://recipenlg.cs.put.poznan.pl/dataset) 下载。其中包含 220 万份食谱。大小略小于 1 GB。
-
+## 下载并解压数据集
+
+1. 进入下载页面[https://recipenlg.cs.put.poznan.pl/dataset](https://recipenlg.cs.put.poznan.pl/dataset)。
+2. 接受条款和条件并下载 zip 文件。
+3. 使用 `unzip` 解压 zip 文件,得到 `full_dataset.csv` 文件。
+
+## 创建表
+
+运行 clickhouse-client 并执行以下 CREATE 请求:
+
+``` sql
+CREATE TABLE recipes
+(
+ title String,
+ ingredients Array(String),
+ directions Array(String),
+ link String,
+ source LowCardinality(String),
+ NER Array(String)
+) ENGINE = MergeTree ORDER BY title;
+```
+
+## 插入数据
+
+运行以下命令:
+
+``` bash
+clickhouse-client --query "
+ INSERT INTO recipes
+ SELECT
+ title,
+ JSONExtract(ingredients, 'Array(String)'),
+ JSONExtract(directions, 'Array(String)'),
+ link,
+ source,
+ JSONExtract(NER, 'Array(String)')
+ FROM input('num UInt32, title String, ingredients String, directions String, link String, source LowCardinality(String), NER String')
+ FORMAT CSVWithNames
+" --input_format_with_names_use_header 0 --format_csv_allow_single_quote 0 --input_format_allow_errors_num 10 < full_dataset.csv
+```
+
+这是一个展示如何解析自定义 CSV,这其中涉及了许多调整。
+
+说明:
+- 数据集为 CSV 格式,但在插入时需要一些预处理;使用表函数 [input](../../sql-reference/table-functions/input.md) 进行预处理;
+- CSV 文件的结构在表函数 `input` 的参数中指定;
+- 字段 `num`(行号)是不需要的 - 可以忽略并从文件中进行解析;
+- 使用 `FORMAT CSVWithNames`,因为标题不包含第一个字段的名称,因此 CSV 中的标题将被忽略(通过命令行参数 `--input_format_with_names_use_header 0`);
+- 文件仅使用双引号将 CSV 字符串括起来;一些字符串没有用双引号括起来,单引号也不能被解析为括起来的字符串 - 所以添加`--format_csv_allow_single_quote 0`参数接受文件中的单引号;
+- 由于某些 CSV 的字符串的开头包含 `\M/` 因此无法被解析; CSV 中唯一可能以反斜杠开头的值是 `\N`,这个值被解析为 SQL NULL。通过添加`--input_format_allow_errors_num 10`参数,允许在导入过程中跳过 10 个格式错误;
+- 在数据集中的 Ingredients、directions 和 NER 字段为数组;但这些数组并没有以一般形式表示:这些字段作为 JSON 序列化为字符串,然后放入 CSV 中 - 在导入是将它们解析为字符串,然后使用 [JSONExtract](../../sql-reference/functions/json-functions.md ) 函数将其转换为数组。
+
+## 验证插入的数据
+
+通过检查行数:
+
+请求:
+
+``` sql
+SELECT count() FROM recipes;
+```
+
+结果:
+
+``` text
+┌─count()─┐
+│ 2231141 │
+└─────────┘
+```
+
+## 示例查询
+
+### 按配方数量排列的顶级组件:
+
+在此示例中,我们学习如何使用 [arrayJoin](../../sql-reference/functions/array-join/) 函数将数组扩展为行的集合。
+
+请求:
+
+``` sql
+SELECT
+ arrayJoin(NER) AS k,
+ count() AS c
+FROM recipes
+GROUP BY k
+ORDER BY c DESC
+LIMIT 50
+```
+
+结果:
+
+``` text
+┌─k────────────────────┬──────c─┐
+│ salt │ 890741 │
+│ sugar │ 620027 │
+│ butter │ 493823 │
+│ flour │ 466110 │
+│ eggs │ 401276 │
+│ onion │ 372469 │
+│ garlic │ 358364 │
+│ milk │ 346769 │
+│ water │ 326092 │
+│ vanilla │ 270381 │
+│ olive oil │ 197877 │
+│ pepper │ 179305 │
+│ brown sugar │ 174447 │
+│ tomatoes │ 163933 │
+│ egg │ 160507 │
+│ baking powder │ 148277 │
+│ lemon juice │ 146414 │
+│ Salt │ 122557 │
+│ cinnamon │ 117927 │
+│ sour cream │ 116682 │
+│ cream cheese │ 114423 │
+│ margarine │ 112742 │
+│ celery │ 112676 │
+│ baking soda │ 110690 │
+│ parsley │ 102151 │
+│ chicken │ 101505 │
+│ onions │ 98903 │
+│ vegetable oil │ 91395 │
+│ oil │ 85600 │
+│ mayonnaise │ 84822 │
+│ pecans │ 79741 │
+│ nuts │ 78471 │
+│ potatoes │ 75820 │
+│ carrots │ 75458 │
+│ pineapple │ 74345 │
+│ soy sauce │ 70355 │
+│ black pepper │ 69064 │
+│ thyme │ 68429 │
+│ mustard │ 65948 │
+│ chicken broth │ 65112 │
+│ bacon │ 64956 │
+│ honey │ 64626 │
+│ oregano │ 64077 │
+│ ground beef │ 64068 │
+│ unsalted butter │ 63848 │
+│ mushrooms │ 61465 │
+│ Worcestershire sauce │ 59328 │
+│ cornstarch │ 58476 │
+│ green pepper │ 58388 │
+│ Cheddar cheese │ 58354 │
+└──────────────────────┴────────┘
+
+50 rows in set. Elapsed: 0.112 sec. Processed 2.23 million rows, 361.57 MB (19.99 million rows/s., 3.24 GB/s.)
+```
+
+### 最复杂的草莓食谱
+
+``` sql
+SELECT
+ title,
+ length(NER),
+ length(directions)
+FROM recipes
+WHERE has(NER, 'strawberry')
+ORDER BY length(directions) DESC
+LIMIT 10
+```
+
+结果:
+
+``` text
+┌─title────────────────────────────────────────────────────────────┬─length(NER)─┬─length(directions)─┐
+│ Chocolate-Strawberry-Orange Wedding Cake │ 24 │ 126 │
+│ Strawberry Cream Cheese Crumble Tart │ 19 │ 47 │
+│ Charlotte-Style Ice Cream │ 11 │ 45 │
+│ Sinfully Good a Million Layers Chocolate Layer Cake, With Strawb │ 31 │ 45 │
+│ Sweetened Berries With Elderflower Sherbet │ 24 │ 44 │
+│ Chocolate-Strawberry Mousse Cake │ 15 │ 42 │
+│ Rhubarb Charlotte with Strawberries and Rum │ 20 │ 42 │
+│ Chef Joey's Strawberry Vanilla Tart │ 7 │ 37 │
+│ Old-Fashioned Ice Cream Sundae Cake │ 17 │ 37 │
+│ Watermelon Cake │ 16 │ 36 │
+└──────────────────────────────────────────────────────────────────┴─────────────┴────────────────────┘
+
+10 rows in set. Elapsed: 0.215 sec. Processed 2.23 million rows, 1.48 GB (10.35 million rows/s., 6.86 GB/s.)
+```
+
+在此示例中,我们使用 [has](../../sql-reference/functions/array-functions/#hasarr-elem) 函数来按过滤数组类型元素并按 directions 的数量进行排序。
+
+有一个婚礼蛋糕需要整个126个步骤来制作!显示 directions:
+
+请求:
+
+``` sql
+SELECT arrayJoin(directions)
+FROM recipes
+WHERE title = 'Chocolate-Strawberry-Orange Wedding Cake'
+```
+
+结果:
+
+``` text
+┌─arrayJoin(directions)───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
+│ Position 1 rack in center and 1 rack in bottom third of oven and preheat to 350F. │
+│ Butter one 5-inch-diameter cake pan with 2-inch-high sides, one 8-inch-diameter cake pan with 2-inch-high sides and one 12-inch-diameter cake pan with 2-inch-high sides. │
+│ Dust pans with flour; line bottoms with parchment. │
+│ Combine 1/3 cup orange juice and 2 ounces unsweetened chocolate in heavy small saucepan. │
+│ Stir mixture over medium-low heat until chocolate melts. │
+│ Remove from heat. │
+│ Gradually mix in 1 2/3 cups orange juice. │
+│ Sift 3 cups flour, 2/3 cup cocoa, 2 teaspoons baking soda, 1 teaspoon salt and 1/2 teaspoon baking powder into medium bowl. │
+│ using electric mixer, beat 1 cup (2 sticks) butter and 3 cups sugar in large bowl until blended (mixture will look grainy). │
+│ Add 4 eggs, 1 at a time, beating to blend after each. │
+│ Beat in 1 tablespoon orange peel and 1 tablespoon vanilla extract. │
+│ Add dry ingredients alternately with orange juice mixture in 3 additions each, beating well after each addition. │
+│ Mix in 1 cup chocolate chips. │
+│ Transfer 1 cup plus 2 tablespoons batter to prepared 5-inch pan, 3 cups batter to prepared 8-inch pan and remaining batter (about 6 cups) to 12-inch pan. │
+│ Place 5-inch and 8-inch pans on center rack of oven. │
+│ Place 12-inch pan on lower rack of oven. │
+│ Bake cakes until tester inserted into center comes out clean, about 35 minutes. │
+│ Transfer cakes in pans to racks and cool completely. │
+│ Mark 4-inch diameter circle on one 6-inch-diameter cardboard cake round. │
+│ Cut out marked circle. │
+│ Mark 7-inch-diameter circle on one 8-inch-diameter cardboard cake round. │
+│ Cut out marked circle. │
+│ Mark 11-inch-diameter circle on one 12-inch-diameter cardboard cake round. │
+│ Cut out marked circle. │
+│ Cut around sides of 5-inch-cake to loosen. │
+│ Place 4-inch cardboard over pan. │
+│ Hold cardboard and pan together; turn cake out onto cardboard. │
+│ Peel off parchment.Wrap cakes on its cardboard in foil. │
+│ Repeat turning out, peeling off parchment and wrapping cakes in foil, using 7-inch cardboard for 8-inch cake and 11-inch cardboard for 12-inch cake. │
+│ Using remaining ingredients, make 1 more batch of cake batter and bake 3 more cake layers as described above. │
+│ Cool cakes in pans. │
+│ Cover cakes in pans tightly with foil. │
+│ (Can be prepared ahead. │
+│ Let stand at room temperature up to 1 day or double-wrap all cake layers and freeze up to 1 week. │
+│ Bring cake layers to room temperature before using.) │
+│ Place first 12-inch cake on its cardboard on work surface. │
+│ Spread 2 3/4 cups ganache over top of cake and all the way to edge. │
+│ Spread 2/3 cup jam over ganache, leaving 1/2-inch chocolate border at edge. │
+│ Drop 1 3/4 cups white chocolate frosting by spoonfuls over jam. │
+│ Gently spread frosting over jam, leaving 1/2-inch chocolate border at edge. │
+│ Rub some cocoa powder over second 12-inch cardboard. │
+│ Cut around sides of second 12-inch cake to loosen. │
+│ Place cardboard, cocoa side down, over pan. │
+│ Turn cake out onto cardboard. │
+│ Peel off parchment. │
+│ Carefully slide cake off cardboard and onto filling on first 12-inch cake. │
+│ Refrigerate. │
+│ Place first 8-inch cake on its cardboard on work surface. │
+│ Spread 1 cup ganache over top all the way to edge. │
+│ Spread 1/4 cup jam over, leaving 1/2-inch chocolate border at edge. │
+│ Drop 1 cup white chocolate frosting by spoonfuls over jam. │
+│ Gently spread frosting over jam, leaving 1/2-inch chocolate border at edge. │
+│ Rub some cocoa over second 8-inch cardboard. │
+│ Cut around sides of second 8-inch cake to loosen. │
+│ Place cardboard, cocoa side down, over pan. │
+│ Turn cake out onto cardboard. │
+│ Peel off parchment. │
+│ Slide cake off cardboard and onto filling on first 8-inch cake. │
+│ Refrigerate. │
+│ Place first 5-inch cake on its cardboard on work surface. │
+│ Spread 1/2 cup ganache over top of cake and all the way to edge. │
+│ Spread 2 tablespoons jam over, leaving 1/2-inch chocolate border at edge. │
+│ Drop 1/3 cup white chocolate frosting by spoonfuls over jam. │
+│ Gently spread frosting over jam, leaving 1/2-inch chocolate border at edge. │
+│ Rub cocoa over second 6-inch cardboard. │
+│ Cut around sides of second 5-inch cake to loosen. │
+│ Place cardboard, cocoa side down, over pan. │
+│ Turn cake out onto cardboard. │
+│ Peel off parchment. │
+│ Slide cake off cardboard and onto filling on first 5-inch cake. │
+│ Chill all cakes 1 hour to set filling. │
+│ Place 12-inch tiered cake on its cardboard on revolving cake stand. │
+│ Spread 2 2/3 cups frosting over top and sides of cake as a first coat. │
+│ Refrigerate cake. │
+│ Place 8-inch tiered cake on its cardboard on cake stand. │
+│ Spread 1 1/4 cups frosting over top and sides of cake as a first coat. │
+│ Refrigerate cake. │
+│ Place 5-inch tiered cake on its cardboard on cake stand. │
+│ Spread 3/4 cup frosting over top and sides of cake as a first coat. │
+│ Refrigerate all cakes until first coats of frosting set, about 1 hour. │
+│ (Cakes can be made to this point up to 1 day ahead; cover and keep refrigerate.) │
+│ Prepare second batch of frosting, using remaining frosting ingredients and following directions for first batch. │
+│ Spoon 2 cups frosting into pastry bag fitted with small star tip. │
+│ Place 12-inch cake on its cardboard on large flat platter. │
+│ Place platter on cake stand. │
+│ Using icing spatula, spread 2 1/2 cups frosting over top and sides of cake; smooth top. │
+│ Using filled pastry bag, pipe decorative border around top edge of cake. │
+│ Refrigerate cake on platter. │
+│ Place 8-inch cake on its cardboard on cake stand. │
+│ Using icing spatula, spread 1 1/2 cups frosting over top and sides of cake; smooth top. │
+│ Using pastry bag, pipe decorative border around top edge of cake. │
+│ Refrigerate cake on its cardboard. │
+│ Place 5-inch cake on its cardboard on cake stand. │
+│ Using icing spatula, spread 3/4 cup frosting over top and sides of cake; smooth top. │
+│ Using pastry bag, pipe decorative border around top edge of cake, spooning more frosting into bag if necessary. │
+│ Refrigerate cake on its cardboard. │
+│ Keep all cakes refrigerated until frosting sets, about 2 hours. │
+│ (Can be prepared 2 days ahead. │
+│ Cover loosely; keep refrigerated.) │
+│ Place 12-inch cake on platter on work surface. │
+│ Press 1 wooden dowel straight down into and completely through center of cake. │
+│ Mark dowel 1/4 inch above top of frosting. │
+│ Remove dowel and cut with serrated knife at marked point. │
+│ Cut 4 more dowels to same length. │
+│ Press 1 cut dowel back into center of cake. │
+│ Press remaining 4 cut dowels into cake, positioning 3 1/2 inches inward from cake edges and spacing evenly. │
+│ Place 8-inch cake on its cardboard on work surface. │
+│ Press 1 dowel straight down into and completely through center of cake. │
+│ Mark dowel 1/4 inch above top of frosting. │
+│ Remove dowel and cut with serrated knife at marked point. │
+│ Cut 3 more dowels to same length. │
+│ Press 1 cut dowel back into center of cake. │
+│ Press remaining 3 cut dowels into cake, positioning 2 1/2 inches inward from edges and spacing evenly. │
+│ Using large metal spatula as aid, place 8-inch cake on its cardboard atop dowels in 12-inch cake, centering carefully. │
+│ Gently place 5-inch cake on its cardboard atop dowels in 8-inch cake, centering carefully. │
+│ Using citrus stripper, cut long strips of orange peel from oranges. │
+│ Cut strips into long segments. │
+│ To make orange peel coils, wrap peel segment around handle of wooden spoon; gently slide peel off handle so that peel keeps coiled shape. │
+│ Garnish cake with orange peel coils, ivy or mint sprigs, and some berries. │
+│ (Assembled cake can be made up to 8 hours ahead. │
+│ Let stand at cool room temperature.) │
+│ Remove top and middle cake tiers. │
+│ Remove dowels from cakes. │
+│ Cut top and middle cakes into slices. │
+│ To cut 12-inch cake: Starting 3 inches inward from edge and inserting knife straight down, cut through from top to bottom to make 6-inch-diameter circle in center of cake. │
+│ Cut outer portion of cake into slices; cut inner portion into slices and serve with strawberries. │
+└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
+
+126 rows in set. Elapsed: 0.011 sec. Processed 8.19 thousand rows, 5.34 MB (737.75 thousand rows/s., 480.59 MB/s.)
+```
+
+### 在线 Playground
+
+此数据集也可在 [在线 Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUCiAgICBhcnJheUpvaW4oTkVSKSBBUyBrLAogICAgY291bnQoKSBBUyBjCkZST00gcmVjaXBlcwpHUk9VUCBCWSBrCk9SREVSIEJZIGMgREVTQwpMSU1JVCA1MA==) 中体验。
+
+[原文链接](https://clickhouse.com/docs/en/getting-started/example-datasets/recipes/)
diff --git a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx
index 1583af60843..058f0ae421a 100644
--- a/docs/zh/getting-started/example-datasets/uk-price-paid.mdx
+++ b/docs/zh/getting-started/example-datasets/uk-price-paid.mdx
@@ -1,10 +1,450 @@
---
slug: /zh/getting-started/example-datasets/uk-price-paid
-sidebar_label: UK Property Price Paid
+sidebar_label: 英国房地产支付价格
sidebar_position: 1
-title: "UK Property Price Paid"
+title: "英国房地产支付价格"
---
-import Content from '@site/docs/en/getting-started/example-datasets/uk-price-paid.md';
+该数据集包含自 1995 年以来有关英格兰和威尔士房地产价格的数据。未压缩的大小约为 4 GiB,在 ClickHouse 中大约需要 278 MiB。
-
+来源:https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads
+字段说明:https://www.gov.uk/guidance/about-the-price-data
+
+包含 HM Land Registry data © Crown copyright and database right 2021.。此数据集需在 Open Government License v3.0 的许可下使用。
+
+## 创建表 {#create-table}
+
+```sql
+CREATE TABLE uk_price_paid
+(
+ price UInt32,
+ date Date,
+ postcode1 LowCardinality(String),
+ postcode2 LowCardinality(String),
+ type Enum8('terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4, 'other' = 0),
+ is_new UInt8,
+ duration Enum8('freehold' = 1, 'leasehold' = 2, 'unknown' = 0),
+ addr1 String,
+ addr2 String,
+ street LowCardinality(String),
+ locality LowCardinality(String),
+ town LowCardinality(String),
+ district LowCardinality(String),
+ county LowCardinality(String)
+)
+ENGINE = MergeTree
+ORDER BY (postcode1, postcode2, addr1, addr2);
+```
+
+## 预处理和插入数据 {#preprocess-import-data}
+
+我们将使用 `url` 函数将数据流式传输到 ClickHouse。我们需要首先预处理一些传入的数据,其中包括:
+
+- 将`postcode` 拆分为两个不同的列 - `postcode1` 和 `postcode2`,因为这更适合存储和查询
+- 将`time` 字段转换为日期为它只包含 00:00 时间
+- 忽略 [UUid](/docs/zh/sql-reference/data-types/uuid.md) 字段,因为我们不需要它进行分析
+- 使用 [transform](/docs/zh/sql-reference/functions/other-functions.md#transform) 函数将 `Enum` 字段 `type` 和 `duration` 转换为更易读的 `Enum` 字段
+- 将 `is_new` 字段从单字符串(` Y`/`N`) 到 [UInt8](/docs/zh/sql-reference/data-types/int-uint.md#uint8-uint16-uint32-uint64-uint256-int8-int16-int32-int64 -int128-int256) 字段为 0 或 1
+- 删除最后两列,因为它们都具有相同的值(即 0)
+
+`url` 函数将来自网络服务器的数据流式传输到 ClickHouse 表中。以下命令将 500 万行插入到 `uk_price_paid` 表中:
+
+```sql
+INSERT INTO uk_price_paid
+WITH
+ splitByChar(' ', postcode) AS p
+SELECT
+ toUInt32(price_string) AS price,
+ parseDateTimeBestEffortUS(time) AS date,
+ p[1] AS postcode1,
+ p[2] AS postcode2,
+ transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type,
+ b = 'Y' AS is_new,
+ transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration,
+ addr1,
+ addr2,
+ street,
+ locality,
+ town,
+ district,
+ county
+FROM url(
+ 'http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv',
+ 'CSV',
+ 'uuid_string String,
+ price_string String,
+ time String,
+ postcode String,
+ a String,
+ b String,
+ c String,
+ addr1 String,
+ addr2 String,
+ street String,
+ locality String,
+ town String,
+ district String,
+ county String,
+ d String,
+ e String'
+) SETTINGS max_http_get_redirects=10;
+```
+
+需要等待一两分钟以便数据插入,具体时间取决于网络速度。
+
+## 验证数据 {#validate-data}
+
+让我们通过查看插入了多少行来验证它是否有效:
+
+```sql
+SELECT count()
+FROM uk_price_paid
+```
+
+在执行此查询时,数据集有 27,450,499 行。让我们看看 ClickHouse 中表的大小是多少:
+
+```sql
+SELECT formatReadableSize(total_bytes)
+FROM system.tables
+WHERE name = 'uk_price_paid'
+```
+
+请注意,表的大小仅为 221.43 MiB!
+
+## 运行一些查询 {#run-queries}
+
+让我们运行一些查询来分析数据:
+
+### 查询 1. 每年平均价格 {#average-price}
+
+```sql
+SELECT
+ toYear(date) AS year,
+ round(avg(price)) AS price,
+ bar(price, 0, 1000000, 80
+)
+FROM uk_price_paid
+GROUP BY year
+ORDER BY year
+```
+
+结果如下所示:
+
+```response
+┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐
+│ 1995 │ 67934 │ █████▍ │
+│ 1996 │ 71508 │ █████▋ │
+│ 1997 │ 78536 │ ██████▎ │
+│ 1998 │ 85441 │ ██████▋ │
+│ 1999 │ 96038 │ ███████▋ │
+│ 2000 │ 107487 │ ████████▌ │
+│ 2001 │ 118888 │ █████████▌ │
+│ 2002 │ 137948 │ ███████████ │
+│ 2003 │ 155893 │ ████████████▍ │
+│ 2004 │ 178888 │ ██████████████▎ │
+│ 2005 │ 189359 │ ███████████████▏ │
+│ 2006 │ 203532 │ ████████████████▎ │
+│ 2007 │ 219375 │ █████████████████▌ │
+│ 2008 │ 217056 │ █████████████████▎ │
+│ 2009 │ 213419 │ █████████████████ │
+│ 2010 │ 236110 │ ██████████████████▊ │
+│ 2011 │ 232805 │ ██████████████████▌ │
+│ 2012 │ 238381 │ ███████████████████ │
+│ 2013 │ 256927 │ ████████████████████▌ │
+│ 2014 │ 280008 │ ██████████████████████▍ │
+│ 2015 │ 297263 │ ███████████████████████▋ │
+│ 2016 │ 313518 │ █████████████████████████ │
+│ 2017 │ 346371 │ ███████████████████████████▋ │
+│ 2018 │ 350556 │ ████████████████████████████ │
+│ 2019 │ 352184 │ ████████████████████████████▏ │
+│ 2020 │ 375808 │ ██████████████████████████████ │
+│ 2021 │ 381105 │ ██████████████████████████████▍ │
+│ 2022 │ 362572 │ █████████████████████████████ │
+└──────┴────────┴────────────────────────────────────────┘
+```
+
+### 查询 2. 伦敦每年的平均价格 {#average-price-london}
+
+```sql
+SELECT
+ toYear(date) AS year,
+ round(avg(price)) AS price,
+ bar(price, 0, 2000000, 100
+)
+FROM uk_price_paid
+WHERE town = 'LONDON'
+GROUP BY year
+ORDER BY year
+```
+
+结果如下所示:
+
+```response
+┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐
+│ 1995 │ 109110 │ █████▍ │
+│ 1996 │ 118659 │ █████▊ │
+│ 1997 │ 136526 │ ██████▋ │
+│ 1998 │ 153002 │ ███████▋ │
+│ 1999 │ 180633 │ █████████ │
+│ 2000 │ 215849 │ ██████████▋ │
+│ 2001 │ 232987 │ ███████████▋ │
+│ 2002 │ 263668 │ █████████████▏ │
+│ 2003 │ 278424 │ █████████████▊ │
+│ 2004 │ 304664 │ ███████████████▏ │
+│ 2005 │ 322887 │ ████████████████▏ │
+│ 2006 │ 356195 │ █████████████████▋ │
+│ 2007 │ 404062 │ ████████████████████▏ │
+│ 2008 │ 420741 │ █████████████████████ │
+│ 2009 │ 427754 │ █████████████████████▍ │
+│ 2010 │ 480322 │ ████████████████████████ │
+│ 2011 │ 496278 │ ████████████████████████▋ │
+│ 2012 │ 519482 │ █████████████████████████▊ │
+│ 2013 │ 616195 │ ██████████████████████████████▋ │
+│ 2014 │ 724121 │ ████████████████████████████████████▏ │
+│ 2015 │ 792101 │ ███████████████████████████████████████▌ │
+│ 2016 │ 843589 │ ██████████████████████████████████████████▏ │
+│ 2017 │ 983523 │ █████████████████████████████████████████████████▏ │
+│ 2018 │ 1016753 │ ██████████████████████████████████████████████████▋ │
+│ 2019 │ 1041673 │ ████████████████████████████████████████████████████ │
+│ 2020 │ 1060027 │ █████████████████████████████████████████████████████ │
+│ 2021 │ 958249 │ ███████████████████████████████████████████████▊ │
+│ 2022 │ 902596 │ █████████████████████████████████████████████▏ │
+└──────┴─────────┴───────────────────────────────────────────────────────┘
+```
+
+2020 年房价出事了!但这并不令人意外……
+
+### 查询 3. 最昂贵的社区 {#most-expensive-neighborhoods}
+
+```sql
+SELECT
+ town,
+ district,
+ count() AS c,
+ round(avg(price)) AS price,
+ bar(price, 0, 5000000, 100)
+FROM uk_price_paid
+WHERE date >= '2020-01-01'
+GROUP BY
+ town,
+ district
+HAVING c >= 100
+ORDER BY price DESC
+LIMIT 100
+```
+
+结果如下所示:
+
+```response
+┌─town─────────────────┬─district───────────────┬─────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)─────────────────────────┐
+│ LONDON │ CITY OF LONDON │ 578 │ 3149590 │ ██████████████████████████████████████████████████████████████▊ │
+│ LONDON │ CITY OF WESTMINSTER │ 7083 │ 2903794 │ ██████████████████████████████████████████████████████████ │
+│ LONDON │ KENSINGTON AND CHELSEA │ 4986 │ 2333782 │ ██████████████████████████████████████████████▋ │
+│ LEATHERHEAD │ ELMBRIDGE │ 203 │ 2071595 │ █████████████████████████████████████████▍ │
+│ VIRGINIA WATER │ RUNNYMEDE │ 308 │ 1939465 │ ██████████████████████████████████████▋ │
+│ LONDON │ CAMDEN │ 5750 │ 1673687 │ █████████████████████████████████▍ │
+│ WINDLESHAM │ SURREY HEATH │ 182 │ 1428358 │ ████████████████████████████▌ │
+│ NORTHWOOD │ THREE RIVERS │ 112 │ 1404170 │ ████████████████████████████ │
+│ BARNET │ ENFIELD │ 259 │ 1338299 │ ██████████████████████████▋ │
+│ LONDON │ ISLINGTON │ 5504 │ 1275520 │ █████████████████████████▌ │
+│ LONDON │ RICHMOND UPON THAMES │ 1345 │ 1261935 │ █████████████████████████▏ │
+│ COBHAM │ ELMBRIDGE │ 727 │ 1251403 │ █████████████████████████ │
+│ BEACONSFIELD │ BUCKINGHAMSHIRE │ 680 │ 1199970 │ ███████████████████████▊ │
+│ LONDON │ TOWER HAMLETS │ 10012 │ 1157827 │ ███████████████████████▏ │
+│ LONDON │ HOUNSLOW │ 1278 │ 1144389 │ ██████████████████████▊ │
+│ BURFORD │ WEST OXFORDSHIRE │ 182 │ 1139393 │ ██████████████████████▋ │
+│ RICHMOND │ RICHMOND UPON THAMES │ 1649 │ 1130076 │ ██████████████████████▌ │
+│ KINGSTON UPON THAMES │ RICHMOND UPON THAMES │ 147 │ 1126111 │ ██████████████████████▌ │
+│ ASCOT │ WINDSOR AND MAIDENHEAD │ 773 │ 1106109 │ ██████████████████████ │
+│ LONDON │ HAMMERSMITH AND FULHAM │ 6162 │ 1056198 │ █████████████████████ │
+│ RADLETT │ HERTSMERE │ 513 │ 1045758 │ ████████████████████▊ │
+│ LEATHERHEAD │ GUILDFORD │ 354 │ 1045175 │ ████████████████████▊ │
+│ WEYBRIDGE │ ELMBRIDGE │ 1275 │ 1036702 │ ████████████████████▋ │
+│ FARNHAM │ EAST HAMPSHIRE │ 107 │ 1033682 │ ████████████████████▋ │
+│ ESHER │ ELMBRIDGE │ 915 │ 1032753 │ ████████████████████▋ │
+│ FARNHAM │ HART │ 102 │ 1002692 │ ████████████████████ │
+│ GERRARDS CROSS │ BUCKINGHAMSHIRE │ 845 │ 983639 │ ███████████████████▋ │
+│ CHALFONT ST GILES │ BUCKINGHAMSHIRE │ 286 │ 973993 │ ███████████████████▍ │
+│ SALCOMBE │ SOUTH HAMS │ 215 │ 965724 │ ███████████████████▎ │
+│ SURBITON │ ELMBRIDGE │ 181 │ 960346 │ ███████████████████▏ │
+│ BROCKENHURST │ NEW FOREST │ 226 │ 951278 │ ███████████████████ │
+│ SUTTON COLDFIELD │ LICHFIELD │ 110 │ 930757 │ ██████████████████▌ │
+│ EAST MOLESEY │ ELMBRIDGE │ 372 │ 927026 │ ██████████████████▌ │
+│ LLANGOLLEN │ WREXHAM │ 127 │ 925681 │ ██████████████████▌ │
+│ OXFORD │ SOUTH OXFORDSHIRE │ 638 │ 923830 │ ██████████████████▍ │
+│ LONDON │ MERTON │ 4383 │ 923194 │ ██████████████████▍ │
+│ GUILDFORD │ WAVERLEY │ 261 │ 905733 │ ██████████████████ │
+│ TEDDINGTON │ RICHMOND UPON THAMES │ 1147 │ 894856 │ █████████████████▊ │
+│ HARPENDEN │ ST ALBANS │ 1271 │ 893079 │ █████████████████▋ │
+│ HENLEY-ON-THAMES │ SOUTH OXFORDSHIRE │ 1042 │ 887557 │ █████████████████▋ │
+│ POTTERS BAR │ WELWYN HATFIELD │ 314 │ 863037 │ █████████████████▎ │
+│ LONDON │ WANDSWORTH │ 13210 │ 857318 │ █████████████████▏ │
+│ BILLINGSHURST │ CHICHESTER │ 255 │ 856508 │ █████████████████▏ │
+│ LONDON │ SOUTHWARK │ 7742 │ 843145 │ ████████████████▋ │
+│ LONDON │ HACKNEY │ 6656 │ 839716 │ ████████████████▋ │
+│ LUTTERWORTH │ HARBOROUGH │ 1096 │ 836546 │ ████████████████▋ │
+│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES │ 1846 │ 828990 │ ████████████████▌ │
+│ LONDON │ EALING │ 5583 │ 820135 │ ████████████████▍ │
+│ INGATESTONE │ CHELMSFORD │ 120 │ 815379 │ ████████████████▎ │
+│ MARLOW │ BUCKINGHAMSHIRE │ 718 │ 809943 │ ████████████████▏ │
+│ EAST GRINSTEAD │ TANDRIDGE │ 105 │ 809461 │ ████████████████▏ │
+│ CHIGWELL │ EPPING FOREST │ 484 │ 809338 │ ████████████████▏ │
+│ EGHAM │ RUNNYMEDE │ 989 │ 807858 │ ████████████████▏ │
+│ HASLEMERE │ CHICHESTER │ 223 │ 804173 │ ████████████████ │
+│ PETWORTH │ CHICHESTER │ 288 │ 803206 │ ████████████████ │
+│ TWICKENHAM │ RICHMOND UPON THAMES │ 2194 │ 802616 │ ████████████████ │
+│ WEMBLEY │ BRENT │ 1698 │ 801733 │ ████████████████ │
+│ HINDHEAD │ WAVERLEY │ 233 │ 801482 │ ████████████████ │
+│ LONDON │ BARNET │ 8083 │ 792066 │ ███████████████▋ │
+│ WOKING │ GUILDFORD │ 343 │ 789360 │ ███████████████▋ │
+│ STOCKBRIDGE │ TEST VALLEY │ 318 │ 777909 │ ███████████████▌ │
+│ BERKHAMSTED │ DACORUM │ 1049 │ 776138 │ ███████████████▌ │
+│ MAIDENHEAD │ BUCKINGHAMSHIRE │ 236 │ 775572 │ ███████████████▌ │
+│ SOLIHULL │ STRATFORD-ON-AVON │ 142 │ 770727 │ ███████████████▍ │
+│ GREAT MISSENDEN │ BUCKINGHAMSHIRE │ 431 │ 764493 │ ███████████████▎ │
+│ TADWORTH │ REIGATE AND BANSTEAD │ 920 │ 757511 │ ███████████████▏ │
+│ LONDON │ BRENT │ 4124 │ 757194 │ ███████████████▏ │
+│ THAMES DITTON │ ELMBRIDGE │ 470 │ 750828 │ ███████████████ │
+│ LONDON │ LAMBETH │ 10431 │ 750532 │ ███████████████ │
+│ RICKMANSWORTH │ THREE RIVERS │ 1500 │ 747029 │ ██████████████▊ │
+│ KINGS LANGLEY │ DACORUM │ 281 │ 746536 │ ██████████████▊ │
+│ HARLOW │ EPPING FOREST │ 172 │ 739423 │ ██████████████▋ │
+│ TONBRIDGE │ SEVENOAKS │ 103 │ 738740 │ ██████████████▋ │
+│ BELVEDERE │ BEXLEY │ 686 │ 736385 │ ██████████████▋ │
+│ CRANBROOK │ TUNBRIDGE WELLS │ 769 │ 734328 │ ██████████████▋ │
+│ SOLIHULL │ WARWICK │ 116 │ 733286 │ ██████████████▋ │
+│ ALDERLEY EDGE │ CHESHIRE EAST │ 357 │ 732882 │ ██████████████▋ │
+│ WELWYN │ WELWYN HATFIELD │ 404 │ 730281 │ ██████████████▌ │
+│ CHISLEHURST │ BROMLEY │ 870 │ 730279 │ ██████████████▌ │
+│ LONDON │ HARINGEY │ 6488 │ 726715 │ ██████████████▌ │
+│ AMERSHAM │ BUCKINGHAMSHIRE │ 965 │ 725426 │ ██████████████▌ │
+│ SEVENOAKS │ SEVENOAKS │ 2183 │ 725102 │ ██████████████▌ │
+│ BOURNE END │ BUCKINGHAMSHIRE │ 269 │ 724595 │ ██████████████▍ │
+│ NORTHWOOD │ HILLINGDON │ 568 │ 722436 │ ██████████████▍ │
+│ PURFLEET │ THURROCK │ 143 │ 722205 │ ██████████████▍ │
+│ SLOUGH │ BUCKINGHAMSHIRE │ 832 │ 721529 │ ██████████████▍ │
+│ INGATESTONE │ BRENTWOOD │ 301 │ 718292 │ ██████████████▎ │
+│ EPSOM │ REIGATE AND BANSTEAD │ 315 │ 709264 │ ██████████████▏ │
+│ ASHTEAD │ MOLE VALLEY │ 524 │ 708646 │ ██████████████▏ │
+│ BETCHWORTH │ MOLE VALLEY │ 155 │ 708525 │ ██████████████▏ │
+│ OXTED │ TANDRIDGE │ 645 │ 706946 │ ██████████████▏ │
+│ READING │ SOUTH OXFORDSHIRE │ 593 │ 705466 │ ██████████████ │
+│ FELTHAM │ HOUNSLOW │ 1536 │ 703815 │ ██████████████ │
+│ TUNBRIDGE WELLS │ WEALDEN │ 207 │ 703296 │ ██████████████ │
+│ LEWES │ WEALDEN │ 116 │ 701349 │ ██████████████ │
+│ OXFORD │ OXFORD │ 3656 │ 700813 │ ██████████████ │
+│ MAYFIELD │ WEALDEN │ 177 │ 698158 │ █████████████▊ │
+│ PINNER │ HARROW │ 997 │ 697876 │ █████████████▊ │
+│ LECHLADE │ COTSWOLD │ 155 │ 696262 │ █████████████▊ │
+│ WALTON-ON-THAMES │ ELMBRIDGE │ 1850 │ 690102 │ █████████████▋ │
+└──────────────────────┴────────────────────────┴───────┴─────────┴─────────────────────────────────────────────────────────────────┘
+```
+
+## 使用 Projection 加速查询 {#speedup-with-projections}
+
+[Projections](/docs/zh/sql-reference/statements/alter/projection.mdx) 允许我们通过存储任意格式的预先聚合的数据来提高查询速度。在此示例中,我们创建了一个按年份、地区和城镇分组的房产的平均价格、总价格和数量的 Projection。在执行时,如果 ClickHouse 认为 Projection 可以提高查询的性能,它将使用 Projection(何时使用由 ClickHouse 决定)。
+
+### 构建投影{#build-projection}
+
+让我们通过维度 `toYear(date)`、`district` 和 `town` 创建一个聚合 Projection:
+
+```sql
+ALTER TABLE uk_price_paid
+ ADD PROJECTION projection_by_year_district_town
+ (
+ SELECT
+ toYear(date),
+ district,
+ town,
+ avg(price),
+ sum(price),
+ count()
+ GROUP BY
+ toYear(date),
+ district,
+ town
+ )
+```
+
+填充现有数据的 Projection。 (如果不进行 materialize 操作,则 ClickHouse 只会为新插入的数据创建 Projection):
+
+```sql
+ALTER TABLE uk_price_paid
+ MATERIALIZE PROJECTION projection_by_year_district_town
+SETTINGS mutations_sync = 1
+```
+
+## Test Performance {#test-performance}
+
+让我们再次运行相同的 3 个查询:
+
+### 查询 1. 每年平均价格 {#average-price-projections}
+
+```sql
+SELECT
+ toYear(date) AS year,
+ round(avg(price)) AS price,
+ bar(price, 0, 1000000, 80)
+FROM uk_price_paid
+GROUP BY year
+ORDER BY year ASC
+```
+
+结果是一样的,但是性能更好!
+```response
+No projection: 28 rows in set. Elapsed: 1.775 sec. Processed 27.45 million rows, 164.70 MB (15.47 million rows/s., 92.79 MB/s.)
+With projection: 28 rows in set. Elapsed: 0.665 sec. Processed 87.51 thousand rows, 3.21 MB (131.51 thousand rows/s., 4.82 MB/s.)
+```
+
+
+### 查询 2. 伦敦每年的平均价格 {#average-price-london-projections}
+
+```sql
+SELECT
+ toYear(date) AS year,
+ round(avg(price)) AS price,
+ bar(price, 0, 2000000, 100)
+FROM uk_price_paid
+WHERE town = 'LONDON'
+GROUP BY year
+ORDER BY year ASC
+```
+
+Same result, but notice the improvement in query performance:
+
+```response
+No projection: 28 rows in set. Elapsed: 0.720 sec. Processed 27.45 million rows, 46.61 MB (38.13 million rows/s., 64.74 MB/s.)
+With projection: 28 rows in set. Elapsed: 0.015 sec. Processed 87.51 thousand rows, 3.51 MB (5.74 million rows/s., 230.24 MB/s.)
+```
+
+### 查询 3. 最昂贵的社区 {#most-expensive-neighborhoods-projections}
+
+注意:需要修改 (date >= '2020-01-01') 以使其与 Projection 定义的维度 (`toYear(date) >= 2020)` 匹配:
+
+```sql
+SELECT
+ town,
+ district,
+ count() AS c,
+ round(avg(price)) AS price,
+ bar(price, 0, 5000000, 100)
+FROM uk_price_paid
+WHERE toYear(date) >= 2020
+GROUP BY
+ town,
+ district
+HAVING c >= 100
+ORDER BY price DESC
+LIMIT 100
+```
+
+同样,结果是相同的,但请注意查询性能的改进:
+
+```response
+No projection: 100 rows in set. Elapsed: 0.928 sec. Processed 27.45 million rows, 103.80 MB (29.56 million rows/s., 111.80 MB/s.)
+With projection: 100 rows in set. Elapsed: 0.336 sec. Processed 17.32 thousand rows, 1.23 MB (51.61 thousand rows/s., 3.65 MB/s.)
+```
+
+### 在 Playground 上测试{#playground}
+
+也可以在 [Online Playground](https://play.clickhouse.com/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==) 上找到此数据集。
diff --git a/docs/zh/interfaces/third-party/client-libraries.md b/docs/zh/interfaces/third-party/client-libraries.md
index d4959e37668..1b7bff02b1a 100644
--- a/docs/zh/interfaces/third-party/client-libraries.md
+++ b/docs/zh/interfaces/third-party/client-libraries.md
@@ -35,6 +35,9 @@ Yandex**没有**维护下面列出的库,也没有做过任何广泛的测试
- NodeJs
- [clickhouse (NodeJs)](https://github.com/TimonKK/clickhouse)
- [node-clickhouse](https://github.com/apla/node-clickhouse)
+ - [nestjs-clickhouse](https://github.com/depyronick/nestjs-clickhouse)
+ - [clickhouse-client](https://github.com/depyronick/clickhouse-client)
+ - [node-clickhouse-orm](https://github.com/zimv/node-clickhouse-orm)
- Perl
- [perl-DBD-ClickHouse](https://github.com/elcamlost/perl-DBD-ClickHouse)
- [HTTP-ClickHouse](https://metacpan.org/release/HTTP-ClickHouse)
diff --git a/docs/zh/introduction/distinctive-features.md b/docs/zh/introduction/distinctive-features.md
index c97ab082c09..a9097e0ecaa 100644
--- a/docs/zh/introduction/distinctive-features.md
+++ b/docs/zh/introduction/distinctive-features.md
@@ -67,7 +67,7 @@ ClickHouse提供各种各样在允许牺牲数据精度的情况下对查询进
2. 基于数据的部分样本进行近似查询。这时,仅会从磁盘检索少部分比例的数据。
3. 不使用全部的聚合条件,通过随机选择有限个数据聚合条件进行聚合。这在数据聚合条件满足某些分布条件下,在提供相当准确的聚合结果的同时降低了计算资源的使用。
-## Adaptive Join Algorithm {#adaptive-join-algorithm}
+## 自适应连接算法 {#adaptive-join-algorithm}
ClickHouse支持自定义[JOIN](../sql-reference/statements/select/join.md)多个表,它更倾向于散列连接算法,如果有多个大表,则使用合并-连接算法
diff --git a/docs/zh/operations/system-tables/crash-log.md b/docs/zh/operations/system-tables/crash-log.md
index d0ed406fa0c..06087a34f35 100644
--- a/docs/zh/operations/system-tables/crash-log.md
+++ b/docs/zh/operations/system-tables/crash-log.md
@@ -7,8 +7,8 @@ slug: /zh/operations/system-tables/crash-log
列信息:
-- `event_date` ([Datetime](../../sql-reference/data-types/datetime.md)) — 事件日期.
-- `event_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — 事件时间.
+- `event_date` ([DateTime](../../sql-reference/data-types/datetime.md)) — 事件日期.
+- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — 事件时间.
- `timestamp_ns` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 以纳秒为单位的事件时间戳.
- `signal` ([Int32](../../sql-reference/data-types/int-uint.md)) — 信号编号.
- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — 线程ID.
diff --git a/docs/zh/operations/system-tables/mutations.md b/docs/zh/operations/system-tables/mutations.md
index dbce0a59063..f5f82c1717a 100644
--- a/docs/zh/operations/system-tables/mutations.md
+++ b/docs/zh/operations/system-tables/mutations.md
@@ -15,7 +15,7 @@ slug: /zh/operations/system-tables/mutations
- `command` ([String](../../sql-reference/data-types/string.md)) — mutation命令字符串(`ALTER TABLE [db.]table`语句之后的部分)。
-- `create_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — mutation命令提交执行的日期和时间。
+- `create_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — mutation命令提交执行的日期和时间。
- `block_numbers.partition_id` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — 对于复制表的mutation,该数组包含分区的ID(每个分区都有一条记录)。对于非复制表的mutation,该数组为空。
@@ -39,7 +39,7 @@ slug: /zh/operations/system-tables/mutations
- `latest_failed_part`([String](../../sql-reference/data-types/string.md)) — 最近不能mutation的part的名称。
-- `latest_fail_time`([Datetime](../../sql-reference/data-types/datetime.md)) — 最近的一个mutation失败的时间。
+- `latest_fail_time`([DateTime](../../sql-reference/data-types/datetime.md)) — 最近的一个mutation失败的时间。
- `latest_fail_reason`([String](../../sql-reference/data-types/string.md)) — 导致最近part的mutation失败的异常消息。
diff --git a/docs/zh/operations/system-tables/replication_queue.md b/docs/zh/operations/system-tables/replication_queue.md
index e82569e378d..95a183cf9f7 100644
--- a/docs/zh/operations/system-tables/replication_queue.md
+++ b/docs/zh/operations/system-tables/replication_queue.md
@@ -29,7 +29,7 @@ slug: /zh/operations/system-tables/replication_queue
- `MUTATE_PART` — 对分片应用一个或多个突变.
- `ALTER_METADATA` — 根据全局 /metadata 和 /columns 路径应用alter修改.
-- `create_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — 提交任务执行的日期和时间.
+- `create_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — 提交任务执行的日期和时间.
- `required_quorum` ([UInt32](../../sql-reference/data-types/int-uint.md)) — 等待任务完成并确认完成的副本数. 此列仅与 `GET_PARTS` 任务相关.
@@ -47,13 +47,13 @@ slug: /zh/operations/system-tables/replication_queue
- `last_exception` ([String](../../sql-reference/data-types/string.md)) — 发生的最后一个错误的短信(如果有).
-- `last_attempt_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — 上次尝试任务的日期和时间.
+- `last_attempt_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — 上次尝试任务的日期和时间.
- `num_postponed` ([UInt32](../../sql-reference/data-types/int-uint.md)) — 延期任务数.
- `postpone_reason` ([String](../../sql-reference/data-types/string.md)) — 任务延期的原因.
-- `last_postpone_time` ([Datetime](../../sql-reference/data-types/datetime.md)) — 上次推迟任务的日期和时间.
+- `last_postpone_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — 上次推迟任务的日期和时间.
- `merge_type` ([String](../../sql-reference/data-types/string.md)) — 当前合并的类型. 如果是突变则为空.
diff --git a/docs/zh/sql-reference/ansi.md b/docs/zh/sql-reference/ansi.md
index 9cf335f89ef..cdccee0084f 100644
--- a/docs/zh/sql-reference/ansi.md
+++ b/docs/zh/sql-reference/ansi.md
@@ -152,7 +152,7 @@ sidebar_label: "ANSI\u517C\u5BB9\u6027"
| F051-02 | TIME(时间)数据类型(并支持用于表达时间的字面量),小数秒精度至少为0 | 否 {.text-danger} | |
| F051-03 | 时间戳数据类型(并支持用于表达时间戳的字面量),小数秒精度至少为0和6 | 是 {.text-danger} | |
| F051-04 | 日期、时间和时间戳数据类型的比较谓词 | 是 {.text-success} | |
-| F051-05 | Datetime 类型和字符串形式表达的时间之间的显式转换 | 是 {.text-success} | |
+| F051-05 | DateTime 类型和字符串形式表达的时间之间的显式转换 | 是 {.text-success} | |
| F051-06 | CURRENT_DATE | 否 {.text-danger} | 使用`today()`替代 |
| F051-07 | LOCALTIME | 否 {.text-danger} | 使用`now()`替代 |
| F051-08 | LOCALTIMESTAMP | 否 {.text-danger} | |
diff --git a/docs/zh/sql-reference/data-types/date.md b/docs/zh/sql-reference/data-types/date.md
index 9b1acdbe939..a8874151e75 100644
--- a/docs/zh/sql-reference/data-types/date.md
+++ b/docs/zh/sql-reference/data-types/date.md
@@ -3,7 +3,7 @@ slug: /zh/sql-reference/data-types/date
---
# 日期 {#date}
-日期类型,用两个字节存储,表示从 1970-01-01 (无符号) 到当前的日期值。允许存储从 Unix 纪元开始到编译阶段定义的上限阈值常量(目前上限是2149年,但最终完全支持的年份为2148)。最小值输出为1970-01-01。
+日期类型,用两个字节存储,表示从 1970-01-01 (无符号) 到当前的日期值。允许存储从 Unix 纪元开始到编译阶段定义的上限阈值常量(目前上限是2106年,但最终完全支持的年份为2105)。最小值输出为1970-01-01。
值的范围: \[1970-01-01, 2149-06-06\]。
diff --git a/docs/zh/sql-reference/data-types/datetime64.md b/docs/zh/sql-reference/data-types/datetime64.md
index ee2d7a6f258..24888645cba 100644
--- a/docs/zh/sql-reference/data-types/datetime64.md
+++ b/docs/zh/sql-reference/data-types/datetime64.md
@@ -6,7 +6,7 @@ sidebar_position: 49
sidebar_label: DateTime64
---
-# Datetime64 {#data_type-datetime64}
+# DateTime64 {#data_type-datetime64}
此类型允许以日期(date)加时间(time)的形式来存储一个时刻的时间值,具有定义的亚秒精度
diff --git a/docs/zh/sql-reference/functions/date-time-functions.md b/docs/zh/sql-reference/functions/date-time-functions.md
index c666d01d15f..4bbd0e5b69b 100644
--- a/docs/zh/sql-reference/functions/date-time-functions.md
+++ b/docs/zh/sql-reference/functions/date-time-functions.md
@@ -539,7 +539,7 @@ date_trunc(unit, value[, timezone])
- 按指定的单位向前取整后的DateTime。
-类型: [Datetime](../../sql-reference/data-types/datetime.md).
+类型: [DateTime](../../sql-reference/data-types/datetime.md).
**示例**
@@ -850,7 +850,7 @@ now([timezone])
- 当前日期和时间。
-类型: [Datetime](../../sql-reference/data-types/datetime.md).
+类型: [DateTime](../../sql-reference/data-types/datetime.md).
**示例**
diff --git a/docs/zh/sql-reference/functions/other-functions.md b/docs/zh/sql-reference/functions/other-functions.md
index a475420ba64..62d2a377ff1 100644
--- a/docs/zh/sql-reference/functions/other-functions.md
+++ b/docs/zh/sql-reference/functions/other-functions.md
@@ -237,7 +237,7 @@ ORDER BY c DESC
``` sql
SELECT
- transform(domain(Referer), ['yandex.ru', 'google.ru', 'vk.com'], ['www.yandex', 'example.com']) AS s,
+ transform(domain(Referer), ['yandex.ru', 'google.ru', 'vkontakte.ru'], ['www.yandex', 'example.com', 'vk.com']) AS s,
count() AS c
FROM test.hits
GROUP BY domain(Referer)
diff --git a/docs/zh/sql-reference/statements/alter.md b/docs/zh/sql-reference/statements/alter.md
index 23edfd633db..fd73be4fd93 100644
--- a/docs/zh/sql-reference/statements/alter.md
+++ b/docs/zh/sql-reference/statements/alter.md
@@ -150,7 +150,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)
不支持对primary key或者sampling key中的列(在 `ENGINE` 表达式中用到的列)进行删除操作。改变包含在primary key中的列的类型时,如果操作不会导致数据的变化(例如,往Enum中添加一个值,或者将`DateTime` 类型改成 `UInt32`),那么这种操作是可行的。
-如果 `ALTER` 操作不足以完成你想要的表变动操作,你可以创建一张新的表,通过 [INSERT SELECT](../../sql-reference/statements/insert-into.md#insert_query_insert-select)将数据拷贝进去,然后通过 [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename)将新的表改成和原有表一样的名称,并删除原有的表。你可以使用 [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) 代替 `INSERT SELECT`。
+如果 `ALTER` 操作不足以完成你想要的表变动操作,你可以创建一张新的表,通过 [INSERT SELECT](../../sql-reference/statements/insert-into.md#inserting-the-results-of-select)将数据拷贝进去,然后通过 [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename)将新的表改成和原有表一样的名称,并删除原有的表。你可以使用 [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) 代替 `INSERT SELECT`。
`ALTER` 操作会阻塞对表的所有读写操作。换句话说,当一个大的 `SELECT` 语句和 `ALTER`同时执行时,`ALTER`会等待,直到 `SELECT` 执行结束。与此同时,当 `ALTER` 运行时,新的 sql 语句将会等待。
diff --git a/docs/zh/sql-reference/statements/create/view.md b/docs/zh/sql-reference/statements/create/view.md
index 12ffe35dde0..be2f8d6ded3 100644
--- a/docs/zh/sql-reference/statements/create/view.md
+++ b/docs/zh/sql-reference/statements/create/view.md
@@ -164,23 +164,6 @@ SELECT * FROM [db.]live_view WHERE ...
您可以使用`ALTER LIVE VIEW [db.]table_name REFRESH`语法.
-### WITH TIMEOUT条件 {#live-view-with-timeout}
-
-当使用`WITH TIMEOUT`子句创建实时视图时,[WATCH](../../../sql-reference/statements/watch.md)观察实时视图的查询。
-
-```sql
-CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AS SELECT ...
-```
-
-如果未指定超时值,则由指定的值[temporary_live_view_timeout](../../../operations/settings/settings.md#temporary-live-view-timeout)决定.
-
-**示例:**
-
-```sql
-CREATE TABLE mt (x Int8) Engine = MergeTree ORDER BY x;
-CREATE LIVE VIEW lv WITH TIMEOUT 15 AS SELECT sum(x) FROM mt;
-```
-
### WITH REFRESH条件 {#live-view-with-refresh}
当使用`WITH REFRESH`子句创建实时视图时,它将在自上次刷新或触发后经过指定的秒数后自动刷新。
@@ -210,20 +193,6 @@ WATCH lv
└─────────────────────┴──────────┘
```
-您可以使用`AND`子句组合`WITH TIMEOUT`和`WITH REFRESH`子句。
-
-```sql
-CREATE LIVE VIEW [db.]table_name WITH TIMEOUT [value_in_sec] AND REFRESH [value_in_sec] AS SELECT ...
-```
-
-**示例:**
-
-```sql
-CREATE LIVE VIEW lv WITH TIMEOUT 15 AND REFRESH 5 AS SELECT now();
-```
-
-15 秒后,如果没有活动的`WATCH`查询,实时视图将自动删除。
-
```sql
WATCH lv
```
diff --git a/docs/zh/sql-reference/statements/insert-into.md b/docs/zh/sql-reference/statements/insert-into.md
index 69762bf43bc..f199329829c 100644
--- a/docs/zh/sql-reference/statements/insert-into.md
+++ b/docs/zh/sql-reference/statements/insert-into.md
@@ -90,7 +90,7 @@ INSERT INTO t FORMAT TabSeparated
如果表中有一些[限制](../../sql-reference/statements/create/table.mdx#constraints),,数据插入时会逐行进行数据校验,如果这里面包含了不符合限制条件的数据,服务将会抛出包含限制信息的异常,这个语句也会被停止执行。
-### 使用`SELECT`的结果写入 {#insert_query_insert-select}
+### 使用`SELECT`的结果写入 {#inserting-the-results-of-select}
``` sql
INSERT INTO [db.]table [(c1, c2, c3)] SELECT ...
diff --git a/packages/build b/packages/build
index c5ebf8641a3..531e068338d 100755
--- a/packages/build
+++ b/packages/build
@@ -26,8 +26,10 @@ SOURCE=${SOURCE:-$PKG_ROOT}
HELP="${0} [--test] [--rpm] [-h|--help]
--test - adds '+test' prefix to version
--apk - build APK packages
+ --archlinux - build archlinux packages
--rpm - build RPM packages
--tgz - build tarball package
+ --deb - build deb package
--help - show this help and exit
Used envs:
@@ -47,16 +49,21 @@ fi
export CLICKHOUSE_VERSION_STRING
-
while [[ $1 == --* ]]
do
case "$1" in
--test )
VERSION_POSTFIX+='+test'
shift ;;
+ --deb )
+ MAKE_DEB=1
+ shift ;;
--apk )
MAKE_APK=1
shift ;;
+ --archlinux )
+ MAKE_ARCHLINUX=1
+ shift ;;
--rpm )
MAKE_RPM=1
shift ;;
@@ -131,18 +138,24 @@ CLICKHOUSE_VERSION_STRING+=$VERSION_POSTFIX
echo -e "\nCurrent version is $CLICKHOUSE_VERSION_STRING"
for config in clickhouse*.yaml; do
- echo "Building deb package for $config"
+ if [ -n "$MAKE_DEB" ] || [ -n "$MAKE_TGZ" ]; then
+ echo "Building deb package for $config"
- # Preserve package path
- exec 9>&1
- PKG_PATH=$(nfpm package --target "$OUTPUT_DIR" --config "$config" --packager deb | tee /dev/fd/9)
- PKG_PATH=${PKG_PATH##*created package: }
- exec 9>&-
+ # Preserve package path
+ exec 9>&1
+ PKG_PATH=$(nfpm package --target "$OUTPUT_DIR" --config "$config" --packager deb | tee /dev/fd/9)
+ PKG_PATH=${PKG_PATH##*created package: }
+ exec 9>&-
+ fi
if [ -n "$MAKE_APK" ]; then
echo "Building apk package for $config"
nfpm package --target "$OUTPUT_DIR" --config "$config" --packager apk
fi
+ if [ -n "$MAKE_ARCHLINUX" ]; then
+ echo "Building archlinux package for $config"
+ nfpm package --target "$OUTPUT_DIR" --config "$config" --packager archlinux
+ fi
if [ -n "$MAKE_RPM" ]; then
echo "Building rpm package for $config"
nfpm package --target "$OUTPUT_DIR" --config "$config" --packager rpm
diff --git a/packages/clickhouse-client.yaml b/packages/clickhouse-client.yaml
index 459a09ee0b8..d4fd9300208 100644
--- a/packages/clickhouse-client.yaml
+++ b/packages/clickhouse-client.yaml
@@ -37,7 +37,7 @@ deb:
contents:
- src: root/etc/clickhouse-client/config.xml
dst: /etc/clickhouse-client/config.xml
- type: config
+ type: config|noreplace
- src: root/usr/bin/clickhouse-benchmark
dst: /usr/bin/clickhouse-benchmark
- src: root/usr/bin/clickhouse-compressor
diff --git a/packages/clickhouse-keeper.yaml b/packages/clickhouse-keeper.yaml
index 7803729c469..f2095dda02a 100644
--- a/packages/clickhouse-keeper.yaml
+++ b/packages/clickhouse-keeper.yaml
@@ -27,9 +27,9 @@ deb:
Source: clickhouse
contents:
-- src: root/etc/clickhouse-keeper
- dst: /etc/clickhouse-keeper
- type: config
+- src: root/etc/clickhouse-keeper/keeper_config.xml
+ dst: /etc/clickhouse-keeper/keeper_config.xml
+ type: config|noreplace
- src: root/usr/bin/clickhouse-keeper
dst: /usr/bin/clickhouse-keeper
# docs
diff --git a/packages/clickhouse-server.init b/packages/clickhouse-server.init
index 13aeffe13a7..f215e52b6f3 100755
--- a/packages/clickhouse-server.init
+++ b/packages/clickhouse-server.init
@@ -120,7 +120,11 @@ use_cron()
if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then
return 1
fi
- # 2. disabled by config
+ # 2. checking whether the config is existed
+ if [ ! -f "$CLICKHOUSE_CRONFILE" ]; then
+ return 1
+ fi
+ # 3. disabled by config
if [ -z "$CLICKHOUSE_CRONFILE" ]; then
return 2
fi
diff --git a/packages/clickhouse-server.yaml b/packages/clickhouse-server.yaml
index a94ad1e9169..fe59828ca43 100644
--- a/packages/clickhouse-server.yaml
+++ b/packages/clickhouse-server.yaml
@@ -42,9 +42,12 @@ deb:
Source: clickhouse
contents:
-- src: root/etc/clickhouse-server
- dst: /etc/clickhouse-server
- type: config
+- src: root/etc/clickhouse-server/config.xml
+ dst: /etc/clickhouse-server/config.xml
+ type: config|noreplace
+- src: root/etc/clickhouse-server/users.xml
+ dst: /etc/clickhouse-server/users.xml
+ type: config|noreplace
- src: clickhouse-server.init
dst: /etc/init.d/clickhouse-server
- src: clickhouse-server.service
diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt
index 67ccc121e24..5b6c5b26633 100644
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@@ -189,7 +189,7 @@ else()
message(STATUS "ClickHouse su: OFF")
endif()
-configure_file (config_tools.h.in ${ConfigIncludePath}/config_tools.h)
+configure_file (config_tools.h.in ${CONFIG_INCLUDE_PATH}/config_tools.h)
macro(clickhouse_target_link_split_lib target name)
if(NOT CLICKHOUSE_ONE_SHARED)
diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 303c8c2ce4f..6e289b57845 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -12,10 +12,11 @@
#include
#include "Client.h"
#include "Core/Protocol.h"
+#include "Parsers/formatAST.h"
#include
-#include
+#include "config_version.h"
#include
#include
#include
@@ -242,6 +243,7 @@ try
registerAggregateFunctions();
processConfig();
+ initTtyBuffer(toProgressOption(config().getString("progress", "default")));
/// Includes delayed_interactive.
if (is_interactive)
@@ -514,6 +516,66 @@ static bool queryHasWithClause(const IAST & ast)
return false;
}
+std::optional Client::processFuzzingStep(const String & query_to_execute, const ASTPtr & parsed_query)
+{
+ processParsedSingleQuery(query_to_execute, query_to_execute, parsed_query);
+
+ const auto * exception = server_exception ? server_exception.get() : client_exception.get();
+ // Sometimes you may get TOO_DEEP_RECURSION from the server,
+ // and TOO_DEEP_RECURSION should not fail the fuzzer check.
+ if (have_error && exception->code() == ErrorCodes::TOO_DEEP_RECURSION)
+ {
+ have_error = false;
+ server_exception.reset();
+ client_exception.reset();
+ return true;
+ }
+
+ if (have_error)
+ {
+ fmt::print(stderr, "Error on processing query '{}': {}\n", parsed_query->formatForErrorMessage(), exception->message());
+
+ // Try to reconnect after errors, for two reasons:
+ // 1. We might not have realized that the server died, e.g. if
+ // it sent us a trace and closed connection properly.
+ // 2. The connection might have gotten into a wrong state and
+ // the next query will get false positive about
+ // "Unknown packet from server".
+ try
+ {
+ connection->forceConnected(connection_parameters.timeouts);
+ }
+ catch (...)
+ {
+ // Just report it, we'll terminate below.
+ fmt::print(stderr,
+ "Error while reconnecting to the server: {}\n",
+ getCurrentExceptionMessage(true));
+
+ // The reconnection might fail, but we'll still be connected
+ // in the sense of `connection->isConnected() = true`,
+ // in case when the requested database doesn't exist.
+ // Disconnect manually now, so that the following code doesn't
+ // have any doubts, and the connection state is predictable.
+ connection->disconnect();
+ }
+ }
+
+ if (!connection->isConnected())
+ {
+ // Probably the server is dead because we found an assertion
+ // failure. Fail fast.
+ fmt::print(stderr, "Lost connection to the server.\n");
+
+ // Print the changed settings because they might be needed to
+ // reproduce the error.
+ printChangedSettings();
+
+ return false;
+ }
+
+ return std::nullopt;
+}
/// Returns false when server is not available.
bool Client::processWithFuzzing(const String & full_query)
@@ -558,18 +620,33 @@ bool Client::processWithFuzzing(const String & full_query)
// - SET -- The time to fuzz the settings has not yet come
// (see comments in Client/QueryFuzzer.cpp)
size_t this_query_runs = query_fuzzer_runs;
- if (orig_ast->as() ||
- orig_ast->as() ||
- orig_ast->as() ||
- orig_ast->as())
+ ASTs queries_for_fuzzed_tables;
+
+ if (orig_ast->as())
{
this_query_runs = 1;
}
+ else if (const auto * create = orig_ast->as())
+ {
+ if (QueryFuzzer::isSuitableForFuzzing(*create))
+ this_query_runs = create_query_fuzzer_runs;
+ else
+ this_query_runs = 1;
+ }
+ else if (const auto * insert = orig_ast->as())
+ {
+ this_query_runs = 1;
+ queries_for_fuzzed_tables = fuzzer.getInsertQueriesForFuzzedTables(full_query);
+ }
+ else if (const auto * drop = orig_ast->as())
+ {
+ this_query_runs = 1;
+ queries_for_fuzzed_tables = fuzzer.getDropQueriesForFuzzedTables(*drop);
+ }
String query_to_execute;
- ASTPtr parsed_query;
-
ASTPtr fuzz_base = orig_ast;
+
for (size_t fuzz_step = 0; fuzz_step < this_query_runs; ++fuzz_step)
{
fmt::print(stderr, "Fuzzing step {} out of {}\n", fuzz_step, this_query_runs);
@@ -630,9 +707,9 @@ bool Client::processWithFuzzing(const String & full_query)
continue;
}
- parsed_query = ast_to_process;
- query_to_execute = parsed_query->formatForErrorMessage();
- processParsedSingleQuery(full_query, query_to_execute, parsed_query);
+ query_to_execute = ast_to_process->formatForErrorMessage();
+ if (auto res = processFuzzingStep(query_to_execute, ast_to_process))
+ return *res;
}
catch (...)
{
@@ -645,60 +722,6 @@ bool Client::processWithFuzzing(const String & full_query)
have_error = true;
}
- const auto * exception = server_exception ? server_exception.get() : client_exception.get();
- // Sometimes you may get TOO_DEEP_RECURSION from the server,
- // and TOO_DEEP_RECURSION should not fail the fuzzer check.
- if (have_error && exception->code() == ErrorCodes::TOO_DEEP_RECURSION)
- {
- have_error = false;
- server_exception.reset();
- client_exception.reset();
- return true;
- }
-
- if (have_error)
- {
- fmt::print(stderr, "Error on processing query '{}': {}\n", ast_to_process->formatForErrorMessage(), exception->message());
-
- // Try to reconnect after errors, for two reasons:
- // 1. We might not have realized that the server died, e.g. if
- // it sent us a trace and closed connection properly.
- // 2. The connection might have gotten into a wrong state and
- // the next query will get false positive about
- // "Unknown packet from server".
- try
- {
- connection->forceConnected(connection_parameters.timeouts);
- }
- catch (...)
- {
- // Just report it, we'll terminate below.
- fmt::print(stderr,
- "Error while reconnecting to the server: {}\n",
- getCurrentExceptionMessage(true));
-
- // The reconnection might fail, but we'll still be connected
- // in the sense of `connection->isConnected() = true`,
- // in case when the requested database doesn't exist.
- // Disconnect manually now, so that the following code doesn't
- // have any doubts, and the connection state is predictable.
- connection->disconnect();
- }
- }
-
- if (!connection->isConnected())
- {
- // Probably the server is dead because we found an assertion
- // failure. Fail fast.
- fmt::print(stderr, "Lost connection to the server.\n");
-
- // Print the changed settings because they might be needed to
- // reproduce the error.
- printChangedSettings();
-
- return false;
- }
-
// Check that after the query is formatted, we can parse it back,
// format again and get the same result. Unfortunately, we can't
// compare the ASTs, which would be more sensitive to errors. This
@@ -729,13 +752,12 @@ bool Client::processWithFuzzing(const String & full_query)
// query, but second and third.
// If you have to add any more workarounds to this check, just remove
// it altogether, it's not so useful.
- if (parsed_query && !have_error && !queryHasWithClause(*parsed_query))
+ if (ast_to_process && !have_error && !queryHasWithClause(*ast_to_process))
{
ASTPtr ast_2;
try
{
const auto * tmp_pos = query_to_execute.c_str();
-
ast_2 = parseQuery(tmp_pos, tmp_pos + query_to_execute.size(), false /* allow_multi_statements */);
}
catch (Exception & e)
@@ -762,7 +784,7 @@ bool Client::processWithFuzzing(const String & full_query)
"Got the following (different) text after formatting the fuzzed query and parsing it back:\n'{}'\n, expected:\n'{}'\n",
text_3, text_2);
fmt::print(stderr, "In more detail:\n");
- fmt::print(stderr, "AST-1 (generated by fuzzer):\n'{}'\n", parsed_query->dumpTree());
+ fmt::print(stderr, "AST-1 (generated by fuzzer):\n'{}'\n", ast_to_process->dumpTree());
fmt::print(stderr, "Text-1 (AST-1 formatted):\n'{}'\n", query_to_execute);
fmt::print(stderr, "AST-2 (Text-1 parsed):\n'{}'\n", ast_2->dumpTree());
fmt::print(stderr, "Text-2 (AST-2 formatted):\n'{}'\n", text_2);
@@ -784,6 +806,7 @@ bool Client::processWithFuzzing(const String & full_query)
// so that it doesn't influence the exit code.
server_exception.reset();
client_exception.reset();
+ fuzzer.notifyQueryFailed(ast_to_process);
have_error = false;
}
else if (ast_to_process->formatForErrorMessage().size() > 500)
@@ -800,6 +823,35 @@ bool Client::processWithFuzzing(const String & full_query)
}
}
+ for (const auto & query : queries_for_fuzzed_tables)
+ {
+ std::cout << std::endl;
+ WriteBufferFromOStream ast_buf(std::cout, 4096);
+ formatAST(*query, ast_buf, false /*highlight*/);
+ ast_buf.next();
+ std::cout << std::endl << std::endl;
+
+ try
+ {
+ query_to_execute = query->formatForErrorMessage();
+ if (auto res = processFuzzingStep(query_to_execute, query))
+ return *res;
+ }
+ catch (...)
+ {
+ client_exception = std::make_unique(getCurrentExceptionMessage(print_stack_trace), getCurrentExceptionCode());
+ have_error = true;
+ }
+
+ if (have_error)
+ {
+ server_exception.reset();
+ client_exception.reset();
+ fuzzer.notifyQueryFailed(query);
+ have_error = false;
+ }
+ }
+
return true;
}
@@ -834,6 +886,7 @@ void Client::addOptions(OptionsDescription & options_description)
("compression", po::value(), "enable or disable compression (enabled by default for remote communication and disabled for localhost communication).")
("query-fuzzer-runs", po::value()->default_value(0), "After executing every SELECT query, do random mutations in it and run again specified number of times. This is used for testing to discover unexpected corner cases.")
+ ("create-query-fuzzer-runs", po::value()->default_value(0), "")
("interleave-queries-file", po::value>()->multitoken(),
"file path with queries to execute before every file from 'queries-file'; multiple files can be specified (--queries-file file1 file2...); this is needed to enable more aggressive fuzzing of newly added tests (see 'query-fuzzer-runs' option)")
@@ -994,6 +1047,17 @@ void Client::processOptions(const OptionsDescription & options_description,
ignore_error = true;
}
+ if ((create_query_fuzzer_runs = options["create-query-fuzzer-runs"].as()))
+ {
+ // Fuzzer implies multiquery.
+ config().setBool("multiquery", true);
+ // Ignore errors in parsing queries.
+ config().setBool("ignore-error", true);
+
+ global_context->setSetting("allow_suspicious_low_cardinality_types", true);
+ ignore_error = true;
+ }
+
if (options.count("opentelemetry-traceparent"))
{
String traceparent = options["opentelemetry-traceparent"].as();
@@ -1025,7 +1089,6 @@ void Client::processConfig()
}
else
{
- need_render_progress = config().getBool("progress", false);
echo_queries = config().getBool("echo", false);
ignore_error = config().getBool("ignore-error", false);
@@ -1045,15 +1108,21 @@ void Client::processConfig()
else
format = config().getString("format", is_interactive ? "PrettyCompact" : "TabSeparated");
- format_max_block_size = config().getInt("format_max_block_size", global_context->getSettingsRef().max_block_size);
+ format_max_block_size = config().getUInt64("format_max_block_size",
+ global_context->getSettingsRef().max_block_size);
insert_format = "Values";
/// Setting value from cmd arg overrides one from config
if (global_context->getSettingsRef().max_insert_block_size.changed)
+ {
insert_format_max_block_size = global_context->getSettingsRef().max_insert_block_size;
+ }
else
- insert_format_max_block_size = config().getInt("insert_format_max_block_size", global_context->getSettingsRef().max_insert_block_size);
+ {
+ insert_format_max_block_size = config().getUInt64("insert_format_max_block_size",
+ global_context->getSettingsRef().max_insert_block_size);
+ }
ClientInfo & client_info = global_context->getClientInfo();
client_info.setInitialQuery();
diff --git a/programs/client/Client.h b/programs/client/Client.h
index 1fec282be51..63f28ca96a2 100644
--- a/programs/client/Client.h
+++ b/programs/client/Client.h
@@ -17,6 +17,7 @@ public:
protected:
bool processWithFuzzing(const String & full_query) override;
+ std::optional processFuzzingStep(const String & query_to_execute, const ASTPtr & parsed_query);
void connect() override;
diff --git a/programs/client/clickhouse-client.xml b/programs/client/clickhouse-client.xml
index 66e7afd8f8c..00f5b26eddf 100644
--- a/programs/client/clickhouse-client.xml
+++ b/programs/client/clickhouse-client.xml
@@ -19,7 +19,6 @@
{host}
{port}
{user}
- {database}
{display_name}
Terminal colors: https://misc.flogisoft.com/bash/tip_colors_and_formatting
See also: https://wiki.hackzine.org/development/misc/readline-color-prompt.html
diff --git a/programs/config_tools.h.in b/programs/config_tools.h.in
index f1787801dc4..30444e8c84e 100644
--- a/programs/config_tools.h.in
+++ b/programs/config_tools.h.in
@@ -1,6 +1,6 @@
-#pragma once
+/// This file was autogenerated by CMake
-// .h autogenerated by cmake !
+#pragma once
#cmakedefine01 ENABLE_CLICKHOUSE_SERVER
#cmakedefine01 ENABLE_CLICKHOUSE_CLIENT
diff --git a/programs/copier/Aliases.h b/programs/copier/Aliases.h
index c4d9c40d9f1..02be3441acd 100644
--- a/programs/copier/Aliases.h
+++ b/programs/copier/Aliases.h
@@ -1,6 +1,10 @@
#pragma once
-#include
+#include
+
+#include
+
+#include
namespace DB
{
@@ -8,21 +12,4 @@ namespace DB
using DatabaseAndTableName = std::pair;
using ListOfDatabasesAndTableNames = std::vector;
-
- /// Hierarchical description of the tasks
- struct ShardPartitionPiece;
- struct ShardPartition;
- struct TaskShard;
- struct TaskTable;
- struct TaskCluster;
- struct ClusterPartition;
-
- using PartitionPieces = std::vector;
- using TasksPartition = std::map>;
- using ShardInfo = Cluster::ShardInfo;
- using TaskShardPtr = std::shared_ptr;
- using TasksShard = std::vector;
- using TasksTable = std::list;
- using ClusterPartitions = std::map>;
}
-
diff --git a/programs/copier/CMakeLists.txt b/programs/copier/CMakeLists.txt
index 57e0996ed78..2c17e70bc5e 100644
--- a/programs/copier/CMakeLists.txt
+++ b/programs/copier/CMakeLists.txt
@@ -1,7 +1,13 @@
set(CLICKHOUSE_COPIER_SOURCES
"${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopierApp.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopier.cpp"
- "${CMAKE_CURRENT_SOURCE_DIR}/Internals.cpp")
+ "${CMAKE_CURRENT_SOURCE_DIR}/Internals.cpp"
+ "${CMAKE_CURRENT_SOURCE_DIR}/ShardPartition.cpp"
+ "${CMAKE_CURRENT_SOURCE_DIR}/ShardPartitionPiece.cpp"
+ "${CMAKE_CURRENT_SOURCE_DIR}/StatusAccumulator.cpp"
+ "${CMAKE_CURRENT_SOURCE_DIR}/TaskCluster.cpp"
+ "${CMAKE_CURRENT_SOURCE_DIR}/TaskShard.cpp"
+ "${CMAKE_CURRENT_SOURCE_DIR}/TaskTable.cpp")
set (CLICKHOUSE_COPIER_LINK
PRIVATE
diff --git a/programs/copier/ClusterCopier.h b/programs/copier/ClusterCopier.h
index b354fc59eee..063b13e9078 100644
--- a/programs/copier/ClusterCopier.h
+++ b/programs/copier/ClusterCopier.h
@@ -3,7 +3,8 @@
#include "Aliases.h"
#include "Internals.h"
#include "TaskCluster.h"
-#include "TaskTableAndShard.h"
+#include "TaskShard.h"
+#include "TaskTable.h"
#include "ShardPartition.h"
#include "ShardPartitionPiece.h"
#include "ZooKeeperStaff.h"
diff --git a/programs/copier/ClusterPartition.h b/programs/copier/ClusterPartition.h
index ed69bfa8c26..22063989e22 100644
--- a/programs/copier/ClusterPartition.h
+++ b/programs/copier/ClusterPartition.h
@@ -1,17 +1,22 @@
#pragma once
-#include "Aliases.h"
+#include
+#include