Merge branch 'master' of github.com:ClickHouse/ClickHouse into clickhouse-local-improve

This commit is contained in:
kssenii 2021-11-02 20:01:15 +00:00
commit feb3d8bc75
573 changed files with 30593 additions and 5033 deletions

View File

@ -21,7 +21,6 @@ jobs:
python3 run_check.py
DockerHubPush:
needs: CheckLabels
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
@ -56,9 +55,36 @@ jobs:
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
DocsCheck:
needs: DockerHubPush
runs-on: [self-hosted, func-tester]
steps:
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/docs_check
- name: Check out repository code
uses: actions/checkout@v2
- name: Docs Check
env:
TEMP_PATH: ${{runner.temp}}/docs_check
REPO_COPY: ${{runner.temp}}/docs_check/ClickHouse
run: |
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 docs_check.py
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderDebDebug:
needs: DockerHubPush
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Download changed images
@ -93,6 +119,7 @@ jobs:
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
BuilderReport:
needs: [BuilderDebDebug]
@ -118,6 +145,7 @@ jobs:
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatelessTestDebug:
needs: [BuilderDebDebug]
@ -147,6 +175,7 @@ jobs:
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FunctionalStatefulTestDebug:
needs: [BuilderDebDebug]
@ -176,9 +205,40 @@ jobs:
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
StressTestDebug:
needs: [BuilderDebDebug]
runs-on: [self-hosted, stress-tester]
steps:
- name: Download json reports
uses: actions/download-artifact@v2
with:
path: ${{runner.temp}}/reports_dir
- name: Check out repository code
uses: actions/checkout@v2
- name: Stress test
env:
TEMP_PATH: ${{runner.temp}}/stress_debug
REPORTS_PATH: ${{runner.temp}}/reports_dir
CHECK_NAME: 'Stress tests (debug, actions)'
REPO_COPY: ${{runner.temp}}/stress_debug/ClickHouse
REQUIRED_BUILD_NUMBER: 7
run: |
sudo rm -fr $TEMP_PATH
mkdir -p $TEMP_PATH
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 stress_check.py "$CHECK_NAME" $REQUIRED_BUILD_NUMBER
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FastTest:
needs: DockerHubPush
if: ${{ !contains(github.event.pull_request.labels.*.name, 'pr-documentation') && !contains(github.event.pull_request.labels.*.name, 'pr-doc-fix') }}
runs-on: [self-hosted, builder]
steps:
- name: Check out repository code
@ -197,9 +257,10 @@ jobs:
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH
FinishCheck:
needs: [StyleCheck, DockerHubPush, CheckLabels, BuilderReport, FastTest, FunctionalStatelessTestDebug, FunctionalStatefulTestDebug]
needs: [StyleCheck, DockerHubPush, CheckLabels, BuilderReport, FastTest, FunctionalStatelessTestDebug, FunctionalStatefulTestDebug, DocsCheck, StressTestDebug]
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code

55
.github/workflows/release.yml vendored Normal file
View File

@ -0,0 +1,55 @@
name: DocsReleaseChecks
concurrency:
group: master-release
cancel-in-progress: true
on: # yamllint disable-line rule:truthy
push:
branches:
- master
paths:
- 'docs/**'
- 'website/**'
- 'benchmark/**'
- 'docker/**'
jobs:
DockerHubPush:
runs-on: [self-hosted, style-checker]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Images check
run: |
cd $GITHUB_WORKSPACE/tests/ci
python3 docker_images_check.py
- name: Upload images files to artifacts
uses: actions/upload-artifact@v2
with:
name: changed_images
path: ${{ runner.temp }}/docker_images_check/changed_images.json
DocsRelease:
needs: DockerHubPush
runs-on: [self-hosted, func-tester]
steps:
- name: Check out repository code
uses: actions/checkout@v2
- name: Download changed images
uses: actions/download-artifact@v2
with:
name: changed_images
path: ${{runner.temp}}/docs_release
- name: Docs Release
env:
TEMP_PATH: ${{runner.temp}}/docs_release
REPO_COPY: ${{runner.temp}}/docs_release/ClickHouse
CLOUDFLARE_TOKEN: ${{secrets.CLOUDFLARE}}
ROBOT_CLICKHOUSE_SSH_KEY: ${{secrets.ROBOT_CLICKHOUSE_SSH_KEY}}
run: |
cp -r $GITHUB_WORKSPACE $TEMP_PATH
cd $REPO_COPY/tests/ci
python3 docs_release.py
- name: Cleanup
if: always()
run: |
docker kill $(docker ps -q) ||:
docker rm -f $(docker ps -a -q) ||:
sudo rm -fr $TEMP_PATH

2
.gitmodules vendored
View File

@ -76,7 +76,7 @@
url = https://github.com/ClickHouse-Extras/libcxxabi.git
[submodule "contrib/snappy"]
path = contrib/snappy
url = https://github.com/google/snappy
url = https://github.com/ClickHouse-Extras/snappy.git
[submodule "contrib/cppkafka"]
path = contrib/cppkafka
url = https://github.com/mfontanini/cppkafka.git

View File

@ -1,4 +1,5 @@
#include <stdexcept>
#include <fstream>
#include <base/getMemoryAmount.h>
#include <base/getPageSize.h>
@ -15,6 +16,17 @@
*/
uint64_t getMemoryAmountOrZero()
{
#if defined(OS_LINUX)
// Try to lookup at the Cgroup limit
std::ifstream cgroup_limit("/sys/fs/cgroup/memory/memory.limit_in_bytes");
if (cgroup_limit.is_open())
{
uint64_t amount = 0; // in case of read error
cgroup_limit >> amount;
return amount;
}
#endif
int64_t num_pages = sysconf(_SC_PHYS_PAGES);
if (num_pages <= 0)
return 0;

15906
benchmark/duckdb/log Normal file

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,43 @@
SELECT count(*) FROM hits;
SELECT count(*) FROM hits WHERE AdvEngineID != 0;
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits;
SELECT sum(UserID) FROM hits;
SELECT COUNT(DISTINCT UserID) FROM hits;
SELECT COUNT(DISTINCT SearchPhrase) FROM hits;
SELECT min(EventDate), max(EventDate) FROM hits;
SELECT AdvEngineID, count(*) FROM hits WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits GROUP BY RegionID ORDER BY u DESC LIMIT 10;
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits GROUP BY RegionID ORDER BY c DESC LIMIT 10;
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE octet_length(MobilePhoneModel) > 0 GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits WHERE octet_length(MobilePhoneModel) > 0 GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT SearchPhrase, count(*) AS c FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT UserID, count(*) FROM hits GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM hits GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM hits GROUP BY UserID, SearchPhrase LIMIT 10;
SELECT UserID, extract(minute FROM (TIMESTAMP '1970-01-01 00:00:00' + to_seconds(EventTime))) AS m, SearchPhrase, count(*) FROM hits GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID FROM hits WHERE UserID = 12345678901234567890;
SELECT count(*) FROM hits WHERE URL::TEXT LIKE '%metrika%';
SELECT SearchPhrase, min(URL), count(*) AS c FROM hits WHERE URL::TEXT LIKE '%metrika%' AND octet_length(SearchPhrase) > 0 GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, COUNT(DISTINCT UserID) FROM hits WHERE Title::TEXT LIKE '%Яндекс%' AND URL::TEXT NOT LIKE '%.yandex.%' AND octet_length(SearchPhrase) > 0 GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT * FROM hits WHERE URL::TEXT LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM hits WHERE octet_length(SearchPhrase) > 0 ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM hits WHERE octet_length(SearchPhrase) > 0 ORDER BY SearchPhrase LIMIT 10;
SELECT SearchPhrase FROM hits WHERE octet_length(SearchPhrase) > 0 ORDER BY EventTime, SearchPhrase LIMIT 10;
SELECT CounterID, avg(octet_length(URL)) AS l, count(*) AS c FROM hits WHERE octet_length(URL) > 0 GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT regexp_replace(Referer::TEXT, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS key, avg(octet_length(Referer)) AS l, count(*) AS c, min(Referer) FROM hits WHERE octet_length(Referer) > 0 GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits;
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits WHERE octet_length(SearchPhrase) > 0 GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS c FROM hits GROUP BY URL ORDER BY c DESC LIMIT 10;
SELECT 1, URL, count(*) AS c FROM hits GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM hits GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND octet_length(URL) > 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND octet_length(Title) > 0 GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND "refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND "refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND "refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-31' AND "refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT DATE_TRUNC('minute', (TIMESTAMP '1970-01-01 00:00:00' + to_seconds(EventTime))) AS "Minute", count(*) AS PageViews FROM hits WHERE CounterID = 62 AND (DATE '1970-01-01' + EventDate) >= '2013-07-01' AND (DATE '1970-01-01' + EventDate) <= '2013-07-02' AND "refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', (TIMESTAMP '1970-01-01 00:00:00' + to_seconds(EventTime))) ORDER BY DATE_TRUNC('minute', (TIMESTAMP '1970-01-01 00:00:00' + to_seconds(EventTime)));

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,12 @@
#!/bin/bash
grep -v -P '^#' queries.sql | sed -e 's/{table}/hits_100m_pg/' | while read query; do
echo 3 | sudo tee /proc/sys/vm/drop_caches
echo "$query";
for i in {1..3}; do
# For some reason JIT does not work on my machine
sudo -u postgres psql tutorial -t -c 'set jit = off' -c '\timing' -c "$query" | grep 'Time' | tee --append log
done;
done;

View File

@ -0,0 +1,142 @@
Create a table in PostgreSQL:
```
CREATE TABLE hits_100m_pg
(
WatchID BIGINT NOT NULL,
JavaEnable SMALLINT NOT NULL,
Title TEXT NOT NULL,
GoodEvent SMALLINT NOT NULL,
EventTime TIMESTAMP NOT NULL,
EventDate Date NOT NULL,
CounterID INTEGER NOT NULL,
ClientIP INTEGER NOT NULL,
RegionID INTEGER NOT NULL,
UserID BIGINT NOT NULL,
CounterClass SMALLINT NOT NULL,
OS SMALLINT NOT NULL,
UserAgent SMALLINT NOT NULL,
URL TEXT NOT NULL,
Referer TEXT NOT NULL,
Refresh SMALLINT NOT NULL,
RefererCategoryID SMALLINT NOT NULL,
RefererRegionID INTEGER NOT NULL,
URLCategoryID SMALLINT NOT NULL,
URLRegionID INTEGER NOT NULL,
ResolutionWidth SMALLINT NOT NULL,
ResolutionHeight SMALLINT NOT NULL,
ResolutionDepth SMALLINT NOT NULL,
FlashMajor SMALLINT NOT NULL,
FlashMinor SMALLINT NOT NULL,
FlashMinor2 TEXT NOT NULL,
NetMajor SMALLINT NOT NULL,
NetMinor SMALLINT NOT NULL,
UserAgentMajor SMALLINT NOT NULL,
UserAgentMinor CHAR(2) NOT NULL,
CookieEnable SMALLINT NOT NULL,
JavascriptEnable SMALLINT NOT NULL,
IsMobile SMALLINT NOT NULL,
MobilePhone SMALLINT NOT NULL,
MobilePhoneModel TEXT NOT NULL,
Params TEXT NOT NULL,
IPNetworkID INTEGER NOT NULL,
TraficSourceID SMALLINT NOT NULL,
SearchEngineID SMALLINT NOT NULL,
SearchPhrase TEXT NOT NULL,
AdvEngineID SMALLINT NOT NULL,
IsArtifical SMALLINT NOT NULL,
WindowClientWidth SMALLINT NOT NULL,
WindowClientHeight SMALLINT NOT NULL,
ClientTimeZone SMALLINT NOT NULL,
ClientEventTime TIMESTAMP NOT NULL,
SilverlightVersion1 SMALLINT NOT NULL,
SilverlightVersion2 SMALLINT NOT NULL,
SilverlightVersion3 INTEGER NOT NULL,
SilverlightVersion4 SMALLINT NOT NULL,
PageCharset TEXT NOT NULL,
CodeVersion INTEGER NOT NULL,
IsLink SMALLINT NOT NULL,
IsDownload SMALLINT NOT NULL,
IsNotBounce SMALLINT NOT NULL,
FUniqID BIGINT NOT NULL,
OriginalURL TEXT NOT NULL,
HID INTEGER NOT NULL,
IsOldCounter SMALLINT NOT NULL,
IsEvent SMALLINT NOT NULL,
IsParameter SMALLINT NOT NULL,
DontCountHits SMALLINT NOT NULL,
WithHash SMALLINT NOT NULL,
HitColor CHAR NOT NULL,
LocalEventTime TIMESTAMP NOT NULL,
Age SMALLINT NOT NULL,
Sex SMALLINT NOT NULL,
Income SMALLINT NOT NULL,
Interests SMALLINT NOT NULL,
Robotness SMALLINT NOT NULL,
RemoteIP INTEGER NOT NULL,
WindowName INTEGER NOT NULL,
OpenerName INTEGER NOT NULL,
HistoryLength SMALLINT NOT NULL,
BrowserLanguage TEXT NOT NULL,
BrowserCountry TEXT NOT NULL,
SocialNetwork TEXT NOT NULL,
SocialAction TEXT NOT NULL,
HTTPError SMALLINT NOT NULL,
SendTiming INTEGER NOT NULL,
DNSTiming INTEGER NOT NULL,
ConnectTiming INTEGER NOT NULL,
ResponseStartTiming INTEGER NOT NULL,
ResponseEndTiming INTEGER NOT NULL,
FetchTiming INTEGER NOT NULL,
SocialSourceNetworkID SMALLINT NOT NULL,
SocialSourcePage TEXT NOT NULL,
ParamPrice BIGINT NOT NULL,
ParamOrderID TEXT NOT NULL,
ParamCurrency TEXT NOT NULL,
ParamCurrencyID SMALLINT NOT NULL,
OpenstatServiceName TEXT NOT NULL,
OpenstatCampaignID TEXT NOT NULL,
OpenstatAdID TEXT NOT NULL,
OpenstatSourceID TEXT NOT NULL,
UTMSource TEXT NOT NULL,
UTMMedium TEXT NOT NULL,
UTMCampaign TEXT NOT NULL,
UTMContent TEXT NOT NULL,
UTMTerm TEXT NOT NULL,
FromTag TEXT NOT NULL,
HasGCLID SMALLINT NOT NULL,
RefererHash BIGINT NOT NULL,
URLHash BIGINT NOT NULL,
CLID INTEGER NOT NULL
);
```
Create a dump from ClickHouse:
```
SELECT WatchID::Int64, JavaEnable, replaceAll(replaceAll(replaceAll(toValidUTF8(Title), '\0', ''), '"', ''), '\\', ''), GoodEvent, EventTime, EventDate, CounterID::Int32, ClientIP::Int32, RegionID::Int32,
UserID::Int64, CounterClass, OS, UserAgent, replaceAll(replaceAll(replaceAll(toValidUTF8(URL), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(Referer), '\0', ''), '"', ''), '\\', ''), Refresh, RefererCategoryID::Int16, RefererRegionID::Int32,
URLCategoryID::Int16, URLRegionID::Int32, ResolutionWidth::Int16, ResolutionHeight::Int16, ResolutionDepth, FlashMajor, FlashMinor,
FlashMinor2, NetMajor, NetMinor, UserAgentMajor::Int16, replaceAll(replaceAll(replaceAll(toValidUTF8(UserAgentMinor::String), '\0', ''), '"', ''), '\\', ''), CookieEnable, JavascriptEnable, IsMobile, MobilePhone,
replaceAll(replaceAll(replaceAll(toValidUTF8(MobilePhoneModel), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(Params), '\0', ''), '"', ''), '\\', ''), IPNetworkID::Int32, TraficSourceID, SearchEngineID::Int16, replaceAll(replaceAll(replaceAll(toValidUTF8(SearchPhrase), '\0', ''), '"', ''), '\\', ''),
AdvEngineID, IsArtifical, WindowClientWidth::Int16, WindowClientHeight::Int16, ClientTimeZone, ClientEventTime,
SilverlightVersion1, SilverlightVersion2, SilverlightVersion3::Int32, SilverlightVersion4::Int16, replaceAll(replaceAll(replaceAll(toValidUTF8(PageCharset), '\0', ''), '"', ''), '\\', ''),
CodeVersion::Int32, IsLink, IsDownload, IsNotBounce, FUniqID::Int64, replaceAll(replaceAll(replaceAll(toValidUTF8(OriginalURL), '\0', ''), '"', ''), '\\', ''), HID::Int32, IsOldCounter, IsEvent,
IsParameter, DontCountHits, WithHash, replaceAll(replaceAll(replaceAll(toValidUTF8(HitColor::String), '\0', ''), '"', ''), '\\', ''), LocalEventTime, Age, Sex, Income, Interests::Int16, Robotness, RemoteIP::Int32,
WindowName, OpenerName, HistoryLength, replaceAll(replaceAll(replaceAll(toValidUTF8(BrowserLanguage::String), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(BrowserCountry::String), '\0', ''), '"', ''), '\\', ''),
replaceAll(replaceAll(replaceAll(toValidUTF8(SocialNetwork), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(SocialAction), '\0', ''), '"', ''), '\\', ''),
HTTPError, least(SendTiming, 30000), least(DNSTiming, 30000), least(ConnectTiming, 30000), least(ResponseStartTiming, 30000),
least(ResponseEndTiming, 30000), least(FetchTiming, 30000), SocialSourceNetworkID,
replaceAll(replaceAll(replaceAll(toValidUTF8(SocialSourcePage), '\0', ''), '"', ''), '\\', ''), ParamPrice, replaceAll(replaceAll(replaceAll(toValidUTF8(ParamOrderID), '\0', ''), '"', ''), '\\', ''), replaceAll(replaceAll(replaceAll(toValidUTF8(ParamCurrency::String), '\0', ''), '"', ''), '\\', ''),
ParamCurrencyID::Int16, OpenstatServiceName, OpenstatCampaignID, OpenstatAdID, OpenstatSourceID,
UTMSource, UTMMedium, UTMCampaign, UTMContent, UTMTerm, FromTag, HasGCLID, RefererHash::Int64, URLHash::Int64, CLID::Int32
FROM hits_100m_obfuscated
INTO OUTFILE 'dump.tsv'
FORMAT TSV
```
Insert data into PostgreSQL:
```
\copy hits_100m_pg FROM 'dump.tsv';
```

129
benchmark/postgresql/log Normal file
View File

@ -0,0 +1,129 @@
Time: 122020.258 ms (02:02.020)
Time: 5060.281 ms (00:05.060)
Time: 5052.692 ms (00:05.053)
Time: 129594.172 ms (02:09.594)
Time: 8079.623 ms (00:08.080)
Time: 7866.964 ms (00:07.867)
Time: 129584.717 ms (02:09.585)
Time: 8276.161 ms (00:08.276)
Time: 8153.295 ms (00:08.153)
Time: 123707.890 ms (02:03.708)
Time: 6835.297 ms (00:06.835)
Time: 6607.039 ms (00:06.607)
Time: 166640.676 ms (02:46.641)
Time: 75401.239 ms (01:15.401)
Time: 73526.027 ms (01:13.526)
Time: 272715.750 ms (04:32.716)
Time: 182721.613 ms (03:02.722)
Time: 182880.525 ms (03:02.881)
Time: 127108.191 ms (02:07.108)
Time: 6542.913 ms (00:06.543)
Time: 6339.887 ms (00:06.340)
Time: 127339.314 ms (02:07.339)
Time: 8376.381 ms (00:08.376)
Time: 7831.872 ms (00:07.832)
Time: 179176.439 ms (02:59.176)
Time: 58559.297 ms (00:58.559)
Time: 58139.265 ms (00:58.139)
Time: 182019.101 ms (03:02.019)
Time: 58435.027 ms (00:58.435)
Time: 58130.994 ms (00:58.131)
Time: 132449.502 ms (02:12.450)
Time: 11203.104 ms (00:11.203)
Time: 11048.435 ms (00:11.048)
Time: 128445.641 ms (02:08.446)
Time: 11602.145 ms (00:11.602)
Time: 11418.356 ms (00:11.418)
Time: 162831.387 ms (02:42.831)
Time: 41510.710 ms (00:41.511)
Time: 41682.899 ms (00:41.683)
Time: 171898.965 ms (02:51.899)
Time: 47379.274 ms (00:47.379)
Time: 47429.908 ms (00:47.430)
Time: 161607.811 ms (02:41.608)
Time: 41674.409 ms (00:41.674)
Time: 40854.340 ms (00:40.854)
Time: 175247.929 ms (02:55.248)
Time: 46721.776 ms (00:46.722)
Time: 46507.631 ms (00:46.508)
Time: 335961.271 ms (05:35.961)
Time: 248535.866 ms (04:08.536)
Time: 247383.678 ms (04:07.384)
Time: 132852.983 ms (02:12.853)
Time: 14939.304 ms (00:14.939)
Time: 14607.525 ms (00:14.608)
Time: 243461.844 ms (04:03.462)
Time: 157307.904 ms (02:37.308)
Time: 155093.101 ms (02:35.093)
Time: 122090.761 ms (02:02.091)
Time: 6411.266 ms (00:06.411)
Time: 6308.178 ms (00:06.308)
Time: 126584.819 ms (02:06.585)
Time: 8836.471 ms (00:08.836)
Time: 8532.176 ms (00:08.532)
Time: 125225.097 ms (02:05.225)
Time: 10236.910 ms (00:10.237)
Time: 9849.757 ms (00:09.850)
Time: 139140.064 ms (02:19.140)
Time: 21797.859 ms (00:21.798)
Time: 21559.214 ms (00:21.559)
Time: 124757.485 ms (02:04.757)
Time: 8728.403 ms (00:08.728)
Time: 8714.130 ms (00:08.714)
Time: 120687.258 ms (02:00.687)
Time: 8366.245 ms (00:08.366)
Time: 8146.856 ms (00:08.147)
Time: 122327.148 ms (02:02.327)
Time: 8698.359 ms (00:08.698)
Time: 8480.807 ms (00:08.481)
Time: 123958.614 ms (02:03.959)
Time: 8595.931 ms (00:08.596)
Time: 8241.773 ms (00:08.242)
Time: 128982.905 ms (02:08.983)
Time: 11252.783 ms (00:11.253)
Time: 10957.931 ms (00:10.958)
Time: 208455.385 ms (03:28.455)
Time: 102530.897 ms (01:42.531)
Time: 102049.298 ms (01:42.049)
Time: 131268.420 ms (02:11.268)
Time: 21094.466 ms (00:21.094)
Time: 20934.610 ms (00:20.935)
Time: 164084.134 ms (02:44.084)
Time: 77418.547 ms (01:17.419)
Time: 75422.290 ms (01:15.422)
Time: 174800.022 ms (02:54.800)
Time: 87859.594 ms (01:27.860)
Time: 85733.954 ms (01:25.734)
Time: 419357.463 ms (06:59.357)
Time: 339047.269 ms (05:39.047)
Time: 334808.230 ms (05:34.808)
Time: 475011.901 ms (07:55.012)
Time: 344406.246 ms (05:44.406)
Time: 347197.731 ms (05:47.198)
Time: 464657.732 ms (07:44.658)
Time: 332084.079 ms (05:32.084)
Time: 330921.322 ms (05:30.921)
Time: 152490.615 ms (02:32.491)
Time: 30954.343 ms (00:30.954)
Time: 31379.062 ms (00:31.379)
Time: 128539.127 ms (02:08.539)
Time: 12802.672 ms (00:12.803)
Time: 12494.088 ms (00:12.494)
Time: 125850.120 ms (02:05.850)
Time: 10318.773 ms (00:10.319)
Time: 9953.030 ms (00:09.953)
Time: 126602.092 ms (02:06.602)
Time: 8935.571 ms (00:08.936)
Time: 8711.184 ms (00:08.711)
Time: 133222.456 ms (02:13.222)
Time: 11848.869 ms (00:11.849)
Time: 11752.640 ms (00:11.753)
Time: 126950.067 ms (02:06.950)
Time: 11260.892 ms (00:11.261)
Time: 10943.649 ms (00:10.944)
Time: 128451.171 ms (02:08.451)
Time: 10984.980 ms (00:10.985)
Time: 10770.609 ms (00:10.771)
Time: 124621.000 ms (02:04.621)
Time: 8885.466 ms (00:08.885)
Time: 8857.296 ms (00:08.857)

View File

@ -0,0 +1,43 @@
SELECT count(*) FROM {table};
SELECT count(*) FROM {table} WHERE AdvEngineID != 0;
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM {table};
SELECT sum(UserID) FROM {table};
SELECT COUNT(DISTINCT UserID) FROM {table};
SELECT COUNT(DISTINCT SearchPhrase) FROM {table};
SELECT min(EventDate), max(EventDate) FROM {table};
SELECT AdvEngineID, count(*) FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10;
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10;
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT UserID, count(*) FROM {table} GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10;
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, count(*) FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID FROM {table} WHERE UserID = -6101065172474983726;
SELECT count(*) FROM {table} WHERE URL LIKE '%metrika%';
SELECT SearchPhrase, min(URL), count(*) AS c FROM {table} WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, COUNT(DISTINCT UserID) FROM {table} WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT * FROM {table} WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM {table} WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS key, avg(length(Referer)) AS l, count(*) AS c, min(Referer) FROM {table} WHERE Referer != '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table};
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10;
SELECT 1, URL, count(*) AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM {table} GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT URLHash, EventDate, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT DATE_TRUNC('minute', EventTime) AS "Minute", count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND "refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime);

View File

@ -0,0 +1,11 @@
#!/bin/bash
grep -v -P '^#' queries.sql | sed -e 's/{table}/hits_100m_obfuscated/' | while read query; do
echo 3 | sudo tee /proc/sys/vm/drop_caches
echo "$query";
for i in {1..3}; do
sudo -u postgres psql tutorial -t -c 'set jit = off' -c '\timing' -c "$query" | grep 'Time' | tee --append log
done;
done;

215
benchmark/timescaledb/log Normal file
View File

@ -0,0 +1,215 @@
3
SELECT count(*) FROM hits_100m_obfuscated;
Time: 3259.733 ms (00:03.260)
Time: 3135.484 ms (00:03.135)
Time: 3135.579 ms (00:03.136)
3
SELECT count(*) FROM hits_100m_obfuscated WHERE AdvEngineID != 0;
Time: 146854.557 ms (02:26.855)
Time: 6921.736 ms (00:06.922)
Time: 6619.892 ms (00:06.620)
3
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM hits_100m_obfuscated;
Time: 146568.297 ms (02:26.568)
Time: 7481.610 ms (00:07.482)
Time: 7258.209 ms (00:07.258)
3
SELECT sum(UserID) FROM hits_100m_obfuscated;
Time: 146864.106 ms (02:26.864)
Time: 5690.024 ms (00:05.690)
Time: 5381.820 ms (00:05.382)
3
SELECT COUNT(DISTINCT UserID) FROM hits_100m_obfuscated;
Time: 227507.331 ms (03:47.507)
Time: 69165.471 ms (01:09.165)
Time: 72216.950 ms (01:12.217)
3
SELECT COUNT(DISTINCT SearchPhrase) FROM hits_100m_obfuscated;
Time: 323644.397 ms (05:23.644)
Time: 177578.740 ms (02:57.579)
Time: 175055.738 ms (02:55.056)
3
SELECT min(EventDate), max(EventDate) FROM hits_100m_obfuscated;
Time: 146147.843 ms (02:26.148)
Time: 5735.128 ms (00:05.735)
Time: 5428.638 ms (00:05.429)
3
SELECT AdvEngineID, count(*) FROM hits_100m_obfuscated WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
Time: 148658.450 ms (02:28.658)
Time: 7014.882 ms (00:07.015)
Time: 6599.736 ms (00:06.600)
3
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM hits_100m_obfuscated GROUP BY RegionID ORDER BY u DESC LIMIT 10;
Time: 202423.122 ms (03:22.423)
Time: 54439.047 ms (00:54.439)
Time: 54800.354 ms (00:54.800)
3
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) FROM hits_100m_obfuscated GROUP BY RegionID ORDER BY c DESC LIMIT 10;
Time: 201152.491 ms (03:21.152)
Time: 55875.854 ms (00:55.876)
Time: 55200.330 ms (00:55.200)
3
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits_100m_obfuscated WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
Time: 146042.603 ms (02:26.043)
Time: 9931.633 ms (00:09.932)
Time: 10037.032 ms (00:10.037)
3
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM hits_100m_obfuscated WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
Time: 150811.952 ms (02:30.812)
Time: 10320.230 ms (00:10.320)
Time: 9993.232 ms (00:09.993)
3
SELECT SearchPhrase, count(*) AS c FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
Time: 173071.218 ms (02:53.071)
Time: 34314.835 ms (00:34.315)
Time: 34420.919 ms (00:34.421)
3
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
Time: 172874.155 ms (02:52.874)
Time: 43704.494 ms (00:43.704)
Time: 43918.380 ms (00:43.918)
3
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
Time: 178484.822 ms (02:58.485)
Time: 36850.436 ms (00:36.850)
Time: 35789.029 ms (00:35.789)
3
SELECT UserID, count(*) FROM hits_100m_obfuscated GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
Time: 169720.759 ms (02:49.721)
Time: 24125.730 ms (00:24.126)
Time: 23782.745 ms (00:23.783)
3
SELECT UserID, SearchPhrase, count(*) FROM hits_100m_obfuscated GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
Time: 182335.631 ms (03:02.336)
Time: 37324.563 ms (00:37.325)
Time: 37124.250 ms (00:37.124)
3
SELECT UserID, SearchPhrase, count(*) FROM hits_100m_obfuscated GROUP BY UserID, SearchPhrase LIMIT 10;
Time: 163799.714 ms (02:43.800)
Time: 18514.031 ms (00:18.514)
Time: 18968.524 ms (00:18.969)
3
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, count(*) FROM hits_100m_obfuscated GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
Time: 294799.480 ms (04:54.799)
Time: 149592.992 ms (02:29.593)
Time: 149466.291 ms (02:29.466)
3
SELECT UserID FROM hits_100m_obfuscated WHERE UserID = -6101065172474983726;
Time: 140797.496 ms (02:20.797)
Time: 5312.321 ms (00:05.312)
Time: 5020.502 ms (00:05.021)
3
SELECT count(*) FROM hits_100m_obfuscated WHERE URL LIKE '%metrika%';
Time: 143092.287 ms (02:23.092)
Time: 7893.874 ms (00:07.894)
Time: 7661.326 ms (00:07.661)
3
SELECT SearchPhrase, min(URL), count(*) AS c FROM hits_100m_obfuscated WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
Time: 143682.424 ms (02:23.682)
Time: 9249.962 ms (00:09.250)
Time: 9073.876 ms (00:09.074)
3
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, COUNT(DISTINCT UserID) FROM hits_100m_obfuscated WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
Time: 150965.884 ms (02:30.966)
Time: 20350.812 ms (00:20.351)
Time: 20074.939 ms (00:20.075)
3
SELECT * FROM hits_100m_obfuscated WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
Time: 4674.669 ms (00:04.675)
Time: 4532.389 ms (00:04.532)
Time: 4555.457 ms (00:04.555)
3
SELECT SearchPhrase FROM hits_100m_obfuscated WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
Time: 5.177 ms
Time: 5.031 ms
Time: 4.419 ms
3
SELECT SearchPhrase FROM hits_100m_obfuscated WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
Time: 141152.210 ms (02:21.152)
Time: 7492.968 ms (00:07.493)
Time: 7300.428 ms (00:07.300)
3
SELECT SearchPhrase FROM hits_100m_obfuscated WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
Time: 30.736 ms
Time: 5.018 ms
Time: 5.132 ms
3
SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM hits_100m_obfuscated WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
Time: 144034.016 ms (02:24.034)
Time: 10701.672 ms (00:10.702)
Time: 10348.565 ms (00:10.349)
3
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www.)?([^/]+)/.*$', '1') AS key, avg(length(Referer)) AS l, count(*) AS c, min(Referer) FROM hits_100m_obfuscated WHERE Referer != '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
Time: 191575.080 ms (03:11.575)
Time: 97836.706 ms (01:37.837)
Time: 97673.219 ms (01:37.673)
3
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM hits_100m_obfuscated;
Time: 143652.317 ms (02:23.652)
Time: 22185.656 ms (00:22.186)
Time: 21887.411 ms (00:21.887)
3
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
Time: 153481.944 ms (02:33.482)
Time: 17748.628 ms (00:17.749)
Time: 17551.116 ms (00:17.551)
3
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits_100m_obfuscated WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
Time: 167448.684 ms (02:47.449)
Time: 25902.961 ms (00:25.903)
Time: 25592.018 ms (00:25.592)
3
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM hits_100m_obfuscated GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
Time: 299183.443 ms (04:59.183)
Time: 145349.772 ms (02:25.350)
Time: 143214.688 ms (02:23.215)
3
SELECT URL, count(*) AS c FROM hits_100m_obfuscated GROUP BY URL ORDER BY c DESC LIMIT 10;
Time: 389851.369 ms (06:29.851)
Time: 228158.639 ms (03:48.159)
Time: 231811.118 ms (03:51.811)
3
SELECT 1, URL, count(*) AS c FROM hits_100m_obfuscated GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
Time: 407458.343 ms (06:47.458)
Time: 230125.530 ms (03:50.126)
Time: 230764.511 ms (03:50.765)
3
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM hits_100m_obfuscated GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
Time: 174098.556 ms (02:54.099)
Time: 23503.975 ms (00:23.504)
Time: 24322.856 ms (00:24.323)
3
SELECT URL, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
Time: 145906.025 ms (02:25.906)
Time: 10824.695 ms (00:10.825)
Time: 10484.885 ms (00:10.485)
3
SELECT Title, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
Time: 144063.711 ms (02:24.064)
Time: 8947.980 ms (00:08.948)
Time: 8608.434 ms (00:08.608)
3
SELECT URL, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
Time: 141883.596 ms (02:21.884)
Time: 7977.257 ms (00:07.977)
Time: 7673.547 ms (00:07.674)
3
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
Time: 147100.084 ms (02:27.100)
Time: 9527.812 ms (00:09.528)
Time: 9457.663 ms (00:09.458)
3
SELECT URLHash, EventDate, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
Time: 144585.669 ms (02:24.586)
Time: 10815.223 ms (00:10.815)
Time: 10594.707 ms (00:10.595)
3
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
Time: 145738.341 ms (02:25.738)
Time: 10592.979 ms (00:10.593)
Time: 10181.477 ms (00:10.181)
3
SELECT DATE_TRUNC('minute', EventTime) AS "Minute", count(*) AS PageViews FROM hits_100m_obfuscated WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND "refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime);
Time: 145023.796 ms (02:25.024)
Time: 8035.337 ms (00:08.035)
Time: 7865.698 ms (00:07.866)

View File

@ -0,0 +1,129 @@
Time: 1784.299 ms (00:01.784)
Time: 1223.461 ms (00:01.223)
Time: 1200.665 ms (00:01.201)
Time: 22730.141 ms (00:22.730)
Time: 1379.227 ms (00:01.379)
Time: 1361.595 ms (00:01.362)
Time: 29888.235 ms (00:29.888)
Time: 3160.611 ms (00:03.161)
Time: 3207.363 ms (00:03.207)
Time: 53922.569 ms (00:53.923)
Time: 2301.456 ms (00:02.301)
Time: 2277.009 ms (00:02.277)
Time: 45363.999 ms (00:45.364)
Time: 43765.848 ms (00:43.766)
Time: 44066.621 ms (00:44.067)
Time: 172945.633 ms (02:52.946)
Time: 136944.098 ms (02:16.944)
Time: 138268.413 ms (02:18.268)
Time: 16764.579 ms (00:16.765)
Time: 2579.907 ms (00:02.580)
Time: 2590.390 ms (00:02.590)
Time: 1498.034 ms (00:01.498)
Time: 1434.534 ms (00:01.435)
Time: 1448.123 ms (00:01.448)
Time: 113533.016 ms (01:53.533)
Time: 78465.335 ms (01:18.465)
Time: 80778.839 ms (01:20.779)
Time: 90456.388 ms (01:30.456)
Time: 87050.166 ms (01:27.050)
Time: 88426.851 ms (01:28.427)
Time: 45021.632 ms (00:45.022)
Time: 12486.342 ms (00:12.486)
Time: 12222.489 ms (00:12.222)
Time: 44246.843 ms (00:44.247)
Time: 15606.856 ms (00:15.607)
Time: 15251.554 ms (00:15.252)
Time: 29654.719 ms (00:29.655)
Time: 29441.858 ms (00:29.442)
Time: 29608.141 ms (00:29.608)
Time: 103547.383 ms (01:43.547)
Time: 104733.648 ms (01:44.734)
Time: 105779.016 ms (01:45.779)
Time: 29695.834 ms (00:29.696)
Time: 15395.447 ms (00:15.395)
Time: 15819.650 ms (00:15.820)
Time: 27841.552 ms (00:27.842)
Time: 29521.849 ms (00:29.522)
Time: 27508.521 ms (00:27.509)
Time: 56665.709 ms (00:56.666)
Time: 56459.321 ms (00:56.459)
Time: 56407.620 ms (00:56.408)
Time: 27488.888 ms (00:27.489)
Time: 25557.427 ms (00:25.557)
Time: 25634.140 ms (00:25.634)
Time: 97376.463 ms (01:37.376)
Time: 96047.902 ms (01:36.048)
Time: 99918.341 ms (01:39.918)
Time: 6294.887 ms (00:06.295)
Time: 6407.262 ms (00:06.407)
Time: 6376.369 ms (00:06.376)
Time: 40787.808 ms (00:40.788)
Time: 11206.256 ms (00:11.206)
Time: 11219.871 ms (00:11.220)
Time: 12420.227 ms (00:12.420)
Time: 12548.301 ms (00:12.548)
Time: 12468.458 ms (00:12.468)
Time: 57679.878 ms (00:57.680)
Time: 35466.123 ms (00:35.466)
Time: 35562.064 ms (00:35.562)
Time: 13551.276 ms (00:13.551)
Time: 13417.313 ms (00:13.417)
Time: 13645.287 ms (00:13.645)
Time: 150.297 ms
Time: 55.995 ms
Time: 55.796 ms
Time: 3059.796 ms (00:03.060)
Time: 3038.246 ms (00:03.038)
Time: 3041.210 ms (00:03.041)
Time: 4461.720 ms (00:04.462)
Time: 4446.691 ms (00:04.447)
Time: 4424.526 ms (00:04.425)
Time: 29275.463 ms (00:29.275)
Time: 17558.747 ms (00:17.559)
Time: 17438.621 ms (00:17.439)
Time: 203316.184 ms (03:23.316)
Time: 190037.946 ms (03:10.038)
Time: 189276.624 ms (03:09.277)
Time: 36921.542 ms (00:36.922)
Time: 36963.771 ms (00:36.964)
Time: 36660.406 ms (00:36.660)
Time: 38307.345 ms (00:38.307)
Time: 17597.355 ms (00:17.597)
Time: 17324.776 ms (00:17.325)
Time: 39857.567 ms (00:39.858)
Time: 26776.411 ms (00:26.776)
Time: 26592.819 ms (00:26.593)
Time: 162782.290 ms (02:42.782)
Time: 160722.582 ms (02:40.723)
Time: 162487.263 ms (02:42.487)
Time: 261494.290 ms (04:21.494)
Time: 263594.014 ms (04:23.594)
Time: 260436.201 ms (04:20.436)
Time: 265758.455 ms (04:25.758)
Time: 270087.523 ms (04:30.088)
Time: 266617.218 ms (04:26.617)
Time: 30677.159 ms (00:30.677)
Time: 28933.542 ms (00:28.934)
Time: 29815.271 ms (00:29.815)
Time: 19754.932 ms (00:19.755)
Time: 16851.157 ms (00:16.851)
Time: 16703.289 ms (00:16.703)
Time: 10379.500 ms (00:10.379)
Time: 10267.336 ms (00:10.267)
Time: 10287.944 ms (00:10.288)
Time: 17320.582 ms (00:17.321)
Time: 9786.410 ms (00:09.786)
Time: 9760.578 ms (00:09.761)
Time: 33487.352 ms (00:33.487)
Time: 26056.528 ms (00:26.057)
Time: 25958.258 ms (00:25.958)
Time: 28020.227 ms (00:28.020)
Time: 5609.725 ms (00:05.610)
Time: 5538.744 ms (00:05.539)
Time: 15119.473 ms (00:15.119)
Time: 5057.455 ms (00:05.057)
Time: 5063.154 ms (00:05.063)
Time: 3627.703 ms (00:03.628)
Time: 3645.232 ms (00:03.645)
Time: 3546.855 ms (00:03.547)

View File

@ -0,0 +1,43 @@
SELECT count(*) FROM {table};
SELECT count(*) FROM {table} WHERE AdvEngineID != 0;
SELECT sum(AdvEngineID), count(*), avg(ResolutionWidth) FROM {table};
SELECT sum(UserID) FROM {table};
SELECT COUNT(DISTINCT UserID) FROM {table};
SELECT COUNT(DISTINCT SearchPhrase) FROM {table};
SELECT min(EventDate), max(EventDate) FROM {table};
SELECT AdvEngineID, count(*) FROM {table} WHERE AdvEngineID != 0 GROUP BY AdvEngineID ORDER BY count(*) DESC;
SELECT RegionID, COUNT(DISTINCT UserID) AS u FROM {table} GROUP BY RegionID ORDER BY u DESC LIMIT 10;
SELECT RegionID, sum(AdvEngineID), count(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) FROM {table} GROUP BY RegionID ORDER BY c DESC LIMIT 10;
SELECT MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT MobilePhone, MobilePhoneModel, COUNT(DISTINCT UserID) AS u FROM {table} WHERE MobilePhoneModel != '' GROUP BY MobilePhone, MobilePhoneModel ORDER BY u DESC LIMIT 10;
SELECT SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, COUNT(DISTINCT UserID) AS u FROM {table} WHERE SearchPhrase != '' GROUP BY SearchPhrase ORDER BY u DESC LIMIT 10;
SELECT SearchEngineID, SearchPhrase, count(*) AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT UserID, count(*) FROM {table} GROUP BY UserID ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID, SearchPhrase, count(*) FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10;
SELECT UserID, extract(minute FROM EventTime) AS m, SearchPhrase, count(*) FROM {table} GROUP BY UserID, m, SearchPhrase ORDER BY count(*) DESC LIMIT 10;
SELECT UserID FROM {table} WHERE UserID = -6101065172474983726;
SELECT count(*) FROM {table} WHERE URL LIKE '%metrika%';
SELECT SearchPhrase, min(URL), count(*) AS c FROM {table} WHERE URL LIKE '%metrika%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT SearchPhrase, min(URL), min(Title), count(*) AS c, COUNT(DISTINCT UserID) FROM {table} WHERE Title LIKE '%Яндекс%' AND URL NOT LIKE '%.yandex.%' AND SearchPhrase != '' GROUP BY SearchPhrase ORDER BY c DESC LIMIT 10;
SELECT * FROM {table} WHERE URL LIKE '%metrika%' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY SearchPhrase LIMIT 10;
SELECT SearchPhrase FROM {table} WHERE SearchPhrase != '' ORDER BY EventTime, SearchPhrase LIMIT 10;
SELECT CounterID, avg(length(URL)) AS l, count(*) AS c FROM {table} WHERE URL != '' GROUP BY CounterID HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT REGEXP_REPLACE(Referer, '^https?://(?:www\.)?([^/]+)/.*$', '\1') AS key, avg(length(Referer)) AS l, count(*) AS c, min(Referer) FROM {table} WHERE Referer != '' GROUP BY key HAVING count(*) > 100000 ORDER BY l DESC LIMIT 25;
SELECT sum(ResolutionWidth), sum(ResolutionWidth + 1), sum(ResolutionWidth + 2), sum(ResolutionWidth + 3), sum(ResolutionWidth + 4), sum(ResolutionWidth + 5), sum(ResolutionWidth + 6), sum(ResolutionWidth + 7), sum(ResolutionWidth + 8), sum(ResolutionWidth + 9), sum(ResolutionWidth + 10), sum(ResolutionWidth + 11), sum(ResolutionWidth + 12), sum(ResolutionWidth + 13), sum(ResolutionWidth + 14), sum(ResolutionWidth + 15), sum(ResolutionWidth + 16), sum(ResolutionWidth + 17), sum(ResolutionWidth + 18), sum(ResolutionWidth + 19), sum(ResolutionWidth + 20), sum(ResolutionWidth + 21), sum(ResolutionWidth + 22), sum(ResolutionWidth + 23), sum(ResolutionWidth + 24), sum(ResolutionWidth + 25), sum(ResolutionWidth + 26), sum(ResolutionWidth + 27), sum(ResolutionWidth + 28), sum(ResolutionWidth + 29), sum(ResolutionWidth + 30), sum(ResolutionWidth + 31), sum(ResolutionWidth + 32), sum(ResolutionWidth + 33), sum(ResolutionWidth + 34), sum(ResolutionWidth + 35), sum(ResolutionWidth + 36), sum(ResolutionWidth + 37), sum(ResolutionWidth + 38), sum(ResolutionWidth + 39), sum(ResolutionWidth + 40), sum(ResolutionWidth + 41), sum(ResolutionWidth + 42), sum(ResolutionWidth + 43), sum(ResolutionWidth + 44), sum(ResolutionWidth + 45), sum(ResolutionWidth + 46), sum(ResolutionWidth + 47), sum(ResolutionWidth + 48), sum(ResolutionWidth + 49), sum(ResolutionWidth + 50), sum(ResolutionWidth + 51), sum(ResolutionWidth + 52), sum(ResolutionWidth + 53), sum(ResolutionWidth + 54), sum(ResolutionWidth + 55), sum(ResolutionWidth + 56), sum(ResolutionWidth + 57), sum(ResolutionWidth + 58), sum(ResolutionWidth + 59), sum(ResolutionWidth + 60), sum(ResolutionWidth + 61), sum(ResolutionWidth + 62), sum(ResolutionWidth + 63), sum(ResolutionWidth + 64), sum(ResolutionWidth + 65), sum(ResolutionWidth + 66), sum(ResolutionWidth + 67), sum(ResolutionWidth + 68), sum(ResolutionWidth + 69), sum(ResolutionWidth + 70), sum(ResolutionWidth + 71), sum(ResolutionWidth + 72), sum(ResolutionWidth + 73), sum(ResolutionWidth + 74), sum(ResolutionWidth + 75), sum(ResolutionWidth + 76), sum(ResolutionWidth + 77), sum(ResolutionWidth + 78), sum(ResolutionWidth + 79), sum(ResolutionWidth + 80), sum(ResolutionWidth + 81), sum(ResolutionWidth + 82), sum(ResolutionWidth + 83), sum(ResolutionWidth + 84), sum(ResolutionWidth + 85), sum(ResolutionWidth + 86), sum(ResolutionWidth + 87), sum(ResolutionWidth + 88), sum(ResolutionWidth + 89) FROM {table};
SELECT SearchEngineID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} WHERE SearchPhrase != '' GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT WatchID, ClientIP, count(*) AS c, sum("refresh"), avg(ResolutionWidth) FROM {table} GROUP BY WatchID, ClientIP ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS c FROM {table} GROUP BY URL ORDER BY c DESC LIMIT 10;
SELECT 1, URL, count(*) AS c FROM {table} GROUP BY 1, URL ORDER BY c DESC LIMIT 10;
SELECT ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3, count(*) AS c FROM {table} GROUP BY ClientIP, ClientIP - 1, ClientIP - 2, ClientIP - 3 ORDER BY c DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND URL != '' GROUP BY URL ORDER BY PageViews DESC LIMIT 10;
SELECT Title, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND DontCountHits = 0 AND "refresh" = 0 AND Title != '' GROUP BY Title ORDER BY PageViews DESC LIMIT 10;
SELECT URL, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND IsLink != 0 AND IsDownload = 0 GROUP BY URL ORDER BY PageViews DESC LIMIT 1000;
SELECT TraficSourceID, SearchEngineID, AdvEngineID, CASE WHEN (SearchEngineID = 0 AND AdvEngineID = 0) THEN Referer ELSE '' END AS Src, URL AS Dst, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 GROUP BY TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst ORDER BY PageViews DESC LIMIT 1000;
SELECT URLHash, EventDate, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND TraficSourceID IN (-1, 6) AND RefererHash = 686716256552154761 GROUP BY URLHash, EventDate ORDER BY PageViews DESC LIMIT 100;
SELECT WindowClientWidth, WindowClientHeight, count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-31' AND "refresh" = 0 AND DontCountHits = 0 AND URLHash = 686716256552154761 GROUP BY WindowClientWidth, WindowClientHeight ORDER BY PageViews DESC LIMIT 10000;
SELECT DATE_TRUNC('minute', EventTime) AS "Minute", count(*) AS PageViews FROM {table} WHERE CounterID = 62 AND EventDate >= '2013-07-01' AND EventDate <= '2013-07-02' AND "refresh" = 0 AND DontCountHits = 0 GROUP BY DATE_TRUNC('minute', EventTime) ORDER BY DATE_TRUNC('minute', EventTime);

File diff suppressed because it is too large Load Diff

View File

@ -2,11 +2,11 @@
# NOTE: has nothing common with DBMS_TCP_PROTOCOL_VERSION,
# only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
SET(VERSION_REVISION 54456)
SET(VERSION_REVISION 54457)
SET(VERSION_MAJOR 21)
SET(VERSION_MINOR 11)
SET(VERSION_MINOR 12)
SET(VERSION_PATCH 1)
SET(VERSION_GITHASH 7a4a0b0edef0ad6e0aa662cd3b90c3f4acf796e7)
SET(VERSION_DESCRIBE v21.11.1.1-prestable)
SET(VERSION_STRING 21.11.1.1)
SET(VERSION_GITHASH 503a418dedf0011e9040c3a1b6913e0b5488be4c)
SET(VERSION_DESCRIBE v21.12.1.1-prestable)
SET(VERSION_STRING 21.12.1.1)
# end of autochange

View File

@ -1,10 +1,3 @@
option (ENABLE_FILELOG "Enable FILELOG" ON)
if (NOT ENABLE_FILELOG)
message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use StorageFileLog with ENABLE_FILELOG=OFF")
return()
endif()
# StorageFileLog only support Linux platform
if (OS_LINUX)
set (USE_FILELOG 1)

2
contrib/snappy vendored

@ -1 +1 @@
Subproject commit 3f194acb57e0487531c96b97af61dcbd025a78a3
Subproject commit fb057edfed820212076239fd32cb2ff23e9016bf

4
debian/changelog vendored
View File

@ -1,5 +1,5 @@
clickhouse (21.11.1.1) unstable; urgency=low
clickhouse (21.12.1.1) unstable; urgency=low
* Modified source code
-- clickhouse-release <clickhouse-release@yandex-team.ru> Thu, 09 Sep 2021 12:03:26 +0300
-- clickhouse-release <clickhouse-release@yandex-team.ru> Tue, 02 Nov 2021 00:56:42 +0300

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/"
ARG version=21.11.1.*
ARG version=21.12.1.*
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list

View File

@ -0,0 +1,43 @@
# docker build -t clickhouse/docs-build .
FROM ubuntu:20.04
ENV LANG=C.UTF-8
RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list
RUN apt-get update \
&& DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
python3-setuptools \
virtualenv \
wget \
bash \
python \
curl \
python3-requests \
sudo \
git \
openssl \
python3-pip \
software-properties-common \
language-pack-zh* \
chinese* \
fonts-arphic-ukai \
fonts-arphic-uming \
fonts-ipafont-mincho \
fonts-ipafont-gothic \
fonts-unfonts-core \
xvfb \
nodejs \
npm \
openjdk-11-jdk \
ssh-client \
&& pip --no-cache-dir install scipy \
&& apt-get autoremove --yes \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
RUN wget 'https://github.com/wkhtmltopdf/packaging/releases/download/0.12.6-1/wkhtmltox_0.12.6-1.focal_amd64.deb'
RUN npm i -g purify-css
RUN pip3 install --ignore-installed --upgrade setuptools pip virtualenv

View File

@ -0,0 +1,9 @@
# docker build -t clickhouse/docs-check .
FROM clickhouse/docs-builder
COPY run.sh /
ENV REPO_PATH=/repo_path
ENV OUTPUT_PATH=/output_path
CMD ["/bin/bash", "/run.sh"]

9
docker/docs/check/run.sh Normal file
View File

@ -0,0 +1,9 @@
#!/usr/bin/env bash
set -euo pipefail
cd $REPO_PATH/docs/tools
mkdir venv
virtualenv -p $(which python3) venv
source venv/bin/activate
python3 -m pip install --ignore-installed -r requirements.txt
./build.py --skip-git-log 2>&1 | tee $OUTPUT_PATH/output.log

View File

@ -0,0 +1,9 @@
# docker build -t clickhouse/docs-release .
FROM clickhouse/docs-builder
COPY run.sh /
ENV REPO_PATH=/repo_path
ENV OUTPUT_PATH=/output_path
CMD ["/bin/bash", "/run.sh"]

View File

@ -0,0 +1,10 @@
#!/usr/bin/env bash
set -euo pipefail
cd $REPO_PATH/docs/tools
mkdir venv
virtualenv -p $(which python3) venv
source venv/bin/activate
python3 -m pip install --ignore-installed -r requirements.txt
mkdir -p ~/.ssh && ssh-keyscan -t rsa github.com >> ~/.ssh/known_hosts
./release.sh 2>&1 | tee tee $OUTPUT_PATH/output.log

View File

@ -166,5 +166,20 @@
"docker/test/keeper-jepsen": {
"name": "clickhouse/keeper-jepsen-test",
"dependent": []
},
"docker/docs/builder": {
"name": "clickhouse/docs-builder",
"dependent": [
"docker/docs/check",
"docker/docs/release"
]
},
"docker/docs/check": {
"name": "clickhouse/docs-check",
"dependent": []
},
"docker/docs/release": {
"name": "clickhouse/docs-release",
"dependent": []
}
}

View File

@ -1,7 +1,7 @@
FROM ubuntu:20.04
ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/"
ARG version=21.11.1.*
ARG version=21.12.1.*
ARG gosu_ver=1.10
# set non-empty deb_location_url url to create a docker image

View File

@ -86,7 +86,7 @@ done
if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CLICKHOUSE_PASSWORD" ]; then
echo "$0: create new user '$CLICKHOUSE_USER' instead 'default'"
cat <<EOT > /etc/clickhouse-server/users.d/default-user.xml
<yandex>
<clickhouse>
<!-- Docs: <https://clickhouse.com/docs/en/operations/settings/settings_users/> -->
<users>
<!-- Remove default user -->
@ -103,7 +103,7 @@ if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CL
<access_management>${CLICKHOUSE_ACCESS_MANAGEMENT}</access_management>
</${CLICKHOUSE_USER}>
</users>
</yandex>
</clickhouse>
EOT
fi

View File

@ -1,7 +1,7 @@
FROM ubuntu:18.04
ARG repository="deb https://repo.clickhouse.com/deb/stable/ main/"
ARG version=21.11.1.*
ARG version=21.12.1.*
RUN apt-get update && \
apt-get install -y apt-transport-https dirmngr && \

View File

@ -264,7 +264,7 @@ function run_tests
set +e
time clickhouse-test --hung-check -j 8 --order=random \
--fast-tests-only --no-long --testname --shard --zookeeper \
--fast-tests-only --no-long --testname --shard --zookeeper --check-zookeeper-session \
-- "$FASTTEST_FOCUS" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \
| tee "$FASTTEST_OUTPUT/test_result.txt"

View File

@ -36,10 +36,11 @@ function clone
git diff --name-only master HEAD | tee ci-changed-files.txt
else
if [ -v COMMIT_SHA ]; then
git fetch --depth 1 origin "$SHA_TO_TEST"
git fetch --depth 2 origin "$SHA_TO_TEST"
git checkout "$SHA_TO_TEST"
echo "Checked out nominal SHA $SHA_TO_TEST for master"
else
git fetch --depth 2 origin
echo "Using default repository head $(git rev-parse HEAD)"
fi
git diff --name-only HEAD~1 HEAD | tee ci-changed-files.txt

View File

@ -634,7 +634,7 @@ create view query_display_names as select * from
create view partial_query_times as select * from
file('analyze/partial-query-times.tsv', TSVWithNamesAndTypes,
'test text, query_index int, time_stddev float, time_median float')
'test text, query_index int, time_stddev float, time_median double')
;
-- Report for partial queries that we could only run on the new server (e.g.

View File

@ -109,7 +109,7 @@ function run_tests()
fi
set +e
clickhouse-test --testname --shard --zookeeper --no-stateless --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --no-stateless --hung-check --print-time "${ADDITIONAL_OPTIONS[@]}" \
"$SKIP_TESTS_OPTION" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee test_output/test_result.txt
set -e
}

View File

@ -97,7 +97,7 @@ function run_tests()
fi
set +e
clickhouse-test --testname --shard --zookeeper --hung-check --print-time \
clickhouse-test --testname --shard --zookeeper --check-zookeeper-session --hung-check --print-time \
--test-runs "$NUM_TRIES" "${ADDITIONAL_OPTIONS[@]}" 2>&1 \
| ts '%Y-%m-%d %H:%M:%S' \
| tee -a test_output/test_result.txt

View File

@ -26,4 +26,6 @@ COPY ./stress /stress
COPY run.sh /
ENV DATASETS="hits visits"
ENV S3_URL="https://clickhouse-datasets.s3.yandex.net"
CMD ["/bin/bash", "/run.sh"]

View File

@ -46,11 +46,11 @@ function configure()
sudo chown root: /var/lib/clickhouse
# Set more frequent update period of asynchronous metrics to more frequently update information about real memory usage (less chance of OOM).
echo "<yandex><asynchronous_metrics_update_period_s>1</asynchronous_metrics_update_period_s></yandex>" \
echo "<clickhouse><asynchronous_metrics_update_period_s>1</asynchronous_metrics_update_period_s></clickhouse>" \
> /etc/clickhouse-server/config.d/asynchronous_metrics_update_period_s.xml
# Set maximum memory usage as half of total memory (less chance of OOM).
echo "<yandex><max_server_memory_usage_to_ram_ratio>0.5</max_server_memory_usage_to_ram_ratio></yandex>" \
echo "<clickhouse><max_server_memory_usage_to_ram_ratio>0.5</max_server_memory_usage_to_ram_ratio></clickhouse>" \
> /etc/clickhouse-server/config.d/max_server_memory_usage_to_ram_ratio.xml
}
@ -112,7 +112,7 @@ configure
start
# shellcheck disable=SC2086 # No quotes because I want to split it into words.
/s3downloader --dataset-names $DATASETS
/s3downloader --url-prefix "$S3_URL" --dataset-names $DATASETS
chmod 777 -R /var/lib/clickhouse
clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary"
clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test"

View File

@ -50,7 +50,7 @@ URL="https://builds.clickhouse.com/master/${DIR}/clickhouse"
echo
echo "Will download ${URL}"
echo
curl -O "${URL}" && chmod a+x clickhouse &&
curl -O "${URL}" && chmod a+x clickhouse || exit 1
echo
echo "Successfully downloaded the ClickHouse binary, you can run it as:
./clickhouse"

View File

@ -16,10 +16,13 @@ The supported formats are:
| [TabSeparatedRaw](#tabseparatedraw) | ✔ | ✔ |
| [TabSeparatedWithNames](#tabseparatedwithnames) | ✔ | ✔ |
| [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes) | ✔ | ✔ |
| [TabSeparatedRawWithNames](#tabseparatedrawwithnames) | ✔ | ✔ |
| [TabSeparatedRawWithNamesAndTypes](#tabseparatedrawwithnamesandtypes) | ✔ | ✔ |
| [Template](#format-template) | ✔ | ✔ |
| [TemplateIgnoreSpaces](#templateignorespaces) | ✔ | ✗ |
| [CSV](#csv) | ✔ | ✔ |
| [CSVWithNames](#csvwithnames) | ✔ | ✔ |
| [CSVWithNamesAndTypes](#csvwithnamesandtypes) | ✔ | ✔ |
| [CustomSeparated](#format-customseparated) | ✔ | ✔ |
| [Values](#data-format-values) | ✔ | ✔ |
| [Vertical](#vertical) | ✗ | ✔ |
@ -33,8 +36,10 @@ The supported formats are:
| [JSONStringsEachRow](#jsonstringseachrow) | ✔ | ✔ |
| [JSONStringsEachRowWithProgress](#jsonstringseachrowwithprogress) | ✗ | ✔ |
| [JSONCompactEachRow](#jsoncompacteachrow) | ✔ | ✔ |
| [JSONCompactEachRowWithNames](#jsoncompacteachrowwithnames) | ✔ | ✔ |
| [JSONCompactEachRowWithNamesAndTypes](#jsoncompacteachrowwithnamesandtypes) | ✔ | ✔ |
| [JSONCompactStringsEachRow](#jsoncompactstringseachrow) | ✔ | ✔ |
| [JSONCompactStringsEachRowWithNames](#jsoncompactstringseachrowwithnames) | ✔ | ✔ |
| [JSONCompactStringsEachRowWithNamesAndTypes](#jsoncompactstringseachrowwithnamesandtypes) | ✔ | ✔ |
| [TSKV](#tskv) | ✔ | ✔ |
| [Pretty](#pretty) | ✗ | ✔ |
@ -51,6 +56,7 @@ The supported formats are:
| [ArrowStream](#data-format-arrow-stream) | ✔ | ✔ |
| [ORC](#data-format-orc) | ✔ | ✔ |
| [RowBinary](#rowbinary) | ✔ | ✔ |
| [RowBinaryWithNames](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [RowBinaryWithNamesAndTypes](#rowbinarywithnamesandtypes) | ✔ | ✔ |
| [Native](#native) | ✔ | ✔ |
| [Null](#null) | ✗ | ✔ |
@ -126,6 +132,9 @@ Arrays are written as a list of comma-separated values in square brackets. Numbe
[NULL](../sql-reference/syntax.md) is formatted as `\N`.
If setting [input_format_tsv_empty_as_default](../operations/settings/settings.md#settings-input_format_tsv_empty_as_default) is enabled,
empty input fields are replaced with default values. For complex default expressions [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#settings-input_format_defaults_for_omitted_fields) must be enabled too.
Each element of [Nested](../sql-reference/data-types/nested-data-structures/nested.md) structures is represented as array.
For example:
@ -164,17 +173,35 @@ This format is also available under the name `TSVRaw`.
## TabSeparatedWithNames {#tabseparatedwithnames}
Differs from the `TabSeparated` format in that the column names are written in the first row.
During parsing, the first row is expected to contain the column names. You can use column names to determine their position and to check their correctness.
If setting [input_format_with_names_use_header](../operations/settings/settings.md#settings-input_format_with_names_use_header) is set to 1,
the columns from input data will be mapped to the columns from the table by their names, columns with unknown names will be skipped if setting [input_format_skip_unknown_fields](../operations/settings/settings.md#settings-input_format_skip_unknown_fields) is set to 1.
Otherwise, the first row will be skipped.
This format is also available under the name `TSVWithNames`.
## TabSeparatedWithNamesAndTypes {#tabseparatedwithnamesandtypes}
Differs from the `TabSeparated` format in that the column names are written to the first row, while the column types are in the second row.
During parsing, the first and second rows are completely ignored.
The first row with names is processed the same way as in `TabSeparatedWithNames` format.
If setting [input_format_with_types_use_header](../operations/settings/settings.md#settings-input_format_with_types_use_header) is set to 1,
the types from input data will be compared with the types of the corresponding columns from the table. Otherwise, the second row will be skipped.
This format is also available under the name `TSVWithNamesAndTypes`.
## TabSeparatedRawWithNames {#tabseparatedrawwithnames}
Differs from `TabSeparatedWithNames` format in that the rows are written without escaping.
When parsing with this format, tabs or linefeeds are not allowed in each field.
This format is also available under the name `TSVRawWithNames`.
## TabSeparatedWithNamesAndTypes {#tabseparatedrawwithnamesandtypes}
Differs from `TabSeparatedWithNamesAndTypes` format in that the rows are written without escaping.
When parsing with this format, tabs or linefeeds are not allowed in each field.
This format is also available under the name `TSVRawWithNamesAndNames`.
## Template {#format-template}
This format allows specifying a custom format string with placeholders for values with a specified escaping rule.
@ -195,7 +222,7 @@ where `delimiter_i` is a delimiter between values (`$` symbol can be escaped as
- `Raw` (without escaping, similarly to `TSVRaw`)
- `None` (no escaping rule, see further)
If an escaping rule is omitted, then `None` will be used. `XML` and `Raw` are suitable only for output.
If an escaping rule is omitted, then `None` will be used. `XML` is suitable only for output.
So, for the following format string:
@ -375,9 +402,8 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR
When parsing, all values can be parsed either with or without quotes. Both double and single quotes are supported. Rows can also be arranged without quotes. In this case, they are parsed up to the delimiter character or line feed (CR or LF). In violation of the RFC, when parsing rows without quotes, the leading and trailing spaces and tabs are ignored. For the line feed, Unix (LF), Windows (CR LF) and Mac OS Classic (CR LF) types are all supported.
Empty unquoted input values are replaced with default values for the respective columns, if
[input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields)
is enabled.
If setting [input_format_csv_empty_as_default](../operations/settings/settings.md#settings-input_format_csv_empty_as_default) is enabled,
empty unquoted input values are replaced with default values. For complex default expressions [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#settings-input_format_defaults_for_omitted_fields) must be enabled too.
`NULL` is formatted as `\N` or `NULL` or an empty unquoted string (see settings [input_format_csv_unquoted_null_literal_as_null](../operations/settings/settings.md#settings-input_format_csv_unquoted_null_literal_as_null) and [input_format_defaults_for_omitted_fields](../operations/settings/settings.md#session_settings-input_format_defaults_for_omitted_fields)).
@ -385,7 +411,11 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
## CSVWithNames {#csvwithnames}
Also prints the header row, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
Also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
## CSVWithNamesAndTypes {#csvwithnamesandtypes}
Also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
## CustomSeparated {#format-customseparated}
@ -657,10 +687,21 @@ Differs from `JSONEachRow`/`JSONStringsEachRow` in that ClickHouse will also yie
{"progress":{"read_rows":"3","read_bytes":"24","written_rows":"0","written_bytes":"0","total_rows_to_read":"3"}}
```
## JSONCompactEachRowWithNames {#jsoncompacteachrowwithnames}
Differs from `JSONCompactEachRow` format in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
## JSONCompactEachRowWithNamesAndTypes {#jsoncompacteachrowwithnamesandtypes}
Differs from `JSONCompactEachRow` format in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
## JSONCompactStringsEachRowWithNames {#jsoncompactstringseachrowwithnames}
Differs from `JSONCompactStringsEachRow` in that in that it also prints the header row with column names, similar to [TabSeparatedWithNames](#tabseparatedwithnames).
## JSONCompactStringsEachRowWithNamesAndTypes {#jsoncompactstringseachrowwithnamesandtypes}
Differs from `JSONCompactEachRow`/`JSONCompactStringsEachRow` in that the column names and types are written as the first two rows.
Differs from `JSONCompactStringsEachRow` in that it also prints two header rows with column names and types, similar to [TabSeparatedWithNamesAndTypes](#tabseparatedwithnamesandtypes).
```json
["'hello'", "multiply(42, number)", "range(5)"]
@ -703,7 +744,7 @@ CREATE TABLE IF NOT EXISTS example_table
- If `input_format_defaults_for_omitted_fields = 1`, then the default value for `x` equals `0`, but the default value of `a` equals `x * 2`.
!!! note "Warning"
When inserting data with `insert_sample_with_metadata = 1`, ClickHouse consumes more computational resources, compared to insertion with `insert_sample_with_metadata = 0`.
When inserting data with `input_format_defaults_for_omitted_fields = 1`, ClickHouse consumes more computational resources, compared to insertion with `input_format_defaults_for_omitted_fields = 0`.
### Selecting Data {#selecting-data}
@ -910,6 +951,13 @@ Array is represented as a varint length (unsigned [LEB128](https://en.wikipedia.
For [NULL](../sql-reference/syntax.md#null-literal) support, an additional byte containing 1 or 0 is added before each [Nullable](../sql-reference/data-types/nullable.md) value. If 1, then the value is `NULL` and this byte is interpreted as a separate value. If 0, the value after the byte is not `NULL`.
## RowBinaryWithNames {#rowbinarywithnames}
Similar to [RowBinary](#rowbinary), but with added header:
- [LEB128](https://en.wikipedia.org/wiki/LEB128)-encoded number of columns (N)
- N `String`s specifying column names
## RowBinaryWithNamesAndTypes {#rowbinarywithnamesandtypes}
Similar to [RowBinary](#rowbinary), but with added header:

View File

@ -29,7 +29,7 @@ toc_title: Adopters
| <a href="https://www.benocs.com/" class="favicon">Benocs</a> | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) |
| <a href="https://www.bigo.sg/" class="favicon">BIGO</a> | Video | Computing Platform | — | — | [Blog Article, August 2020](https://www.programmersought.com/article/44544895251/) |
| <a href="https://www.bilibili.com/" class="favicon">BiliBili</a> | Video sharing | — | — | — | [Blog post, June 2021](https://chowdera.com/2021/06/20210622012241476b.html) |
| <a href="https://www.bloomberg.com/">Bloomberg</a> | Finance, Media | Monitoring | — | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) |
| <a href="https://www.bloomberg.com/">Bloomberg</a> | Finance, Media | Monitoring | — | — | [Job opening, September 2021](https://careers.bloomberg.com/job/detail/94913), [slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) |
| <a href="https://bloxy.info" class="favicon">Bloxy</a> | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) |
| <a href="https://www.bytedance.com" class="favicon">Bytedance</a> | Social platforms | — | — | — | [The ClickHouse Meetup East, October 2020](https://www.youtube.com/watch?v=ckChUkC3Pns) |
| <a href="https://cardsmobile.ru/" class="favicon">CardsMobile</a> | Finance | Analytics | — | — | [VC.ru](https://vc.ru/s/cardsmobile/143449-rukovoditel-gruppy-analiza-dannyh) |
@ -170,5 +170,7 @@ toc_title: Adopters
| <a href="https://cft.ru/" class="favicon">ЦФТ</a> | Banking, Financial products, Payments | — | — | — | [Meetup in Russian, April 2020](https://team.cft.ru/events/162) |
| <a href="https://promo.croc.ru/digitalworker" class="favicon">Цифровой Рабочий</a> | Industrial IoT, Analytics | — | — | — | [Blog post in Russian, March 2021](https://habr.com/en/company/croc/blog/548018/) |
| <a href="https://shop.okraina.ru/" class="favicon">ООО «МПЗ Богородский»</a> | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) |
| <a href="https://domclick.ru/" class="favicon">ДомКлик</a> | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) |
| <a href="https://www.deepl.com/" class="favicon">Deepl</a> | Machine Learning | — | — | — | [Video, October 2021](https://www.youtube.com/watch?v=WIYJiPwxXdM&t=1182s) |
[Original article](https://clickhouse.com/docs/en/introduction/adopters/) <!--hide-->

View File

@ -7,7 +7,7 @@ toc_title: Configuration Files
ClickHouse supports multi-file configuration management. The main server configuration file is `/etc/clickhouse-server/config.xml` or `/etc/clickhouse-server/config.yaml`. Other files must be in the `/etc/clickhouse-server/config.d` directory. Note, that any configuration file can be written either in XML or YAML, but mixing formats in one file is not supported. For example, you can have main configs as `config.xml` and `users.xml` and write additional files in `config.d` and `users.d` directories in `.yaml`.
All XML files should have the same root element, usually `<yandex>`. As for YAML, `yandex:` should not be present, the parser will insert it automatically.
All XML files should have the same root element, usually `<clickhouse>`. As for YAML, `clickhouse:` should not be present, the parser will insert it automatically.
## Override {#override}
@ -21,13 +21,13 @@ Some settings specified in the main configuration file can be overridden in othe
You can also declare attributes as coming from environment variables by using `from_env="VARIABLE_NAME"`:
```xml
<yandex>
<clickhouse>
<macros>
<replica from_env="REPLICA" />
<layer from_env="LAYER" />
<shard from_env="SHARD" />
</macros>
</yandex>
</clickhouse>
```
## Substitution {#substitution}
@ -39,7 +39,7 @@ If you want to replace an entire element with a substitution use `include` as el
XML substitution example:
```xml
<yandex>
<clickhouse>
<!-- Appends XML subtree found at `/profiles-in-zookeeper` ZK path to `<profiles>` element. -->
<profiles from_zk="/profiles-in-zookeeper" />
@ -48,7 +48,7 @@ XML substitution example:
<include from_zk="/users-in-zookeeper" />
<include from_zk="/other-users-in-zookeeper" />
</users>
</yandex>
</clickhouse>
```
Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node and it will be fully inserted into the source element.
@ -72,7 +72,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
```
``` xml
<yandex>
<clickhouse>
<users>
<alice>
<profile>analytics</profile>
@ -83,7 +83,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
<quota>analytics</quota>
</alice>
</users>
</yandex>
</clickhouse>
```
## YAML examples {#example}

View File

@ -23,32 +23,32 @@ To enable Kerberos, one should include `kerberos` section in `config.xml`. This
Example (goes into `config.xml`):
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos />
</yandex>
</clickhouse>
```
With principal specification:
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos>
<principal>HTTP/clickhouse.example.com@EXAMPLE.COM</principal>
</kerberos>
</yandex>
</clickhouse>
```
With filtering by realm:
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</yandex>
</clickhouse>
```
!!! warning "Note"
@ -80,7 +80,7 @@ Parameters:
Example (goes into `users.xml`):
```xml
<yandex>
<clickhouse>
<!- ... -->
<users>
<!- ... -->
@ -91,7 +91,7 @@ Example (goes into `users.xml`):
</kerberos>
</my_user>
</users>
</yandex>
</clickhouse>
```
!!! warning "Warning"

View File

@ -14,7 +14,7 @@ To define LDAP server you must add `ldap_servers` section to the `config.xml`.
**Example**
```xml
<yandex>
<clickhouse>
<!- ... -->
<ldap_servers>
<!- Typical LDAP server. -->
@ -45,7 +45,7 @@ To define LDAP server you must add `ldap_servers` section to the `config.xml`.
<enable_tls>no</enable_tls>
</my_ad_server>
</ldap_servers>
</yandex>
</clickhouse>
```
Note, that you can define multiple LDAP servers inside the `ldap_servers` section using distinct names.
@ -90,7 +90,7 @@ At each login attempt, ClickHouse tries to "bind" to the specified DN defined by
**Example**
```xml
<yandex>
<clickhouse>
<!- ... -->
<users>
<!- ... -->
@ -101,7 +101,7 @@ At each login attempt, ClickHouse tries to "bind" to the specified DN defined by
</ldap>
</my_user>
</users>
</yandex>
</clickhouse>
```
Note, that user `my_user` refers to `my_ldap_server`. This LDAP server must be configured in the main `config.xml` file as described previously.
@ -125,7 +125,7 @@ At each login attempt, ClickHouse tries to find the user definition locally and
Goes into `config.xml`.
```xml
<yandex>
<clickhouse>
<!- ... -->
<user_directories>
<!- Typical LDAP server. -->
@ -156,7 +156,7 @@ Goes into `config.xml`.
</role_mapping>
</ldap>
</user_directories>
</yandex>
</clickhouse>
```
Note that `my_ldap_server` referred in the `ldap` section inside the `user_directories` section must be a previously defined LDAP server that is configured in the `config.xml` (see [LDAP Server Definition](#ldap-server-definition)).

View File

@ -23,7 +23,7 @@ chmod a+x ./hardware.sh
./hardware.sh
```
3. Copy the output and send it to clickhouse-feedback@yandex-team.com
3. Copy the output and send it to feedback@clickhouse.com
All the results are published here: https://clickhouse.com/benchmark/hardware/

View File

@ -69,6 +69,8 @@ If no conditions met for a data part, ClickHouse uses the `lz4` compression.
</compression>
```
<!--
## encryption {#server-settings-encryption}
Configures a command to obtain a key to be used by [encryption codecs](../../sql-reference/statements/create/table.md#create-query-encryption-codecs). Key (or keys) should be written in environment variables or set in the configuration file.
@ -131,7 +133,7 @@ Also, users can add nonce that must be 12 bytes long (by default encryption and
```xml
<encryption_codecs>
<aes_128_gcm_siv>
<nonce>0123456789101</nonce>
<nonce>012345678910</nonce>
</aes_128_gcm_siv>
</encryption_codecs>
```
@ -148,6 +150,8 @@ Or it can be set in hex:
Everything mentioned above can be applied for `aes_256_gcm_siv` (but the key must be 32 bytes long).
-->
## custom_settings_prefixes {#custom_settings_prefixes}
List of prefixes for [custom settings](../../operations/settings/index.md#custom_settings). The prefixes must be separated with commas.
@ -482,16 +486,12 @@ Backlog (queue size of pending connections) of the listen socket.
Default value: `4096` (as in linux [5.4+](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=19f92a030ca6d772ab44b22ee6a01378a8cb32d4)).
Usually this value does not need to be changed, since:
- default value is large enough,
- and for accepting client's connections server has separate thread.
- default value is large enough,
- and for accepting client's connections server has separate thread.
So even if you have `TcpExtListenOverflows` (from `nstat`) non zero and this
counter grows for ClickHouse server it does not mean that this value need to be
increased, since:
- usually if 4096 is not enough it shows some internal ClickHouse scaling
issue, so it is better to report an issue.
- and it does not mean that the server can handle more connections later (and
even if it can, clients can already goes away / disconnect).
So even if you have `TcpExtListenOverflows` (from `nstat`) non zero and this counter grows for ClickHouse server it does not mean that this value need to be increased, since:
- usually if 4096 is not enough it shows some internal ClickHouse scaling issue, so it is better to report an issue.
- and it does not mean that the server can handle more connections later (and even if it could, by that moment clients may be gone or disconnected).
Examples:
@ -786,14 +786,14 @@ It is enabled by default. If it`s not, you can do this manually.
To manually turn on metrics history collection [`system.metric_log`](../../operations/system-tables/metric_log.md), create `/etc/clickhouse-server/config.d/metric_log.xml` with the following content:
``` xml
<yandex>
<clickhouse>
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
</yandex>
</clickhouse>
```
**Disabling**
@ -801,9 +801,9 @@ To manually turn on metrics history collection [`system.metric_log`](../../opera
To disable `metric_log` setting, you should create the following file `/etc/clickhouse-server/config.d/disable_metric_log.xml` with the following content:
``` xml
<yandex>
<clickhouse>
<metric_log remove="1" />
</yandex>
</clickhouse>
```
## replicated_merge_tree {#server_configuration_parameters-replicated_merge_tree}
@ -1039,7 +1039,7 @@ Parameters:
**Example**
```xml
<yandex>
<clickhouse>
<text_log>
<level>notice</level>
<database>system</database>
@ -1048,7 +1048,7 @@ Parameters:
<!-- <partition_by>event_date</partition_by> -->
<engine>Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day</engine>
</text_log>
</yandex>
</clickhouse>
```
@ -1290,6 +1290,7 @@ This section contains the following parameters:
- [Replication](../../engines/table-engines/mergetree-family/replication.md)
- [ZooKeeper Programmers Guide](http://zookeeper.apache.org/doc/current/zookeeperProgrammers.html)
- [Optional secured communication between ClickHouse and Zookeeper](../ssl-zookeeper.md#secured-communication-with-zookeeper)
## use_minimalistic_part_header_in_zookeeper {#server-settings-use_minimalistic_part_header_in_zookeeper}

View File

@ -399,7 +399,7 @@ Default value: 1.
## input_format_defaults_for_omitted_fields {#session_settings-input_format_defaults_for_omitted_fields}
When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option only applies to [JSONEachRow](../../interfaces/formats.md#jsoneachrow), [CSV](../../interfaces/formats.md#csv) and [TabSeparated](../../interfaces/formats.md#tabseparated) formats.
When performing `INSERT` queries, replace omitted input column values with default values of the respective columns. This option only applies to [JSONEachRow](../../interfaces/formats.md#jsoneachrow), [CSV](../../interfaces/formats.md#csv), [TabSeparated](../../interfaces/formats.md#tabseparated) formats and formats with `WithNames`/`WithNamesAndTypes` suffixes.
!!! note "Note"
When this option is enabled, extended table metadata are sent from server to client. It consumes additional computing resources on the server and can reduce performance.
@ -417,6 +417,12 @@ When enabled, replace empty input fields in TSV with default values. For complex
Disabled by default.
## input_format_csv_empty_as_default {#settings-input-format-csv-empty-as-default}
When enabled, replace empty input fields in CSV with default values. For complex default expressions `input_format_defaults_for_omitted_fields` must be enabled too.
Enabled by default.
## input_format_tsv_enum_as_number {#settings-input_format_tsv_enum_as_number}
Enables or disables parsing enum values as enum ids for TSV input format.
@ -540,8 +546,40 @@ To improve insert performance, we recommend disabling this check if you are sure
Supported formats:
- [CSVWithNames](../../interfaces/formats.md#csvwithnames)
- [TabSeparatedWithNames](../../interfaces/formats.md#tabseparatedwithnames)
- [CSVWithNames](../../interfaces/formats.md#csvwithnames)
- [CSVWithNames](../../interfaces/formats.md#csvwithnamesandtypes)
- [TabSeparatedWithNames](../../interfaces/formats.md#tabseparatedwithnames)
- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md#tabseparatedwithnamesandtypes)
- [JSONCompactEachRowWithNames](../../interfaces/formats.md#jsoncompacteachrowwithnames)
- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md#jsoncompacteachrowwithnamesandtypes)
- [JSONCompactStringsEachRowWithNames](../../interfaces/formats.md#jsoncompactstringseachrowwithnames)
- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md#jsoncompactstringseachrowwithnamesandtypes)
- [RowBinaryWithNames](../../interfaces/formats.md#rowbinarywithnames-rowbinarywithnames)
- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md#rowbinarywithnamesandtypes-rowbinarywithnamesandtypes)
Possible values:
- 0 — Disabled.
- 1 — Enabled.
Default value: 1.
## input_format_with_types_use_header {#settings-input-format-with-types-use-header}
Controls whether format parser should check if data types from the input data match data types from the target table.
Supported formats:
- [CSVWithNames](../../interfaces/formats.md#csvwithnames)
- [CSVWithNames](../../interfaces/formats.md#csvwithnamesandtypes)
- [TabSeparatedWithNames](../../interfaces/formats.md#tabseparatedwithnames)
- [TabSeparatedWithNamesAndTypes](../../interfaces/formats.md#tabseparatedwithnamesandtypes)
- [JSONCompactEachRowWithNames](../../interfaces/formats.md#jsoncompacteachrowwithnames)
- [JSONCompactEachRowWithNamesAndTypes](../../interfaces/formats.md#jsoncompacteachrowwithnamesandtypes)
- [JSONCompactStringsEachRowWithNames](../../interfaces/formats.md#jsoncompactstringseachrowwithnames)
- [JSONCompactStringsEachRowWithNamesAndTypes](../../interfaces/formats.md#jsoncompactstringseachrowwithnamesandtypes)
- [RowBinaryWithNames](../../interfaces/formats.md#rowbinarywithnames-rowbinarywithnames)
- [RowBinaryWithNamesAndTypes](../../interfaces/formats.md#rowbinarywithnamesandtypes-rowbinarywithnamesandtypes)
Possible values:
@ -1397,6 +1435,32 @@ Minimum count of executing same expression before it is get compiled.
Default value: `3`.
## compile_aggregate_expressions {#compile_aggregate_expressions}
Enables or disables JIT-compilation of aggregate functions to native code. Enabling this setting can improve the performance.
Possible values:
- 0 — Aggregation is done without JIT compilation.
- 1 — Aggregation is done using JIT compilation.
Default value: `1`.
**See Also**
- [min_count_to_compile_aggregate_expression](#min_count_to_compile_aggregate_expression)
## min_count_to_compile_aggregate_expression {#min_count_to_compile_aggregate_expression}
The minimum number of identical aggregate expressions to start JIT-compilation. Works only if the [compile_aggregate_expressions](#compile_aggregate_expressions) setting is enabled.
Possible values:
- Positive integer.
- 0 — Identical aggregate expressions are always JIT-compiled.
Default value: `3`.
## output_format_json_quote_64bit_integers {#session_settings-output_format_json_quote_64bit_integers}
Controls quoting of 64-bit or bigger [integers](../../sql-reference/data-types/int-uint.md) (like `UInt64` or `Int128`) when they are output in a [JSON](../../interfaces/formats.md#json) format.
@ -1751,9 +1815,11 @@ Do not merge aggregation states from different servers for distributed query pro
Possible values:
- 0 — Disabled (final query processing is done on the initiator node).
- 1 - Do not merge aggregation states from different servers for distributed query processing (query completelly processed on the shard, initiator only proxy the data), can be used in case it is for certain that there are different keys on different shards.
- 2 - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possible when the query processed completelly on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`).
- `0` — Disabled (final query processing is done on the initiator node).
- `1` - Do not merge aggregation states from different servers for distributed query processing (query completelly processed on the shard, initiator only proxy the data), can be used in case it is for certain that there are different keys on different shards.
- `2` - Same as `1` but applies `ORDER BY` and `LIMIT` (it is not possible when the query processed completelly on the remote node, like for `distributed_group_by_no_merge=1`) on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`).
Default value: `0`
**Example**
@ -1784,29 +1850,27 @@ FORMAT PrettyCompactMonoBlock
└───────┘
```
Default value: 0
## distributed_push_down_limit {#distributed-push-down-limit}
## distributed_push_down_limit (#distributed-push-down-limit}
LIMIT will be applied on each shard separatelly.
Enables or disables [LIMIT](#limit) applying on each shard separatelly.
This will allow to avoid:
- Sending extra rows over network;
- Processing rows behind the limit on the initiator.
- sending extra rows over network,
- processing rows behind the limit on the initiator.
It is possible if at least one of the following conditions met:
- `distributed_group_by_no_merge` > 0
- query **does not have `GROUP BY`/`DISTINCT`/`LIMIT BY`**, but it has `ORDER BY`/`LIMIT`.
- query **has `GROUP BY`/`DISTINCT`/`LIMIT BY`** with `ORDER BY`/`LIMIT` and:
- `optimize_skip_unused_shards_limit` is enabled
- `optimize_distributed_group_by_sharding_key` is enabled
Starting from 21.9 version you cannot get inaccurate results anymore, since `distributed_push_down_limit` changes query execution only if at least one of the conditions met:
- [distributed_group_by_no_merge](#distributed-group-by-no-merge) > 0.
- Query **does not have** `GROUP BY`/`DISTINCT`/`LIMIT BY`, but it has `ORDER BY`/`LIMIT`.
- Query **has** `GROUP BY`/`DISTINCT`/`LIMIT BY` with `ORDER BY`/`LIMIT` and:
- [optimize_skip_unused_shards](#optimize-skip-unused-shards) is enabled.
- [optimize_distributed_group_by_sharding_key](#optimize-distributed-group-by-sharding-key) is enabled.
Possible values:
- 0 - Disabled
- 1 - Enabled
- 0 — Disabled.
- 1 — Enabled.
Default value: `1`.
See also:
@ -1920,6 +1984,7 @@ Default value: 0
See also:
- [distributed_group_by_no_merge](#distributed-group-by-no-merge)
- [distributed_push_down_limit](#distributed-push-down-limit)
- [optimize_skip_unused_shards](#optimize-skip-unused-shards)
!!! note "Note"
@ -3831,6 +3896,21 @@ Default value: `0`.
- [optimize_move_to_prewhere](#optimize_move_to_prewhere) setting
## describe_include_subcolumns {#describe_include_subcolumns}
Enables describing subcolumns for a [DESCRIBE](../../sql-reference/statements/describe-table.md) query. For example, members of a [Tuple](../../sql-reference/data-types/tuple.md) or subcolumns of a [Map](../../sql-reference/data-types/map.md#map-subcolumns), [Nullable](../../sql-reference/data-types/nullable.md#finding-null) or an [Array](../../sql-reference/data-types/array.md#array-size) data type.
Possible values:
- 0 — Subcolumns are not included in `DESCRIBE` queries.
- 1 — Subcolumns are included in `DESCRIBE` queries.
Default value: `0`.
**Example**
See an example for the [DESCRIBE](../../sql-reference/statements/describe-table.md) statement.
## async_insert {#async-insert}
Enables or disables asynchronous inserts. This makes sense only for insertion over HTTP protocol. Note that deduplication isn't working for such inserts.

View File

@ -0,0 +1,74 @@
---
toc_priority: 45
toc_title: Secured communication with Zookeeper
---
# Optional secured communication between ClickHouse and Zookeeper {#secured-communication-with-zookeeper}
You should specify `ssl.keyStore.location`, `ssl.keyStore.password` and `ssl.trustStore.location`, `ssl.trustStore.password` for communication with ClickHouse client over SSL. These options are available from Zookeeper version 3.5.2.
You can add `zookeeper.crt` to trusted certificates.
``` bash
sudo cp zookeeper.crt /usr/local/share/ca-certificates/zookeeper.crt
sudo update-ca-certificates
```
Client section in `config.xml` will look like:
``` xml
<client>
<certificateFile>/etc/clickhouse-server/client.crt</certificateFile>
<privateKeyFile>/etc/clickhouse-server/client.key</privateKeyFile>
<loadDefaultCAFile>true</loadDefaultCAFile>
<cacheSessions>true</cacheSessions>
<disableProtocols>sslv2,sslv3</disableProtocols>
<preferServerCiphers>true</preferServerCiphers>
<invalidCertificateHandler>
<name>RejectCertificateHandler</name>
</invalidCertificateHandler>
</client>
```
Add Zookeeper to ClickHouse config with some cluster and macros:
``` xml
<yandex>
<zookeeper>
<node>
<host>localhost</host>
<port>2281</port>
<secure>1</secure>
</node>
</zookeeper>
</yandex>
```
Start `clickhouse-server`. In logs you should see:
```text
<Trace> ZooKeeper: initialized, hosts: secure://localhost:2281
```
Prefix `secure://` indicates that connection is secured by SSL.
To ensure traffic is encrypted run `tcpdump` on secured port:
```bash
tcpdump -i any dst port 2281 -nnXS
```
And query in `clickhouse-client`:
```sql
SELECT * FROM system.zookeeper WHERE path = '/';
```
On unencrypted connection you will see in `tcpdump` output something like this:
```text
..../zookeeper/q
uota.
```
On encrypted connection you should not see this.

View File

@ -22,7 +22,7 @@ ClickHouse supports zero-copy replication for `S3` and `HDFS` disks, which means
Configuration markup:
``` xml
<yandex>
<clickhouse>
<storage_configuration>
<disks>
<hdfs>
@ -44,7 +44,7 @@ Configuration markup:
<merge_tree>
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
</merge_tree>
</yandex>
</clickhouse>
```
Required parameters:
@ -96,7 +96,7 @@ Optional parameters:
Example of disk configuration:
``` xml
<yandex>
<clickhouse>
<storage_configuration>
<disks>
<disk_s3>
@ -113,7 +113,7 @@ Example of disk configuration:
</disk_s3_encrypted>
</disks>
</storage_configuration>
</yandex>
</clickhouse>
```
## Storing Data on Web Server {#storing-data-on-webserver}
@ -127,7 +127,7 @@ Web server storage is supported only for the [MergeTree](../engines/table-engine
A ready test case. You need to add this configuration to config:
``` xml
<yandex>
<clickhouse>
<storage_configuration>
<disks>
<web>
@ -145,7 +145,7 @@ A ready test case. You need to add this configuration to config:
</web>
</policies>
</storage_configuration>
</yandex>
</clickhouse>
```
And then execute this query:

View File

@ -24,6 +24,11 @@ Columns:
- `is_in_primary_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the primary key expression.
- `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column is in the sampling key expression.
- `compression_codec` ([String](../../sql-reference/data-types/string.md)) — Compression codec name.
- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned.
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bitness for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned.
- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for `Decimal` types. Otherwise, the `NULL` value is returned.
- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for `Decimal` types. Otherwise, the `NULL` value is returned.
- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Decimal precision of `DateTime64` data type. For other data types, the `NULL` value is returned.
**Example**
@ -34,10 +39,11 @@ SELECT * FROM system.columns LIMIT 2 FORMAT Vertical;
```text
Row 1:
──────
database: system
table: aggregate_function_combinators
name: name
database: INFORMATION_SCHEMA
table: COLUMNS
name: table_catalog
type: String
position: 1
default_kind:
default_expression:
data_compressed_bytes: 0
@ -49,13 +55,19 @@ is_in_sorting_key: 0
is_in_primary_key: 0
is_in_sampling_key: 0
compression_codec:
character_octet_length: ᴺᵁᴸᴸ
numeric_precision: ᴺᵁᴸᴸ
numeric_precision_radix: ᴺᵁᴸᴸ
numeric_scale: ᴺᵁᴸᴸ
datetime_precision: ᴺᵁᴸᴸ
Row 2:
──────
database: system
table: aggregate_function_combinators
name: is_internal
type: UInt8
database: INFORMATION_SCHEMA
table: COLUMNS
name: table_schema
type: String
position: 2
default_kind:
default_expression:
data_compressed_bytes: 0
@ -67,6 +79,11 @@ is_in_sorting_key: 0
is_in_primary_key: 0
is_in_sampling_key: 0
compression_codec:
character_octet_length: ᴺᵁᴸᴸ
numeric_precision: ᴺᵁᴸᴸ
numeric_precision_radix: ᴺᵁᴸᴸ
numeric_scale: ᴺᵁᴸᴸ
datetime_precision: ᴺᵁᴸᴸ
```
The `system.columns` table contains the following columns (the column type is shown in brackets):

View File

@ -9,6 +9,7 @@ Columns:
- `data_path` ([String](../../sql-reference/data-types/string.md)) — Data path.
- `metadata_path` ([String](../../sql-reference/data-types/enum.md)) — Metadata path.
- `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Database UUID.
- `comment` ([String](../../sql-reference/data-types/enum.md)) — Database comment.
The `name` column from this system table is used for implementing the `SHOW DATABASES` query.
@ -17,22 +18,20 @@ The `name` column from this system table is used for implementing the `SHOW DATA
Create a database.
``` sql
CREATE DATABASE test
CREATE DATABASE test;
```
Check all of the available databases to the user.
``` sql
SELECT * FROM system.databases
SELECT * FROM system.databases;
```
``` text
┌─name───────────────────────────┬─engine─┬─data_path──────────────────┬─metadata_path───────────────────────────────────────────────────────┬─────────────────────────────────uuid─┐
_temporary_and_external_tables │ Memory │ /var/lib/clickhouse/ │ │ 00000000-0000-0000-0000-000000000000 │
│ default │ Atomic │ /var/lib/clickhouse/store/ │ /var/lib/clickhouse/store/d31/d317b4bd-3595-4386-81ee-c2334694128a/ │ d317b4bd-3595-4386-81ee-c2334694128a
test │ Atomic │ /var/lib/clickhouse/store/ │ /var/lib/clickhouse/store/39b/39bf0cc5-4c06-4717-87fe-c75ff3bd8ebb/ │ 39bf0cc5-4c06-4717-87fe-c75ff3bd8ebb
│ system │ Atomic │ /var/lib/clickhouse/store/ │ /var/lib/clickhouse/store/1d1/1d1c869d-e465-4b1b-a51f-be033436ebf9/ │ 1d1c869d-e465-4b1b-a51f-be033436ebf9
└────────────────────────────────┴────────┴────────────────────────────┴─────────────────────────────────────────────────────────────────────┴──────────────────────────────────────┘
┌─name───────────────┬─engine─┬─data_path──────────────────┬─metadata_path───────────────────────────────────────────────────────┬─uuid─────────────────────────────────┬─comment─┐
INFORMATION_SCHEMA │ Memory │ /var/lib/clickhouse/ │ │ 00000000-0000-0000-0000-000000000000 │
│ default │ Atomic │ /var/lib/clickhouse/store/ │ /var/lib/clickhouse/store/d31/d317b4bd-3595-4386-81ee-c2334694128a/ │ 24363899-31d7-42a0-a436-389931d752a0 │
information_schema │ Memory │ /var/lib/clickhouse/ │ │ 00000000-0000-0000-0000-000000000000 │
│ system │ Atomic │ /var/lib/clickhouse/store/ │ /var/lib/clickhouse/store/1d1/1d1c869d-e465-4b1b-a51f-be033436ebf9/ │ 03e9f3d1-cc88-4a49-83e9-f3d1cc881a49 │
└────────────────────┴────────┴────────────────────────────┴─────────────────────────────────────────────────────────────────────┴──────────────────────────────────────┴─────────┘
```
[Original article](https://clickhouse.com/docs/en/operations/system-tables/databases) <!--hide-->

View File

@ -34,7 +34,7 @@ System log tables can be customized by creating a config file with the same name
An example:
```xml
<yandex>
<clickhouse>
<query_log>
<database>system</database>
<table>query_log</table>
@ -45,7 +45,7 @@ An example:
-->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
</yandex>
</clickhouse>
```
By default, table growth is unlimited. To control a size of a table, you can use [TTL](../../sql-reference/statements/alter/ttl.md#manipulations-with-table-ttl) settings for removing outdated log records. Also you can use the partitioning feature of `MergeTree`-engine tables.

View File

@ -0,0 +1,210 @@
# INFORMATION_SCHEMA {#information-schema}
`INFORMATION_SCHEMA` (`information_schema`) is a system database that contains views. Using these views, you can get information about the metadata of database objects. These views read data from the columns of the [system.columns](../../operations/system-tables/columns.md), [system.databases](../../operations/system-tables/databases.md) and [system.tables](../../operations/system-tables/tables.md) system tables.
The structure and composition of system tables may change in different versions of the product, but the support of the `information_schema` makes it possible to change the structure of system tables without changing the method of access to metadata. Metadata requests do not depend on the DBMS used.
``` sql
SHOW TABLES FROM INFORMATION_SCHEMA;
```
``` text
┌─name─────┐
│ COLUMNS │
│ SCHEMATA │
│ TABLES │
│ VIEWS │
└──────────┘
```
`INFORMATION_SCHEMA` contains the following views:
- [COLUMNS](#columns)
- [SCHEMATA](#schemata)
- [TABLES](#tables)
- [VIEWS](#views)
## COLUMNS {#columns}
Contains columns read from the [system.columns](../../operations/system-tables/columns.md) system table and columns that are not supported in ClickHouse or do not make sense (always `NULL`), but must be by the standard.
Columns:
- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located.
- `table_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located.
- `table_name` ([String](../../sql-reference/data-types/string.md)) — Table name.
- `column_name` ([String](../../sql-reference/data-types/string.md)) — Column name.
- `ordinal_position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Ordinal position of a column in a table starting with 1.
- `column_default` ([String](../../sql-reference/data-types/string.md)) — Expression for the default value, or an empty string if it is not defined.
- `is_nullable` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the column type is `Nullable`.
- `data_type` ([String](../../sql-reference/data-types/string.md)) — Column type.
- `character_maximum_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned.
- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Maximum length in bytes for binary data, character data, or text data and images. In ClickHouse makes sense only for `FixedString` data type. Otherwise, the `NULL` value is returned.
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Accuracy of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse it is bitness for integer types and decimal precision for `Decimal` types. Otherwise, the `NULL` value is returned.
- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The base of the number system is the accuracy of approximate numeric data, exact numeric data, integer data or monetary data. In ClickHouse it's 2 for integer types and 10 for `Decimal` types. Otherwise, the `NULL` value is returned.
- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — The scale of approximate numeric data, exact numeric data, integer data, or monetary data. In ClickHouse makes sense only for `Decimal` types. Otherwise, the `NULL` value is returned.
- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — Decimal precision of `DateTime64` data type. For other data types, the `NULL` value is returned.
- `character_set_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported.
- `character_set_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported.
- `character_set_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported.
- `collation_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported.
- `collation_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported.
- `collation_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported.
- `domain_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported.
- `domain_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported.
- `domain_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported.
**Example**
Query:
``` sql
SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE (table_schema=currentDatabase() OR table_schema='') AND table_name NOT LIKE '%inner%' LIMIT 1 FORMAT Vertical;
```
Result:
``` text
Row 1:
──────
table_catalog: default
table_schema: default
table_name: describe_example
column_name: id
ordinal_position: 1
column_default:
is_nullable: 0
data_type: UInt64
character_maximum_length: ᴺᵁᴸᴸ
character_octet_length: ᴺᵁᴸᴸ
numeric_precision: 64
numeric_precision_radix: 2
numeric_scale: 0
datetime_precision: ᴺᵁᴸᴸ
character_set_catalog: ᴺᵁᴸᴸ
character_set_schema: ᴺᵁᴸᴸ
character_set_name: ᴺᵁᴸᴸ
collation_catalog: ᴺᵁᴸᴸ
collation_schema: ᴺᵁᴸᴸ
collation_name: ᴺᵁᴸᴸ
domain_catalog: ᴺᵁᴸᴸ
domain_schema: ᴺᵁᴸᴸ
domain_name: ᴺᵁᴸᴸ
```
## SCHEMATA {#schemata}
Contains columns read from the [system.databases](../../operations/system-tables/databases.md) system table and columns that are not supported in ClickHouse or do not make sense (always `NULL`), but must be by the standard.
Columns:
- `catalog_name` ([String](../../sql-reference/data-types/string.md)) — The name of the database.
- `schema_name` ([String](../../sql-reference/data-types/string.md)) — The name of the database.
- `schema_owner` ([String](../../sql-reference/data-types/string.md)) — Schema owner name, always `'default'`.
- `default_character_set_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported.
- `default_character_set_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported.
- `default_character_set_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported.
- `sql_path` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, not supported.
**Example**
Query:
``` sql
SELECT * FROM information_schema.schemata WHERE schema_name ILIKE 'information_schema' LIMIT 1 FORMAT Vertical;
```
Result:
``` text
Row 1:
──────
catalog_name: INFORMATION_SCHEMA
schema_name: INFORMATION_SCHEMA
schema_owner: default
default_character_set_catalog: ᴺᵁᴸᴸ
default_character_set_schema: ᴺᵁᴸᴸ
default_character_set_name: ᴺᵁᴸᴸ
sql_path: ᴺᵁᴸᴸ
```
## TABLES {#tables}
Contains columns read from the [system.tables](../../operations/system-tables/tables.md) system table.
Columns:
- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located.
- `table_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located.
- `table_name` ([String](../../sql-reference/data-types/string.md)) — Table name.
- `table_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Table type. Possible values:
- `BASE TABLE`
- `VIEW`
- `FOREIGN TABLE`
- `LOCAL TEMPORARY`
- `SYSTEM VIEW`
**Example**
Query:
``` sql
SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE (table_schema = currentDatabase() OR table_schema = '') AND table_name NOT LIKE '%inner%' LIMIT 1 FORMAT Vertical;
```
Result:
``` text
Row 1:
──────
table_catalog: default
table_schema: default
table_name: describe_example
table_type: BASE TABLE
```
## VIEWS {#views}
Contains columns read from the [system.tables](../../operations/system-tables/tables.md) system table, when the table engine [View](../../engines/table-engines/special/view.md) is used.
Columns:
- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located.
- `table_schema` ([String](../../sql-reference/data-types/string.md)) — The name of the database in which the table is located.
- `table_name` ([String](../../sql-reference/data-types/string.md)) — Table name.
- `view_definition` ([String](../../sql-reference/data-types/string.md)) — `SELECT` query for view.
- `check_option` ([String](../../sql-reference/data-types/string.md)) — `NONE`, no checking.
- `is_updatable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, the view is not updated.
- `is_insertable_into` ([Enum8](../../sql-reference/data-types/enum.md)) — Shows whether the created view is [materialized](../../sql-reference/statements/create/view/#materialized). Possible values:
- `NO` — The created view is not materialized.
- `YES` — The created view is materialized.
- `is_trigger_updatable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, the trigger is not updated.
- `is_trigger_deletable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, the trigger is not deleted.
- `is_trigger_insertable_into` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, no data is inserted into the trigger.
**Example**
Query:
``` sql
CREATE VIEW v (n Nullable(Int32), f Float64) AS SELECT n, f FROM t;
CREATE MATERIALIZED VIEW mv ENGINE = Null AS SELECT * FROM system.one;
SELECT * FROM information_schema.views WHERE table_schema = currentDatabase() LIMIT 1 FORMAT Vertical;
```
Result:
``` text
Row 1:
──────
table_catalog: default
table_schema: default
table_name: mv
view_definition: SELECT * FROM system.one
check_option: NONE
is_updatable: NO
is_insertable_into: YES
is_trigger_updatable: NO
is_trigger_deletable: NO
is_trigger_insertable_into: NO
```

View File

@ -30,6 +30,8 @@ Columns:
- `engine_full` ([String](../../sql-reference/data-types/string.md)) - Parameters of the table engine.
- `as_select` ([String](../../sql-reference/data-types/string.md)) - `SELECT` query for view.
- `partition_key` ([String](../../sql-reference/data-types/string.md)) - The partition key expression specified in the table.
- `sorting_key` ([String](../../sql-reference/data-types/string.md)) - The sorting key expression specified in the table.
@ -56,6 +58,7 @@ Columns:
- `comment` ([String](../../sql-reference/data-types/string.md)) - The comment for the table.
- `has_own_data` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Flag that indicates whether the table itself stores some data on disk or only accesses some other source.
The `system.tables` table is used in `SHOW TABLES` query implementation.
@ -80,6 +83,7 @@ dependencies_database: []
dependencies_table: []
create_table_query: CREATE TABLE base.t1 (`n` UInt64) ENGINE = MergeTree ORDER BY n SETTINGS index_granularity = 8192
engine_full: MergeTree ORDER BY n SETTINGS index_granularity = 8192
as_select: SELECT database AS table_catalog
partition_key:
sorting_key: n
primary_key: n
@ -90,6 +94,7 @@ total_bytes: 99
lifetime_rows: ᴺᵁᴸᴸ
lifetime_bytes: ᴺᵁᴸᴸ
comment:
has_own_data: 0
Row 2:
──────
@ -105,6 +110,7 @@ dependencies_database: []
dependencies_table: []
create_table_query: CREATE TABLE default.`53r93yleapyears` (`id` Int8, `febdays` Int8) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192
engine_full: MergeTree ORDER BY id SETTINGS index_granularity = 8192
as_select: SELECT name AS catalog_name
partition_key:
sorting_key: id
primary_key: id
@ -115,6 +121,5 @@ total_bytes: 155
lifetime_rows: ᴺᵁᴸᴸ
lifetime_bytes: ᴺᵁᴸᴸ
comment:
has_own_data: 0
```
[Original article](https://clickhouse.com/docs/en/operations/system-tables/tables) <!--hide-->

View File

@ -47,7 +47,7 @@ Parameters:
## Format of Zookeeper.xml {#format-of-zookeeper-xml}
``` xml
<yandex>
<clickhouse>
<logger>
<level>trace</level>
<size>100M</size>
@ -60,13 +60,13 @@ Parameters:
<port>2181</port>
</node>
</zookeeper>
</yandex>
</clickhouse>
```
## Configuration of Copying Tasks {#configuration-of-copying-tasks}
``` xml
<yandex>
<clickhouse>
<!-- Configuration of clusters as in an ordinary server config -->
<remote_servers>
<source_cluster>
@ -179,7 +179,7 @@ Parameters:
</table_visits>
...
</tables>
</yandex>
</clickhouse>
```
`clickhouse-copier` tracks the changes in `/task/path/description` and applies them on the fly. For instance, if you change the value of `max_workers`, the number of processes running tasks will also change.

View File

@ -26,7 +26,7 @@ You can view the list of external dictionaries and their statuses in the `system
The configuration looks like this:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<layout>
@ -36,7 +36,7 @@ The configuration looks like this:
</layout>
...
</dictionary>
</yandex>
</clickhouse>
```
Corresponding [DDL-query](../../../sql-reference/statements/create/dictionary.md):
@ -289,7 +289,7 @@ Details of the algorithm:
Configuration example:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
@ -317,7 +317,7 @@ Configuration example:
</structure>
</dictionary>
</yandex>
</clickhouse>
```
or

View File

@ -10,7 +10,7 @@ An external dictionary can be connected from many different sources.
If dictionary is configured using xml-file, the configuration looks like this:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<source>
@ -21,7 +21,7 @@ If dictionary is configured using xml-file, the configuration looks like this:
...
</dictionary>
...
</yandex>
</clickhouse>
```
In case of [DDL-query](../../../sql-reference/statements/create/dictionary.md), equal configuration will looks like:
@ -311,7 +311,7 @@ Configuring `/etc/odbc.ini` (or `~/.odbc.ini` if you signed in under a user that
The dictionary configuration in ClickHouse:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>table_name</name>
<source>
@ -340,7 +340,7 @@ The dictionary configuration in ClickHouse:
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
or
@ -416,7 +416,7 @@ Remarks:
Configuring the dictionary in ClickHouse:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>test</name>
<source>
@ -446,7 +446,7 @@ Configuring the dictionary in ClickHouse:
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
or

View File

@ -26,7 +26,7 @@ The [dictionaries](../../../operations/system-tables/dictionaries.md#system_tabl
The dictionary configuration file has the following format:
``` xml
<yandex>
<clickhouse>
<comment>An optional element with any content. Ignored by the ClickHouse server.</comment>
<!--Optional element. File name with substitutions-->
@ -38,7 +38,7 @@ The dictionary configuration file has the following format:
<!-- There can be any number of <dictionary> sections in the configuration file. -->
</dictionary>
</yandex>
</clickhouse>
```
You can [configure](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict.md) any number of dictionaries in the same file.

View File

@ -53,7 +53,7 @@ The first column is `id`, the second column is `c1`.
Configure the external dictionary:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>ext-dict-test</name>
<source>
@ -77,7 +77,7 @@ Configure the external dictionary:
</structure>
<lifetime>0</lifetime>
</dictionary>
</yandex>
</clickhouse>
```
Perform the query:
@ -113,7 +113,7 @@ The first column is `id`, the second is `c1`, the third is `c2`.
Configure the external dictionary:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>ext-dict-mult</name>
<source>
@ -142,7 +142,7 @@ Configure the external dictionary:
</structure>
<lifetime>0</lifetime>
</dictionary>
</yandex>
</clickhouse>
```
Perform the query:

View File

@ -2,13 +2,13 @@
toc_title: S2 Geometry
---
# Functions for Working with S2 Index {#s2Index}
# Functions for Working with S2 Index {#s2index}
[S2](https://s2geometry.io/) is a geographical indexing system where all geographical data is represented on a three-dimensional sphere (similar to a globe).
In the S2 library points are represented as unit length vectors called S2 point indices (points on the surface of a three dimensional unit sphere) as opposed to traditional (latitude, longitude) pairs.
In the S2 library points are represented as the S2 Index - a specific number which encodes internally a point on the surface of a unit sphere, unlike traditional (latitude, longitude) pairs. To get the S2 point index for a given point specified in the format (latitude, longitude) use the [geoToS2](#geotos2) function. Also, you can use the [s2ToGeo](#s2togeo) function for getting geographical coordinates corresponding to the specified S2 point index.
## geoToS2 {#geoToS2}
## geoToS2 {#geotos2}
Returns [S2](#s2index) point index corresponding to the provided coordinates `(longitude, latitude)`.
@ -34,7 +34,7 @@ Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
Query:
``` sql
SELECT geoToS2(37.79506683, 55.71290588) as s2Index;
SELECT geoToS2(37.79506683, 55.71290588) AS s2Index;
```
Result:
@ -45,7 +45,7 @@ Result:
└─────────────────────┘
```
## s2ToGeo {#s2ToGeo}
## s2ToGeo {#s2togeo}
Returns geo coordinates `(longitude, latitude)` corresponding to the provided [S2](#s2index) point index.
@ -57,20 +57,20 @@ s2ToGeo(s2index)
**Arguments**
- `s2Index` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2index` — S2 Index. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned values**
- A tuple consisting of two values: `tuple(lon,lat)`.
Type: `lon` - [Float64](../../../sql-reference/data-types/float.md). `lat` — [Float64](../../../sql-reference/data-types/float.md).
Type: `lon` [Float64](../../../sql-reference/data-types/float.md). `lat` — [Float64](../../../sql-reference/data-types/float.md).
**Example**
Query:
``` sql
SELECT s2ToGeo(4704772434919038107) as s2Coodrinates;
SELECT s2ToGeo(4704772434919038107) AS s2Coodrinates;
```
Result:
@ -81,9 +81,9 @@ Result:
└──────────────────────────────────────┘
```
## s2GetNeighbors {#s2GetNeighbors}
## s2GetNeighbors {#s2getneighbors}
Returns S2 neighbor indices corresponding to the provided [S2](#s2index)). Each cell in the S2 system is a quadrilateral bounded by four geodesics. So, each cell has 4 neighbors.
Returns S2 neighbor indixes corresponding to the provided [S2](#s2index). Each cell in the S2 system is a quadrilateral bounded by four geodesics. So, each cell has 4 neighbors.
**Syntax**
@ -97,16 +97,16 @@ s2GetNeighbors(s2index)
**Returned values**
- An array consisting of the 4 neighbor indices: `array[s2index1, s2index3, s2index2, s2index4]`.
- An array consisting of 4 neighbor indexes: `array[s2index1, s2index3, s2index2, s2index4]`.
Type: Each S2 index is [UInt64](../../../sql-reference/data-types/int-uint.md).
Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
select s2GetNeighbors(5074766849661468672) AS s2Neighbors;
SELECT s2GetNeighbors(5074766849661468672) AS s2Neighbors;
```
Result:
@ -117,9 +117,9 @@ Result:
└───────────────────────────────────────────────────────────────────────────────────┘
```
## s2CellsIntersect {#s2CellsIntersect}
## s2CellsIntersect {#s2cellsintersect}
Determines if the two provided [S2](#s2index)) cell indices intersect or not.
Determines if the two provided [S2](#s2index) cells intersect or not.
**Syntax**
@ -133,8 +133,8 @@ s2CellsIntersect(s2index1, s2index2)
**Returned values**
- 1 — If the S2 cell indices intersect.
- 0 — If the S2 cell indices don't intersect.
- 1 — If the cells intersect.
- 0 — If the cells don't intersect.
Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
@ -143,7 +143,7 @@ Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
Query:
``` sql
select s2CellsIntersect(9926595209846587392, 9926594385212866560) as intersect;
SELECT s2CellsIntersect(9926595209846587392, 9926594385212866560) AS intersect;
```
Result:
@ -154,11 +154,9 @@ Result:
└───────────┘
```
## s2CapContains {#s2CapContains}
## s2CapContains {#s2capcontains}
A cap represents a portion of the sphere that has been cut off by a plane. It is defined by a point on a sphere and a radius in degrees.
Determines if a cap contains a s2 point index.
Determines if a cap contains a S2 point. A cap represents a part of the sphere that has been cut off by a plane. It is defined by a point on a sphere and a radius in degrees.
**Syntax**
@ -168,9 +166,9 @@ s2CapContains(center, degrees, point)
**Arguments**
- `center` - S2 point index corresponding to the cap. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `degrees` - Radius of the cap in degrees. [Float64](../../../sql-reference/data-types/float.md).
- `point` - S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `center` S2 point index corresponding to the cap. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `degrees` Radius of the cap in degrees. [Float64](../../../sql-reference/data-types/float.md).
- `point` S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned values**
@ -184,7 +182,7 @@ Type: [UInt8](../../../sql-reference/data-types/int-uint.md).
Query:
``` sql
select s2CapContains(1157339245694594829, 1.0, 1157347770437378819) as capContains;
SELECT s2CapContains(1157339245694594829, 1.0, 1157347770437378819) AS capContains;
```
Result:
@ -195,11 +193,9 @@ Result:
└─────────────┘
```
## s2CapUnion {#s2CapUnion}
## s2CapUnion {#s2capunion}
A cap represents a portion of the sphere that has been cut off by a plane. It is defined by a point on a sphere and a radius in degrees.
Determines the smallest cap that contains the given two input caps.
Determines the smallest cap that contains the given two input caps. A cap represents a portion of the sphere that has been cut off by a plane. It is defined by a point on a sphere and a radius in degrees.
**Syntax**
@ -209,13 +205,13 @@ s2CapUnion(center1, radius1, center2, radius2)
**Arguments**
- `center1`, `center2` - S2 point indices corresponding to the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `radius1`, `radius2` - Radii of the two input caps in degrees. [Float64](../../../sql-reference/data-types/float.md).
- `center1`, `center2` — S2 point indixes corresponding to the two input caps. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `radius1`, `radius2` — Radius of the two input caps in degrees. [Float64](../../../sql-reference/data-types/float.md).
**Returned values**
- `center` - S2 point index corresponding the center of the smallest cap containing the two input caps. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `radius` - Radius of the smallest cap containing the two input caps. Type: [Float64](../../../sql-reference/data-types/float.md).
- `center` S2 point index corresponding the center of the smallest cap containing the two input caps. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `radius` Radius of the smallest cap containing the two input caps. Type: [Float64](../../../sql-reference/data-types/float.md).
**Example**
@ -233,11 +229,9 @@ Result:
└────────────────────────────────────────┘
```
## s2RectAdd{#s2RectAdd}
## s2RectAdd {#s2rectadd}
In the S2 system, a rectangle is represented by a type of S2Region called a S2LatLngRect that represents a rectangle in latitude-longitude space.
Increases the size of the bounding rectangle to include the given S2 point index.
Increases the size of the bounding rectangle to include the given S2 point. In the S2 system, a rectangle is represented by a type of S2Region called a `S2LatLngRect` that represents a rectangle in latitude-longitude space.
**Syntax**
@ -247,21 +241,21 @@ s2RectAdd(s2pointLow, s2pointHigh, s2Point)
**Arguments**
- `s2PointLow` - Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2PointHigh` - High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Point` - Target S2 point index that the bound rectangle should be grown to include. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2PointLow` Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2PointHigh` High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Point` Target S2 point index that the bound rectangle should be grown to include. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned values**
- `s2PointLow` - Low S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2PointHigh` - Hight S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/float.md).
- `s2PointLow` Low S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2PointHigh` Hight S2 cell id corresponding to the grown rectangle. Type: [UInt64](../../../sql-reference/data-types/float.md).
**Example**
Query:
``` sql
SELECT s2RectAdd(5178914411069187297, 5177056748191934217, 5179056748191934217) as rectAdd;
SELECT s2RectAdd(5178914411069187297, 5177056748191934217, 5179056748191934217) AS rectAdd;
```
Result:
@ -272,11 +266,9 @@ Result:
└───────────────────────────────────────────┘
```
## s2RectContains{#s2RectContains}
## s2RectContains {#s2rectcontains}
In the S2 system, a rectangle is represented by a type of S2Region called a S2LatLngRect that represents a rectangle in latitude-longitude space.
Determines if a given rectangle contains a S2 point index.
Determines if a given rectangle contains a S2 point. In the S2 system, a rectangle is represented by a type of S2Region called a `S2LatLngRect` that represents a rectangle in latitude-longitude space.
**Syntax**
@ -286,9 +278,9 @@ s2RectContains(s2PointLow, s2PointHi, s2Point)
**Arguments**
- `s2PointLow` - Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2PointHigh` - High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Point` - Target S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2PointLow` Low S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2PointHigh` High S2 point index corresponding to the rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Point` Target S2 point index. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned values**
@ -300,7 +292,7 @@ s2RectContains(s2PointLow, s2PointHi, s2Point)
Query:
``` sql
SELECT s2RectContains(5179062030687166815, 5177056748191934217, 5177914411069187297) AS rectContains
SELECT s2RectContains(5179062030687166815, 5177056748191934217, 5177914411069187297) AS rectContains;
```
Result:
@ -311,11 +303,9 @@ Result:
└──────────────┘
```
## s2RectUinion{#s2RectUnion}
## s2RectUinion {#s2rectunion}
In the S2 system, a rectangle is represented by a type of S2Region called a S2LatLngRect that represents a rectangle in latitude-longitude space.
Returns the smallest rectangle containing the union of this rectangle and the given rectangle.
Returns the smallest rectangle containing the union of this rectangle and the given rectangle. In the S2 system, a rectangle is represented by a type of S2Region called a `S2LatLngRect` that represents a rectangle in latitude-longitude space.
**Syntax**
@ -325,20 +315,20 @@ s2RectUnion(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2PointHi)
**Arguments**
- `s2Rect1PointLow`, `s2Rect1PointHi` - Low and High S2 point indices corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Rect2PointLow`, `s2Rect2PointHi` - Low and High S2 point indices corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned values**
- `s2UnionRect2PointLow` - Low S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2UnionRect2PointHi` - High S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2UnionRect2PointLow` Low S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2UnionRect2PointHi` High S2 cell id corresponding to the union rectangle. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
SELECT s2RectUnion(5178914411069187297, 5177056748191934217, 5179062030687166815, 5177056748191934217) AS rectUnion
SELECT s2RectUnion(5178914411069187297, 5177056748191934217, 5179062030687166815, 5177056748191934217) AS rectUnion;
```
Result:
@ -349,9 +339,9 @@ Result:
└───────────────────────────────────────────┘
```
## s2RectIntersection{#s2RectIntersection}
## s2RectIntersection {#s2rectintersection}
Returns the smallest Rectangle containing the intersection of this rectangle and the given rectangle.
Returns the smallest rectangle containing the intersection of this rectangle and the given rectangle. In the S2 system, a rectangle is represented by a type of S2Region called a `S2LatLngRect` that represents a rectangle in latitude-longitude space.
**Syntax**
@ -361,20 +351,20 @@ s2RectIntersection(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2Poin
**Arguments**
- `s2Rect1PointLow`, `s2Rect1PointHi` - Low and High S2 point indices corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Rect2PointLow`, `s2Rect2PointHi` - Low and High S2 point indices corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Rect1PointLow`, `s2Rect1PointHi` — Low and High S2 point indexes corresponding to the first rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Rect2PointLow`, `s2Rect2PointHi` — Low and High S2 point indexes corresponding to the second rectangle. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Returned values**
- `s2UnionRect2PointLow` - Low S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2UnionRect2PointHi` - Hi S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2UnionRect2PointLow` Low S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2UnionRect2PointHi` — High S2 cell id corresponding to the rectangle containing the intersection of the given rectangles. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Example**
Query:
``` sql
SELECT s2RectIntersection(5178914411069187297, 5177056748191934217, 5179062030687166815, 5177056748191934217) AS rectIntersection
SELECT s2RectIntersection(5178914411069187297, 5177056748191934217, 5179062030687166815, 5177056748191934217) AS rectIntersection;
```
Result:

View File

@ -16,81 +16,3 @@ The [stochasticLinearRegression](../../sql-reference/aggregate-functions/referen
## stochasticLogisticRegression {#stochastic-logistic-regression}
The [stochasticLogisticRegression](../../sql-reference/aggregate-functions/reference/stochasticlogisticregression.md#agg_functions-stochasticlogisticregression) aggregate function implements stochastic gradient descent method for binary classification problem. Uses `evalMLMethod` to predict on new data.
## bayesAB {#bayesab}
Compares test groups (variants) and calculates for each group the probability to be the best one. The first group is used as a control group.
**Syntax**
``` sql
bayesAB(distribution_name, higher_is_better, variant_names, x, y)
```
**Arguments**
- `distribution_name` — Name of the probability distribution. [String](../../sql-reference/data-types/string.md). Possible values:
- `beta` for [Beta distribution](https://en.wikipedia.org/wiki/Beta_distribution)
- `gamma` for [Gamma distribution](https://en.wikipedia.org/wiki/Gamma_distribution)
- `higher_is_better` — Boolean flag. [Boolean](../../sql-reference/data-types/boolean.md). Possible values:
- `0` — lower values are considered to be better than higher
- `1` — higher values are considered to be better than lower
- `variant_names` — Variant names. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
- `x` — Numbers of tests for the corresponding variants. [Array](../../sql-reference/data-types/array.md)([Float64](../../sql-reference/data-types/float.md)).
- `y` — Numbers of successful tests for the corresponding variants. [Array](../../sql-reference/data-types/array.md)([Float64](../../sql-reference/data-types/float.md)).
!!! note "Note"
All three arrays must have the same size. All `x` and `y` values must be non-negative constant numbers. `y` cannot be larger than `x`.
**Returned values**
For each variant the function calculates:
- `beats_control` — long-term probability to out-perform the first (control) variant
- `to_be_best` — long-term probability to out-perform all other variants
Type: JSON.
**Example**
Query:
``` sql
SELECT bayesAB('beta', 1, ['Control', 'A', 'B'], [3000., 3000., 3000.], [100., 90., 110.]) FORMAT PrettySpace;
```
Result:
``` text
{
"data":[
{
"variant_name":"Control",
"x":3000,
"y":100,
"beats_control":0,
"to_be_best":0.22619
},
{
"variant_name":"A",
"x":3000,
"y":90,
"beats_control":0.23469,
"to_be_best":0.04671
},
{
"variant_name":"B",
"x":3000,
"y":110,
"beats_control":0.7580899999999999,
"to_be_best":0.7271
}
]
}
```

View File

@ -213,7 +213,7 @@ SELECT splitByNonAlpha(' 1! a, b. ');
## arrayStringConcat(arr\[, separator\]) {#arraystringconcatarr-separator}
Concatenates the strings (values of type String or Nullable(String)) listed in the array with the separator. separator is an optional parameter: a constant string, set to an empty string by default.
Concatenates string representations of values listed in the array with the separator. `separator` is an optional parameter: a constant string, set to an empty string by default.
Returns the string.
## alphaTokens(s) {#alphatokenss}
@ -307,3 +307,33 @@ Result:
│ ['Cli','lic','ick','ckH','kHo','Hou','ous','use'] │
└───────────────────────────────────────────────────┘
```
## tokens {#tokens}
Splits a string into tokens using non-alphanumeric ASCII characters as separators.
**Arguments**
- `input_string` — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object.
**Returned value**
- The resulting array of tokens from input string.
Type: [Array](../data-types/array.md).
**Example**
Query:
``` sql
SELECT tokens('test1,;\\ test2,;\\ test3,;\\ test4') AS tokens;
```
Result:
``` text
┌─tokens────────────────────────────┐
│ ['test1','test2','test3','test4'] │
└───────────────────────────────────┘
```

View File

@ -313,32 +313,6 @@ SELECT toValidUTF8('\x61\xF0\x80\x80\x80b');
└───────────────────────┘
```
## tokens {#tokens}
Split string into tokens using non-alpha numeric ASCII characters as separators.
**Arguments**
- `input_string` — Any set of bytes represented as the [String](../../sql-reference/data-types/string.md) data type object.
**Returned value**
- The resulting array of tokens from input string.
Type: [Array](../data-types/array.md).
**Example**
``` sql
SELECT tokens('test1,;\\ test2,;\\ test3,;\\ test4') AS tokens;
```
``` text
┌─tokens────────────────────────────┐
│ ['test1','test2','test3','test4'] │
└───────────────────────────────────┘
```
## repeat {#repeat}
Repeats a string as many times as specified and concatenates the replicated values as a single string.

View File

@ -166,6 +166,80 @@ Result:
└─────────────────┘
```
## tupleToNameValuePairs {#tupletonamevaluepairs}
Turns a named tuple into an array of (name, value) pairs. For a `Tuple(a T, b T, ..., c T)` returns `Array(Tuple(String, T), ...)`
in which the `Strings` represents the named fields of the tuple and `T` are the values associated with those names. All values in the tuple should be of the same type.
**Syntax**
``` sql
tupleToNameValuePairs(tuple)
**Arguments**
- `tuple` — Named tuple. [Tuple](../../sql-reference/data-types/tuple.md) with any types of values.
**Returned value**
- An array with (name, value) pairs.
Type: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), ...)).
**Example**
Query:
``` sql
CREATE TABLE tupletest (`col` Tuple(user_ID UInt64, session_ID UInt64) ENGINE = Memory;
INSERT INTO tupletest VALUES (tuple( 100, 2502)), (tuple(1,100));
SELECT tupleToNameValuePairs(col) FROM tupletest;
```
Result:
``` text
┌─tupleToNameValuePairs(col)────────────┐
│ [('user_ID',100),('session_ID',2502)] │
│ [('user_ID',1),('session_ID',100)] │
└───────────────────────────────────────┘
```
It is possible to transform colums to rows using this function:
``` sql
CREATE TABLE tupletest (`col` Tuple(CPU Float64, Memory Float64, Disk Float64)) ENGINE = Memory;
INSERT INTO tupletest VALUES(tuple(3.3, 5.5, 6.6));
SELECT arrayJoin(tupleToNameValuePairs(col))FROM tupletest;
```
Result:
``` text
┌─arrayJoin(tupleToNameValuePairs(col))─┐
│ ('CPU',3.3) │
│ ('Memory',5.5) │
│ ('Disk',6.6) │
└───────────────────────────────────────┘
```
If you pass a simple tuple to the function, ClickHouse uses the indexes of the values as their names:
``` sql
SELECT tupleToNameValuePairs(tuple(3, 2, 1));
```
Result:
``` text
┌─tupleToNameValuePairs(tuple(3, 2, 1))─┐
│ [('1',3),('2',2),('3',1)] │
└───────────────────────────────────────┘
## tuplePlus {#tupleplus}
Calculates the sum of corresponding values of two tuples of the same size.
@ -895,7 +969,6 @@ Result:
Calculates the unit vector of a given vector (the values of the tuple are the coordinates) in `Lp` space (using [p-norm](https://en.wikipedia.org/wiki/Norm_(mathematics)#p-norm)).
**Syntax**
```sql

View File

@ -22,7 +22,7 @@ map(key1, value1[, key2, value2, ...])
**Returned value**
- Data structure as `key:value` pairs.
- Data structure as `key:value` pairs.
Type: [Map(key, value)](../../sql-reference/data-types/map.md).
@ -165,9 +165,6 @@ Result:
## mapPopulateSeries {#function-mappopulateseries}
Fills missing keys in the maps (key and value array pair), where keys are integers. Also, it supports specifying the max key, which is used to extend the keys array.
Arguments are [maps](../../sql-reference/data-types/map.md) or two [arrays](../../sql-reference/data-types/array.md#data-type-array), where the first array represent keys, and the second array contains values for the each key.
For array arguments the number of elements in `keys` and `values` must be the same for each row.
**Syntax**
@ -178,12 +175,17 @@ mapPopulateSeries(map[, max])
Generates a map (a tuple with two arrays or a value of `Map` type, depending on the arguments), where keys are a series of numbers, from minimum to maximum keys (or `max` argument if it specified) taken from the map with a step size of one, and corresponding values. If the value is not specified for the key, then it uses the default value in the resulting map. For repeated keys, only the first value (in order of appearing) gets associated with the key.
For array arguments the number of elements in `keys` and `values` must be the same for each row.
**Arguments**
Arguments are [maps](../../sql-reference/data-types/map.md) or two [arrays](../../sql-reference/data-types/array.md#data-type-array), where the first array represent keys, and the second array contains values for the each key.
Mapped arrays:
- `keys` — Array of keys. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)).
- `values` — Array of values. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#uint-ranges)).
- `max` — Maximum key value. Optional. [Int8, Int16, Int32, Int64, Int128, Int256](../../sql-reference/data-types/int-uint.md#int-ranges).
or
@ -191,14 +193,14 @@ or
**Returned value**
- Depending on the arguments returns a [map](../../sql-reference/data-types/map.md) or a [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys.
- Depending on the arguments returns a [map](../../sql-reference/data-types/map.md) or a [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2) of two [arrays](../../sql-reference/data-types/array.md#data-type-array): keys in sorted order, and values the corresponding keys.
**Example**
Query with mapped arrays:
```sql
select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type;
SELECT mapPopulateSeries([1,2,4], [11,22,44], 5) AS res, toTypeName(res) AS type;
```
Result:
@ -390,5 +392,43 @@ Result:
└─────────────────────────────┘
```
## mapExtractKeyLike {#mapExtractKeyLike}
**Syntax**
```sql
mapExtractKeyLike(map, pattern)
```
**Parameters**
- `map` — Map. [Map](../../sql-reference/data-types/map.md).
- `pattern` - String pattern to match.
**Returned value**
- A map contained elements the key of which matchs the specified pattern. If there are no elements matched the pattern, it will return an empty map.
**Example**
Query:
```sql
CREATE TABLE test (a Map(String,String)) ENGINE = Memory;
INSERT INTO test VALUES ({'abc':'abc','def':'def'}), ({'hij':'hij','klm':'klm'});
SELECT mapExtractKeyLike(a, 'a%') FROM test;
```
Result:
```text
┌─mapExtractKeyLike(a, 'a%')─┐
│ {'abc':'abc'} │
│ {} │
└────────────────────────────┘
```
[Original article](https://clickhouse.com/docs/en/sql-reference/functions/tuple-map-functions/) <!--hide-->

View File

@ -7,7 +7,7 @@ toc_title: PROJECTION
The following operations with [projections](../../../engines/table-engines/mergetree-family/mergetree.md#projections) are available:
- `ALTER TABLE [db].name ADD PROJECTION name AS SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]` - Adds projection description to tables metadata.
- `ALTER TABLE [db].name ADD PROJECTION name ( SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY] )` - Adds projection description to tables metadata.
- `ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk.

View File

@ -8,7 +8,7 @@ toc_title: DATABASE
Creates a new database.
``` sql
CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] [ENGINE = engine(...)]
CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] [ENGINE = engine(...)] [COMMENT 'Comment']
```
## Clauses {#clauses}
@ -26,4 +26,33 @@ ClickHouse creates the `db_name` database on all the servers of a specified clus
### ENGINE {#engine}
[MySQL](../../../engines/database-engines/mysql.md) allows you to retrieve data from the remote MySQL server. By default, ClickHouse uses its own [database engine](../../../engines/database-engines/index.md). Theres also a [lazy](../../../engines/database-engines/lazy.md) engine.
By default, ClickHouse uses its own [Atomic](../../../engines/database-engines/atomic.md) database engine. There are also [Lazy](../../../engines/database-engines/lazy.md), [MySQL](../../../engines/database-engines/mysql.md), [PostgresSQL](../../../engines/database-engines/postgresql.md), [MaterializedMySQL](../../../engines/database-engines/materialized-mysql.md), [MaterializedPostgreSQL](../../../engines/database-engines/materialized-postgresql.md), [Replicated](../../../engines/database-engines/replicated.md), [SQLite](../../../engines/database-engines/sqlite.md).
### COMMENT {#comment}
You can add a comment to the database when you creating it.
The comment is supported for all database engines.
**Syntax**
``` sql
CREATE DATABASE db_name ENGINE = engine(...) COMMENT 'Comment'
```
**Example**
Query:
``` sql
CREATE DATABASE db_comment ENGINE = Memory COMMENT 'The temporary database';
SELECT name, comment FROM system.databases WHERE name = 'db_comment';
```
Result:
```text
┌─name───────┬─comment────────────────┐
│ db_comment │ The temporary database │
└────────────┴────────────────────────┘
```

View File

@ -3,18 +3,67 @@ toc_priority: 42
toc_title: DESCRIBE
---
# DESCRIBE TABLE Statement {#misc-describe-table}
# DESCRIBE TABLE {#misc-describe-table}
Returns information about table columns.
**Syntax**
``` sql
DESC|DESCRIBE TABLE [db.]table [INTO OUTFILE filename] [FORMAT format]
```
Returns the following `String` type columns:
The `DESCRIBE` statement returns a row for each table column with the following [String](../../sql-reference/data-types/string.md) values:
- `name` — Column name.
- `type`— Column type.
- `default_type` — Clause that is used in [default expression](../../sql-reference/statements/create/table.md#create-default-values) (`DEFAULT`, `MATERIALIZED` or `ALIAS`). Column contains an empty string, if the default expression isnt specified.
- `default_expression` — Value specified in the `DEFAULT` clause.
- `comment_expression` — Comment text.
- `name` — A column name.
- `type` — A column type.
- `default_type` — A clause that is used in the column [default expression](../../sql-reference/statements/create/table.md#create-default-values): `DEFAULT`, `MATERIALIZED` or `ALIAS`. If there is no default expression, then empty string is returned.
- `default_expression` — An expression specified after the `DEFAULT` clause.
- `comment` — A [column comment](../../sql-reference/statements/alter/column.md#alter_comment-column).
- `codec_expression` — A [codec](../../sql-reference/statements/create/table.md#codecs) that is applied to the column.
- `ttl_expression` — A [TTL](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl) expression.
- `is_subcolumn` — A flag that equals `1` for internal subcolumns. It is included into the result only if subcolumn description is enabled by the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting.
Nested data structures are output in “expanded” format. Each column is shown separately, with the name after a dot.
All columns in [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) data structures are described separately. The name of each column is prefixed with a parent column name and a dot.
To show internal subcolumns of other data types, use the [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting.
**Example**
Query:
``` sql
CREATE TABLE describe_example (
id UInt64, text String DEFAULT 'unknown' CODEC(ZSTD),
user Tuple (name String, age UInt8)
) ENGINE = MergeTree() ORDER BY id;
DESCRIBE TABLE describe_example;
DESCRIBE TABLE describe_example SETTINGS describe_include_subcolumns=1;
```
Result:
``` text
┌─name─┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
│ id │ UInt64 │ │ │ │ │ │
│ text │ String │ DEFAULT │ 'unknown' │ │ ZSTD(1) │ │
│ user │ Tuple(name String, age UInt8) │ │ │ │ │ │
└──────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
```
The second query additionally shows subcolumns:
``` text
┌─name──────┬─type──────────────────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┬─is_subcolumn─┐
│ id │ UInt64 │ │ │ │ │ │ 0 │
│ text │ String │ DEFAULT │ 'unknown' │ │ ZSTD(1) │ │ 0 │
│ user │ Tuple(name String, age UInt8) │ │ │ │ │ │ 0 │
│ user.name │ String │ │ │ │ │ │ 1 │
│ user.age │ UInt8 │ │ │ │ │ │ 1 │
└───────────┴───────────────────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┴──────────────┘
```
**See Also**
- [describe_include_subcolumns](../../operations/settings/settings.md#describe_include_subcolumns) setting.

View File

@ -559,7 +559,7 @@ CREATE TABLE IF NOT EXISTS example_table
- もし `input_format_defaults_for_omitted_fields = 1` のデフォルト値 `x` 等しい `0` しかし、デフォルト値は `a` 等しい `x * 2`.
!!! note "警告"
データを挿入するとき `insert_sample_with_metadata = 1`,ClickHouseは、挿入と比較して、より多くの計算リソースを消費します `insert_sample_with_metadata = 0`.
データを挿入するとき `input_format_defaults_for_omitted_fields = 1`,ClickHouseは、挿入と比較して、より多くの計算リソースを消費します `input_format_defaults_for_omitted_fields = 0`.
### データの選択 {#selecting-data}

View File

@ -10,7 +10,7 @@ toc_title: "\u8A2D\u5B9A\u30D5\u30A1\u30A4\u30EB"
ClickHouseは複数のファイル構成管理をサポートします。 主サーバ設定ファイルで指定することがで `/etc/clickhouse-server/config.xml`. その他のファイルは `/etc/clickhouse-server/config.d` ディレクトリ。
!!! note "注"
すべての構成ファイルはXML形式である必要があります。 また、通常は同じルート要素を持つ必要があります `<yandex>`.
すべての構成ファイルはXML形式である必要があります。 また、通常は同じルート要素を持つ必要があります `<clickhouse>`.
メイン構成ファイルで指定された一部の設定は、他の構成ファイルで上書きできます。 その `replace` または `remove` これらの構成ファイルの要素に属性を指定できます。
@ -36,7 +36,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
```
``` xml
<yandex>
<clickhouse>
<users>
<alice>
<profile>analytics</profile>
@ -47,7 +47,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
<quota>analytics</quota>
</alice>
</users>
</yandex>
</clickhouse>
```
各設定ファイルでは、サーバともある `file-preprocessed.xml` 起動時のファイル。 これらのファイルには、完了したすべての置換と上書きが含まれており、情報提供を目的としています。 設定ファイルでZooKeeperの置換が使用されていても、サーバーの起動時にZooKeeperが使用できない場合、サーバーは前処理されたファイルから設定をロードします。

View File

@ -335,14 +335,14 @@ SELECT * FROM system.metrics LIMIT 10
メトリック履歴の収集を有効にするには `system.metric_log`,作成 `/etc/clickhouse-server/config.d/metric_log.xml` 次の内容を使って:
``` xml
<yandex>
<clickhouse>
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
</yandex>
</clickhouse>
```
**例**

View File

@ -46,7 +46,7 @@ $ clickhouse-copier copier --daemon --config zookeeper.xml --task-path /task/pat
## 飼育係の形式。xml {#format-of-zookeeper-xml}
``` xml
<yandex>
<clickhouse>
<logger>
<level>trace</level>
<size>100M</size>
@ -59,13 +59,13 @@ $ clickhouse-copier copier --daemon --config zookeeper.xml --task-path /task/pat
<port>2181</port>
</node>
</zookeeper>
</yandex>
</clickhouse>
```
## コピータスクの構成 {#configuration-of-copying-tasks}
``` xml
<yandex>
<clickhouse>
<!-- Configuration of clusters as in an ordinary server config -->
<remote_servers>
<source_cluster>
@ -168,7 +168,7 @@ $ clickhouse-copier copier --daemon --config zookeeper.xml --task-path /task/pat
</table_visits>
...
</tables>
</yandex>
</clickhouse>
```
`clickhouse-copier` の変更を追跡します `/task/path/description` そしてその場でそれらを適用します。 たとえば、次の値を変更すると `max_workers`、タスクを実行しているプロセスの数も変更されます。

View File

@ -28,7 +28,7 @@ ClickHouseは、辞書のエラーに対して例外を生成します。 エラ
設定は次のようになります:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<layout>
@ -38,7 +38,7 @@ ClickHouseは、辞書のエラーに対して例外を生成します。 エラ
</layout>
...
</dictionary>
</yandex>
</clickhouse>
```
対応する [DDL-クエリ](../../statements/create.md#create-dictionary-query):
@ -208,7 +208,7 @@ dictGetT('dict_name', 'attr_name', id, date)
設定例:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
@ -237,7 +237,7 @@ dictGetT('dict_name', 'attr_name', id, date)
</structure>
</dictionary>
</yandex>
</clickhouse>
```
または

View File

@ -12,7 +12,7 @@ toc_title: "\u5916\u90E8\u8F9E\u66F8\u306E\u30BD\u30FC\u30B9"
辞書がxml-fileを使用して構成されている場合、構成は次のようになります:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<source>
@ -23,7 +23,7 @@ toc_title: "\u5916\u90E8\u8F9E\u66F8\u306E\u30BD\u30FC\u30B9"
...
</dictionary>
...
</yandex>
</clickhouse>
```
の場合 [DDL-クエリ](../../statements/create.md#create-dictionary-query)、等しい構成は次のようになります:
@ -272,7 +272,7 @@ $ sudo apt-get install -y unixodbc odbcinst odbc-postgresql
ClickHouseの辞書構成:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>table_name</name>
<source>
@ -301,7 +301,7 @@ ClickHouseの辞書構成:
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
または
@ -367,7 +367,7 @@ $ sudo apt-get install tdsodbc freetds-bin sqsh
ClickHouseでの辞書の構成:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>test</name>
<source>
@ -397,7 +397,7 @@ ClickHouseでの辞書の構成:
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
または

View File

@ -28,7 +28,7 @@ toc_title: "\u4E00\u822C\u7684\u306A\u8AAC\u660E"
辞書構成ファイルの形式は次のとおりです:
``` xml
<yandex>
<clickhouse>
<comment>An optional element with any content. Ignored by the ClickHouse server.</comment>
<!--Optional element. File name with substitutions-->
@ -40,7 +40,7 @@ toc_title: "\u4E00\u822C\u7684\u306A\u8AAC\u660E"
<!-- There can be any number of <dictionary> sections in the configuration file. -->
</dictionary>
</yandex>
</clickhouse>
```
あなたはできる [設定](external-dicts-dict.md) 同じファイル内の任意の数の辞書。

View File

@ -50,7 +50,7 @@ ClickHouseは、属性の値を解析できない場合、または値が属性
外部辞書の構成:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>ext-dict-test</name>
<source>
@ -74,7 +74,7 @@ ClickHouseは、属性の値を解析できない場合、または値が属性
</structure>
<lifetime>0</lifetime>
</dictionary>
</yandex>
</clickhouse>
```
クエリの実行:

View File

@ -8,7 +8,7 @@ toc_title: "Конфигурационные файлы"
ClickHouse поддерживает многофайловое управление конфигурацией. Основной конфигурационный файл сервера — `/etc/clickhouse-server/config.xml` или `/etc/clickhouse-server/config.yaml`. Остальные файлы должны находиться в директории `/etc/clickhouse-server/config.d`. Обратите внимание, что конфигурационные файлы могут быть записаны в форматах XML или YAML, но смешение этих форматов в одном файле не поддерживается. Например, можно хранить основные конфигурационные файлы как `config.xml` и `users.xml`, а дополнительные файлы записать в директории `config.d` и `users.d` в формате `.yaml`.
Все XML файлы должны иметь одинаковый корневой элемент, обычно `<yandex>`. Для YAML элемент `yandex:` должен отсутствовать, так как парсер вставляет его автоматически.
Все XML файлы должны иметь одинаковый корневой элемент, обычно `<clickhouse>`. Для YAML элемент `clickhouse:` должен отсутствовать, так как парсер вставляет его автоматически.
## Переопределение {#override}
@ -22,13 +22,13 @@ ClickHouse поддерживает многофайловое управлен
Также возможно указать атрибуты как переменные среды с помощью `from_env="VARIABLE_NAME"`:
```xml
<yandex>
<clickhouse>
<macros>
<replica from_env="REPLICA" />
<layer from_env="LAYER" />
<shard from_env="SHARD" />
</macros>
</yandex>
</clickhouse>
```
## Подстановки {#substitution}
@ -40,7 +40,7 @@ ClickHouse поддерживает многофайловое управлен
Пример подстановки XML:
```xml
<yandex>
<clickhouse>
<!-- Appends XML subtree found at `/profiles-in-zookeeper` ZK path to `<profiles>` element. -->
<profiles from_zk="/profiles-in-zookeeper" />
@ -49,7 +49,7 @@ ClickHouse поддерживает многофайловое управлен
<include from_zk="/users-in-zookeeper" />
<include from_zk="/other-users-in-zookeeper" />
</users>
</yandex>
</clickhouse>
```
Подстановки могут также выполняться из ZooKeeper. Для этого укажите у элемента атрибут `from_zk = "/path/to/node"`. Значение элемента заменится на содержимое узла `/path/to/node` в ZooKeeper. В ZooKeeper-узел также можно положить целое XML-поддерево, оно будет целиком вставлено в исходный элемент.
@ -66,7 +66,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
```
``` xml
<yandex>
<clickhouse>
<users>
<alice>
<profile>analytics</profile>
@ -77,7 +77,7 @@ $ cat /etc/clickhouse-server/users.d/alice.xml
<quota>analytics</quota>
</alice>
</users>
</yandex>
</clickhouse>
```
Для каждого конфигурационного файла, сервер при запуске генерирует также файлы `file-preprocessed.xml`. Эти файлы содержат все выполненные подстановки и переопределения, и предназначены для информационных целей. Если в конфигурационных файлах были использованы ZooKeeper-подстановки, но при старте сервера ZooKeeper недоступен, то сервер загрузит конфигурацию из preprocessed-файла.

View File

@ -24,32 +24,32 @@ ClickHouse предоставляет возможность аутентифи
Примеры, как должен выглядеть файл `config.xml`:
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos />
</yandex>
</clickhouse>
```
Или, с указанием принципала:
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos>
<principal>HTTP/clickhouse.example.com@EXAMPLE.COM</principal>
</kerberos>
</yandex>
</clickhouse>
```
Или, с фильтрацией по реалм:
```xml
<yandex>
<clickhouse>
<!- ... -->
<kerberos>
<realm>EXAMPLE.COM</realm>
</kerberos>
</yandex>
</clickhouse>
```
!!! Warning "Важно"
@ -81,7 +81,7 @@ ClickHouse предоставляет возможность аутентифи
Пример, как выглядит конфигурация Kerberos в `users.xml`:
```xml
<yandex>
<clickhouse>
<!- ... -->
<users>
<!- ... -->
@ -92,7 +92,7 @@ ClickHouse предоставляет возможность аутентифи
</kerberos>
</my_user>
</users>
</yandex>
</clickhouse>
```

View File

@ -14,7 +14,7 @@
**Пример**
```xml
<yandex>
<clickhouse>
<!- ... -->
<ldap_servers>
<!- Typical LDAP server. -->
@ -45,7 +45,7 @@
<enable_tls>no</enable_tls>
</my_ad_server>
</ldap_servers>
</yandex>
</clickhouse>
```
Обратите внимание, что можно определить несколько LDAP серверов внутри секции `ldap_servers`, используя различные имена.
@ -90,7 +90,7 @@
**Пример**
```xml
<yandex>
<clickhouse>
<!- ... -->
<users>
<!- ... -->
@ -101,7 +101,7 @@
</ldap>
</my_user>
</users>
</yandex>
</clickhouse>
```
Обратите внимание, что пользователь `my_user` ссылается на `my_ldap_server`. Этот LDAP сервер должен быть настроен в основном файле `config.xml`, как это было описано ранее.
@ -125,7 +125,7 @@ CREATE USER my_user IDENTIFIED WITH ldap SERVER 'my_ldap_server';
В `config.xml`.
```xml
<yandex>
<clickhouse>
<!- ... -->
<user_directories>
<!- Typical LDAP server. -->
@ -156,7 +156,7 @@ CREATE USER my_user IDENTIFIED WITH ldap SERVER 'my_ldap_server';
</role_mapping>
</ldap>
</user_directories>
</yandex>
</clickhouse>
```
Обратите внимание, что `my_ldap_server`, указанный в секции `ldap` внутри секции `user_directories`, должен быть настроен в файле `config.xml`, как это было описано ранее. (см. [Определение LDAP сервера](#ldap-server-definition)).

View File

@ -467,6 +467,26 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
<listen_host>127.0.0.1</listen_host>
```
## listen_backlog {#server_configuration_parameters-listen_backlog}
Бэклог (размер очереди соединений, ожидающих принятия) прослушивающего сокета.
Значение по умолчанию: `4096` (как в linux [5.4+](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=19f92a030ca6d772ab44b22ee6a01378a8cb32d4)).
Обычно это значение незачем менять по следующим причинам:
- значение по умолчанию достаточно велико,
- для принятия соединения клиента у сервера есть отдельный поток.
Так что даже если у вас `TcpExtListenOverflows` (из `nstat`) ненулевой и растет для сервера ClickHouse, это не повод увеличивать значение по умолчанию, поскольку:
- обычно если 4096 недостаточно, это говорит о внутренних проблемах ClickHouse с масштабированием, так что лучше сообщить о проблеме,
- и это не значит, что сервер сможет принять еще больше подключений в дальнейшем (а если и сможет, клиенты, вероятно, уже отсоединятся).
Примеры:
``` xml
<listen_backlog>4096</listen_backlog>
```
## logger {#server_configuration_parameters-logger}
Настройки логирования.
@ -754,14 +774,14 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
Чтобы вручную включить сбор истории метрик в таблице [`system.metric_log`](../../operations/system-tables/metric_log.md), создайте `/etc/clickhouse-server/config.d/metric_log.xml` следующего содержания:
``` xml
<yandex>
<clickhouse>
<metric_log>
<database>system</database>
<table>metric_log</table>
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
<collect_interval_milliseconds>1000</collect_interval_milliseconds>
</metric_log>
</yandex>
</clickhouse>
```
**Выключение**
@ -769,9 +789,9 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
Чтобы отключить настройку `metric_log` , создайте файл `/etc/clickhouse-server/config.d/disable_metric_log.xml` следующего содержания:
``` xml
<yandex>
<clickhouse>
<metric_log remove="1" />
</yandex>
</clickhouse>
```
## replicated\_merge\_tree {#server_configuration_parameters-replicated_merge_tree}
@ -1007,7 +1027,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
**Пример**
```xml
<yandex>
<clickhouse>
<text_log>
<level>notice</level>
<database>system</database>
@ -1016,7 +1036,7 @@ ClickHouse проверяет условия для `min_part_size` и `min_part
<!-- <partition_by>event_date</partition_by> -->
<engine>Engine = MergeTree PARTITION BY event_date ORDER BY event_time TTL event_date + INTERVAL 30 day</engine>
</text_log>
</yandex>
</clickhouse>
```

View File

@ -1361,6 +1361,32 @@ load_balancing = round_robin
Значение по умолчанию: `3`.
## compile_aggregate_expressions {#compile_aggregate_expressions}
Включает или отключает компиляцию агрегатных функций в нативный код во время выполнения запроса. Включение этой настройки может улучшить производительность выполнения запросов.
Возможные значения:
- 0 — агрегатные функции не компилируются в нативный код.
- 1 — агрегатные функции компилируются в нативный код в процессе выполнения запроса.
Значение по умолчанию: `1`.
**См. также**
- [min_count_to_compile_aggregate_expression](#min_count_to_compile_aggregate_expression)
## min_count_to_compile_aggregate_expression {#min_count_to_compile_aggregate_expression}
Минимальное количество вызовов агрегатной функции с одинаковым выражением, при котором функция будет компилироваться в нативный код в ходе выполнения запроса. Работает только если включена настройка [compile_aggregate_expressions](#compile_aggregate_expressions).
Возможные значения:
- Целое положительное число.
- 0 — агрегатные функциии всегда компилируются в ходе выполнения запроса.
Значение по умолчанию: `3`.
## input_format_skip_unknown_fields {#input-format-skip-unknown-fields}
Если значение равно true, то при выполнении INSERT входные данные из столбцов с неизвестными именами будут пропущены. В противном случае эта ситуация создаст исключение.
@ -1705,6 +1731,32 @@ ClickHouse генерирует исключение
Значение по умолчанию: 0.
## distributed_push_down_limit {#distributed-push-down-limit}
Включает или отключает [LIMIT](#limit), применяемый к каждому шарду по отдельности.
Это позволяет избежать:
- отправки дополнительных строк по сети;
- обработки строк за пределами ограничения для инициатора.
Начиная с версии 21.9 вы больше не сможете получить неточные результаты, так как `distributed_push_down_limit` изменяет выполнение запроса только в том случае, если выполнено хотя бы одно из условий:
- `distributed_group_by_no_merge` > 0.
- запрос **не содержит** `GROUP BY`/`DISTINCT`/`LIMIT BY`, но содержит `ORDER BY`/`LIMIT`.
- запрос **содержит** `GROUP BY`/`DISTINCT`/`LIMIT BY` с `ORDER BY`/`LIMIT` и:
- включена настройка [optimize_skip_unused_shards](#optimize-skip-unused-shards).
- включена настройка `optimize_distributed_group_by_sharding_key`.
Возможные значения:
- 0 — выключена.
- 1 — включена.
Значение по умолчанию: `1`.
См. также:
- [optimize_skip_unused_shards](#optimize-skip-unused-shards)
## optimize_skip_unused_shards {#optimize-skip-unused-shards}
Включает или отключает пропуск неиспользуемых шардов для запросов [SELECT](../../sql-reference/statements/select/index.md) , в которых условие ключа шардирования задано в секции `WHERE/PREWHERE`. Предполагается, что данные распределены с помощью ключа шардирования, в противном случае запрос выдаст неверный результат.
@ -3641,6 +3693,21 @@ SELECT * FROM positional_arguments ORDER BY 2,3;
- настройка [optimize_move_to_prewhere](#optimize_move_to_prewhere)
## describe_include_subcolumns {#describe_include_subcolumns}
Включает или отключает описание подстолбцов при выполнении запроса [DESCRIBE](../../sql-reference/statements/describe-table.md). Настройка действует, например, на элементы [Tuple](../../sql-reference/data-types/tuple.md) или подстолбцы типов [Map](../../sql-reference/data-types/map.md#map-subcolumns), [Nullable](../../sql-reference/data-types/nullable.md#finding-null) или [Array](../../sql-reference/data-types/array.md#array-size).
Возможные значения:
- 0 — подстолбцы не включаются в результат запросов `DESCRIBE`.
- 1 — подстолбцы включаются в результат запросов `DESCRIBE`.
Значение по умолчанию: `0`.
**Пример**
Смотрите пример запроса [DESCRIBE](../../sql-reference/statements/describe-table.md).
## async_insert {#async-insert}
Включает или отключает асинхронные вставки. Работает только для вставок по протоколу HTTP. Обратите внимание, что при таких вставках дедупликация не производится.

View File

@ -19,7 +19,7 @@ toc_title: "Хранение данных на внешних дисках"
Пример конфигурации:
``` xml
<yandex>
<clickhouse>
<storage_configuration>
<disks>
<hdfs>
@ -41,7 +41,7 @@ toc_title: "Хранение данных на внешних дисках"
<merge_tree>
<min_bytes_for_wide_part>0</min_bytes_for_wide_part>
</merge_tree>
</yandex>
</clickhouse>
```
Обязательные параметры:
@ -93,7 +93,7 @@ toc_title: "Хранение данных на внешних дисках"
Пример конфигурации:
``` xml
<yandex>
<clickhouse>
<storage_configuration>
<disks>
<disk_s3>
@ -110,7 +110,7 @@ toc_title: "Хранение данных на внешних дисках"
</disk_s3_encrypted>
</disks>
</storage_configuration>
</yandex>
</clickhouse>
```
## Хранение данных на веб-сервере {#storing-data-on-webserver}
@ -124,7 +124,7 @@ toc_title: "Хранение данных на внешних дисках"
Готовый тестовый пример. Добавьте эту конфигурацию в config:
``` xml
<yandex>
<clickhouse>
<storage_configuration>
<disks>
<web>
@ -142,7 +142,7 @@ toc_title: "Хранение данных на внешних дисках"
</web>
</policies>
</storage_configuration>
</yandex>
</clickhouse>
```
А затем выполните этот запрос:

View File

@ -24,6 +24,11 @@ Cтолбцы:
- `is_in_primary_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий включение столбца в первичный ключ.
- `is_in_sampling_key` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий включение столбца в ключ выборки.
- `compression_codec` ([String](../../sql-reference/data-types/string.md)) — имя кодека сжатия.
- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальная длина в байтах для двоичных данных, символьных данных или текстовых данных и изображений. В ClickHouse имеет смысл только для типа данных `FixedString`. Иначе возвращается значение `NULL`.
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — точность приблизительных числовых данных, точных числовых данных, целочисленных данных или денежных данных. В ClickHouse это разрядность для целочисленных типов и десятичная точность для типов `Decimal`. Иначе возвращается значение `NULL`.
- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — основание системы счисления точности приблизительных числовых данных, точных числовых данных, целочисленных данных или денежных данных. В ClickHouse значение столбца равно 2 для целочисленных типов и 10 — для типов `Decimal`. Иначе возвращается значение `NULL`.
- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — масштаб приблизительных числовых данных, точных числовых данных, целочисленных данных или денежных данных. В ClickHouse имеет смысл только для типов `Decimal`. Иначе возвращается значение `NULL`.
- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — десятичная точность для данных типа `DateTime64`. Для других типов данных возвращается значение `NULL`.
**Пример**
@ -34,10 +39,11 @@ SELECT * FROM system.columns LIMIT 2 FORMAT Vertical;
```text
Row 1:
──────
database: system
table: aggregate_function_combinators
name: name
database: INFORMATION_SCHEMA
table: COLUMNS
name: table_catalog
type: String
position: 1
default_kind:
default_expression:
data_compressed_bytes: 0
@ -49,13 +55,19 @@ is_in_sorting_key: 0
is_in_primary_key: 0
is_in_sampling_key: 0
compression_codec:
character_octet_length: ᴺᵁᴸᴸ
numeric_precision: ᴺᵁᴸᴸ
numeric_precision_radix: ᴺᵁᴸᴸ
numeric_scale: ᴺᵁᴸᴸ
datetime_precision: ᴺᵁᴸᴸ
Row 2:
──────
database: system
table: aggregate_function_combinators
name: is_internal
type: UInt8
database: INFORMATION_SCHEMA
table: COLUMNS
name: table_schema
type: String
position: 2
default_kind:
default_expression:
data_compressed_bytes: 0
@ -67,4 +79,9 @@ is_in_sorting_key: 0
is_in_primary_key: 0
is_in_sampling_key: 0
compression_codec:
character_octet_length: ᴺᵁᴸᴸ
numeric_precision: ᴺᵁᴸᴸ
numeric_precision_radix: ᴺᵁᴸᴸ
numeric_scale: ᴺᵁᴸᴸ
datetime_precision: ᴺᵁᴸᴸ
```

View File

@ -34,7 +34,7 @@ toc_title: "Системные таблицы"
Пример:
```xml
<yandex>
<clickhouse>
<query_log>
<database>system</database>
<table>query_log</table>
@ -45,7 +45,7 @@ toc_title: "Системные таблицы"
-->
<flush_interval_milliseconds>7500</flush_interval_milliseconds>
</query_log>
</yandex>
</clickhouse>
```
По умолчанию размер таблицы не ограничен. Управлять размером таблицы можно используя [TTL](../../sql-reference/statements/alter/ttl.md#manipuliatsii-s-ttl-tablitsy) для удаления устаревших записей журнала. Также вы можете использовать функцию партиционирования для таблиц `MergeTree`.

View File

@ -0,0 +1,210 @@
# INFORMATION_SCHEMA {#information-schema}
`INFORMATION_SCHEMA` (`information_schema`) — это системная база данных, содержащая представления. Используя эти представления, вы можете получить информацию о метаданных объектов базы данных. Эти представления считывают данные из столбцов системных таблиц [system.columns](../../operations/system-tables/columns.md), [system.databases](../../operations/system-tables/databases.md) и [system.tables](../../operations/system-tables/tables.md).
Структура и состав системных таблиц могут меняться в разных версиях СУБД ClickHouse, но поддержка `information_schema` позволяет изменять структуру системных таблиц без изменения способа доступа к метаданным. Запросы метаданных не зависят от используемой СУБД.
``` sql
SHOW TABLES FROM INFORMATION_SCHEMA;
```
``` text
┌─name─────┐
│ COLUMNS │
│ SCHEMATA │
│ TABLES │
│ VIEWS │
└──────────┘
```
`INFORMATION_SCHEMA` содержит следующие представления:
- [COLUMNS](#columns)
- [SCHEMATA](#schemata)
- [TABLES](#tables)
- [VIEWS](#views)
## COLUMNS {#columns}
Содержит столбцы, которые считываются из системной таблицы [system.columns](../../operations/system-tables/columns.md), и столбцы, которые не поддерживаются в ClickHouse или не имеют смысла (всегда имеют значение `NULL`), но должны быть по стандарту.
Столбцы:
- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится таблица.
- `table_schema` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится таблица.
- `table_name` ([String](../../sql-reference/data-types/string.md)) — имя таблицы.
- `column_name` ([String](../../sql-reference/data-types/string.md)) — имя столбца.
- `ordinal_position` ([UInt64](../../sql-reference/data-types/int-uint.md)) — порядковый номер столбца в таблице (нумерация начинается с 1).
- `column_default` ([String](../../sql-reference/data-types/string.md)) — выражение для значения по умолчанию или пустая строка.
- `is_nullable` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий является ли столбец типа `Nullable`.
- `data_type` ([String](../../sql-reference/data-types/string.md)) — тип столбца.
- `character_maximum_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальная длина в байтах для двоичных данных, символьных данных или текстовых данных и изображений. В ClickHouse имеет смысл только для типа данных `FixedString`. Иначе возвращается значение `NULL`.
- `character_octet_length` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — максимальная длина в байтах для двоичных данных, символьных данных или текстовых данных и изображений. В ClickHouse имеет смысл только для типа данных `FixedString`. Иначе возвращается значение `NULL`.
- `numeric_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — точность приблизительных числовых данных, точных числовых данных, целочисленных данных или денежных данных. В ClickHouse это разрядность для целочисленных типов и десятичная точность для типов `Decimal`. Иначе возвращается значение `NULL`.
- `numeric_precision_radix` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — основание системы счисления точности приблизительных числовых данных, точных числовых данных, целочисленных данных или денежных данных. В ClickHouse значение столбца равно 2 для целочисленных типов и 10 — для типов `Decimal`. Иначе возвращается значение `NULL`.
- `numeric_scale` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — масштаб приблизительных числовых данных, точных числовых данных, целочисленных данных или денежных данных. В ClickHouse имеет смысл только для типов `Decimal`. Иначе возвращается значение `NULL`.
- `datetime_precision` ([Nullable](../../sql-reference/data-types/nullable.md)([UInt64](../../sql-reference/data-types/int-uint.md))) — десятичная точность для данных типа `DateTime64`. Для других типов данных возвращается значение `NULL`.
- `character_set_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается.
- `character_set_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается.
- `character_set_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается.
- `collation_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается.
- `collation_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается.
- `collation_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается.
- `domain_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается.
- `domain_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается.
- `domain_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается.
**Пример**
Запрос:
``` sql
SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE (table_schema=currentDatabase() OR table_schema='') AND table_name NOT LIKE '%inner%' LIMIT 1 FORMAT Vertical;
```
Результат:
``` text
Row 1:
──────
table_catalog: default
table_schema: default
table_name: describe_example
column_name: id
ordinal_position: 1
column_default:
is_nullable: 0
data_type: UInt64
character_maximum_length: ᴺᵁᴸᴸ
character_octet_length: ᴺᵁᴸᴸ
numeric_precision: 64
numeric_precision_radix: 2
numeric_scale: 0
datetime_precision: ᴺᵁᴸᴸ
character_set_catalog: ᴺᵁᴸᴸ
character_set_schema: ᴺᵁᴸᴸ
character_set_name: ᴺᵁᴸᴸ
collation_catalog: ᴺᵁᴸᴸ
collation_schema: ᴺᵁᴸᴸ
collation_name: ᴺᵁᴸᴸ
domain_catalog: ᴺᵁᴸᴸ
domain_schema: ᴺᵁᴸᴸ
domain_name: ᴺᵁᴸᴸ
```
## SCHEMATA {#schemata}
Содержит столбцы, которые считываются из системной таблицы [system.databases](../../operations/system-tables/databases.md), и столбцы, которые не поддерживаются в ClickHouse или не имеют смысла (всегда имеют значение `NULL`), но должны быть по стандарту.
Столбцы:
- `catalog_name` ([String](../../sql-reference/data-types/string.md)) — имя базы данных.
- `schema_name` ([String](../../sql-reference/data-types/string.md)) — имя базы данных.
- `schema_owner` ([String](../../sql-reference/data-types/string.md)) — имя владельца схемы, всегда `'default'`.
- `default_character_set_catalog` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается.
- `default_character_set_schema` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается.
- `default_character_set_name` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается.
- `sql_path` ([Nullable](../../sql-reference/data-types/nullable.md)([String](../../sql-reference/data-types/string.md))) — `NULL`, не поддерживается.
**Пример**
Запрос:
``` sql
SELECT * FROM information_schema.schemata WHERE schema_name ILIKE 'information_schema' LIMIT 1 FORMAT Vertical;
```
Результат:
``` text
Row 1:
──────
catalog_name: INFORMATION_SCHEMA
schema_name: INFORMATION_SCHEMA
schema_owner: default
default_character_set_catalog: ᴺᵁᴸᴸ
default_character_set_schema: ᴺᵁᴸᴸ
default_character_set_name: ᴺᵁᴸᴸ
sql_path: ᴺᵁᴸᴸ
```
## TABLES {#tables}
Содержит столбцы, которые считываются из системной таблицы [system.tables](../../operations/system-tables/tables.md).
Столбцы:
- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится таблица.
- `table_schema` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится таблица.
- `table_name` ([String](../../sql-reference/data-types/string.md)) — имя таблицы.
- `table_type` ([Enum8](../../sql-reference/data-types/enum.md)) — тип таблицы. Возможные значения:
- `BASE TABLE`
- `VIEW`
- `FOREIGN TABLE`
- `LOCAL TEMPORARY`
- `SYSTEM VIEW`
**Пример**
Запрос:
``` sql
SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE (table_schema = currentDatabase() OR table_schema = '') AND table_name NOT LIKE '%inner%' LIMIT 1 FORMAT Vertical;
```
Результат:
``` text
Row 1:
──────
table_catalog: default
table_schema: default
table_name: describe_example
table_type: BASE TABLE
```
## VIEWS {#views}
Содержит столбцы, которые считываются из системной таблицы [system.tables](../../operations/system-tables/tables.md), если использован движок [View](../../engines/table-engines/special/view.md).
Столбцы:
- `table_catalog` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится таблица.
- `table_schema` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится таблица.
- `table_name` ([String](../../sql-reference/data-types/string.md)) — имя таблицы.
- `view_definition` ([String](../../sql-reference/data-types/string.md)) — `SELECT` запрос для представления.
- `check_option` ([String](../../sql-reference/data-types/string.md)) — `NONE`, нет проверки.
- `is_updatable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, представление не обновляется.
- `is_insertable_into` ([Enum8](../../sql-reference/data-types/enum.md)) — показывает является ли представление [материализованным](../../sql-reference/statements/create/view/#materialized). Возможные значения:
- `NO` — создано обычное представление.
- `YES` — создано материализованное представление.
- `is_trigger_updatable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, триггер не обновляется.
- `is_trigger_deletable` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, триггер не удаляется.
- `is_trigger_insertable_into` ([Enum8](../../sql-reference/data-types/enum.md)) — `NO`, данные не вставляются в триггер.
**Пример**
Запрос:
``` sql
CREATE VIEW v (n Nullable(Int32), f Float64) AS SELECT n, f FROM t;
CREATE MATERIALIZED VIEW mv ENGINE = Null AS SELECT * FROM system.one;
SELECT * FROM information_schema.views WHERE table_schema = currentDatabase() LIMIT 1 FORMAT Vertical;
```
Результат:
``` text
Row 1:
──────
table_catalog: default
table_schema: default
table_name: mv
view_definition: SELECT * FROM system.one
check_option: NONE
is_updatable: NO
is_insertable_into: YES
is_trigger_updatable: NO
is_trigger_deletable: NO
is_trigger_insertable_into: NO
```

View File

@ -9,20 +9,37 @@
Столбцы:
- `database` ([String](../../sql-reference/data-types/string.md)) — имя базы данных, в которой находится таблица.
- `name` ([String](../../sql-reference/data-types/string.md)) — имя таблицы.
- `engine` ([String](../../sql-reference/data-types/string.md)) — движок таблицы (без параметров).
- `is_temporary` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, указывающий на то, временная это таблица или нет.
- `data_path` ([String](../../sql-reference/data-types/string.md)) — путь к данным таблицы в файловой системе.
- `metadata_path` ([String](../../sql-reference/data-types/string.md)) — путь к табличным метаданным в файловой системе.
- `metadata_modification_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — время последней модификации табличных метаданных.
- `dependencies_database` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — зависимости базы данных.
- `dependencies_table` ([Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md))) — табличные зависимости (таблицы [MaterializedView](../../engines/table-engines/special/materializedview.md), созданные на базе текущей таблицы).
- `create_table_query` ([String](../../sql-reference/data-types/string.md)) — запрос, при помощи которого создавалась таблица.
- `engine_full` ([String](../../sql-reference/data-types/string.md)) — параметры табличного движка.
- `as_select` ([String](../../sql-reference/data-types/string.md)) - `SELECT` запрос для представления.
- `partition_key` ([String](../../sql-reference/data-types/string.md)) — ключ партиционирования таблицы.
- `sorting_key` ([String](../../sql-reference/data-types/string.md)) — ключ сортировки таблицы.
- `primary_key` ([String](../../sql-reference/data-types/string.md)) - первичный ключ таблицы.
- `sampling_key` ([String](../../sql-reference/data-types/string.md)) — ключ сэмплирования таблицы.
- `storage_policy` ([String](../../sql-reference/data-types/string.md)) - политика хранения данных:
- [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes)
@ -41,6 +58,8 @@
- `comment` ([String](../../sql-reference/data-types/string.md)) — комментарий к таблице.
- `has_own_data` ([UInt8](../../sql-reference/data-types/int-uint.md)) — флаг, показывающий хранит ли таблица сама какие-то данные на диске или только обращается к какому-то другому источнику.
Таблица `system.tables` используется при выполнении запроса `SHOW TABLES`.
**Пример**
@ -64,6 +83,7 @@ dependencies_database: []
dependencies_table: []
create_table_query: CREATE TABLE base.t1 (`n` UInt64) ENGINE = MergeTree ORDER BY n SETTINGS index_granularity = 8192
engine_full: MergeTree ORDER BY n SETTINGS index_granularity = 8192
as_select: SELECT database AS table_catalog
partition_key:
sorting_key: n
primary_key: n
@ -74,6 +94,7 @@ total_bytes: 99
lifetime_rows: ᴺᵁᴸᴸ
lifetime_bytes: ᴺᵁᴸᴸ
comment:
has_own_data: 0
Row 2:
──────
@ -89,6 +110,7 @@ dependencies_database: []
dependencies_table: []
create_table_query: CREATE TABLE default.`53r93yleapyears` (`id` Int8, `febdays` Int8) ENGINE = MergeTree ORDER BY id SETTINGS index_granularity = 8192
engine_full: MergeTree ORDER BY id SETTINGS index_granularity = 8192
as_select: SELECT name AS catalog_name
partition_key:
sorting_key: id
primary_key: id
@ -99,4 +121,5 @@ total_bytes: 155
lifetime_rows: ᴺᵁᴸᴸ
lifetime_bytes: ᴺᵁᴸᴸ
comment:
has_own_data: 0
```

View File

@ -44,7 +44,7 @@ $ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --bas
## Формат Zookeeper.xml {#format-zookeeper-xml}
``` xml
<yandex>
<clickhouse>
<logger>
<level>trace</level>
<size>100M</size>
@ -57,13 +57,13 @@ $ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --bas
<port>2181</port>
</node>
</zookeeper>
</yandex>
</clickhouse>
```
## Конфигурация заданий на копирование {#konfiguratsiia-zadanii-na-kopirovanie}
``` xml
<yandex>
<clickhouse>
<!-- Configuration of clusters as in an ordinary server config -->
<remote_servers>
<source_cluster>
@ -176,7 +176,7 @@ $ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --bas
</table_visits>
...
</tables>
</yandex>
</clickhouse>
```
`clickhouse-copier` отслеживает изменения `/task/path/description` и применяет их «на лету». Если вы поменяете, например, значение `max_workers`, то количество процессов, выполняющих задания, также изменится.

View File

@ -26,7 +26,7 @@ toc_title: "Хранение словарей в памяти"
Общий вид конфигурации:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<layout>
@ -36,7 +36,7 @@ toc_title: "Хранение словарей в памяти"
</layout>
...
</dictionary>
</yandex>
</clickhouse>
```
Соответствущий [DDL-запрос](../../statements/create/dictionary.md#create-dictionary-query):
@ -284,7 +284,7 @@ RANGE(MIN first MAX last)
Пример конфигурации:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
@ -313,7 +313,7 @@ RANGE(MIN first MAX last)
</structure>
</dictionary>
</yandex>
</clickhouse>
```
или

View File

@ -10,7 +10,7 @@ toc_title: "Источники внешних словарей"
Общий вид XML-конфигурации:
``` xml
<yandex>
<clickhouse>
<dictionary>
...
<source>
@ -21,7 +21,7 @@ toc_title: "Источники внешних словарей"
...
</dictionary>
...
</yandex>
</clickhouse>
```
Аналогичный [DDL-запрос](../../statements/create/dictionary.md#create-dictionary-query):
@ -311,7 +311,7 @@ $ sudo apt-get install -y unixodbc odbcinst odbc-postgresql
Конфигурация словаря в ClickHouse:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>table_name</name>
<source>
@ -340,7 +340,7 @@ $ sudo apt-get install -y unixodbc odbcinst odbc-postgresql
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
или
@ -416,7 +416,7 @@ $ sudo apt-get install tdsodbc freetds-bin sqsh
Настройка словаря в ClickHouse:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>test</name>
<source>
@ -446,7 +446,7 @@ $ sudo apt-get install tdsodbc freetds-bin sqsh
</attribute>
</structure>
</dictionary>
</yandex>
</clickhouse>
```
или

View File

@ -26,7 +26,7 @@ ClickHouse:
Конфигурационный файл словарей имеет вид:
``` xml
<yandex>
<clickhouse>
<comment>Необязательный элемент с любым содержимым. Игнорируется сервером ClickHouse.</comment>
<!--Необязательный элемент, имя файла с подстановками-->
@ -42,7 +42,7 @@ ClickHouse:
<dictionary>
<!-- Конфигурация словаря -->
</dictionary>
</yandex>
</clickhouse>
```
В одном файле можно [сконфигурировать](external-dicts-dict.md) произвольное количество словарей.

View File

@ -53,7 +53,7 @@ dictGetOrNull('dict_name', attr_name, id_expr)
Настройка внешнего словаря:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>ext-dict-test</name>
<source>
@ -77,7 +77,7 @@ dictGetOrNull('dict_name', attr_name, id_expr)
</structure>
<lifetime>0</lifetime>
</dictionary>
</yandex>
</clickhouse>
```
Выполним запрос:
@ -113,7 +113,7 @@ LIMIT 3;
Настройка внешнего словаря:
``` xml
<yandex>
<clickhouse>
<dictionary>
<name>ext-dict-mult</name>
<source>
@ -142,7 +142,7 @@ LIMIT 3;
</structure>
<lifetime>0</lifetime>
</dictionary>
</yandex>
</clickhouse>
```
Выполним запрос:

View File

@ -0,0 +1,376 @@
---
toc_title: "Функции для работы с индексами S2"
---
# Функции для работы с индексами S2 {#s2index}
[S2](https://s2geometry.io/) — это система геокодирования, в которой все географические данные представлены на трехмерной сфере (аналогично глобусу).
В библиотеке S2 точки представлены в виде индекса S2 — определенного числа, которое внутренне кодирует точку на поверхности трехмерной единичной сферы, в отличие от традиционных пар (широта, долгота). Чтобы получить индекс S2 для точки, заданной в формате (широта, долгота), используйте функцию [geoToS2](#geotools2). Также вы можете использовать функцию [s2togeo](#s2togeo) для получения географических координат, соответствующих заданному S2 индексу точки.
## geoToS2 {#geotos2}
Возвращает [S2](#s2index) индекс точки, соответствующий заданным координатам в формате `(долгота, широта)`.
**Синтаксис**
``` sql
geoToS2(lon, lat)
```
**Аргументы**
- `lon` — долгота. [Float64](../../../sql-reference/data-types/float.md).
- `lat` — широта. [Float64](../../../sql-reference/data-types/float.md).
**Возвращаемое значение**
- S2 индекс точки.
Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT geoToS2(37.79506683, 55.71290588) AS s2Index;
```
Результат:
``` text
┌─────────────s2Index─┐
│ 4704772434919038107 │
└─────────────────────┘
```
## s2ToGeo {#s2togeo}
Возвращает географические координаты `(долгота, широта)`, соответствующие заданному [S2](#s2index) индексу точки.
**Синтаксис**
``` sql
s2ToGeo(s2index)
```
**Аргументы**
- `s2index` — [S2](#s2index) индекс. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
- Кортеж их двух значений: `tuple(lon,lat)`.
Тип: `lon` — [Float64](../../../sql-reference/data-types/float.md). `lat` — [Float64](../../../sql-reference/data-types/float.md).
**Пример**
Запрос:
``` sql
SELECT s2ToGeo(4704772434919038107) AS s2Coodrinates;
```
Результат:
``` text
┌─s2Coodrinates────────────────────────┐
│ (37.79506681471008,55.7129059052841) │
└──────────────────────────────────────┘
```
## s2GetNeighbors {#s2getneighbors}
Возвращает [S2](#s2index) индексы ячеек, которые являются соседними для заданного S2 индекса. Ячейка в системе S2 представляет собой прямоугольник, ограниченный четырьмя сторонами. Соответственно, у каждой ячейки есть 4 соседние ячейки.
**Синтаксис**
``` sql
s2GetNeighbors(s2index)
```
**Аргументы**
- `s2index` — [S2](#s2index) индекс. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
- Массив, содержащий 4 значения — S2 индекса соседних ячеек: `array[s2index1, s2index3, s2index2, s2index4]`.
Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT s2GetNeighbors(5074766849661468672) AS s2Neighbors;
```
Результат:
``` text
┌─s2Neighbors───────────────────────────────────────────────────────────────────────┐
│ [5074766987100422144,5074766712222515200,5074767536856236032,5074767261978329088] │
└───────────────────────────────────────────────────────────────────────────────────┘
```
## s2CellsIntersect {#s2cellsintersect}
Проверяет, пересекаются ли две заданные ячейки или нет.
**Синтаксис**
``` sql
s2CellsIntersect(s2index1, s2index2)
```
**Аргументы**
- `siIndex1`, `s2index2` — S2 индексы первой и второй ячейки. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
- 1 — ячейки пересекаются.
- 0 — ячейки не пересекаются.
Тип: [UInt8](../../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT s2CellsIntersect(9926595209846587392, 9926594385212866560) AS intersect;
```
Результат:
``` text
┌─intersect─┐
│ 1 │
└───────────┘
```
## s2CapContains {#s2capcontains}
Определяет, содержит ли заданный купол указанную точку. Купол представляет собой часть сферы, которая была отрезана плоскостью. Купол задается точкой на сфере и радиусом в градусах.
**Синтаксис**
``` sql
s2CapContains(center, degrees, point)
```
**Аргументы**
- `center` — S2 индекс точки, определяющей центр купола. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `degrees` — радиус купола в градусах. [Float64](../../../sql-reference/data-types/float.md).
- `point` — S2 индекс проверяемой точки. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
- 1 — купол содержит точку.
- 0 — купол не содержит точку.
Тип: [UInt8](../../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT s2CapContains(1157339245694594829, 1.0, 1157347770437378819) AS capContains;
```
Результат:
``` text
┌─capContains─┐
│ 1 │
└─────────────┘
```
## s2CapUnion {#s2capunion}
Определяет наименьший купол, содержащий два заданных купола. Купол представляет собой часть сферы, которая была отрезана плоскостью. Купол задается точкой на сфере и радиусом в градусах.
**Синтаксис**
``` sql
s2CapUnion(center1, radius1, center2, radius2)
```
**Аргументы**
- `center1`, `center2` — S2 индексы точек, определяющие два центра куполов. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `radius1`, `radius2` — значения радиусов в градусах, определяющие два радиуса куполов. [Float64](../../../sql-reference/data-types/float.md).
**Возвращаемые значения**
- `center` — S2 индекс точки, соответствующий центру наименьшего купола, содержащего заданные купола. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `radius` — радиус в градусах наименьшего купола, содержащего заданные купола. Тип: [Float64](../../../sql-reference/data-types/float.md).
**Пример**
Запрос:
``` sql
SELECT s2CapUnion(3814912406305146967, 1.0, 1157347770437378819, 1.0) AS capUnion;
```
Результат:
``` text
┌─capUnion───────────────────────────────┐
│ (4534655147792050737,60.2088283994957) │
└────────────────────────────────────────┘
```
## s2RectAdd {#s2rectadd}
Увеличивает размер ограничивающего прямоугольника, чтобы включить в себя точку, заданную S2 индексом. В системе S2 прямоугольник представлен типом S2Region, называемым `S2LatLngRect`, который задает прямоугольник в пространстве широта-долгота.
**Синтаксис**
``` sql
s2RectAdd(s2pointLow, s2pointHigh, s2Point)
```
**Аргументы**
- `s2PointLow` — S2 индекс нижней точки, которая задает ограничиваюший прямоугольник. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2PointHigh` — S2 индекс верхний точки, которая задает ограничиваюший прямоугольник. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Point` — S2 индекс целевой точки, которая будет содержаться увеличенным ограничивающим прямоугольником. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
- `s2PointLow` — идентификатор нижней S2 ячейки, соответствующий увеличенному прямоугольнику. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2PointHigh` — идентификатор верхней S2 ячейки, соответствующий увеличенному прямоугольнику. Тип: [UInt64](../../../sql-reference/data-types/float.md).
**Пример**
Запрос:
``` sql
SELECT s2RectAdd(5178914411069187297, 5177056748191934217, 5179056748191934217) AS rectAdd;
```
Результат:
``` text
┌─rectAdd───────────────────────────────────┐
│ (5179062030687166815,5177056748191934217) │
└───────────────────────────────────────────┘
```
## s2RectContains {#s2rectcontains}
Проверяет, содержит ли заданный прямоугольник указанную S2 точку. В системе S2 прямоугольник представлен типом S2Region, называемым `S2LatLngRect`, который задает прямоугольник в пространстве широта-долгота.
**Синтаксис**
``` sql
s2RectContains(s2PointLow, s2PointHi, s2Point)
```
**Аргументы**
- `s2PointLow` — S2 индекс самой низкой точки, которая задает прямоугольник. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2PointHigh` — S2 индекс самой высокой точки, которая задает прямоугольник. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Point` — S2 индекс проверяемой точки. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
- 1 — прямоугольник содержит заданную точку.
- 0 — прямоугольник не содержит заданную точку.
**Пример**
Запрос:
``` sql
SELECT s2RectContains(5179062030687166815, 5177056748191934217, 5177914411069187297) AS rectContains;
```
Результат:
``` text
┌─rectContains─┐
│ 0 │
└──────────────┘
```
## s2RectUinion {#s2rectunion}
Возвращает наименьший прямоугольник, содержащий объединение двух заданных прямоугольников. В системе S2 прямоугольник представлен типом S2Region, называемым `S2LatLngRect`, который задает прямоугольник в пространстве широта-долгота.
**Синтаксис**
``` sql
s2RectUnion(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2PointHi)
```
**Аргументы**
- `s2Rect1PointLow`, `s2Rect1PointHi` — значения S2 индекса для самой низкой и самой высокой точек, которые задают первый прямоугольник. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Rect2PointLow`, `s2Rect2PointHi` — значения S2 индекса для самой низкой и самой высокой точек, которые задают второй прямоугольник. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
- `s2UnionRect2PointLow` — идентификатор нижней ячейки, соответствующей объединенному прямоугольнику. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2UnionRect2PointHi` — идентификатор верхней ячейки, соответствующей объединенному прямоугольнику. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT s2RectUnion(5178914411069187297, 5177056748191934217, 5179062030687166815, 5177056748191934217) AS rectUnion;
```
Результат:
``` text
┌─rectUnion─────────────────────────────────┐
│ (5179062030687166815,5177056748191934217) │
└───────────────────────────────────────────┘
```
## s2RectIntersection {#s2rectintersection}
Возвращает наименьший прямоугольник, содержащий пересечение двух заданных прямоугольников. В системе S2 прямоугольник представлен типом S2Region, называемым `S2LatLngRect`, который задает прямоугольник в пространстве широта-долгота.
**Синтаксис**
``` sql
s2RectIntersection(s2Rect1PointLow, s2Rect1PointHi, s2Rect2PointLow, s2Rect2PointHi)
```
**Аргументы**
- `s2Rect1PointLow`, `s2Rect1PointHi` — значения S2 индекса для самой низкой и самой высокой точек, которые задают первый прямоугольник. [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2Rect2PointLow`, `s2Rect2PointHi` — значения S2 индекса для самой низкой и самой высокой точек, которые задают второй прямоугольник. [UInt64](../../../sql-reference/data-types/int-uint.md).
**Возвращаемые значения**
- `s2UnionRect2PointLow` — идентификатор нижней ячейки, соответствующей результирующему прямоугольнику. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
- `s2UnionRect2PointHi` — идентификатор верхней ячейки, соответствующей результирующему прямоугольнику. Тип: [UInt64](../../../sql-reference/data-types/int-uint.md).
**Пример**
Запрос:
``` sql
SELECT s2RectIntersection(5178914411069187297, 5177056748191934217, 5179062030687166815, 5177056748191934217) AS rectIntersection;
```
Результат:
``` text
┌─rectIntersection──────────────────────────┐
│ (5178914411069187297,5177056748191934217) │
└───────────────────────────────────────────┘
```

View File

@ -16,80 +16,3 @@ toc_title: "Функции машинного обучения"
### Stochastic Logistic Regression {#stochastic-logistic-regression}
Агрегатная функция [stochasticLogisticRegression](../../sql-reference/functions/machine-learning-functions.md#agg_functions-stochasticlogisticregression) реализует стохастический градиентный спуск для задачи бинарной классификации.
## bayesAB {#bayesab}
Сравнивает тестовые группы (варианты) и для каждой группы рассчитывает вероятность того, что эта группа окажется лучшей. Первая из перечисленных групп считается контрольной.
**Синтаксис**
``` sql
bayesAB(distribution_name, higher_is_better, variant_names, x, y)
```
**Аргументы**
- `distribution_name` — вероятностное распределение. [String](../../sql-reference/data-types/string.md). Возможные значения:
- `beta` для [Бета-распределения](https://ru.wikipedia.org/wiki/Бета-распределение)
- `gamma` для [Гамма-распределения](https://ru.wikipedia.org/wiki/Гамма-распределение)
- `higher_is_better` — способ определения предпочтений. [Boolean](../../sql-reference/data-types/boolean.md). Возможные значения:
- `0` — чем меньше значение, тем лучше
- `1` — чем больше значение, тем лучше
- `variant_names` — массив, содержащий названия вариантов. [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
- `x` — массив, содержащий число проведенных тестов (испытаний) для каждого варианта. [Array](../../sql-reference/data-types/array.md)([Float64](../../sql-reference/data-types/float.md)).
- `y` — массив, содержащий число успешных тестов (испытаний) для каждого варианта. [Array](../../sql-reference/data-types/array.md)([Float64](../../sql-reference/data-types/float.md)).
!!! note "Замечание"
Все три массива должны иметь одинаковый размер. Все значения `x` и `y` должны быть неотрицательными числами (константами). Значение `y` не может превышать соответствующее значение `x`.
**Возвращаемые значения**
Для каждого варианта рассчитываются:
- `beats_control` — вероятность, что данный вариант превосходит контрольный в долгосрочной перспективе
- `to_be_best` — вероятность, что данный вариант является лучшим в долгосрочной перспективе
Тип: JSON.
**Пример**
Запрос:
``` sql
SELECT bayesAB('beta', 1, ['Control', 'A', 'B'], [3000., 3000., 3000.], [100., 90., 110.]) FORMAT PrettySpace;
```
Результат:
``` text
{
"data":[
{
"variant_name":"Control",
"x":3000,
"y":100,
"beats_control":0,
"to_be_best":0.22619
},
{
"variant_name":"A",
"x":3000,
"y":90,
"beats_control":0.23469,
"to_be_best":0.04671
},
{
"variant_name":"B",
"x":3000,
"y":110,
"beats_control":0.7580899999999999,
"to_be_best":0.7271
}
]
}
```

View File

@ -28,7 +28,7 @@ stem('language', word)
Query:
``` sql
SELECT SELECT arrayMap(x -> stem('en', x), ['I', 'think', 'it', 'is', 'a', 'blessing', 'in', 'disguise']) as res;
SELECT arrayMap(x -> stem('en', x), ['I', 'think', 'it', 'is', 'a', 'blessing', 'in', 'disguise']) as res;
```
Result:

View File

@ -173,7 +173,7 @@ roundBankers(4.5) = 4
roundBankers(3.55, 1) = 3.6
roundBankers(3.65, 1) = 3.6
roundBankers(10.35, 1) = 10.4
roundBankers(10.755, 2) = 11,76
roundBankers(10.755, 2) = 10,76
```
**Смотрите также**

View File

@ -212,8 +212,8 @@ SELECT splitByNonAlpha(' 1! a, b. ');
## arrayStringConcat(arr\[, separator\]) {#arraystringconcatarr-separator}
Склеивает строки, перечисленные в массиве, с разделителем separator.
separator - необязательный параметр, константная строка, по умолчанию равен пустой строке.
Склеивает строковые представления элементов массива с разделителем `separator`.
`separator` - необязательный параметр, константная строка, по умолчанию равен пустой строке.
Возвращается строка.
## alphaTokens(s) {#alphatokenss}
@ -270,3 +270,32 @@ SELECT ngrams('ClickHouse', 3);
└───────────────────────────────────────────────────┘
```
## tokens {#tokens}
Разбивает строку на токены, используя в качестве разделителей не буквенно-цифровые символы ASCII.
**Аргументы**
- `input_string` — набор байтов. [String](../../sql-reference/data-types/string.md).
**Возвращаемые значения**
Возвращает массив токенов.
Тип: [Array](../data-types/array.md).
**Пример**
Запрос:
``` sql
SELECT tokens('test1,;\\ test2,;\\ test3,;\\ test4') AS tokens;
```
Результат:
``` text
┌─tokens────────────────────────────┐
│ ['test1','test2','test3','test4'] │
└───────────────────────────────────┘
```

View File

@ -164,6 +164,80 @@ SELECT tupleHammingDistance(wordShingleMinHash(string), wordShingleMinHashCaseIn
└─────────────────┘
```
## tupleToNameValuePairs {#tupletonamevaluepairs}
Приводит именованный кортеж к списку пар (имя, значение). Для `Tuple(a T, b T, ..., c T)` возвращает `Array(Tuple(String, T), ...)`, где `Strings` — это названия именованных полей, а `T` — это соответствующие значения. Все значения в кортеже должны быть одинакового типа.
**Синтаксис**
``` sql
tupleToNameValuePairs(tuple)
```
**Аргументы**
- `tuple` — именованный кортеж. [Tuple](../../sql-reference/data-types/tuple.md) с любым типом значений.
**Возвращаемое значение**
- Список пар (имя, значение).
Тип: [Array](../../sql-reference/data-types/array.md)([Tuple](../../sql-reference/data-types/tuple.md)([String](../../sql-reference/data-types/string.md), ...)).
**Пример**
Запрос:
``` sql
CREATE TABLE tupletest (`col` Tuple(user_ID UInt64, session_ID UInt64) ENGINE = Memory;
INSERT INTO tupletest VALUES (tuple( 100, 2502)), (tuple(1,100));
SELECT tupleToNameValuePairs(col) FROM tupletest;
```
Результат:
``` text
┌─tupleToNameValuePairs(col)────────────┐
│ [('user_ID',100),('session_ID',2502)] │
│ [('user_ID',1),('session_ID',100)] │
└───────────────────────────────────────┘
```
С помощью этой функции можно выводить столбцы в виде строк:
``` sql
CREATE TABLE tupletest (`col` Tuple(CPU Float64, Memory Float64, Disk Float64)) ENGINE = Memory;
INSERT INTO tupletest VALUES(tuple(3.3, 5.5, 6.6));
SELECT arrayJoin(tupleToNameValuePairs(col))FROM tupletest;
```
Результат:
``` text
┌─arrayJoin(tupleToNameValuePairs(col))─┐
│ ('CPU',3.3) │
│ ('Memory',5.5) │
│ ('Disk',6.6) │
└───────────────────────────────────────┘
```
Если в функцию передается обычный кортеж, ClickHouse использует индексы значений в качестве имен:
``` sql
SELECT tupleToNameValuePairs(tuple(3, 2, 1));
```
Результат:
``` text
┌─tupleToNameValuePairs(tuple(3, 2, 1))─┐
│ [('1',3),('2',2),('3',1)] │
└───────────────────────────────────────┘
## tuplePlus {#tupleplus}
Вычисляет сумму соответствующих значений двух кортежей одинакового размера.
@ -443,7 +517,6 @@ dotProduct(tuple1, tuple2)
- `tuple1` — первый кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
- `tuple2` — второй кортеж. [Tuple](../../sql-reference/data-types/tuple.md).
**Возвращаемое значение**
- Скалярное произведение.

View File

@ -108,7 +108,7 @@ SELECT mapAdd(([toUInt8(1), 2], [1, 1]), ([toUInt8(1), 2], [1, 1])) as res, toTy
SELECT mapAdd(map(1,1), map(1,1));
```
Result:
Результат:
```text
┌─mapAdd(map(1, 1), map(1, 1))─┐
@ -128,13 +128,13 @@ mapSubtract(Tuple(Array, Array), Tuple(Array, Array) [, ...])
**Аргументы**
Аргументами являются [кортежи](../../sql-reference/data-types/tuple.md#tuplet1-t2) из двух [массивов](../../sql-reference/data-types/array.md#data-type-array), где элементы в первом массиве представляют ключи, а второй массив содержит значения для каждого ключа.
Аргументами являются контейнеры [Map](../../sql-reference/data-types/map.md) или [кортежи](../../sql-reference/data-types/tuple.md#tuplet1-t2) из двух [массивов](../../sql-reference/data-types/array.md#data-type-array), где элементы в первом массиве представляют ключи, а второй массив содержит значения для каждого ключа.
Все массивы ключей должны иметь один и тот же тип, а все массивы значений должны содержать элементы, которые можно приводить к одному типу ([Int64](../../sql-reference/data-types/int-uint.md#int-ranges), [UInt64](../../sql-reference/data-types/int-uint.md#uint-ranges) или [Float64](../../sql-reference/data-types/float.md#float32-float64)).
Общий приведенный тип используется в качестве типа для результирующего массива.
**Возвращаемое значение**
- Возвращает один [tuple](../../sql-reference/data-types/tuple.md#tuplet1-t2), в котором первый массив содержит отсортированные ключи, а второй - значения.
- В зависимости от аргумента возвращает один [Map](../../sql-reference/data-types/map.md) или [кортеж](../../sql-reference/data-types/tuple.md#tuplet1-t2), в котором первый массив содержит отсортированные ключи, а второй значения.
**Пример**
@ -152,6 +152,20 @@ SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt3
└────────────────┴───────────────────────────────────┘
```
Запрос с контейнером `Map`:
```sql
SELECT mapSubtract(map(1,1), map(1,1));
```
Результат:
```text
┌─mapSubtract(map(1, 1), map(1, 1))─┐
│ {1:0} │
└───────────────────────────────────┘
```
## mapPopulateSeries {#function-mappopulateseries}
Заполняет недостающие ключи в контейнере map (пара массивов ключей и значений), где ключи являются целыми числами. Кроме того, он поддерживает указание максимального ключа, который используется для расширения массива ключей.
@ -160,6 +174,7 @@ SELECT mapSubtract(([toUInt8(1), 2], [toInt32(1), 1]), ([toUInt8(1), 2], [toInt3
``` sql
mapPopulateSeries(keys, values[, max])
mapPopulateSeries(map[, max])
```
Генерирует контейнер map, где ключи - это серия чисел, от минимального до максимального ключа (или аргумент `max`, если он указан), взятых из массива `keys` с размером шага один, и соответствующие значения, взятые из массива `values`. Если значение не указано для ключа, то в результирующем контейнере используется значение по умолчанию.
@ -168,19 +183,28 @@ mapPopulateSeries(keys, values[, max])
**Аргументы**
- `keys` — массив ключей [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#int-ranges)).
Аргументами являются контейнер [Map](../../sql-reference/data-types/map.md) или два [массива](../../sql-reference/data-types/array.md#data-type-array), где первый массив представляет ключи, а второй массив содержит значения для каждого ключа.
Сопоставленные массивы:
- `keys` — массив ключей. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#int-ranges)).
- `values` — массив значений. [Array](../../sql-reference/data-types/array.md#data-type-array)([Int](../../sql-reference/data-types/int-uint.md#int-ranges)).
- `max` — максимальное значение ключа. Необязательный параметр. [Int8, Int16, Int32, Int64, Int128, Int256](../../sql-reference/data-types/int-uint.md#int-ranges).
или
- `map` — контейнер `Map` с целочисленными ключами. [Map](../../sql-reference/data-types/map.md).
**Возвращаемое значение**
- Возвращает [кортеж](../../sql-reference/data-types/tuple.md#tuplet1-t2) из двух [массивов](../../sql-reference/data-types/array.md#data-type-array): ключи отсортированные по порядку и значения соответствующих ключей.
- В зависимости от аргумента возвращает контейнер [Map](../../sql-reference/data-types/map.md) или [кортеж](../../sql-reference/data-types/tuple.md#tuplet1-t2) из двух [массивов](../../sql-reference/data-types/array.md#data-type-array): ключи отсортированные по порядку и значения соответствующих ключей.
**Пример**
Запрос:
Запрос с сопоставленными массивами:
```sql
select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type;
SELECT mapPopulateSeries([1,2,4], [11,22,44], 5) AS res, toTypeName(res) AS type;
```
Результат:
@ -191,6 +215,20 @@ select mapPopulateSeries([1,2,4], [11,22,44], 5) as res, toTypeName(res) as type
└──────────────────────────────┴───────────────────────────────────┘
```
Запрос с контейнером `Map`:
```sql
SELECT mapPopulateSeries(map(1, 10, 5, 20), 6);
```
Результат:
```text
┌─mapPopulateSeries(map(1, 10, 5, 20), 6)─┐
│ {1:10,2:0,3:0,4:0,5:20,6:0} │
└─────────────────────────────────────────┘
```
## mapContains {#mapcontains}
Определяет, содержит ли контейнер `map` ключ `key`.
@ -319,4 +357,3 @@ SELECT mapValues(a) FROM test;
│ ['twelve','6.0'] │
└──────────────────┘
```

View File

@ -7,7 +7,7 @@ toc_title: PROJECTION
Доступны следующие операции с [проекциями](../../../engines/table-engines/mergetree-family/mergetree.md#projections):
- `ALTER TABLE [db].name ADD PROJECTION name AS SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]` — добавляет описание проекции в метаданные.
- `ALTER TABLE [db].name ADD PROJECTION name ( SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY] )` — добавляет описание проекции в метаданные.
- `ALTER TABLE [db].name DROP PROJECTION name` — удаляет описание проекции из метаданных и удаляет файлы проекции с диска.

View File

@ -8,27 +8,51 @@ toc_title: "База данных"
Создает базу данных.
``` sql
CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] [ENGINE = engine(...)]
CREATE DATABASE [IF NOT EXISTS] db_name [ON CLUSTER cluster] [ENGINE = engine(...)] [COMMENT 'Comment']
```
### Секции {#sektsii}
## Секции {#clauses}
- `IF NOT EXISTS`
### IF NOT EXISTS {#if-not-exists}
Если база данных с именем `db_name` уже существует, то ClickHouse не создаёт базу данных и:
- Не генерирует исключение, если секция указана.
- Генерирует исключение, если секция не указана.
Если база данных с именем `db_name` уже существует, то ClickHouse не создает базу данных и:
- `ON CLUSTER`
- Не генерирует исключение, если секция указана.
- Генерирует исключение, если секция не указана.
ClickHouse создаёт базу данных `db_name` на всех серверах указанного кластера.
### ON CLUSTER {#on-cluster}
- `ENGINE`
ClickHouse создаёт базу данных с именем `db_name` на всех серверах указанного кластера. Более подробную информацию смотрите в разделе [Распределенные DDL запросы](../../../sql-reference/distributed-ddl.md).
- MySQL
### ENGINE {#engine}
Позволяет получать данные с удаленного сервера MySQL.
По умолчанию ClickHouse использует собственный движок баз данных [Atomic](../../../engines/database-engines/atomic.md). Есть также движки баз данных [Lazy](../../../engines/database-engines/lazy.md), [MySQL](../../../engines/database-engines/mysql.md), [PostgresSQL](../../../engines/database-engines/postgresql.md), [MaterializedMySQL](../../../engines/database-engines/materialized-mysql.md), [MaterializedPostgreSQL](../../../engines/database-engines/materialized-postgresql.md), [Replicated](../../../engines/database-engines/replicated.md), [SQLite](../../../engines/database-engines/sqlite.md).
По умолчанию ClickHouse использует собственный движок баз данных.
### COMMENT {#comment}
<!--hide-->
Вы можете добавить комментарий к базе данных при ее создании.
Комментарий поддерживается для всех движков баз данных.
**Синтаксис**
``` sql
CREATE DATABASE db_name ENGINE = engine(...) COMMENT 'Comment'
```
**Пример**
Запрос:
``` sql
CREATE DATABASE db_comment ENGINE = Memory COMMENT 'The temporary database';
SELECT name, comment FROM system.databases WHERE name = 'db_comment';
```
Результат:
```text
┌─name───────┬─comment────────────────┐
│ db_comment │ The temporary database │
└────────────┴────────────────────────┘
```

View File

@ -8,10 +8,10 @@ toc_title: "Словарь"
``` sql
CREATE DICTIONARY [IF NOT EXISTS] [db.]dictionary_name [ON CLUSTER cluster]
(
key1 type1 [DEFAULT|EXPRESSION expr1] [HIERARCHICAL|INJECTIVE|IS_OBJECT_ID],
key2 type2 [DEFAULT|EXPRESSION expr2] [HIERARCHICAL|INJECTIVE|IS_OBJECT_ID],
attr1 type2 [DEFAULT|EXPRESSION expr3],
attr2 type2 [DEFAULT|EXPRESSION expr4]
key1 type1 [DEFAULT|EXPRESSION expr1] [IS_OBJECT_ID],
key2 type2 [DEFAULT|EXPRESSION expr2],
attr1 type2 [DEFAULT|EXPRESSION expr3] [HIERARCHICAL|INJECTIVE],
attr2 type2 [DEFAULT|EXPRESSION expr4] [HIERARCHICAL|INJECTIVE]
)
PRIMARY KEY key1, key2
SOURCE(SOURCE_NAME([param1 value1 ... paramN valueN]))

Some files were not shown because too many files have changed in this diff Show More